diff options
author | Jon Bratseth <bratseth@verizonmedia.com> | 2019-11-18 13:16:54 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@verizonmedia.com> | 2019-11-18 13:16:54 +0100 |
commit | 34e83218cc7b210137b633bc3e99f9b99dda3e19 (patch) | |
tree | e3c8862607596cf4b481e1c615db6f4586ddfa17 /node-repository | |
parent | 2d07304049b00d679403b143adee7b481b099bb8 (diff) |
Do rebalancing deployments in the Rebalancer
This avoids cases where the Rebalancer chooses a move
which turns out to not be legal when actually making
the redeployment, causing another node to be selected
as target.
Diffstat (limited to 'node-repository')
8 files changed, 180 insertions, 48 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index 4adb8fc247f..610c6ff999e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -128,6 +128,10 @@ public final class Node { /** Returns the parent hostname for this node if this node is a docker container or a VM (i.e. it has a parent host). Otherwise, empty **/ public Optional<String> parentHostname() { return parentHostname; } + public boolean hasParent(String hostname) { + return parentHostname.isPresent() && parentHostname.get().equals(hostname); + } + /** Returns the flavor of this node */ public Flavor flavor() { return flavor; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 94e3318ac9c..90daa0ad279 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -84,7 +84,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource)); capacityReportMaintainer = new CapacityReportMaintainer(nodeRepository, metric, defaults.capacityReportInterval); osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval); - rebalancer = new Rebalancer(nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), provisionServiceProvider.getHostProvisioner(), metric, clock, defaults.rebalancerInterval); + rebalancer = new Rebalancer(deployer, nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), provisionServiceProvider.getHostProvisioner(), metric, clock, defaults.rebalancerInterval); // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now infrastructureProvisioner.maintain(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java index 3800e48b4bf..ef1afde5601 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java @@ -1,9 +1,17 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationLockException; +import com.yahoo.config.provision.Deployer; +import com.yahoo.config.provision.Deployment; +import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.TransientException; import com.yahoo.jdisc.Metric; +import com.yahoo.jdisc.core.Main; +import com.yahoo.log.LogLevel; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; @@ -13,25 +21,34 @@ import com.yahoo.vespa.hosted.provision.provisioning.DockerHostCapacity; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer; +import com.yahoo.yolean.Exceptions; +import java.io.Closeable; +import java.io.IOException; import java.time.Clock; import java.time.Duration; +import java.util.List; import java.util.Optional; +import java.util.Set; +import java.util.logging.Logger; public class Rebalancer extends Maintainer { + private final Deployer deployer; private final HostResourcesCalculator hostResourcesCalculator; private final Optional<HostProvisioner> hostProvisioner; private final Metric metric; private final Clock clock; - public Rebalancer(NodeRepository nodeRepository, + public Rebalancer(Deployer deployer, + NodeRepository nodeRepository, HostResourcesCalculator hostResourcesCalculator, Optional<HostProvisioner> hostProvisioner, Metric metric, Clock clock, Duration interval) { super(nodeRepository, interval); + this.deployer = deployer; this.hostResourcesCalculator = hostResourcesCalculator; this.hostProvisioner = hostProvisioner; this.metric = metric; @@ -51,7 +68,11 @@ public class Rebalancer extends Maintainer { Move bestMove = findBestMove(allNodes); if (bestMove == Move.none) return; - markWantToRetire(bestMove.node); + boolean couldMarkRetired = markWantToRetire(bestMove.node, true); + if ( ! couldMarkRetired) return; + boolean success = deployTo(bestMove); + if ( ! success) + markWantToRetire(bestMove.node, false); } /** We do this here rather than in MetricsReporter because it is expensive and frequent updates are unnecessary */ @@ -90,20 +111,39 @@ public class Rebalancer extends Maintainer { double skewReductionAtToHost = skewReductionByAdding(node, toHost, capacity); double netSkewReduction = skewReductionAtFromHost + skewReductionAtToHost; if (netSkewReduction > bestMove.netSkewReduction) - bestMove = new Move(node, netSkewReduction); + bestMove = new Move(node, toHost, netSkewReduction); } } return bestMove; } - private void markWantToRetire(Node node) { + private boolean markWantToRetire(Node node, boolean wantToRetire) { try (Mutex lock = nodeRepository().lock(node)) { Optional<Node> nodeToMove = nodeRepository().getNode(node.hostname()); - if (nodeToMove.isEmpty()) return; - if (nodeToMove.get().state() != Node.State.active) return; + if (nodeToMove.isEmpty()) return false; + if (nodeToMove.get().state() != Node.State.active) return false; - nodeRepository().write(nodeToMove.get().withWantToRetire(true, Agent.system, clock.instant()), lock); - log.info("Marked " + nodeToMove.get() + " as want to retire to reduce allocation skew"); + nodeRepository().write(nodeToMove.get().withWantToRetire(wantToRetire, Agent.system, clock.instant()), lock); + return true; + } + } + + /** + * Try a redeployment to effect the chosen move. + * If it can be done, that's ok; we'll try this or another move later. + * + * @return true if the move was done, false if it couldn't be + */ + private boolean deployTo(Move move) { + ApplicationId application = move.node.allocation().get().owner(); + try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, nodeRepository())) { + if ( ! deployment.prepare()) return false; + if (nodeRepository().getNodes(application, Node.State.reserved).stream().noneMatch(node -> node.hasParent(move.toHost.hostname()))) + return false; // Deployment is not moving the from node to the target we identified for some reason + + if ( ! deployment.activate()) return false; + log.info("Rebalancer redeployed " + application + " to " + move); + return true; } } @@ -123,20 +163,86 @@ public class Rebalancer extends Maintainer { private static class Move { - static final Move none = new Move(null, 0); + static final Move none = new Move(null, null, 0); final Node node; + final Node toHost; final double netSkewReduction; - Move(Node node, double netSkewReduction) { + Move(Node node, Node toHost, double netSkewReduction) { this.node = node; + this.toHost = toHost; this.netSkewReduction = netSkewReduction; } @Override public String toString() { - return "move: " + - ( node == null ? "none" : node.hostname() + ", skew reduction " + netSkewReduction ); + return "move " + + ( node == null ? "none" : + (node.hostname() + " to " + toHost + " [skew reduction " + netSkewReduction + "]")); + } + + } + + private static class MaintenanceDeployment implements Closeable { + + private static final Logger log = Logger.getLogger(MaintenanceDeployment.class.getName()); + + private final ApplicationId application; + private final Optional<Mutex> lock; + private final Optional<Deployment> deployment; + + public MaintenanceDeployment(ApplicationId application, Deployer deployer, NodeRepository nodeRepository) { + this.application = application; + lock = tryLock(application, nodeRepository); + deployment = tryDeployment(lock, application, deployer, nodeRepository); + } + + private Optional<Mutex> tryLock(ApplicationId application, NodeRepository nodeRepository) { + try { + // Use a short lock to avoid interfering with change deployments + return Optional.of(nodeRepository.lock(application, Duration.ofSeconds(1))); + } + catch (ApplicationLockException e) { + return Optional.empty(); + } + } + + private Optional<Deployment> tryDeployment(Optional<Mutex> lock, + ApplicationId application, + Deployer deployer, + NodeRepository nodeRepository) { + if (lock.isEmpty()) return Optional.empty(); + if (nodeRepository.getNodes(application, Node.State.active).isEmpty()) return Optional.empty(); + return deployer.deployFromLocalActive(application); + } + + public boolean prepare() { + return doStep(() -> deployment.get().prepare()); + } + + public boolean activate() { + return doStep(() -> deployment.get().activate()); + } + + private boolean doStep(Runnable action) { + if ( deployment.isEmpty()) return false; + try { + action.run(); + return true; + } catch (TransientException e) { + log.log(LogLevel.INFO, "Failed to deploy " + application + " with a transient error: " + + Exceptions.toMessageString(e)); + return false; + } catch (RuntimeException e) { + log.log(LogLevel.WARNING, "Exception on maintenance deploy of " + application, e); + return false; + } + } + + @Override + public void close() { + lock.ifPresent(l -> l.close()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index 11df1aa8c35..a885b26b6cd 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -122,7 +122,7 @@ class Activator { // Note: log parent hosts not ready, but do not add to exception (to avoid leaking hostnames) logger.log(LogLevel.INFO, application + ": Parent hosts not ready: " + nonActiveHosts); throw new ParentHostUnavailableException("Waiting for hosts to finish booting: " + - numNonActive + "/" + parentHostnames.size() + " left."); + numNonActive + "/" + parentHostnames.size() + " left."); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java index d5c7626b757..a56ace07c82 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java @@ -57,6 +57,7 @@ public class InfraDeployerImpl implements InfraDeployer { } private class InfraDeployment implements Deployment { + private final InfraApplicationApi application; private boolean prepared = false; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index e381fdc654e..1c3cd40bc02 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -96,14 +96,14 @@ class NodeAllocation { */ List<Node> offer(List<PrioritizableNode> nodesPrioritized) { List<Node> accepted = new ArrayList<>(); - for (PrioritizableNode offeredPriority : nodesPrioritized) { - Node offered = offeredPriority.node; + for (PrioritizableNode node : nodesPrioritized) { + Node offered = node.node; if (offered.allocation().isPresent()) { ClusterMembership membership = offered.allocation().get().membership(); if ( ! offered.allocation().get().owner().equals(application)) continue; // wrong application if ( ! membership.cluster().equalsIgnoringGroupAndVespaVersion(cluster)) continue; // wrong cluster id/type - if ((! offeredPriority.isSurplusNode || saturated()) && ! membership.cluster().group().equals(cluster.group())) continue; // wrong group and we can't or have no reason to change it + if ((! node.isSurplusNode || saturated()) && ! membership.cluster().group().equals(cluster.group())) continue; // wrong group and we can't or have no reason to change it if ( offered.allocation().get().isRemovable()) continue; // don't accept; causes removal if ( indexes.contains(membership.index())) continue; // duplicate index (just to be sure) @@ -114,12 +114,11 @@ class NodeAllocation { if (offered.status().wantToRetire()) wantToRetireNode = true; if (requestedNodes.isExclusive() && ! hostsOnly(application.tenant(), offered.parentHostname())) wantToRetireNode = true; - if (( ! saturated() && hasCompatibleFlavor(offered)) || acceptToRetire(offered)) - accepted.add(acceptNode(offeredPriority, wantToRetireNode)); + accepted.add(acceptNode(node, wantToRetireNode)); } else { - accepted.add(acceptNode(offeredPriority, false)); + accepted.add(acceptNode(node, false)); } } else if ( ! saturated() && hasCompatibleFlavor(offered)) { @@ -138,11 +137,11 @@ class NodeAllocation { if (offered.status().wantToRetire()) { continue; } - offeredPriority.node = offered.allocate(application, + node.node = offered.allocate(application, ClusterMembership.from(cluster, highestIndex.add(1)), - requestedNodes.resources().orElse(offeredPriority.node.flavor().resources()), + requestedNodes.resources().orElse(node.node.flavor().resources()), clock.instant()); - accepted.add(acceptNode(offeredPriority, false)); + accepted.add(acceptNode(node, false)); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java index e628e823025..898e1343ea3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java @@ -73,7 +73,7 @@ public class MockDeployer implements Deployer { } try { return Optional.ofNullable(applications.get(id)) - .map(application -> new MockDeployment(provisioner, application)); + .map(application -> new MockDeployment(provisioner, application)); } finally { lock.unlock(); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java index e8917c891f9..fc0e0e68570 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.Flavor; @@ -12,16 +13,19 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.Zone; import com.yahoo.config.provisioning.FlavorsConfig; +import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; +import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; import org.junit.Test; import java.time.Duration; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import static org.junit.Assert.assertEquals; @@ -35,60 +39,78 @@ public class RebalancerTest { @Test public void testRebalancing() { + ApplicationId cpuApp = makeApplicationId("t1", "a1"); + ApplicationId memApp = makeApplicationId("t2", "a2"); + NodeResources cpuResources = new NodeResources(8, 4, 1, 0.1); + NodeResources memResources = new NodeResources(4, 9, 1, 0.1); + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.perf, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); MetricsReporterTest.TestMetric metric = new MetricsReporterTest.TestMetric(); - Rebalancer rebalancer = new Rebalancer(tester.nodeRepository(), + + Map<ApplicationId, MockDeployer.ApplicationContext> apps = Map.of( + cpuApp, new MockDeployer.ApplicationContext(cpuApp, clusterSpec("c"), Capacity.fromCount(1, cpuResources), 1), + memApp, new MockDeployer.ApplicationContext(memApp, clusterSpec("c"), Capacity.fromCount(1, memResources), 1)); + MockDeployer deployer = new MockDeployer(tester.provisioner(), tester.clock(), apps); + + Rebalancer rebalancer = new Rebalancer(deployer, + tester.nodeRepository(), new IdentityHostResourcesCalculator(), Optional.empty(), metric, tester.clock(), Duration.ofMinutes(1)); - NodeResources cpuResources = new NodeResources(8, 4, 1, 0.1); - NodeResources memResources = new NodeResources(4, 9, 1, 0.1); tester.makeReadyNodes(3, "flt", NodeType.host, 8); tester.deployZoneApp(); // Cpu heavy application - causing 1 of these nodes to be skewed - ApplicationId cpuApp = makeApplicationId("t1", "a1"); deployApp(cpuApp, clusterSpec("c"), cpuResources, tester, 1); - String cpuSkewedNodeHostname = tester.nodeRepository().getNodes(cpuApp).get(0).hostname(); + Node cpuSkewedNode = tester.nodeRepository().getNodes(cpuApp).get(0); rebalancer.maintain(); assertFalse("No better place to move the skewed node, so no action is taken", - tester.nodeRepository().getNode(cpuSkewedNodeHostname).get().status().wantToRetire()); + tester.nodeRepository().getNode(cpuSkewedNode.hostname()).get().status().wantToRetire()); assertEquals(0.00325, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001); - tester.makeReadyNodes(1, "cpu", NodeType.host, 8); + Node newCpuHost = tester.makeReadyNodes(1, "cpu", NodeType.host, 8).get(0); + tester.deployZoneApp(); rebalancer.maintain(); - assertTrue("We can now move the node to the cpu skewed host to reduce skew", - tester.nodeRepository().getNode(cpuSkewedNodeHostname).get().status().wantToRetire()); - assertEquals("We're not actually moving the node here so skew remains steady", - 0.00325, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001); + assertTrue("Rebalancer retired the node we wanted to move away from", + tester.nodeRepository().getNode(cpuSkewedNode.hostname()).get().allocation().get().membership().retired()); + assertTrue("... and added a node on the new host instead", + tester.nodeRepository().getNodes(cpuApp, Node.State.active).stream().anyMatch(node -> node.hasParent(newCpuHost.hostname()))); + assertEquals("Skew is reduced", + 0.00244, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001); - ApplicationId memApp = makeApplicationId("t2", "a2"); deployApp(memApp, clusterSpec("c"), memResources, tester, 1); assertEquals("Assigned to a flat node as that causes least skew", "flt", tester.nodeRepository().list().parentOf(tester.nodeRepository().getNodes(memApp).get(0)).get().flavor().name()); - String memSkewedNodeHostname = tester.nodeRepository().getNodes(memApp).get(0).hostname(); - tester.makeReadyNodes(1, "mem", NodeType.host, 8); rebalancer.maintain(); assertEquals("Deploying the mem skewed app increased skew", - 0.00752, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001); - assertFalse("The mem skewed node is not set want to retire as the cpu skewed node still is", - tester.nodeRepository().getNode(memSkewedNodeHostname).get().status().wantToRetire()); + 0.00734, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001); + + Node memSkewedNode = tester.nodeRepository().getNodes(memApp).get(0); + Node newMemHost = tester.makeReadyNodes(1, "mem", NodeType.host, 8).get(0); + tester.deployZoneApp(); + + rebalancer.maintain(); + assertFalse("No rebalancing happens because cpuSkewedNode is still retired", + tester.nodeRepository().getNode(memSkewedNode.hostname()).get().allocation().get().membership().retired()); + + NestedTransaction tx = new NestedTransaction(); + tester.nodeRepository().deactivate(List.of(cpuSkewedNode), tx); + tx.commit(); - Node cpuSkewedNode = tester.nodeRepository().getNode(cpuSkewedNodeHostname).get(); - tester.nodeRepository().write(cpuSkewedNode.withWantToRetire(false, Agent.system, tester.clock().instant()), - tester.nodeRepository().lock(cpuSkewedNode)); rebalancer.maintain(); - assertTrue("The mem skewed node is now scheduled for moving", - tester.nodeRepository().getNode(memSkewedNodeHostname).get().status().wantToRetire()); - assertFalse("(The cpu skewed node is not because it causes slightly less skew)", - tester.nodeRepository().getNode(cpuSkewedNodeHostname).get().status().wantToRetire()); + assertTrue("Rebalancer retired the node we wanted to move away from", + tester.nodeRepository().getNode(memSkewedNode.hostname()).get().allocation().get().membership().retired()); + assertTrue("... and added a node on the new host instead", + tester.nodeRepository().getNodes(memApp, Node.State.active).stream().anyMatch(node -> node.hasParent(newMemHost.hostname()))); + assertEquals("Skew is reduced", + 0.00587, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001); } private ClusterSpec clusterSpec(String clusterId) { |