summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@verizonmedia.com>2019-11-18 13:16:54 +0100
committerJon Bratseth <bratseth@verizonmedia.com>2019-11-18 13:16:54 +0100
commit34e83218cc7b210137b633bc3e99f9b99dda3e19 (patch)
treee3c8862607596cf4b481e1c615db6f4586ddfa17 /node-repository
parent2d07304049b00d679403b143adee7b481b099bb8 (diff)
Do rebalancing deployments in the Rebalancer
This avoids cases where the Rebalancer chooses a move which turns out to not be legal when actually making the redeployment, causing another node to be selected as target.
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java130
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java70
8 files changed, 180 insertions, 48 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index 4adb8fc247f..610c6ff999e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -128,6 +128,10 @@ public final class Node {
/** Returns the parent hostname for this node if this node is a docker container or a VM (i.e. it has a parent host). Otherwise, empty **/
public Optional<String> parentHostname() { return parentHostname; }
+ public boolean hasParent(String hostname) {
+ return parentHostname.isPresent() && parentHostname.get().equals(hostname);
+ }
+
/** Returns the flavor of this node */
public Flavor flavor() { return flavor; }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 94e3318ac9c..90daa0ad279 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -84,7 +84,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource));
capacityReportMaintainer = new CapacityReportMaintainer(nodeRepository, metric, defaults.capacityReportInterval);
osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval);
- rebalancer = new Rebalancer(nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), provisionServiceProvider.getHostProvisioner(), metric, clock, defaults.rebalancerInterval);
+ rebalancer = new Rebalancer(deployer, nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), provisionServiceProvider.getHostProvisioner(), metric, clock, defaults.rebalancerInterval);
// The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now
infrastructureProvisioner.maintain();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index 3800e48b4bf..ef1afde5601 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -1,9 +1,17 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ApplicationLockException;
+import com.yahoo.config.provision.Deployer;
+import com.yahoo.config.provision.Deployment;
+import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.TransientException;
import com.yahoo.jdisc.Metric;
+import com.yahoo.jdisc.core.Main;
+import com.yahoo.log.LogLevel;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
@@ -13,25 +21,34 @@ import com.yahoo.vespa.hosted.provision.provisioning.DockerHostCapacity;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer;
+import com.yahoo.yolean.Exceptions;
+import java.io.Closeable;
+import java.io.IOException;
import java.time.Clock;
import java.time.Duration;
+import java.util.List;
import java.util.Optional;
+import java.util.Set;
+import java.util.logging.Logger;
public class Rebalancer extends Maintainer {
+ private final Deployer deployer;
private final HostResourcesCalculator hostResourcesCalculator;
private final Optional<HostProvisioner> hostProvisioner;
private final Metric metric;
private final Clock clock;
- public Rebalancer(NodeRepository nodeRepository,
+ public Rebalancer(Deployer deployer,
+ NodeRepository nodeRepository,
HostResourcesCalculator hostResourcesCalculator,
Optional<HostProvisioner> hostProvisioner,
Metric metric,
Clock clock,
Duration interval) {
super(nodeRepository, interval);
+ this.deployer = deployer;
this.hostResourcesCalculator = hostResourcesCalculator;
this.hostProvisioner = hostProvisioner;
this.metric = metric;
@@ -51,7 +68,11 @@ public class Rebalancer extends Maintainer {
Move bestMove = findBestMove(allNodes);
if (bestMove == Move.none) return;
- markWantToRetire(bestMove.node);
+ boolean couldMarkRetired = markWantToRetire(bestMove.node, true);
+ if ( ! couldMarkRetired) return;
+ boolean success = deployTo(bestMove);
+ if ( ! success)
+ markWantToRetire(bestMove.node, false);
}
/** We do this here rather than in MetricsReporter because it is expensive and frequent updates are unnecessary */
@@ -90,20 +111,39 @@ public class Rebalancer extends Maintainer {
double skewReductionAtToHost = skewReductionByAdding(node, toHost, capacity);
double netSkewReduction = skewReductionAtFromHost + skewReductionAtToHost;
if (netSkewReduction > bestMove.netSkewReduction)
- bestMove = new Move(node, netSkewReduction);
+ bestMove = new Move(node, toHost, netSkewReduction);
}
}
return bestMove;
}
- private void markWantToRetire(Node node) {
+ private boolean markWantToRetire(Node node, boolean wantToRetire) {
try (Mutex lock = nodeRepository().lock(node)) {
Optional<Node> nodeToMove = nodeRepository().getNode(node.hostname());
- if (nodeToMove.isEmpty()) return;
- if (nodeToMove.get().state() != Node.State.active) return;
+ if (nodeToMove.isEmpty()) return false;
+ if (nodeToMove.get().state() != Node.State.active) return false;
- nodeRepository().write(nodeToMove.get().withWantToRetire(true, Agent.system, clock.instant()), lock);
- log.info("Marked " + nodeToMove.get() + " as want to retire to reduce allocation skew");
+ nodeRepository().write(nodeToMove.get().withWantToRetire(wantToRetire, Agent.system, clock.instant()), lock);
+ return true;
+ }
+ }
+
+ /**
+ * Try a redeployment to effect the chosen move.
+ * If it can be done, that's ok; we'll try this or another move later.
+ *
+ * @return true if the move was done, false if it couldn't be
+ */
+ private boolean deployTo(Move move) {
+ ApplicationId application = move.node.allocation().get().owner();
+ try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, nodeRepository())) {
+ if ( ! deployment.prepare()) return false;
+ if (nodeRepository().getNodes(application, Node.State.reserved).stream().noneMatch(node -> node.hasParent(move.toHost.hostname())))
+ return false; // Deployment is not moving the from node to the target we identified for some reason
+
+ if ( ! deployment.activate()) return false;
+ log.info("Rebalancer redeployed " + application + " to " + move);
+ return true;
}
}
@@ -123,20 +163,86 @@ public class Rebalancer extends Maintainer {
private static class Move {
- static final Move none = new Move(null, 0);
+ static final Move none = new Move(null, null, 0);
final Node node;
+ final Node toHost;
final double netSkewReduction;
- Move(Node node, double netSkewReduction) {
+ Move(Node node, Node toHost, double netSkewReduction) {
this.node = node;
+ this.toHost = toHost;
this.netSkewReduction = netSkewReduction;
}
@Override
public String toString() {
- return "move: " +
- ( node == null ? "none" : node.hostname() + ", skew reduction " + netSkewReduction );
+ return "move " +
+ ( node == null ? "none" :
+ (node.hostname() + " to " + toHost + " [skew reduction " + netSkewReduction + "]"));
+ }
+
+ }
+
+ private static class MaintenanceDeployment implements Closeable {
+
+ private static final Logger log = Logger.getLogger(MaintenanceDeployment.class.getName());
+
+ private final ApplicationId application;
+ private final Optional<Mutex> lock;
+ private final Optional<Deployment> deployment;
+
+ public MaintenanceDeployment(ApplicationId application, Deployer deployer, NodeRepository nodeRepository) {
+ this.application = application;
+ lock = tryLock(application, nodeRepository);
+ deployment = tryDeployment(lock, application, deployer, nodeRepository);
+ }
+
+ private Optional<Mutex> tryLock(ApplicationId application, NodeRepository nodeRepository) {
+ try {
+ // Use a short lock to avoid interfering with change deployments
+ return Optional.of(nodeRepository.lock(application, Duration.ofSeconds(1)));
+ }
+ catch (ApplicationLockException e) {
+ return Optional.empty();
+ }
+ }
+
+ private Optional<Deployment> tryDeployment(Optional<Mutex> lock,
+ ApplicationId application,
+ Deployer deployer,
+ NodeRepository nodeRepository) {
+ if (lock.isEmpty()) return Optional.empty();
+ if (nodeRepository.getNodes(application, Node.State.active).isEmpty()) return Optional.empty();
+ return deployer.deployFromLocalActive(application);
+ }
+
+ public boolean prepare() {
+ return doStep(() -> deployment.get().prepare());
+ }
+
+ public boolean activate() {
+ return doStep(() -> deployment.get().activate());
+ }
+
+ private boolean doStep(Runnable action) {
+ if ( deployment.isEmpty()) return false;
+ try {
+ action.run();
+ return true;
+ } catch (TransientException e) {
+ log.log(LogLevel.INFO, "Failed to deploy " + application + " with a transient error: " +
+ Exceptions.toMessageString(e));
+ return false;
+ } catch (RuntimeException e) {
+ log.log(LogLevel.WARNING, "Exception on maintenance deploy of " + application, e);
+ return false;
+ }
+ }
+
+ @Override
+ public void close() {
+ lock.ifPresent(l -> l.close());
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
index 11df1aa8c35..a885b26b6cd 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
@@ -122,7 +122,7 @@ class Activator {
// Note: log parent hosts not ready, but do not add to exception (to avoid leaking hostnames)
logger.log(LogLevel.INFO, application + ": Parent hosts not ready: " + nonActiveHosts);
throw new ParentHostUnavailableException("Waiting for hosts to finish booting: " +
- numNonActive + "/" + parentHostnames.size() + " left.");
+ numNonActive + "/" + parentHostnames.size() + " left.");
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java
index d5c7626b757..a56ace07c82 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java
@@ -57,6 +57,7 @@ public class InfraDeployerImpl implements InfraDeployer {
}
private class InfraDeployment implements Deployment {
+
private final InfraApplicationApi application;
private boolean prepared = false;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index e381fdc654e..1c3cd40bc02 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -96,14 +96,14 @@ class NodeAllocation {
*/
List<Node> offer(List<PrioritizableNode> nodesPrioritized) {
List<Node> accepted = new ArrayList<>();
- for (PrioritizableNode offeredPriority : nodesPrioritized) {
- Node offered = offeredPriority.node;
+ for (PrioritizableNode node : nodesPrioritized) {
+ Node offered = node.node;
if (offered.allocation().isPresent()) {
ClusterMembership membership = offered.allocation().get().membership();
if ( ! offered.allocation().get().owner().equals(application)) continue; // wrong application
if ( ! membership.cluster().equalsIgnoringGroupAndVespaVersion(cluster)) continue; // wrong cluster id/type
- if ((! offeredPriority.isSurplusNode || saturated()) && ! membership.cluster().group().equals(cluster.group())) continue; // wrong group and we can't or have no reason to change it
+ if ((! node.isSurplusNode || saturated()) && ! membership.cluster().group().equals(cluster.group())) continue; // wrong group and we can't or have no reason to change it
if ( offered.allocation().get().isRemovable()) continue; // don't accept; causes removal
if ( indexes.contains(membership.index())) continue; // duplicate index (just to be sure)
@@ -114,12 +114,11 @@ class NodeAllocation {
if (offered.status().wantToRetire()) wantToRetireNode = true;
if (requestedNodes.isExclusive() && ! hostsOnly(application.tenant(), offered.parentHostname()))
wantToRetireNode = true;
-
if (( ! saturated() && hasCompatibleFlavor(offered)) || acceptToRetire(offered))
- accepted.add(acceptNode(offeredPriority, wantToRetireNode));
+ accepted.add(acceptNode(node, wantToRetireNode));
}
else {
- accepted.add(acceptNode(offeredPriority, false));
+ accepted.add(acceptNode(node, false));
}
}
else if ( ! saturated() && hasCompatibleFlavor(offered)) {
@@ -138,11 +137,11 @@ class NodeAllocation {
if (offered.status().wantToRetire()) {
continue;
}
- offeredPriority.node = offered.allocate(application,
+ node.node = offered.allocate(application,
ClusterMembership.from(cluster, highestIndex.add(1)),
- requestedNodes.resources().orElse(offeredPriority.node.flavor().resources()),
+ requestedNodes.resources().orElse(node.node.flavor().resources()),
clock.instant());
- accepted.add(acceptNode(offeredPriority, false));
+ accepted.add(acceptNode(node, false));
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
index e628e823025..898e1343ea3 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
@@ -73,7 +73,7 @@ public class MockDeployer implements Deployer {
}
try {
return Optional.ofNullable(applications.get(id))
- .map(application -> new MockDeployment(provisioner, application));
+ .map(application -> new MockDeployment(provisioner, application));
} finally {
lock.unlock();
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
index e8917c891f9..fc0e0e68570 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.Flavor;
@@ -12,16 +13,19 @@ import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
import com.yahoo.config.provisioning.FlavorsConfig;
+import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
+import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
import org.junit.Test;
import java.time.Duration;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
import static org.junit.Assert.assertEquals;
@@ -35,60 +39,78 @@ public class RebalancerTest {
@Test
public void testRebalancing() {
+ ApplicationId cpuApp = makeApplicationId("t1", "a1");
+ ApplicationId memApp = makeApplicationId("t2", "a2");
+ NodeResources cpuResources = new NodeResources(8, 4, 1, 0.1);
+ NodeResources memResources = new NodeResources(4, 9, 1, 0.1);
+
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.perf, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
MetricsReporterTest.TestMetric metric = new MetricsReporterTest.TestMetric();
- Rebalancer rebalancer = new Rebalancer(tester.nodeRepository(),
+
+ Map<ApplicationId, MockDeployer.ApplicationContext> apps = Map.of(
+ cpuApp, new MockDeployer.ApplicationContext(cpuApp, clusterSpec("c"), Capacity.fromCount(1, cpuResources), 1),
+ memApp, new MockDeployer.ApplicationContext(memApp, clusterSpec("c"), Capacity.fromCount(1, memResources), 1));
+ MockDeployer deployer = new MockDeployer(tester.provisioner(), tester.clock(), apps);
+
+ Rebalancer rebalancer = new Rebalancer(deployer,
+ tester.nodeRepository(),
new IdentityHostResourcesCalculator(),
Optional.empty(),
metric,
tester.clock(),
Duration.ofMinutes(1));
- NodeResources cpuResources = new NodeResources(8, 4, 1, 0.1);
- NodeResources memResources = new NodeResources(4, 9, 1, 0.1);
tester.makeReadyNodes(3, "flt", NodeType.host, 8);
tester.deployZoneApp();
// Cpu heavy application - causing 1 of these nodes to be skewed
- ApplicationId cpuApp = makeApplicationId("t1", "a1");
deployApp(cpuApp, clusterSpec("c"), cpuResources, tester, 1);
- String cpuSkewedNodeHostname = tester.nodeRepository().getNodes(cpuApp).get(0).hostname();
+ Node cpuSkewedNode = tester.nodeRepository().getNodes(cpuApp).get(0);
rebalancer.maintain();
assertFalse("No better place to move the skewed node, so no action is taken",
- tester.nodeRepository().getNode(cpuSkewedNodeHostname).get().status().wantToRetire());
+ tester.nodeRepository().getNode(cpuSkewedNode.hostname()).get().status().wantToRetire());
assertEquals(0.00325, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001);
- tester.makeReadyNodes(1, "cpu", NodeType.host, 8);
+ Node newCpuHost = tester.makeReadyNodes(1, "cpu", NodeType.host, 8).get(0);
+ tester.deployZoneApp();
rebalancer.maintain();
- assertTrue("We can now move the node to the cpu skewed host to reduce skew",
- tester.nodeRepository().getNode(cpuSkewedNodeHostname).get().status().wantToRetire());
- assertEquals("We're not actually moving the node here so skew remains steady",
- 0.00325, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001);
+ assertTrue("Rebalancer retired the node we wanted to move away from",
+ tester.nodeRepository().getNode(cpuSkewedNode.hostname()).get().allocation().get().membership().retired());
+ assertTrue("... and added a node on the new host instead",
+ tester.nodeRepository().getNodes(cpuApp, Node.State.active).stream().anyMatch(node -> node.hasParent(newCpuHost.hostname())));
+ assertEquals("Skew is reduced",
+ 0.00244, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001);
- ApplicationId memApp = makeApplicationId("t2", "a2");
deployApp(memApp, clusterSpec("c"), memResources, tester, 1);
assertEquals("Assigned to a flat node as that causes least skew", "flt",
tester.nodeRepository().list().parentOf(tester.nodeRepository().getNodes(memApp).get(0)).get().flavor().name());
- String memSkewedNodeHostname = tester.nodeRepository().getNodes(memApp).get(0).hostname();
- tester.makeReadyNodes(1, "mem", NodeType.host, 8);
rebalancer.maintain();
assertEquals("Deploying the mem skewed app increased skew",
- 0.00752, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001);
- assertFalse("The mem skewed node is not set want to retire as the cpu skewed node still is",
- tester.nodeRepository().getNode(memSkewedNodeHostname).get().status().wantToRetire());
+ 0.00734, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001);
+
+ Node memSkewedNode = tester.nodeRepository().getNodes(memApp).get(0);
+ Node newMemHost = tester.makeReadyNodes(1, "mem", NodeType.host, 8).get(0);
+ tester.deployZoneApp();
+
+ rebalancer.maintain();
+ assertFalse("No rebalancing happens because cpuSkewedNode is still retired",
+ tester.nodeRepository().getNode(memSkewedNode.hostname()).get().allocation().get().membership().retired());
+
+ NestedTransaction tx = new NestedTransaction();
+ tester.nodeRepository().deactivate(List.of(cpuSkewedNode), tx);
+ tx.commit();
- Node cpuSkewedNode = tester.nodeRepository().getNode(cpuSkewedNodeHostname).get();
- tester.nodeRepository().write(cpuSkewedNode.withWantToRetire(false, Agent.system, tester.clock().instant()),
- tester.nodeRepository().lock(cpuSkewedNode));
rebalancer.maintain();
- assertTrue("The mem skewed node is now scheduled for moving",
- tester.nodeRepository().getNode(memSkewedNodeHostname).get().status().wantToRetire());
- assertFalse("(The cpu skewed node is not because it causes slightly less skew)",
- tester.nodeRepository().getNode(cpuSkewedNodeHostname).get().status().wantToRetire());
+ assertTrue("Rebalancer retired the node we wanted to move away from",
+ tester.nodeRepository().getNode(memSkewedNode.hostname()).get().allocation().get().membership().retired());
+ assertTrue("... and added a node on the new host instead",
+ tester.nodeRepository().getNodes(memApp, Node.State.active).stream().anyMatch(node -> node.hasParent(newMemHost.hostname())));
+ assertEquals("Skew is reduced",
+ 0.00587, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001);
}
private ClusterSpec clusterSpec(String clusterId) {