summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@verizonmedia.com>2019-10-17 16:36:48 +0200
committerJon Bratseth <bratseth@verizonmedia.com>2019-10-17 16:36:48 +0200
commit82d9136a2166d7262c8b2eaba0d09a10f4750422 (patch)
tree04639ce412ba7c3c79f6290c00c4a9be203e00fd
parent15c2736314ff5c94a695124ab271f1e046060029 (diff)
Schedule node balancing acts
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java114
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java27
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeSkewTest.java45
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceTester.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java115
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java39
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java13
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json3
17 files changed, 344 insertions, 46 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index 3d502d2b3da..64a98bba035 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -167,8 +167,10 @@ public final class Node {
*/
public Node withWantToRetire(boolean wantToRetire, Agent agent, Instant at) {
if (wantToRetire == status.wantToRetire()) return this;
- return with(status.withWantToRetire(wantToRetire))
- .with(history.with(new History.Event(History.Event.Type.wantToRetire, Agent.operator, at)));
+ Node node = this.with(status.withWantToRetire(wantToRetire));
+ if (wantToRetire)
+ node = node.with(history.with(new History.Event(History.Event.Type.wantToRetire, agent, at)));
+ return node;
}
/**
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index e1de5be8648..869a06ac530 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -15,6 +15,7 @@ import java.util.List;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
import static java.util.stream.Collectors.collectingAndThen;
@@ -132,6 +133,9 @@ public class NodeList implements Iterable<Node> {
/** Returns the immutable list of nodes in this */
public List<Node> asList() { return nodes; }
+ /** Returns the nodes of this as a stream */
+ public Stream<Node> stream() { return asList().stream(); }
+
public NodeList filter(Predicate<Node> predicate) {
return nodes.stream().filter(predicate).collect(collectingAndThen(Collectors.toList(), NodeList::wrap));
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index 68123092dfa..072f2e765f4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -427,7 +427,7 @@ public class NodeRepository extends AbstractComponent {
}
/**
- * Set a node dirty, which is in the provisioned, failed or parked state.
+ * Set a node dirty, allowed if it is in the provisioned, inactive, failed or parked state.
* Use this to clean newly provisioned nodes or to recycle failed nodes which have been repaired or put on hold.
*
* @throws IllegalArgumentException if the node has hardware failure
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java
index 013fd169f45..a6b88d50acb 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java
@@ -39,7 +39,8 @@ public class InactiveExpirer extends Expirer {
@Override
protected void expire(List<Node> expired) {
expired.forEach(node -> {
- if (node.status().wantToRetire()) {
+ if (node.status().wantToRetire() &&
+ node.history().event(History.Event.Type.wantToRetire).get().agent() == Agent.operator) {
nodeRepository.park(node.hostname(), false, Agent.system, "Expired by InactiveExpirer");
} else {
nodeRepository.setDirty(node, Agent.system, "Expired by InactiveExpirer");
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 29c0544420a..c644626bf01 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -47,7 +47,9 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Optional<DynamicProvisioningMaintainer> dynamicProvisioningMaintainer;
private final CapacityReportMaintainer capacityReportMaintainer;
private final OsUpgradeActivator osUpgradeActivator;
+ private final Rebalancer rebalancer;
+ @SuppressWarnings("unused")
@Inject
public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer,
HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor,
@@ -55,7 +57,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
ProvisionServiceProvider provisionServiceProvider,
FlagSource flagSource) {
this(nodeRepository, deployer, infraDeployer, hostLivenessTracker, serviceMonitor, zone, Clock.systemUTC(),
- orchestrator, metric, provisionServiceProvider, flagSource);
+ orchestrator, metric, provisionServiceProvider, flagSource);
}
public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer,
@@ -82,6 +84,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
new DynamicProvisioningMaintainer(nodeRepository, durationFromEnv("host_provisioner_interval").orElse(defaults.dynamicProvisionerInterval), hostProvisioner, flagSource));
capacityReportMaintainer = new CapacityReportMaintainer(nodeRepository, metric, durationFromEnv("capacity_report_interval").orElse(defaults.capacityReportInterval));
osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval);
+ rebalancer = new Rebalancer(nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), clock, defaults.rebalancerInterval);
// The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now
infrastructureProvisioner.maintain();
@@ -105,6 +108,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
loadBalancerExpirer.ifPresent(Maintainer::deconstruct);
dynamicProvisioningMaintainer.ifPresent(Maintainer::deconstruct);
osUpgradeActivator.deconstruct();
+ rebalancer.deconstruct();
}
private static Optional<Duration> durationFromEnv(String envVariable) {
@@ -149,6 +153,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Duration loadBalancerExpirerInterval;
private final Duration dynamicProvisionerInterval;
private final Duration osUpgradeActivatorInterval;
+ private final Duration rebalancerInterval;
private final NodeFailer.ThrottlePolicy throttlePolicy;
@@ -169,6 +174,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
reservationExpiry = Duration.ofMinutes(20); // Need to be long enough for deployment to be finished for all config model versions
dynamicProvisionerInterval = Duration.ofMinutes(5);
osUpgradeActivatorInterval = zone.system().isCd() ? Duration.ofSeconds(30) : Duration.ofMinutes(5);
+ rebalancerInterval = Duration.ofMinutes(40);
if (zone.environment().equals(Environment.prod) && ! zone.system().isCd()) {
inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index d2158dd08db..4c3134a9382 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -1,5 +1,117 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
-public class Rebalancer {
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.transaction.Mutex;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.provisioning.DockerHostCapacity;
+import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.util.List;
+import java.util.Optional;
+
+public class Rebalancer extends Maintainer {
+
+ private final HostResourcesCalculator hostResourcesCalculator;
+ private final Clock clock;
+
+ public Rebalancer(NodeRepository nodeRepository, HostResourcesCalculator hostResourcesCalculator, Clock clock, Duration interval) {
+ super(nodeRepository, interval);
+ this.hostResourcesCalculator = hostResourcesCalculator;
+ this.clock = clock;
+ }
+
+ @Override
+ protected void maintain() {
+ // Work with an unlocked snapshot as this can take a long time and full consistency is not needed
+ NodeList allNodes = nodeRepository().list();
+
+ if ( ! zoneIsStable(allNodes)) return;
+
+ Move bestMove = findBestMove(allNodes);
+ if (bestMove == Move.none) return;
+ markWantToRetire(bestMove.node);
+ }
+
+ private boolean zoneIsStable(NodeList allNodes) {
+ List<Node> active = allNodes.state(Node.State.active).asList();
+ if (active.stream().anyMatch(node -> node.allocation().get().membership().retired())) return false;
+ if (active.stream().anyMatch(node -> node.status().wantToRetire())) return false;
+ return true;
+ }
+
+ /**
+ * Find the best move to reduce allocation skew and returns it.
+ * Returns Move.none if no moves can be made to reduce skew.
+ */
+ private Move findBestMove(NodeList allNodes) {
+ DockerHostCapacity capacity = new DockerHostCapacity(allNodes, hostResourcesCalculator);
+ Move bestMove = Move.none;
+ for (Node node : allNodes.state(Node.State.active).asList()) {
+ for (Node toHost : allNodes.nodeType(NodeType.host).asList()) {
+ if (node.parentHostname().isEmpty()) continue;
+ if (toHost.hostname().equals(node.parentHostname().get())) continue;
+ if ( ! capacity.freeCapacityOf(toHost).satisfies(node.flavor().resources())) continue;
+
+ double skewReductionAtFromHost = skewReductionByRemoving(node, allNodes.parentOf(node).get(), capacity);
+ double skewReductionAtToHost = skewReductionByAdding(node, toHost, capacity);
+ double netSkewReduction = skewReductionAtFromHost + skewReductionAtToHost;
+ if (netSkewReduction > bestMove.netSkewReduction)
+ bestMove = new Move(node, netSkewReduction);
+ }
+ }
+ return bestMove;
+ }
+
+ private void markWantToRetire(Node node) {
+ try (Mutex lock = nodeRepository().lock(node)) {
+ Optional<Node> nodeToMove = nodeRepository().getNode(node.hostname());
+ if (nodeToMove.isEmpty()) return;
+ if (nodeToMove.get().state() != Node.State.active) return;
+
+ nodeRepository().write(nodeToMove.get().withWantToRetire(true, Agent.system, clock.instant()), lock);
+ log.info("Marked " + nodeToMove.get() + " as want to retire to reduce allocation skew");
+ }
+ }
+
+ private double skewReductionByRemoving(Node node, Node fromHost, DockerHostCapacity capacity) {
+ NodeResources freeHostCapacity = capacity.freeCapacityOf(fromHost);
+ double skewBefore = Node.skew(fromHost.flavor().resources(), freeHostCapacity);
+ double skewAfter = Node.skew(fromHost.flavor().resources(), freeHostCapacity.add(node.flavor().resources().anySpeed()));
+ return skewBefore - skewAfter;
+ }
+
+ private double skewReductionByAdding(Node node, Node toHost, DockerHostCapacity capacity) {
+ NodeResources freeHostCapacity = capacity.freeCapacityOf(toHost);
+ double skewBefore = Node.skew(toHost.flavor().resources(), freeHostCapacity);
+ double skewAfter = Node.skew(toHost.flavor().resources(), freeHostCapacity.subtract(node.flavor().resources().anySpeed()));
+ return skewBefore - skewAfter;
+ }
+
+ private static class Move {
+
+ static final Move none = new Move(null, 0);
+
+ final Node node;
+ final double netSkewReduction;
+
+ Move(Node node, double netSkewReduction) {
+ this.node = node;
+ this.netSkewReduction = netSkewReduction;
+ }
+
+ @Override
+ public String toString() {
+ return "move: " +
+ ( node == null ? "none" : node.hostname() + ", skew reduction " + netSkewReduction );
+ }
+
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java
index b13728cb148..a3714ffe033 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java
@@ -6,6 +6,7 @@ import com.google.common.net.InetAddresses;
import com.google.common.primitives.UnsignedBytes;
import com.yahoo.vespa.hosted.provision.LockedNodeList;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.persistence.NameResolver;
import java.net.Inet4Address;
@@ -201,7 +202,7 @@ public class IP {
*
* @param nodes Locked list of all nodes in the repository
*/
- public Set<String> findUnused(LockedNodeList nodes) {
+ public Set<String> findUnused(NodeList nodes) {
var unusedAddresses = new LinkedHashSet<>(addresses);
nodes.filter(node -> node.ipConfig().primary().stream().anyMatch(addresses::contains))
.forEach(node -> unusedAddresses.removeAll(node.ipConfig().primary()));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java
index 112f118705b..9713615f77e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java
@@ -5,6 +5,7 @@ import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.hosted.provision.LockedNodeList;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import java.util.Objects;
@@ -18,10 +19,10 @@ import java.util.Objects;
*/
public class DockerHostCapacity {
- private final LockedNodeList allNodes;
+ private final NodeList allNodes;
private final HostResourcesCalculator hostResourcesCalculator;
- DockerHostCapacity(LockedNodeList allNodes, HostResourcesCalculator hostResourcesCalculator) {
+ public DockerHostCapacity(NodeList allNodes, HostResourcesCalculator hostResourcesCalculator) {
this.allNodes = Objects.requireNonNull(allNodes, "allNodes must be non-null");
this.hostResourcesCalculator = Objects.requireNonNull(hostResourcesCalculator, "hostResourcesCalculator must be non-null");
}
@@ -60,24 +61,28 @@ public class DockerHostCapacity {
}
/**
- * Calculate the remaining capacity for the dockerHost.
+ * Calculate the remaining capacity of a host.
*
- * @param dockerHost the host to find free capacity of.
+ * @param host the host to find free capacity of.
* @return a default (empty) capacity if not a docker host, otherwise the free/unallocated/rest capacity
*/
- NodeResources freeCapacityOf(Node dockerHost, boolean excludeInactive) {
+ public NodeResources freeCapacityOf(Node host) {
+ return freeCapacityOf(host, false);
+ }
+
+ NodeResources freeCapacityOf(Node host, boolean excludeInactive) {
// Only hosts have free capacity
- if (dockerHost.type() != NodeType.host) return new NodeResources(0, 0, 0, 0);
- NodeResources hostResources = hostResourcesCalculator.availableCapacityOf(dockerHost.flavor().resources());
+ if (host.type() != NodeType.host) return new NodeResources(0, 0, 0, 0);
+ NodeResources hostResources = hostResourcesCalculator.availableCapacityOf(host.flavor().resources());
// Subtract used resources without taking disk speed into account since existing allocations grandfathered in
// may not use reflect the actual disk speed (as of May 2019). This (the 3 diskSpeed assignments below)
// can be removed when all node allocations accurately reflect the true host disk speed
- return allNodes.childrenOf(dockerHost).asList().stream()
+ return allNodes.childrenOf(host).asList().stream()
.filter(node -> !(excludeInactive && isInactiveOrRetired(node)))
- .map(node -> node.flavor().resources().withDiskSpeed(NodeResources.DiskSpeed.any))
- .reduce(hostResources.withDiskSpeed(NodeResources.DiskSpeed.any), NodeResources::subtract)
- .withDiskSpeed(dockerHost.flavor().resources().diskSpeed());
+ .map(node -> node.flavor().resources().anySpeed())
+ .reduce(hostResources.anySpeed(), NodeResources::subtract)
+ .withDiskSpeed(host.flavor().resources().diskSpeed());
}
private static boolean isInactiveOrRetired(Node node) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
index 97b615d493f..e437fd4211a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
@@ -62,7 +62,7 @@ public class NodeRepositoryProvisioner implements Provisioner {
this.zone = zone;
this.loadBalancerProvisioner = provisionServiceProvider.getLoadBalancerService().map(lbService -> new LoadBalancerProvisioner(nodeRepository, lbService, flagSource));
this.preparer = new Preparer(nodeRepository,
- zone.environment() == Environment.prod ? SPARE_CAPACITY_PROD : SPARE_CAPACITY_NONPROD,
+ zone.environment() == Environment.prod ? SPARE_CAPACITY_PROD : SPARE_CAPACITY_NONPROD,
provisionServiceProvider.getHostProvisioner(),
provisionServiceProvider.getHostResourcesCalculator(),
flagSource,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeSkewTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeSkewTest.java
new file mode 100644
index 00000000000..85ff4fe4264
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeSkewTest.java
@@ -0,0 +1,45 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision;
+
+import com.yahoo.config.provision.NodeResources;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author bratseth
+ */
+public class NodeSkewTest {
+
+ private static final double d = 0.0001;
+
+ @Test
+ public void testNodeSkewComputation() {
+ // No skew
+ assertEquals(0, Node.skew(r(6, 4, 2), r(6, 4, 2)), d);
+ assertEquals(0, Node.skew(r(6, 4, 2), r(0, 0, 0)), d);
+ assertEquals(0, Node.skew(r(6, 4, 2), r(3, 2, 1)), d);
+
+ // Extremely skewed
+ assertEquals(0.2222, Node.skew(r(6, 4, 2), r(0, 4, 0)), d);
+ // A little less
+ assertEquals(0.1666, Node.skew(r(6, 4, 2), r(3, 4, 0)), d);
+ // A little less
+ assertEquals(0.0555, Node.skew(r(6, 4, 2), r(3, 4, 1)), d);
+ // The same, since being at half and full is equally skewed here
+ assertEquals(0.0555, Node.skew(r(6, 4, 2), r(3, 4, 2)), d);
+ // Almost not skewed
+ assertEquals(0.0062, Node.skew(r(6, 4, 2), r(5, 4, 2)), d);
+
+ // Skew is scale free
+ assertEquals(0.0201, Node.skew(r( 6, 4, 2), r(1, 1, 1)), d);
+ // - all dimensions twice as large
+ assertEquals(0.0201, Node.skew(r(12, 8, 4), r(2, 2, 2)), d);
+ // - just one dimension twice as large
+ assertEquals(0.0201, Node.skew(r(12, 4, 2), r(2, 1, 1)), d);
+ }
+
+ private NodeResources r(double vcpu, double memGb, double diskGb) {
+ return new NodeResources(vcpu, memGb, diskGb, 1);
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
index dc8650c122e..2be9580f78d 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
@@ -61,6 +61,7 @@ import static org.mockito.Mockito.when;
* @author freva
*/
public class DynamicProvisioningMaintainerTest {
+
private final HostProvisionerTester tester = new HostProvisionerTester();
private final HostProvisioner hostProvisioner = mock(HostProvisioner.class);
private final InMemoryFlagSource flagSource = new InMemoryFlagSource()
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java
index 2383f4529a7..d409927701c 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java
@@ -138,7 +138,7 @@ public class InactiveAndFailedExpirerTest {
// Flag one node for retirement and redeploy
{
Node toRetire = tester.getNodes(applicationId, Node.State.active).asList().get(0);
- tester.patchNode(toRetire.with(toRetire.status().withWantToRetire(true)));
+ tester.patchNode(toRetire.withWantToRetire(true, Agent.operator, tester.clock().instant()));
List<HostSpec> hostSpecs = tester.prepare(applicationId, cluster, Capacity.fromCount(2, nodeResources), 1);
tester.activate(applicationId, new HashSet<>(hostSpecs));
}
@@ -160,7 +160,7 @@ public class InactiveAndFailedExpirerTest {
Orchestrator orchestrator = mock(Orchestrator.class);
doThrow(new RuntimeException()).when(orchestrator).acquirePermissionToRemove(any());
new RetiredExpirer(tester.nodeRepository(), tester.orchestrator(), deployer, tester.clock(), Duration.ofDays(30),
- Duration.ofMinutes(10)).run();
+ Duration.ofMinutes(10)).run();
assertEquals(1, tester.nodeRepository().getNodes(Node.State.inactive).size());
// Inactive times out and one node is moved to parked
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceTester.java
index 798df66f6cd..afada3c6f0f 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceTester.java
@@ -66,4 +66,5 @@ public class MaintenanceTester {
.map(n -> n.withCurrentRebootGeneration(n.status().reboot().wanted(), Instant.now(clock)))
.collect(Collectors.toList());
}
+
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
new file mode 100644
index 00000000000..2325bf3118a
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
@@ -0,0 +1,115 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.Flavor;
+import com.yahoo.config.provision.HostSpec;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.RegionName;
+import com.yahoo.config.provision.Zone;
+import com.yahoo.config.provisioning.FlavorsConfig;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
+import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
+import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.HashSet;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author bratseth
+ */
+public class RebalancerTest {
+
+ @Test
+ public void testRebalancing() {
+ ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.perf, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
+ Rebalancer rebalancer = new Rebalancer(tester.nodeRepository(),
+ new IdentityHostResourcesCalculator(),
+ tester.clock(),
+ Duration.ofMinutes(1));
+
+ NodeResources cpuResources = new NodeResources(8, 4, 1, 0.1);
+ NodeResources memResources = new NodeResources(4, 9, 1, 0.1);
+
+ tester.makeReadyNodes(3, "flt", NodeType.host, 8);
+ tester.deployZoneApp(tester);
+
+ // Cpu heavy application - causing 1 of these nodes to be skewed
+ ApplicationId cpuApp = makeApplicationId("t1", "a1");
+ deployApp(cpuApp, clusterSpec("c"), cpuResources, tester, 1);
+ String cpuSkewedNodeHostname = tester.nodeRepository().getNodes(cpuApp).get(0).hostname();
+
+ rebalancer.maintain();
+ assertFalse("No better place to move the skewed node, so no action is taken",
+ tester.nodeRepository().getNode(cpuSkewedNodeHostname).get().status().wantToRetire());
+
+ tester.makeReadyNodes(1, "cpu", NodeType.host, 8);
+
+ rebalancer.maintain();
+ assertTrue("We can now move the node to the cpu skewed host to reduce skew",
+ tester.nodeRepository().getNode(cpuSkewedNodeHostname).get().status().wantToRetire());
+
+ ApplicationId memApp = makeApplicationId("t2", "a2");
+ deployApp(memApp, clusterSpec("c"), memResources, tester, 1);
+ assertEquals("Assigned to a flat node as that causes least skew", "flt",
+ tester.nodeRepository().list().parentOf(tester.nodeRepository().getNodes(memApp).get(0)).get().flavor().name());
+ String memSkewedNodeHostname = tester.nodeRepository().getNodes(memApp).get(0).hostname();
+
+ tester.makeReadyNodes(1, "mem", NodeType.host, 8);
+ rebalancer.maintain();
+ assertFalse("The mem skewed node is not set want to retire as the cpu skewed node still is",
+ tester.nodeRepository().getNode(memSkewedNodeHostname).get().status().wantToRetire());
+
+ Node cpuSkewedNode = tester.nodeRepository().getNode(cpuSkewedNodeHostname).get();
+ tester.nodeRepository().write(cpuSkewedNode.withWantToRetire(false, Agent.system, tester.clock().instant()),
+ tester.nodeRepository().lock(cpuSkewedNode));
+ rebalancer.maintain();
+ assertTrue("The mem skewed node is now scheduled for moving",
+ tester.nodeRepository().getNode(memSkewedNodeHostname).get().status().wantToRetire());
+ assertFalse("(The cpu skewed node is not because it causes slightly less skew)",
+ tester.nodeRepository().getNode(cpuSkewedNodeHostname).get().status().wantToRetire());
+ }
+
+ private ClusterSpec clusterSpec(String clusterId) {
+ return ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from(clusterId), Version.fromString("6.42"), false);
+ }
+
+ private ApplicationId makeApplicationId(String tenant, String appName) {
+ return ApplicationId.from(tenant, appName, "default");
+ }
+
+ private void deployApp(ApplicationId id, ClusterSpec spec, NodeResources flavor, ProvisioningTester tester, int nodeCount) {
+ List<HostSpec> hostSpec = tester.prepare(id, spec, nodeCount, 1, flavor);
+ tester.activate(id, new HashSet<>(hostSpec));
+ }
+
+ private FlavorsConfig flavorsConfig() {
+ FlavorConfigBuilder b = new FlavorConfigBuilder();
+ b.addFlavor("flt", 30, 30, 40, 3, Flavor.Type.BARE_METAL);
+ b.addFlavor("cpu", 40, 20, 40, 3, Flavor.Type.BARE_METAL);
+ b.addFlavor("mem", 20, 40, 40, 3, Flavor.Type.BARE_METAL);
+ return b.build();
+ }
+
+ private static class IdentityHostResourcesCalculator implements HostResourcesCalculator {
+
+ @Override
+ public NodeResources availableCapacityOf(NodeResources hostResources) {
+ return hostResources;
+ }
+
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java
index 4037bc52064..1318a6105f8 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java
@@ -64,7 +64,7 @@ public class DynamicDockerAllocationTest {
public void relocate_nodes_from_spare_hosts() {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
tester.makeReadyNodes(4, "host-small", NodeType.host, 32);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
List<Node> dockerHosts = tester.nodeRepository().getNodes(NodeType.host, Node.State.active);
NodeResources flavor = new NodeResources(1, 4, 10, 1);
@@ -107,7 +107,7 @@ public class DynamicDockerAllocationTest {
public void relocate_failed_nodes() {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
tester.makeReadyNodes(5, "host-small", NodeType.host, 32);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
List<Node> dockerHosts = tester.nodeRepository().getNodes(NodeType.host, Node.State.active);
NodeResources resources = new NodeResources(1, 4, 10, 0.3);
@@ -156,7 +156,7 @@ public class DynamicDockerAllocationTest {
tester.makeReadyNodes(3, "flt", NodeType.host, 8); // cpu: 30, mem: 30
tester.makeReadyNodes(3, "cpu", NodeType.host, 8); // cpu: 40, mem: 20
tester.makeReadyNodes(3, "mem", NodeType.host, 8); // cpu: 20, mem: 40
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
NodeResources fltResources = new NodeResources(6, 6, 1, 0.1);
NodeResources cpuResources = new NodeResources(8, 4, 1, 0.1);
NodeResources memResources = new NodeResources(4, 8, 1, 0.1);
@@ -199,7 +199,7 @@ public class DynamicDockerAllocationTest {
public void do_not_relocate_nodes_from_spare_if_no_where_to_relocate_them() {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
tester.makeReadyNodes(2, "host-small", NodeType.host, 32);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
List<Node> dockerHosts = tester.nodeRepository().getNodes(NodeType.host, Node.State.active);
NodeResources flavor = new NodeResources(1, 4, 10, 1);
@@ -226,7 +226,7 @@ public class DynamicDockerAllocationTest {
public void multiple_groups_are_on_separate_parent_hosts() {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
tester.makeReadyNodes(5, "host-small", NodeType.host, 32);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
//Deploy an application having 6 nodes (3 nodes in 2 groups). We only have 5 docker hosts available
ApplicationId application1 = tester.makeApplicationId();
@@ -247,7 +247,7 @@ public class DynamicDockerAllocationTest {
// Setup test
ApplicationId application1 = tester.makeApplicationId();
tester.makeReadyNodes(5, "host-small", NodeType.host, 32);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
NodeResources flavor = new NodeResources(1, 4, 10, 1);
// Deploy initial state (can max deploy 3 nodes due to redundancy requirements)
@@ -276,7 +276,7 @@ public class DynamicDockerAllocationTest {
public void non_prod_zones_do_not_have_spares() {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.perf, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
tester.makeReadyNodes(3, "host-small", NodeType.host, 32);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
ApplicationId application1 = tester.makeApplicationId();
List<HostSpec> hosts = tester.prepare(application1, clusterSpec("myContent.t1.a1"), 3, 1, new NodeResources(1, 4, 10, 1));
tester.activate(application1, ImmutableSet.copyOf(hosts));
@@ -289,7 +289,7 @@ public class DynamicDockerAllocationTest {
public void cd_uses_slow_disk_nodes_for_docker_hosts() {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(SystemName.cd, Environment.test, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
tester.makeReadyNodes(4, new Flavor(new NodeResources(1, 8, 12, 1, NodeResources.DiskSpeed.slow)), NodeType.host, 10, true);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
ApplicationId application1 = tester.makeApplicationId();
List<HostSpec> hosts = tester.prepare(application1, clusterSpec("myContent.t1.a1"), 3, 1, new NodeResources(1, 4, 10, 1));
tester.activate(application1, ImmutableSet.copyOf(hosts));
@@ -309,7 +309,7 @@ public class DynamicDockerAllocationTest {
public void provision_dual_stack_containers() {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
tester.makeReadyNodes(2, "host-large", NodeType.host, 10, true);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
ApplicationId application = tester.makeApplicationId();
List<HostSpec> hosts = tester.prepare(application, clusterSpec("myContent.t1.a1"), 2, 1, new NodeResources(1, 4, 10, 1));
@@ -340,7 +340,7 @@ public class DynamicDockerAllocationTest {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 12, 1, NodeResources.DiskSpeed.fast)), NodeType.host, 10, true);
tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 12, 1, NodeResources.DiskSpeed.slow)), NodeType.host, 10, true);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
ApplicationId application = tester.makeApplicationId();
ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test"), Version.fromString("1"), false);
@@ -357,7 +357,7 @@ public class DynamicDockerAllocationTest {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 12, 1, NodeResources.DiskSpeed.fast)), NodeType.host, 10, true);
tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 12, 1, NodeResources.DiskSpeed.slow)), NodeType.host, 10, true);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
ApplicationId application = tester.makeApplicationId();
ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test"), Version.fromString("1"), false);
@@ -379,7 +379,7 @@ public class DynamicDockerAllocationTest {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.dev, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 12, 1, NodeResources.DiskSpeed.fast)), NodeType.host, 10, true);
tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 12, 1, NodeResources.DiskSpeed.slow)), NodeType.host, 10, true);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
ApplicationId application = tester.makeApplicationId();
ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test"), Version.fromString("1"), false);
@@ -398,7 +398,7 @@ public class DynamicDockerAllocationTest {
public void testSwitchingFromLegacyFlavorSyntaxToResourcesDoesNotCauseReallocation() {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
tester.makeReadyNodes(2, new Flavor(new NodeResources(5, 20, 140, 3)), NodeType.host, 10, true);
- deployZoneApp(tester);
+ tester.deployZoneApp(tester);
ApplicationId application = tester.makeApplicationId();
ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test"), Version.fromString("1"), false);
@@ -453,18 +453,6 @@ public class DynamicDockerAllocationTest {
return b.build();
}
- private void deployZoneApp(ProvisioningTester tester) {
- ApplicationId applicationId = tester.makeApplicationId();
- List<HostSpec> list = tester.prepare(applicationId,
- ClusterSpec.request(ClusterSpec.Type.container,
- ClusterSpec.Id.from("node-admin"),
- Version.fromString("6.42"),
- false),
- Capacity.fromRequiredNodeType(NodeType.host),
- 1);
- tester.activate(applicationId, ImmutableSet.copyOf(list));
- }
-
private boolean isInactiveOrRetired(Node node) {
boolean isInactive = node.state().equals(Node.State.inactive);
boolean isRetired = false;
@@ -478,4 +466,5 @@ public class DynamicDockerAllocationTest {
private ClusterSpec clusterSpec(String clusterId) {
return ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from(clusterId), Version.fromString("6.42"), false);
}
+
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
index 6c5cc198c67..3ad598b1235 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
@@ -1,6 +1,7 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.provisioning;
+import com.google.common.collect.ImmutableSet;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ApplicationName;
@@ -397,6 +398,18 @@ public class ProvisioningTester {
return nodes;
}
+ public void deployZoneApp(ProvisioningTester tester) {
+ ApplicationId applicationId = tester.makeApplicationId();
+ List<HostSpec> list = tester.prepare(applicationId,
+ ClusterSpec.request(ClusterSpec.Type.container,
+ ClusterSpec.Id.from("node-admin"),
+ Version.fromString("6.42"),
+ false),
+ Capacity.fromRequiredNodeType(NodeType.host),
+ 1);
+ tester.activate(applicationId, ImmutableSet.copyOf(list));
+ }
+
/** Returns the hosts from the input list which are not retired */
public List<HostSpec> nonRetired(Collection<HostSpec> hosts) {
return hosts.stream().filter(host -> ! host.membership().get().retired()).collect(Collectors.toList());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
index cfb39e7e5b1..02746f1c79a 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
@@ -40,6 +40,9 @@
"name": "ProvisionedExpirer"
},
{
+ "name": "Rebalancer"
+ },
+ {
"name": "ReservationExpirer"
},
{