aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2021-02-15 13:22:27 +0100
committerMartin Polden <mpolden@mpolden.no>2021-02-15 14:08:30 +0100
commit9b5105f5c054346a2d0d1d9e2df94791e6bae802 (patch)
tree7e75bcd6b8d0b87d631ae7b553f6c4e54d1e65a9
parent85e8900d505ab7fa4423912b22b586770e2f8752 (diff)
Prevent node movers from increasing skew
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java12
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java46
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java52
5 files changed, 105 insertions, 22 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
index 97d305acac4..2e5a772efd9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
@@ -160,7 +160,7 @@ class MaintenanceDeployment implements Closeable {
try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository)) {
if ( ! deployment.isValid()) return false;
- boolean couldMarkRetiredNow = markWantToRetire(node, true, agent, nodeRepository);
+ boolean couldMarkRetiredNow = markPreferToRetire(node, true, agent, nodeRepository);
if ( ! couldMarkRetiredNow) return false;
Optional<Node> expectedNewNode = Optional.empty();
@@ -182,7 +182,7 @@ class MaintenanceDeployment implements Closeable {
return true;
}
finally {
- markWantToRetire(node, false, agent, nodeRepository); // Necessary if this failed, no-op otherwise
+ markPreferToRetire(node, false, agent, nodeRepository); // Necessary if this failed, no-op otherwise
// Immediately clean up if we reserved the node but could not activate or reserved a node on the wrong host
expectedNewNode.flatMap(node -> nodeRepository.nodes().node(node.hostname(), Node.State.reserved))
@@ -191,17 +191,17 @@ class MaintenanceDeployment implements Closeable {
}
}
- /** Returns true only if this operation changes the state of the wantToRetire flag */
- private boolean markWantToRetire(Node node, boolean wantToRetire, Agent agent, NodeRepository nodeRepository) {
+ /** Returns true only if this operation changes the state of the preferToRetire flag */
+ private boolean markPreferToRetire(Node node, boolean preferToRetire, Agent agent, NodeRepository nodeRepository) {
Optional<NodeMutex> nodeMutex = nodeRepository.nodes().lockAndGet(node);
if (nodeMutex.isEmpty()) return false;
try (var nodeLock = nodeMutex.get()) {
if (nodeLock.node().state() != Node.State.active) return false;
- if (nodeLock.node().status().wantToRetire() == wantToRetire) return false;
+ if (nodeLock.node().status().preferToRetire() == preferToRetire) return false;
- nodeRepository.nodes().write(nodeLock.node().withWantToRetire(wantToRetire, agent, nodeRepository.clock().instant()), nodeLock);
+ nodeRepository.nodes().write(nodeLock.node().withPreferToRetire(preferToRetire, agent, nodeRepository.clock().instant()), nodeLock);
return true;
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java
index 58711e14d7f..609d1f74526 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java
@@ -69,10 +69,10 @@ public abstract class NodeMover<MOVE> extends NodeRepositoryMaintainer {
/** Returns true if no active nodes are retiring or about to be retired */
static boolean zoneIsStable(NodeList allNodes) {
- NodeList active = allNodes.state(Node.State.active);
- if (active.stream().anyMatch(node -> node.allocation().get().membership().retired())) return false;
- if (active.stream().anyMatch(node -> node.status().wantToRetire())) return false;
- return true;
+ return allNodes.state(Node.State.active).stream()
+ .noneMatch(node -> node.allocation().get().membership().retired() ||
+ node.status().wantToRetire() ||
+ node.status().preferToRetire());
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index bc164dc37e0..042d227f8f1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -115,7 +115,7 @@ class NodeAllocation {
if ((! saturated() && hasCompatibleFlavor(candidate) && requestedNodes.acceptable(candidate)) || acceptToRetire) {
candidate = candidate.withNode();
if (candidate.isValid())
- accepted.add(acceptNode(candidate, shouldRetire(candidate), resizeable));
+ accepted.add(acceptNode(candidate, shouldRetire(candidate, nodesPrioritized), resizeable));
}
}
else if (! saturated() && hasCompatibleFlavor(candidate)) {
@@ -146,7 +146,7 @@ class NodeAllocation {
return accepted;
}
- private boolean shouldRetire(NodeCandidate candidate) {
+ private boolean shouldRetire(NodeCandidate candidate, List<NodeCandidate> candidates) {
if ( ! requestedNodes.considerRetiring())
return candidate.allocation().map(a -> a.membership().retired()).orElse(false); // don't second-guess if already retired
@@ -154,6 +154,7 @@ class NodeAllocation {
if (violatesParentHostPolicy(candidate)) return true;
if ( ! hasCompatibleFlavor(candidate)) return true;
if (candidate.wantToRetire()) return true;
+ if (candidate.preferToRetire() && !candidate.replacementIncreasesSkew(candidates)) return true;
if (violatesExclusivity(candidate)) return true;
return false;
}
@@ -226,13 +227,13 @@ class NodeAllocation {
return requestedNodes.isCompatible(candidate.flavor(), nodeRepository.flavors()) || candidate.isResizable;
}
- private Node acceptNode(NodeCandidate candidate, boolean wantToRetire, boolean resizeable) {
+ private Node acceptNode(NodeCandidate candidate, boolean shouldRetire, boolean resizeable) {
Node node = candidate.toNode();
if (node.allocation().isPresent()) // Record the currently requested resources
node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.resources())));
- if (! wantToRetire) {
+ if (! shouldRetire) {
accepted++;
// We want to allocate new nodes rather than unretiring with resize, so count without those
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java
index 460b7a821e6..aad65ef5646 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java
@@ -77,6 +77,8 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
public abstract boolean wantToRetire();
+ public abstract boolean preferToRetire();
+
public abstract Flavor flavor();
public abstract NodeCandidate allocate(ApplicationId owner, ClusterMembership membership, NodeResources requestedResources, Instant at);
@@ -97,6 +99,21 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
/** Returns whether this node can - as far as we know - be used to run the application workload */
public abstract boolean isValid();
+ /** Returns whether replacing this with any of the reserved candidates will increase skew */
+ public boolean replacementIncreasesSkew(List<NodeCandidate> candidates) {
+ return candidates.stream()
+ .filter(candidate -> candidate.state() == Node.State.reserved)
+ .allMatch(reserved -> {
+ int switchPriority = switchPriority(reserved);
+ if (switchPriority < 0) {
+ return true;
+ } else if (switchPriority > 0) {
+ return false;
+ }
+ return hostPriority(reserved) < 0;
+ });
+ }
+
/**
* Compare this candidate to another
*
@@ -117,8 +134,8 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
if (!other.isSurplus && this.isSurplus) return 1;
// Prefer node on exclusive switch
- if (this.exclusiveSwitch && !other.exclusiveSwitch) return -1;
- if (other.exclusiveSwitch && !this.exclusiveSwitch) return 1;
+ int switchPriority = switchPriority(other);
+ if (switchPriority != 0) return switchPriority;
// Choose reserved nodes from a previous allocation attempt (which exist in node repo)
if (this.isInNodeRepoAndReserved() && ! other.isInNodeRepoAndReserved()) return -1;
@@ -156,8 +173,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
if ( ! lessThanHalfTheHost(this) && lessThanHalfTheHost(other)) return 1;
}
- int hostPriority = Double.compare(this.skewWithThis() - this.skewWithoutThis(),
- other.skewWithThis() - other.skewWithoutThis());
+ int hostPriority = hostPriority(other);
if (hostPriority != 0) return hostPriority;
// Choose cheapest node
@@ -188,6 +204,19 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
return new ConcreteNodeCandidate(node, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable);
}
+ /** Returns the switch priority, based on switch exclusivity, of this compared to other */
+ private int switchPriority(NodeCandidate other) {
+ if (this.exclusiveSwitch && !other.exclusiveSwitch) return -1;
+ if (other.exclusiveSwitch && !this.exclusiveSwitch) return 1;
+ return 0;
+ }
+
+ /** Returns the host priority, based on allocation skew, of this compared to other */
+ private int hostPriority(NodeCandidate other) {
+ return Double.compare(this.skewWithThis() - this.skewWithoutThis(),
+ other.skewWithThis() - other.skewWithoutThis());
+ }
+
private boolean lessThanHalfTheHost(NodeCandidate node) {
var n = node.resources();
var h = node.parent.get().resources();
@@ -267,6 +296,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
public boolean wantToRetire() { return node.status().wantToRetire(); }
@Override
+ public boolean preferToRetire() { return node.status().preferToRetire(); }
+
+ @Override
public Flavor flavor() { return node.flavor(); }
@Override
@@ -350,6 +382,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
public boolean wantToRetire() { return false; }
@Override
+ public boolean preferToRetire() { return false; }
+
+ @Override
public Flavor flavor() { return new Flavor(resources); }
@Override
@@ -443,6 +478,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
public boolean wantToRetire() { return false; }
@Override
+ public boolean preferToRetire() { return false; }
+
+ @Override
public Flavor flavor() { return new Flavor(resources); }
@Override
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
index 9fddaab8b3b..f6db9a45a61 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
@@ -23,9 +23,11 @@ import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
+import com.yahoo.vespa.hosted.provision.testutils.MockDeployer.ApplicationContext;
import org.junit.Test;
import java.time.Duration;
+import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -134,6 +136,30 @@ public class RebalancerTest {
tester.getNodes(cpuApp, Node.State.active).stream().anyMatch(node -> node.hasParent(newCpuHost.hostname())));
}
+ @Test
+ public void testRebalancingDoesNotReduceSwitchExclusivity() {
+ Capacity capacity = Capacity.from(new ClusterResources(4, 1, RebalancerTester.cpuResources), true, false);
+ Map<ApplicationId, ApplicationContext> apps = Map.of(cpuApp, new ApplicationContext(cpuApp, RebalancerTester.clusterSpec("c"), capacity));
+ RebalancerTester tester = new RebalancerTester(4, apps);
+
+ // Application is deployed and balanced across exclusive switches
+ tester.deployApp(cpuApp);
+ NodeList allNodes = tester.nodeRepository().nodes().list();
+ NodeList applicationNodes = allNodes.owner(cpuApp);
+ NodeList nodesOnExclusiveSwitch = applicationNodes.onExclusiveSwitch(allNodes.parentsOf(applicationNodes));
+ assertEquals(4, nodesOnExclusiveSwitch.size());
+
+ // Rebalancer does nothing
+ tester.assertNoMovesAfter(waitTimeAfterPreviousDeployment, cpuApp);
+
+ // Another host is provisioned on an existing switch, which can reduce skew
+ Node newCpuHost = tester.makeReadyNode("cpu");
+ tester.tester.patchNode(newCpuHost, (host) -> host.withSwitchHostname("switch0"));
+ tester.activateTenantHosts();
+
+ // Rebalancer does not move to new host, as this would violate switch exclusivity
+ tester.assertNoMovesAfter(waitTimeAfterPreviousDeployment, cpuApp);
+ }
static class RebalancerTester {
@@ -150,12 +176,19 @@ public class RebalancerTest {
private final Rebalancer rebalancer;
RebalancerTester() {
- Map<ApplicationId, MockDeployer.ApplicationContext> apps = Map.of(
- cpuApp, new MockDeployer.ApplicationContext(cpuApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, cpuResources))),
- memoryApp, new MockDeployer.ApplicationContext(memoryApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, memResources))));
+ this(3,
+ Map.of(cpuApp, new ApplicationContext(cpuApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, cpuResources))),
+ memoryApp, new ApplicationContext(memoryApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, memResources)))));
+ }
+
+ RebalancerTester(int hostCount, Map<ApplicationId, ApplicationContext> apps) {
deployer = new MockDeployer(tester.provisioner(), tester.clock(), apps);
rebalancer = new Rebalancer(deployer, tester.nodeRepository(), metric, Duration.ofMinutes(1));
- tester.makeReadyNodes(3, "flat", NodeType.host, 8);
+ List<Node> hosts = tester.makeReadyNodes(hostCount, "flat", NodeType.host, 8);
+ for (int i = 0; i < hosts.size(); i++) {
+ String switchHostname = "switch" + i;
+ tester.patchNode(hosts.get(i), (host) -> host.withSwitchHostname(switchHostname));
+ }
tester.activateTenantHosts();
}
@@ -194,6 +227,17 @@ public class RebalancerTest {
MockDeployer deployer() { return deployer; }
+ private void assertNoMovesAfter(Duration duration, ApplicationId app) {
+ tester.clock().advance(duration);
+ NodeList before = tester.nodeRepository().nodes().list(Node.State.active).owner(app)
+ .sortedBy(Comparator.comparing(Node::hostname));
+ maintain();
+ NodeList after = tester.nodeRepository().nodes().list(Node.State.active).owner(app)
+ .sortedBy(Comparator.comparing(Node::hostname));
+ assertEquals("Node allocation is unchanged", before.asList(), after.asList());
+ assertEquals("No nodes are retired", List.of(), after.retired().asList());
+ }
+
private FlavorsConfig flavorsConfig() {
FlavorConfigBuilder b = new FlavorConfigBuilder();
b.addFlavor("flat", 30, 30, 400, 3, Flavor.Type.BARE_METAL);