summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2020-09-09 21:02:42 +0200
committerGitHub <noreply@github.com>2020-09-09 21:02:42 +0200
commit8cdd7f7babb2bda6c708c00af9911caafdc79fe0 (patch)
tree5686e64b625b7533ef54c8f6bad13634fd4daa71 /node-repository
parent3bbf27bf9d235bbaf01165afcb9e175c2bbaaa89 (diff)
Revert "Revert "Bratseth/allocation improvements" MERGEOK"
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java47
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java12
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java58
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java6
10 files changed, 112 insertions, 32 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index e146583ae04..b08dc6bbaf2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -204,6 +204,11 @@ public final class Node {
return with(requireAllocation("Cannot unretire").unretire());
}
+ /** Returns a copy of this with removable set to the given value */
+ public Node removable(boolean removable) {
+ return with(requireAllocation("Cannot set removable").removable(removable));
+ }
+
/** Returns a copy of this with the restart generation set to generation */
public Node withRestart(Generation generation) {
Allocation allocation = requireAllocation("Cannot set restart generation");
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index 983ba5165e3..4ec7ddd04c4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -511,7 +511,7 @@ public class NodeRepository extends AbstractComponent {
public void setRemovable(ApplicationId application, List<Node> nodes) {
try (Mutex lock = lock(application)) {
List<Node> removableNodes =
- nodes.stream().map(node -> node.with(node.allocation().get().removable()))
+ nodes.stream().map(node -> node.with(node.allocation().get().removable(true)))
.collect(Collectors.toList());
write(removableNodes, lock);
}
@@ -641,7 +641,7 @@ public class NodeRepository extends AbstractComponent {
}
private Node move(Node node, State toState, Agent agent, Optional<String> reason) {
- if (toState == Node.State.active && ! node.allocation().isPresent())
+ if (toState == Node.State.active && node.allocation().isEmpty())
illegal("Could not set " + node + " active. It has no allocation.");
try (Mutex lock = lock(node)) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java
index 30ef84c6927..b476a2bdefc 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java
@@ -85,9 +85,9 @@ public class Allocation {
return new Allocation(owner, clusterMembership, requestedResources, generation, removable, networkPorts);
}
- /** Returns a copy of this allocation where removable is set to true */
- public Allocation removable() {
- return new Allocation(owner, clusterMembership, requestedResources, restartGeneration, true, networkPorts);
+ /** Returns a copy of this allocation where removable is set to the given value */
+ public Allocation removable(boolean removable) {
+ return new Allocation(owner, clusterMembership, requestedResources, restartGeneration, removable, networkPorts);
}
public Allocation with(ClusterMembership newMembership) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index 37842115949..5a3584b6ff4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -241,7 +241,7 @@ public class NodeSerializer {
}
private Optional<Allocation> allocationFromSlime(NodeResources assignedResources, Inspector object) {
- if ( ! object.valid()) return Optional.empty(); // TODO: Remove this line (and to the simplifications that follows) after November 2019
+ if ( ! object.valid()) return Optional.empty();
return Optional.of(new Allocation(applicationIdFromSlime(object),
clusterMembershipFromSlime(object),
NodeResourcesSerializer.optionalResourcesFromSlime(object.field(requestedResourcesKey))
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index 1aa0f69dd9b..a37da10f5f0 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -104,27 +104,23 @@ class NodeAllocation {
Node offered = node.node;
if (offered.allocation().isPresent()) {
- ClusterMembership membership = offered.allocation().get().membership();
- if ( ! offered.allocation().get().owner().equals(application)) continue; // wrong application
+ Allocation allocation = offered.allocation().get();
+ ClusterMembership membership = allocation.membership();
+ if ( ! allocation.owner().equals(application)) continue; // wrong application
if ( ! membership.cluster().satisfies(cluster)) continue; // wrong cluster id/type
if ((! node.isSurplusNode || saturated()) && ! membership.cluster().group().equals(cluster.group())) continue; // wrong group and we can't or have no reason to change it
- if ( offered.allocation().get().isRemovable()) continue; // don't accept; causes removal
+ if ( offered.state() == Node.State.active && allocation.isRemovable()) continue; // don't accept; causes removal
if ( indexes.contains(membership.index())) continue; // duplicate index (just to be sure)
+ boolean resizeable = false;
+ boolean acceptToRetire = false;
if (requestedNodes.considerRetiring()) {
- boolean wantToRetireNode = false;
- if ( ! nodeResourceLimits.isWithinRealLimits(offered, cluster)) wantToRetireNode = true;
- if (violatesParentHostPolicy(this.nodes, offered)) wantToRetireNode = true;
- if ( ! hasCompatibleFlavor(node)) wantToRetireNode = true;
- if (offered.status().wantToRetire()) wantToRetireNode = true;
- if (requestedNodes.isExclusive() && ! hostsOnly(application.tenant(), application.application(), offered.parentHostname()))
- wantToRetireNode = true;
- if ((! saturated() && hasCompatibleFlavor(node)) || acceptToRetire(node))
- accepted.add(acceptNode(node, wantToRetireNode, node.isResizable));
- }
- else {
- accepted.add(acceptNode(node, false, false));
+ resizeable = node.isResizable;
+ acceptToRetire = acceptToRetire(node);
}
+
+ if ((! saturated() && hasCompatibleFlavor(node) && requestedNodes.acceptable(offered)) || acceptToRetire)
+ accepted.add(acceptNode(node, shouldRetire(node), resizeable));
}
else if (! saturated() && hasCompatibleFlavor(node)) {
if ( ! nodeResourceLimits.isWithinRealLimits(offered, cluster)) {
@@ -139,7 +135,7 @@ class NodeAllocation {
++rejectedDueToExclusivity;
continue;
}
- if ( requestedNodes.isExclusive() && ! hostsOnly(application.tenant(), application.application(), offered.parentHostname())) {
+ if ( requestedNodes.isExclusive() && ! hostsOnly(application, offered.parentHostname())) {
++rejectedDueToExclusivity;
continue;
}
@@ -157,6 +153,15 @@ class NodeAllocation {
return accepted;
}
+ private boolean shouldRetire(PrioritizableNode node) {
+ if ( ! requestedNodes.considerRetiring()) return false;
+ if ( ! nodeResourceLimits.isWithinRealLimits(node.node, cluster)) return true;
+ if (violatesParentHostPolicy(this.nodes, node.node)) return true;
+ if ( ! hasCompatibleFlavor(node)) return true;
+ if (node.node.status().wantToRetire()) return true;
+ if (requestedNodes.isExclusive() && ! hostsOnly(application, node.node.parentHostname())) return true;
+ return false;
+ }
private boolean violatesParentHostPolicy(Collection<PrioritizableNode> accepted, Node offered) {
return checkForClashingParentHost() && offeredNodeHasParentHostnameAlreadyAccepted(accepted, offered);
@@ -193,13 +198,13 @@ class NodeAllocation {
return true;
}
- /** Returns true if this host only hosts the given applicaton (in any instance) */
- private boolean hostsOnly(TenantName tenant, ApplicationName application, Optional<String> parentHostname) {
+ /** Returns true if this host only hosts the given application (in any instance) */
+ private boolean hostsOnly(ApplicationId application, Optional<String> parentHostname) {
if (parentHostname.isEmpty()) return true; // yes, as host is exclusive
for (Node nodeOnHost : allNodes.childrenOf(parentHostname.get())) {
if (nodeOnHost.allocation().isEmpty()) continue;
- if ( ! allocatedTo(tenant, application, nodeOnHost)) return false;
+ if ( ! allocatedTo(application.tenant(), application.application(), nodeOnHost)) return false;
}
return true;
}
@@ -256,8 +261,8 @@ class NodeAllocation {
if (resizeable && ! ( node.allocation().isPresent() && node.allocation().get().membership().retired()))
node = resize(node);
- if (node.state() != Node.State.active) // reactivated node - make sure its not retired
- node = node.unretire();
+ if (node.state() != Node.State.active) // reactivated node - wipe state that deactivated it
+ node = node.unretire().removable(false);
} else {
++wasRetiredJustNow;
node = node.retire(nodeRepository.clock().instant());
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
index 9971aae1714..f50c988edfd 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
@@ -60,6 +60,9 @@ public interface NodeSpec {
/** Returns whether the given node must be resized to match this spec */
boolean needsResize(Node node);
+ /** Returns true if there exist some circumstance where we may accept to have this node allocated */
+ boolean acceptable(Node node);
+
/**
* Returns true if a node with given current resources and current spare host resources can be resized
* in-place to resources in this spec.
@@ -157,6 +160,9 @@ public interface NodeSpec {
}
@Override
+ public boolean acceptable(Node node) { return true; }
+
+ @Override
public String toString() { return "request for " + count + " nodes with " + requestedNodeResources; }
}
@@ -211,6 +217,12 @@ public interface NodeSpec {
public boolean needsResize(Node node) { return false; }
@Override
+ public boolean acceptable(Node node) {
+ // Since we consume all offered nodes we should not accept previously deactivated nodes
+ return node.state() != Node.State.inactive;
+ }
+
+ @Override
public String toString() { return "request for all nodes of type '" + type + "'"; }
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
index 1137ae5ce2c..cb39e8fecce 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -105,7 +105,7 @@ class AutoscalingTester {
try (Mutex lock = nodeRepository().lock(application)){
for (Node node : nodeRepository().getNodes(application, Node.State.active)) {
if (node.allocation().get().membership().retired())
- nodeRepository().write(node.with(node.allocation().get().removable()), lock);
+ nodeRepository().write(node.with(node.allocation().get().removable(true)), lock);
}
}
deploy(application, cluster, resources);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java
index 5e4bfc2a7bc..dbbad0b8982 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java
@@ -179,7 +179,7 @@ public class NodeSerializerTest {
(copy.history().event(History.Event.Type.retired).get()).agent());
assertTrue(copy.allocation().get().membership().retired());
- Node removable = copy.with(node.allocation().get().removable());
+ Node removable = copy.with(node.allocation().get().removable(true));
Node removableCopy = nodeSerializer.fromJson(Node.State.provisioned, nodeSerializer.toJson(removable));
assertTrue(removableCopy.allocation().get().isRemovable());
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
index cddc1fcb253..e566172b524 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
@@ -20,6 +20,7 @@ import com.yahoo.config.provision.TenantName;
import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
import org.junit.Test;
import java.util.HashSet;
@@ -380,6 +381,63 @@ public class DockerProvisioningTest {
}
}
+ @Test
+ public void test_startup_redeployment_with_inactive_nodes() {
+ NodeResources r = new NodeResources(20, 40, 100, 4);
+ ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east")))
+ .flavors(List.of(new Flavor(r)))
+ .build();
+ tester.makeReadyHosts(5, r).deployZoneApp();
+
+ ApplicationId app1 = tester.makeApplicationId("app1");
+ ClusterSpec cluster1 = ClusterSpec.request(ClusterSpec.Type.container, new ClusterSpec.Id("cluster1")).vespaVersion("7").build();
+
+ tester.activate(app1, cluster1, Capacity.from(new ClusterResources(5, 1, r)));
+ tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r)));
+
+ assertEquals(2, tester.getNodes(app1, Node.State.active).size());
+ assertEquals(3, tester.getNodes(app1, Node.State.inactive).size());
+
+ // Startup deployment: Not failable
+ tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r), false, false));
+ // ... causes no change
+ assertEquals(2, tester.getNodes(app1, Node.State.active).size());
+ assertEquals(3, tester.getNodes(app1, Node.State.inactive).size());
+ }
+
+ @Test
+ public void inactive_container_nodes_are_reused() {
+ assertInactiveReuse(ClusterSpec.Type.container);
+ }
+
+ @Test
+ public void inactive_content_nodes_are_reused() {
+ assertInactiveReuse(ClusterSpec.Type.content);
+ }
+
+ private void assertInactiveReuse(ClusterSpec.Type clusterType) {
+ NodeResources r = new NodeResources(20, 40, 100, 4);
+ ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east")))
+ .flavors(List.of(new Flavor(r)))
+ .build();
+ tester.makeReadyHosts(4, r).deployZoneApp();
+
+ ApplicationId app1 = tester.makeApplicationId("app1");
+ ClusterSpec cluster1 = ClusterSpec.request(clusterType, new ClusterSpec.Id("cluster1")).vespaVersion("7").build();
+
+ tester.activate(app1, cluster1, Capacity.from(new ClusterResources(4, 1, r)));
+ tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r)));
+
+ // Deactivate any retired nodes - usually done by the RetiredExpirer
+ tester.nodeRepository().setRemovable(app1, tester.getNodes(app1).retired().asList());
+ tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r)));
+
+ assertEquals(2, tester.getNodes(app1, Node.State.inactive).size());
+ tester.activate(app1, cluster1, Capacity.from(new ClusterResources(4, 1, r)));
+ assertEquals(0, tester.getNodes(app1, Node.State.inactive).size());
+ }
+
+
private Set<String> hostsOf(NodeList nodes) {
return nodes.asList().stream().map(Node::parentHostname).map(Optional::get).collect(Collectors.toSet());
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
index 5c730912c49..ff2f0ffca96 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
@@ -888,7 +888,7 @@ public class ProvisioningTest {
// Application allocates two content nodes initially, with cluster type content
ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("music")).vespaVersion("1.2.3").build();
var initialNodes = tester.activate(application, tester.prepare(application, cluster,
- Capacity.from(new ClusterResources(2, 1, defaultResources), false, false)));
+ Capacity.from(new ClusterResources(2, 1, defaultResources))));
// Application is redeployed with cluster type combined
cluster = ClusterSpec.request(ClusterSpec.Type.combined, ClusterSpec.Id.from("music"))
@@ -896,7 +896,7 @@ public class ProvisioningTest {
.combinedId(Optional.of(ClusterSpec.Id.from("qrs")))
.build();
var newNodes = tester.activate(application, tester.prepare(application, cluster,
- Capacity.from(new ClusterResources(2, 1, defaultResources), false, false)));
+ Capacity.from(new ClusterResources(2, 1, defaultResources))));
assertEquals("Node allocation remains the same", initialNodes, newNodes);
assertEquals("Cluster type is updated",
@@ -906,7 +906,7 @@ public class ProvisioningTest {
// Application is redeployed with cluster type content again
cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("music")).vespaVersion("1.2.3").build();
newNodes = tester.activate(application, tester.prepare(application, cluster,
- Capacity.from(new ClusterResources(2, 1, defaultResources), false, false)));
+ Capacity.from(new ClusterResources(2, 1, defaultResources))));
assertEquals("Node allocation remains the same", initialNodes, newNodes);
assertEquals("Cluster type is updated",
Set.of(ClusterSpec.Type.content),