aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <jonmv@users.noreply.github.com>2023-10-18 14:30:06 +0200
committerGitHub <noreply@github.com>2023-10-18 14:30:06 +0200
commitfeb393297b6033df6c9d1ff31439dd2e3b21ff45 (patch)
treedab6b85527654ca3a2be2171fd597d664ff71d00
parent65ad9369517a766891e14b9fb61416f8b011e14d (diff)
parent4fcd092c66963409e906f57b129fbfc76b63276c (diff)
Merge pull request #29000 from vespa-engine/jonmv/tenant-host-redeployer
Use exclusive host sharing, without exclusiveTo set, when pre-provisioning containers
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java30
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java16
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java10
5 files changed, 28 insertions, 32 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index 4fc20eca41e..567a5c03f43 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -494,7 +494,7 @@ public final class Node implements Nodelike {
public Node withExclusiveToApplicationId(ApplicationId exclusiveTo) {
return new Node(id, extraId, ipConfig, hostname, parentHostname, flavor, status, state, allocation, history,
- type, reports, modelName, reservedTo, Optional.ofNullable(exclusiveTo), provisionedForApplicationId.filter(__ -> exclusiveTo != null), hostTTL, hostEmptyAt,
+ type, reports, modelName, reservedTo, Optional.ofNullable(exclusiveTo), provisionedForApplicationId, hostTTL, hostEmptyAt,
exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount, wireguardPubKey);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java
index c661cc6ae49..f260832ef32 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java
@@ -7,6 +7,7 @@ import com.yahoo.concurrent.UncheckedTimeoutException;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterMembership;
import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.ClusterSpec.Type;
import com.yahoo.config.provision.NodeAllocationException;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
@@ -204,30 +205,33 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer {
}
ClusterCapacity clusterCapacityDeficit = deficit.get();
- var clusterType = Optional.ofNullable(clusterCapacityDeficit.clusterType());
nodesPlusProvisioned.addAll(provisionHosts(clusterCapacityDeficit.count(),
toNodeResources(clusterCapacityDeficit),
- clusterType.map(ClusterSpec.Type::from),
+ Optional.ofNullable(clusterCapacityDeficit.clusterType()),
nodeList));
}
}
- private List<Node> provisionHosts(int count, NodeResources nodeResources, Optional<ClusterSpec.Type> clusterType, NodeList allNodes) {
+ private List<Node> provisionHosts(int count, NodeResources nodeResources, Optional<String> clusterType, NodeList allNodes) {
try {
if (throttler.throttle(allNodes, Agent.HostCapacityMaintainer)) {
throw new NodeAllocationException("Host provisioning is being throttled", true);
}
Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion);
List<Integer> provisionIndices = nodeRepository().database().readProvisionIndices(count);
+ HostSharing sharingMode = nodeRepository().exclusiveAllocation(asSpec(clusterType, 0)) ? HostSharing.exclusive : HostSharing.shared;
HostProvisionRequest request = new HostProvisionRequest(provisionIndices, NodeType.host, nodeResources,
ApplicationId.defaultId(), osVersion,
- HostSharing.shared, clusterType, Optional.empty(),
+ sharingMode, clusterType.map(ClusterSpec.Type::valueOf), Optional.empty(),
nodeRepository().zone().cloud().account(), false);
List<Node> hosts = new ArrayList<>();
hostProvisioner.provisionHosts(request,
resources -> true,
provisionedHosts -> {
- hosts.addAll(provisionedHosts.stream().map(host -> host.generateHost(Duration.ZERO)).toList());
+ hosts.addAll(provisionedHosts.stream()
+ .map(host -> host.generateHost(Duration.ZERO))
+ .map(host -> host.withExclusiveToApplicationId(null))
+ .toList());
nodeRepository().nodes().addNodes(hosts, Agent.HostCapacityMaintainer);
});
return hosts;
@@ -269,14 +273,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer {
// We'll allocate each ClusterCapacity as a unique cluster in a dummy application
ApplicationId applicationId = ApplicationId.defaultId();
- ClusterSpec.Id clusterId = ClusterSpec.Id.from(String.valueOf(clusterIndex));
- ClusterSpec.Type type = clusterCapacity.clusterType() != null
- ? ClusterSpec.Type.from(clusterCapacity.clusterType())
- : ClusterSpec.Type.content;
- ClusterSpec clusterSpec = ClusterSpec.request(type, clusterId)
- // build() requires a version, even though it is not (should not be) used
- .vespaVersion(Vtag.currentVersion)
- .build();
+ ClusterSpec clusterSpec = asSpec(Optional.ofNullable(clusterCapacity.clusterType()), clusterIndex);
NodeSpec nodeSpec = NodeSpec.from(clusterCapacity.count(), 1, nodeResources, false, true,
nodeRepository().zone().cloud().account(), Duration.ZERO);
var allocationContext = IP.Allocation.Context.from(nodeRepository().zone().cloud().name(),
@@ -304,6 +301,13 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer {
.toList();
}
+ private static ClusterSpec asSpec(Optional<String> clusterType, int index) {
+ return ClusterSpec.request(clusterType.map(ClusterSpec.Type::from).orElse(ClusterSpec.Type.content),
+ ClusterSpec.Id.from(String.valueOf(index)))
+ .vespaVersion(Vtag.currentVersion) // Needed, but should not be used here.
+ .build();
+ }
+
private static NodeResources toNodeResources(ClusterCapacity clusterCapacity) {
return new NodeResources(clusterCapacity.vcpu(),
clusterCapacity.memoryGb(),
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index d70490c8e9a..26c99501d04 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -106,24 +106,10 @@ public class Nodes {
public NodeList list(Node.State... inState) {
NodeList allNodes = NodeList.copyOf(db.readNodes());
NodeList nodes = inState.length == 0 ? allNodes : allNodes.state(Set.of(inState));
- nodes = NodeList.copyOf(nodes.stream().map(node -> specifyFully(node, allNodes)).toList());
+ nodes = NodeList.copyOf(nodes.stream().toList());
return nodes;
}
- // Repair underspecified node resources. TODO: Remove this after June 2023
- private Node specifyFully(Node node, NodeList allNodes) {
- if (node.resources().isUnspecified()) return node;
-
- if (node.resources().bandwidthGbpsIsUnspecified())
- node = node.with(new Flavor(node.resources().withBandwidthGbps(0.3)), Agent.system, clock.instant());
- if ( node.resources().architecture() == NodeResources.Architecture.any) {
- Optional<Node> parent = allNodes.parentOf(node);
- if (parent.isPresent())
- node = node.with(new Flavor(node.resources().with(parent.get().resources().architecture())), Agent.system, clock.instant());
- }
- return node;
- }
-
/** Returns a locked list of all nodes in this repository */
public LockedNodeList list(Mutex lock) {
return new LockedNodeList(list().asList(), lock);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java
index 96338378892..df0f457b215 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java
@@ -29,7 +29,7 @@ public class CapacityCheckerTest {
var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
assertTrue(failurePath.isPresent());
assertTrue(tester.nodeRepository.nodes().list().nodeType(NodeType.host).asList().containsAll(failurePath.get().hostsCausingFailure));
- assertEquals(4, failurePath.get().hostsCausingFailure.size());
+ assertEquals(5, failurePath.get().hostsCausingFailure.size());
}
@Test
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
index f960b122d24..f1d11da6b58 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
@@ -282,7 +282,11 @@ public class HostCapacityMaintainerTest {
tester = new DynamicProvisioningTester();
NodeResources resources1 = new NodeResources(24, 64, 100, 10);
setPreprovisionCapacityFlag(tester,
- new ClusterCapacity(2, resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(),
+ new ClusterCapacity(1, resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(),
+ resources1.bandwidthGbps(), resources1.diskSpeed().name(),
+ resources1.storageType().name(), resources1.architecture().name(),
+ "container"),
+ new ClusterCapacity(1, resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(),
resources1.bandwidthGbps(), resources1.diskSpeed().name(),
resources1.storageType().name(), resources1.architecture().name(),
null));
@@ -291,12 +295,14 @@ public class HostCapacityMaintainerTest {
// Hosts are provisioned
assertEquals(2, tester.provisionedHostsMatching(resources1));
assertEquals(0, tester.hostProvisioner.deprovisionedHosts());
+ assertEquals(Optional.empty(), tester.nodeRepository.nodes().node("host100").flatMap(Node::exclusiveToApplicationId));
+ assertEquals(Optional.empty(), tester.nodeRepository.nodes().node("host101").flatMap(Node::exclusiveToApplicationId));
// Next maintenance run does nothing
tester.assertNodesUnchanged();
// One host is allocated exclusively to some other application
- tester.nodeRepository.nodes().write(tester.nodeRepository.nodes().list().node("host100").get()
+ tester.nodeRepository.nodes().write(tester.nodeRepository.nodes().node("host100").get()
.withExclusiveToApplicationId(ApplicationId.from("t", "a", "i")),
() -> { });