diff options
author | Harald Musum <musum@yahooinc.com> | 2023-07-06 23:09:30 +0200 |
---|---|---|
committer | Harald Musum <musum@yahooinc.com> | 2023-07-06 23:09:30 +0200 |
commit | c2094a29c42b9f6623097517802d7961cdeea252 (patch) | |
tree | 3a69e05bf36d810d44af6b9dc4cf0c9f6a70f8c9 /node-repository/src | |
parent | 19de0f1b4e105616a28110f12ea097a8d4a2c9bf (diff) |
Add cluster type to ClusterCapacity and use it when provioning hosts in HostCapacityMaintainer
Diffstat (limited to 'node-repository/src')
2 files changed, 67 insertions, 18 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java index d70ee825860..2a0b4f02b20 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java @@ -199,17 +199,22 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { throw new IllegalStateException("Have provisioned " + numProvisions + " times but there's still deficit: aborting"); } - nodesPlusProvisioned.addAll(provisionHosts(deficit.get().count(), toNodeResources(deficit.get()))); + ClusterCapacity clusterCapacityDeficit = deficit.get(); + var clusterType = Optional.ofNullable(clusterCapacityDeficit.clusterType()); + nodesPlusProvisioned.addAll(provisionHosts(clusterCapacityDeficit.count(), + toNodeResources(clusterCapacityDeficit), + clusterType.map(ClusterSpec.Type::from))); } } - private List<Node> provisionHosts(int count, NodeResources nodeResources) { + private List<Node> provisionHosts(int count, NodeResources nodeResources, Optional<ClusterSpec.Type> clusterType) { try { Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); List<Integer> provisionIndices = nodeRepository().database().readProvisionIndices(count); List<Node> hosts = new ArrayList<>(); - HostProvisionRequest request = new HostProvisionRequest(provisionIndices, NodeType.host, nodeResources, ApplicationId.defaultId(), osVersion, - HostSharing.shared, Optional.empty(), Optional.empty(), + HostProvisionRequest request = new HostProvisionRequest(provisionIndices, NodeType.host, nodeResources, + ApplicationId.defaultId(), osVersion, + HostSharing.shared, clusterType, Optional.empty(), nodeRepository().zone().cloud().account(), false); hostProvisioner.provisionHosts(request, provisionedHosts -> { @@ -256,7 +261,10 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { // We'll allocate each ClusterCapacity as a unique cluster in a dummy application ApplicationId applicationId = ApplicationId.defaultId(); ClusterSpec.Id clusterId = ClusterSpec.Id.from(String.valueOf(clusterIndex)); - ClusterSpec clusterSpec = ClusterSpec.request(ClusterSpec.Type.content, clusterId) + ClusterSpec.Type type = clusterCapacity.clusterType() != null + ? ClusterSpec.Type.from(clusterCapacity.clusterType()) + : ClusterSpec.Type.content; + ClusterSpec clusterSpec = ClusterSpec.request(type, clusterId) // build() requires a version, even though it is not (should not be) used .vespaVersion(Vtag.currentVersion) .build(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java index 0478e5d1486..452e4c135c0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java @@ -66,6 +66,7 @@ import java.util.function.Supplier; import java.util.stream.Stream; import static com.yahoo.config.provision.NodeResources.Architecture.arm64; +import static com.yahoo.config.provision.NodeResources.DiskSpeed; import static com.yahoo.config.provision.NodeResources.DiskSpeed.fast; import static com.yahoo.config.provision.NodeResources.StorageType.remote; import static com.yahoo.vespa.hosted.provision.testutils.MockHostProvisioner.Behaviour; @@ -96,7 +97,7 @@ public class HostCapacityMaintainerTest { @Test public void does_not_deprovision_when_preprovisioning_enabled() { tester = new DynamicProvisioningTester().addInitialNodes(); - setPreprovisionCapacityFlag(tester, new ClusterCapacity(1, 1.0, 3.0, 2.0, 1.0, "fast", "remote", "x86_64")); + setPreprovisionCapacityFlag(tester, new ClusterCapacity(1, 1.0, 3.0, 2.0, 1.0, "fast", "remote", "x86_64", null)); Optional<Node> failedHost = node("host2"); assertTrue(failedHost.isPresent()); @@ -109,8 +110,8 @@ public class HostCapacityMaintainerTest { public void provision_deficit_and_deprovision_excess() { tester = new DynamicProvisioningTester().addInitialNodes(); setPreprovisionCapacityFlag(tester, - new ClusterCapacity(2, 48.0, 128.0, 1000.0, 10.0, "fast", "remote", "x86_64"), - new ClusterCapacity(1, 16.0, 24.0, 100.0, 1.0, "fast", "remote", "x86_64")); + new ClusterCapacity(2, 48.0, 128.0, 1000.0, 10.0, "fast", "remote", "x86_64", null), + new ClusterCapacity(1, 16.0, 24.0, 100.0, 1.0, "fast", "remote", "x86_64", null)); assertEquals(0, tester.hostProvisioner.provisionedHosts().size()); assertEquals(9, tester.nodeRepository.nodes().list().size()); @@ -146,7 +147,7 @@ public class HostCapacityMaintainerTest { tester = new DynamicProvisioningTester().addInitialNodes(); // Makes provisioned hosts 48-128-1000-10 tester.hostProvisioner.setHostFlavor("host4"); - var clusterCapacity = new ClusterCapacity(2, 1.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64"); + var clusterCapacity = new ClusterCapacity(2, 1.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64", null); setPreprovisionCapacityFlag(tester, clusterCapacity); assertEquals(0, tester.hostProvisioner.provisionedHosts().size()); @@ -179,7 +180,7 @@ public class HostCapacityMaintainerTest { setPreprovisionCapacityFlag(tester, clusterCapacity, - new ClusterCapacity(2, 24.0, 64.0, 100.0, 1.0, "fast", "remote", "x86_64")); + new ClusterCapacity(2, 24.0, 64.0, 100.0, 1.0, "fast", "remote", "x86_64", null)); tester.maintain(); @@ -193,7 +194,7 @@ public class HostCapacityMaintainerTest { // If the preprovision capacity is reduced, we should see shared hosts deprovisioned. setPreprovisionCapacityFlag(tester, - new ClusterCapacity(1, 1.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64")); + new ClusterCapacity(1, 1.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64", null)); tester.maintain(); @@ -211,8 +212,8 @@ public class HostCapacityMaintainerTest { // If a host with another architecture is added to preprovision capacity, a shared host should be added. setPreprovisionCapacityFlag(tester, - new ClusterCapacity(1, 2.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64"), - new ClusterCapacity(1, 2.0, 30.0, 20.0, 3.0, "fast", "remote", "arm64")); + new ClusterCapacity(1, 2.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64", null), + new ClusterCapacity(1, 2.0, 30.0, 20.0, 3.0, "fast", "remote", "arm64", null)); tester.hostProvisioner.setHostFlavor("arm64"); tester.maintain(); @@ -221,6 +222,36 @@ public class HostCapacityMaintainerTest { assertEquals(1, tester.provisionedHostsMatching(new NodeResources(2, 30, 20, 3, fast, remote, arm64))); } + @Test + public void preprovision_with_shared_host_no_resources_specified() { + tester = new DynamicProvisioningTester(); // No nodes initially + // Makes provisioned hosts 2-30-20-3-arm64 + tester.hostProvisioner.setHostFlavor("arm64"); + var clusterCapacity = new ClusterCapacity(1, 0.0, 0.0, 0.0, 0.0, null, null, "arm64", null); + setPreprovisionCapacityFlag(tester, clusterCapacity); + + assertEquals(0, tester.hostProvisioner.provisionedHosts().size()); + assertEquals(0, tester.nodeRepository.nodes().list().size()); + + // The first cluster will be allocated to host3 and a new host host100. + // host100 will be a large shared host specified above. + tester.maintain(); + verifyFirstMaintainArm64(tester); + + // Second maintain should be a no-op, otherwise we did wrong in the first maintain. + tester.maintain(); + verifyFirstMaintainArm64(tester); + + // Add a second cluster for cluster type admin. Need new hosts + setPreprovisionCapacityFlag(tester, clusterCapacity, new ClusterCapacity(2, 0.0, 0.0, 0.0, 0.0, null, null, "arm64", "admin")); + + tester.maintain(); + System.out.println(tester.hostProvisioner.provisionedHosts()); + assertEquals("2 provisioned hosts", + 2, tester.hostProvisioner.provisionedHosts().size()); + assertEquals(2, tester.provisionedHostsMatching(new NodeResources(2, 30, 20, 30, DiskSpeed.any, remote, arm64))); + } + private void verifyFirstMaintain(DynamicProvisioningTester tester) { assertEquals(tester.hostProvisioner.provisionedHosts().toString(), 1, tester.hostProvisioner.provisionedHosts().size()); assertEquals(1, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10))); @@ -231,6 +262,13 @@ public class HostCapacityMaintainerTest { assertTrue("New 48-128-1000-10 host added", node("host100").isPresent()); } + private void verifyFirstMaintainArm64(DynamicProvisioningTester tester) { + assertEquals(tester.hostProvisioner.provisionedHosts().toString(), 1, tester.hostProvisioner.provisionedHosts().size()); + assertEquals(1, tester.provisionedHostsMatching(new NodeResources(2, 30, 20, 30))); + assertEquals(1, tester.nodeRepository.nodes().list().not().state(State.deprovisioned).size()); // 2 removed, 1 added + assertTrue("New 2-30-20-30 host added", node("host100").isPresent()); + } + @Test public void does_not_remove_if_host_provisioner_failed() { tester = new DynamicProvisioningTester(); @@ -248,7 +286,8 @@ public class HostCapacityMaintainerTest { setPreprovisionCapacityFlag(tester, new ClusterCapacity(2, resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), resources1.bandwidthGbps(), resources1.diskSpeed().name(), - resources1.storageType().name(), resources1.architecture().name())); + resources1.storageType().name(), resources1.architecture().name(), + null)); tester.maintain(); // Hosts are provisioned @@ -266,7 +305,7 @@ public class HostCapacityMaintainerTest { tester.assertNodesUnchanged(); // Must be able to allocate 2 nodes with "no resource requirement" - setPreprovisionCapacityFlag(tester, new ClusterCapacity(2, 0.0, 0.0, 0.0, 0.0, null, null, null)); + setPreprovisionCapacityFlag(tester, new ClusterCapacity(2, 0.0, 0.0, 0.0, 0.0, null, null, null, null)); // Next maintenance run does nothing tester.assertNodesUnchanged(); @@ -289,7 +328,7 @@ public class HostCapacityMaintainerTest { tester.assertNodesUnchanged(); // Increasing the capacity provisions additional hosts - setPreprovisionCapacityFlag(tester, new ClusterCapacity(3, 0.0, 0.0, 0.0, 0.0, null, null, null)); + setPreprovisionCapacityFlag(tester, new ClusterCapacity(3, 0.0, 0.0, 0.0, 0.0, null, null, null, null)); assertEquals(0, tester.provisionedHostsMatching(sharedHostNodeResources)); assertTrue(node("host102").isEmpty()); tester.maintain(); @@ -308,7 +347,8 @@ public class HostCapacityMaintainerTest { resources1.bandwidthGbps() - applicationNodeResources.bandwidthGbps(), resources1.diskSpeed().name(), resources1.storageType().name(), - resources1.architecture().name())); + resources1.architecture().name(), + null)); tester.assertNodesUnchanged(); // But requiring a bit more in the cluster => provisioning of 2 shared hosts. @@ -320,7 +360,8 @@ public class HostCapacityMaintainerTest { resources1.bandwidthGbps(), resources1.diskSpeed().name(), resources1.storageType().name(), - resources1.architecture().name())); + resources1.architecture().name(), + null)); assertEquals(1, tester.provisionedHostsMatching(sharedHostNodeResources)); assertTrue(node("host102").isPresent()); |