diff options
4 files changed, 108 insertions, 27 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java b/flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java index dcef85f9a0d..06c015e5206 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/custom/ClusterCapacity.java @@ -12,6 +12,7 @@ import java.util.OptionalDouble; import static com.yahoo.vespa.flags.custom.Validation.requireNonNegative; import static com.yahoo.vespa.flags.custom.Validation.validArchitectures; +import static com.yahoo.vespa.flags.custom.Validation.validClusterTypes; import static com.yahoo.vespa.flags.custom.Validation.validDiskSpeeds; import static com.yahoo.vespa.flags.custom.Validation.validStorageTypes; import static com.yahoo.vespa.flags.custom.Validation.validateEnum; @@ -31,6 +32,7 @@ public class ClusterCapacity { private final String diskSpeed; private final String storageType; private final String architecture; + private final String clusterType; @JsonCreator public ClusterCapacity(@JsonProperty("count") int count, @@ -40,7 +42,8 @@ public class ClusterCapacity { @JsonProperty("bandwidthGbps") Double bandwidthGbps, @JsonProperty("diskSpeed") String diskSpeed, @JsonProperty("storageType") String storageType, - @JsonProperty("architecture") String architecture) { + @JsonProperty("architecture") String architecture, + @JsonProperty("clusterType") String clusterType) { this.count = (int) requireNonNegative("count", count); this.vcpu = vcpu == null ? OptionalDouble.empty() : OptionalDouble.of(requireNonNegative("vcpu", vcpu)); this.memoryGb = memoryGb == null ? OptionalDouble.empty() : OptionalDouble.of(requireNonNegative("memoryGb", memoryGb)); @@ -49,12 +52,13 @@ public class ClusterCapacity { this.diskSpeed = validateEnum("diskSpeed", validDiskSpeeds, diskSpeed == null ? "fast" : diskSpeed); this.storageType = validateEnum("storageType", validStorageTypes, storageType == null ? "any" : storageType); this.architecture = validateEnum("architecture", validArchitectures, architecture == null ? "x86_64" : architecture); + this.clusterType = clusterType == null ? null : validateEnum("clusterType", validClusterTypes, clusterType); } /** Returns a new ClusterCapacity equal to {@code this}, but with the given count. */ public ClusterCapacity withCount(int count) { return new ClusterCapacity(count, vcpuOrNull(), memoryGbOrNull(), diskGbOrNull(), bandwidthGbpsOrNull(), - diskSpeed, storageType, architecture); + diskSpeed, storageType, architecture, clusterType); } @JsonGetter("count") public int count() { return count; } @@ -73,6 +77,7 @@ public class ClusterCapacity { @JsonGetter("diskSpeed") public String diskSpeed() { return diskSpeed; } @JsonGetter("storageType") public String storageType() { return storageType; } @JsonGetter("architecture") public String architecture() { return architecture; } + @JsonGetter("clusterType") public String clusterType() { return clusterType; } @JsonIgnore public Double vcpu() { return vcpu.orElse(0.0); } @JsonIgnore public Double memoryGb() { return memoryGb.orElse(0.0); } @@ -90,6 +95,7 @@ public class ClusterCapacity { ", diskSpeed=" + diskSpeed + ", storageType=" + storageType + ", architecture=" + architecture + + ", clusterType=" + clusterType + '}'; } @@ -105,12 +111,13 @@ public class ClusterCapacity { bandwidthGbps.equals(that.bandwidthGbps) && diskSpeed.equals(that.diskSpeed) && storageType.equals(that.storageType) && - architecture.equals(that.architecture); + architecture.equals(that.architecture) && + clusterType.equals(that.clusterType); } @Override public int hashCode() { - return Objects.hash(count, vcpu, memoryGb, diskGb, bandwidthGbps, diskSpeed, storageType, architecture); + return Objects.hash(count, vcpu, memoryGb, diskGb, bandwidthGbps, diskSpeed, storageType, architecture, clusterType); } } diff --git a/flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java b/flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java index 23ab3a48ffa..6322ad1a2e1 100644 --- a/flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java +++ b/flags/src/test/java/com/yahoo/vespa/flags/custom/ClusterCapacityTest.java @@ -12,10 +12,12 @@ public class ClusterCapacityTest { @Test void serialization() throws IOException { - ClusterCapacity clusterCapacity = new ClusterCapacity(7, 1.2, 3.4, 5.6, null, "fast", "local", "x86_64"); + ClusterCapacity clusterCapacity = new ClusterCapacity(7, 1.2, 3.4, 5.6, null, "fast", "local", "x86_64", null); ObjectMapper mapper = new ObjectMapper(); String json = mapper.writeValueAsString(clusterCapacity); - assertEquals("{\"count\":7,\"vcpu\":1.2,\"memoryGb\":3.4,\"diskGb\":5.6,\"diskSpeed\":\"fast\",\"storageType\":\"local\",\"architecture\":\"x86_64\"}", json); + assertEquals(""" + {"count":7,"vcpu":1.2,"memoryGb":3.4,"diskGb":5.6,"diskSpeed":"fast","storageType":"local","architecture":"x86_64"}""", + json); ClusterCapacity deserialized = mapper.readValue(json, ClusterCapacity.class); assertEquals(7, deserialized.count()); @@ -30,10 +32,12 @@ public class ClusterCapacityTest { @Test void serialization2() throws IOException { - ClusterCapacity clusterCapacity = new ClusterCapacity(7, 1.2, 3.4, 5.6, 2.3, "any", "remote", "arm64"); + ClusterCapacity clusterCapacity = new ClusterCapacity(7, 1.2, 3.4, 5.6, 2.3, "any", "remote", "arm64", null); ObjectMapper mapper = new ObjectMapper(); String json = mapper.writeValueAsString(clusterCapacity); - assertEquals("{\"count\":7,\"vcpu\":1.2,\"memoryGb\":3.4,\"diskGb\":5.6,\"bandwidthGbps\":2.3,\"diskSpeed\":\"any\",\"storageType\":\"remote\",\"architecture\":\"arm64\"}", json); + assertEquals(""" + {"count":7,"vcpu":1.2,"memoryGb":3.4,"diskGb":5.6,"bandwidthGbps":2.3,"diskSpeed":"any","storageType":"remote","architecture":"arm64"}""", + json); ClusterCapacity deserialized = mapper.readValue(json, ClusterCapacity.class); assertEquals(7, deserialized.count()); @@ -47,8 +51,29 @@ public class ClusterCapacityTest { } @Test + void serialization3() throws IOException { + ClusterCapacity clusterCapacity = new ClusterCapacity(7, 1.2, 3.4, 5.6, 2.3, "any", "remote", "arm64", "admin"); + ObjectMapper mapper = new ObjectMapper(); + String json = mapper.writeValueAsString(clusterCapacity); + assertEquals(""" + {"count":7,"vcpu":1.2,"memoryGb":3.4,"diskGb":5.6,"bandwidthGbps":2.3,"diskSpeed":"any","storageType":"remote","architecture":"arm64","clusterType":"admin"}""", + json); + + ClusterCapacity deserialized = mapper.readValue(json, ClusterCapacity.class); + assertEquals(7, deserialized.count()); + assertEquals(1.2, deserialized.vcpu(), 0.0001); + assertEquals(3.4, deserialized.memoryGb(), 0.0001); + assertEquals(5.6, deserialized.diskGb(), 0.0001); + assertEquals(2.3, deserialized.bandwidthGbps(), 0.0001); + assertEquals("any", deserialized.diskSpeed()); + assertEquals("remote", deserialized.storageType()); + assertEquals("arm64", deserialized.architecture()); + assertEquals("admin", deserialized.clusterType()); + } + + @Test void serializationWithNoNodeResources() throws IOException { - ClusterCapacity clusterCapacity = new ClusterCapacity(7, null, null, null, null, null, null, null); + ClusterCapacity clusterCapacity = new ClusterCapacity(7, null, null, null, null, null, null, null, null); ObjectMapper mapper = new ObjectMapper(); String json = mapper.writeValueAsString(clusterCapacity); assertEquals("{\"count\":7,\"diskSpeed\":\"fast\",\"storageType\":\"any\",\"architecture\":\"x86_64\"}", json); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java index d70ee825860..2a0b4f02b20 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java @@ -199,17 +199,22 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { throw new IllegalStateException("Have provisioned " + numProvisions + " times but there's still deficit: aborting"); } - nodesPlusProvisioned.addAll(provisionHosts(deficit.get().count(), toNodeResources(deficit.get()))); + ClusterCapacity clusterCapacityDeficit = deficit.get(); + var clusterType = Optional.ofNullable(clusterCapacityDeficit.clusterType()); + nodesPlusProvisioned.addAll(provisionHosts(clusterCapacityDeficit.count(), + toNodeResources(clusterCapacityDeficit), + clusterType.map(ClusterSpec.Type::from))); } } - private List<Node> provisionHosts(int count, NodeResources nodeResources) { + private List<Node> provisionHosts(int count, NodeResources nodeResources, Optional<ClusterSpec.Type> clusterType) { try { Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); List<Integer> provisionIndices = nodeRepository().database().readProvisionIndices(count); List<Node> hosts = new ArrayList<>(); - HostProvisionRequest request = new HostProvisionRequest(provisionIndices, NodeType.host, nodeResources, ApplicationId.defaultId(), osVersion, - HostSharing.shared, Optional.empty(), Optional.empty(), + HostProvisionRequest request = new HostProvisionRequest(provisionIndices, NodeType.host, nodeResources, + ApplicationId.defaultId(), osVersion, + HostSharing.shared, clusterType, Optional.empty(), nodeRepository().zone().cloud().account(), false); hostProvisioner.provisionHosts(request, provisionedHosts -> { @@ -256,7 +261,10 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { // We'll allocate each ClusterCapacity as a unique cluster in a dummy application ApplicationId applicationId = ApplicationId.defaultId(); ClusterSpec.Id clusterId = ClusterSpec.Id.from(String.valueOf(clusterIndex)); - ClusterSpec clusterSpec = ClusterSpec.request(ClusterSpec.Type.content, clusterId) + ClusterSpec.Type type = clusterCapacity.clusterType() != null + ? ClusterSpec.Type.from(clusterCapacity.clusterType()) + : ClusterSpec.Type.content; + ClusterSpec clusterSpec = ClusterSpec.request(type, clusterId) // build() requires a version, even though it is not (should not be) used .vespaVersion(Vtag.currentVersion) .build(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java index 0478e5d1486..452e4c135c0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java @@ -66,6 +66,7 @@ import java.util.function.Supplier; import java.util.stream.Stream; import static com.yahoo.config.provision.NodeResources.Architecture.arm64; +import static com.yahoo.config.provision.NodeResources.DiskSpeed; import static com.yahoo.config.provision.NodeResources.DiskSpeed.fast; import static com.yahoo.config.provision.NodeResources.StorageType.remote; import static com.yahoo.vespa.hosted.provision.testutils.MockHostProvisioner.Behaviour; @@ -96,7 +97,7 @@ public class HostCapacityMaintainerTest { @Test public void does_not_deprovision_when_preprovisioning_enabled() { tester = new DynamicProvisioningTester().addInitialNodes(); - setPreprovisionCapacityFlag(tester, new ClusterCapacity(1, 1.0, 3.0, 2.0, 1.0, "fast", "remote", "x86_64")); + setPreprovisionCapacityFlag(tester, new ClusterCapacity(1, 1.0, 3.0, 2.0, 1.0, "fast", "remote", "x86_64", null)); Optional<Node> failedHost = node("host2"); assertTrue(failedHost.isPresent()); @@ -109,8 +110,8 @@ public class HostCapacityMaintainerTest { public void provision_deficit_and_deprovision_excess() { tester = new DynamicProvisioningTester().addInitialNodes(); setPreprovisionCapacityFlag(tester, - new ClusterCapacity(2, 48.0, 128.0, 1000.0, 10.0, "fast", "remote", "x86_64"), - new ClusterCapacity(1, 16.0, 24.0, 100.0, 1.0, "fast", "remote", "x86_64")); + new ClusterCapacity(2, 48.0, 128.0, 1000.0, 10.0, "fast", "remote", "x86_64", null), + new ClusterCapacity(1, 16.0, 24.0, 100.0, 1.0, "fast", "remote", "x86_64", null)); assertEquals(0, tester.hostProvisioner.provisionedHosts().size()); assertEquals(9, tester.nodeRepository.nodes().list().size()); @@ -146,7 +147,7 @@ public class HostCapacityMaintainerTest { tester = new DynamicProvisioningTester().addInitialNodes(); // Makes provisioned hosts 48-128-1000-10 tester.hostProvisioner.setHostFlavor("host4"); - var clusterCapacity = new ClusterCapacity(2, 1.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64"); + var clusterCapacity = new ClusterCapacity(2, 1.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64", null); setPreprovisionCapacityFlag(tester, clusterCapacity); assertEquals(0, tester.hostProvisioner.provisionedHosts().size()); @@ -179,7 +180,7 @@ public class HostCapacityMaintainerTest { setPreprovisionCapacityFlag(tester, clusterCapacity, - new ClusterCapacity(2, 24.0, 64.0, 100.0, 1.0, "fast", "remote", "x86_64")); + new ClusterCapacity(2, 24.0, 64.0, 100.0, 1.0, "fast", "remote", "x86_64", null)); tester.maintain(); @@ -193,7 +194,7 @@ public class HostCapacityMaintainerTest { // If the preprovision capacity is reduced, we should see shared hosts deprovisioned. setPreprovisionCapacityFlag(tester, - new ClusterCapacity(1, 1.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64")); + new ClusterCapacity(1, 1.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64", null)); tester.maintain(); @@ -211,8 +212,8 @@ public class HostCapacityMaintainerTest { // If a host with another architecture is added to preprovision capacity, a shared host should be added. setPreprovisionCapacityFlag(tester, - new ClusterCapacity(1, 2.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64"), - new ClusterCapacity(1, 2.0, 30.0, 20.0, 3.0, "fast", "remote", "arm64")); + new ClusterCapacity(1, 2.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64", null), + new ClusterCapacity(1, 2.0, 30.0, 20.0, 3.0, "fast", "remote", "arm64", null)); tester.hostProvisioner.setHostFlavor("arm64"); tester.maintain(); @@ -221,6 +222,36 @@ public class HostCapacityMaintainerTest { assertEquals(1, tester.provisionedHostsMatching(new NodeResources(2, 30, 20, 3, fast, remote, arm64))); } + @Test + public void preprovision_with_shared_host_no_resources_specified() { + tester = new DynamicProvisioningTester(); // No nodes initially + // Makes provisioned hosts 2-30-20-3-arm64 + tester.hostProvisioner.setHostFlavor("arm64"); + var clusterCapacity = new ClusterCapacity(1, 0.0, 0.0, 0.0, 0.0, null, null, "arm64", null); + setPreprovisionCapacityFlag(tester, clusterCapacity); + + assertEquals(0, tester.hostProvisioner.provisionedHosts().size()); + assertEquals(0, tester.nodeRepository.nodes().list().size()); + + // The first cluster will be allocated to host3 and a new host host100. + // host100 will be a large shared host specified above. + tester.maintain(); + verifyFirstMaintainArm64(tester); + + // Second maintain should be a no-op, otherwise we did wrong in the first maintain. + tester.maintain(); + verifyFirstMaintainArm64(tester); + + // Add a second cluster for cluster type admin. Need new hosts + setPreprovisionCapacityFlag(tester, clusterCapacity, new ClusterCapacity(2, 0.0, 0.0, 0.0, 0.0, null, null, "arm64", "admin")); + + tester.maintain(); + System.out.println(tester.hostProvisioner.provisionedHosts()); + assertEquals("2 provisioned hosts", + 2, tester.hostProvisioner.provisionedHosts().size()); + assertEquals(2, tester.provisionedHostsMatching(new NodeResources(2, 30, 20, 30, DiskSpeed.any, remote, arm64))); + } + private void verifyFirstMaintain(DynamicProvisioningTester tester) { assertEquals(tester.hostProvisioner.provisionedHosts().toString(), 1, tester.hostProvisioner.provisionedHosts().size()); assertEquals(1, tester.provisionedHostsMatching(new NodeResources(48, 128, 1000, 10))); @@ -231,6 +262,13 @@ public class HostCapacityMaintainerTest { assertTrue("New 48-128-1000-10 host added", node("host100").isPresent()); } + private void verifyFirstMaintainArm64(DynamicProvisioningTester tester) { + assertEquals(tester.hostProvisioner.provisionedHosts().toString(), 1, tester.hostProvisioner.provisionedHosts().size()); + assertEquals(1, tester.provisionedHostsMatching(new NodeResources(2, 30, 20, 30))); + assertEquals(1, tester.nodeRepository.nodes().list().not().state(State.deprovisioned).size()); // 2 removed, 1 added + assertTrue("New 2-30-20-30 host added", node("host100").isPresent()); + } + @Test public void does_not_remove_if_host_provisioner_failed() { tester = new DynamicProvisioningTester(); @@ -248,7 +286,8 @@ public class HostCapacityMaintainerTest { setPreprovisionCapacityFlag(tester, new ClusterCapacity(2, resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), resources1.bandwidthGbps(), resources1.diskSpeed().name(), - resources1.storageType().name(), resources1.architecture().name())); + resources1.storageType().name(), resources1.architecture().name(), + null)); tester.maintain(); // Hosts are provisioned @@ -266,7 +305,7 @@ public class HostCapacityMaintainerTest { tester.assertNodesUnchanged(); // Must be able to allocate 2 nodes with "no resource requirement" - setPreprovisionCapacityFlag(tester, new ClusterCapacity(2, 0.0, 0.0, 0.0, 0.0, null, null, null)); + setPreprovisionCapacityFlag(tester, new ClusterCapacity(2, 0.0, 0.0, 0.0, 0.0, null, null, null, null)); // Next maintenance run does nothing tester.assertNodesUnchanged(); @@ -289,7 +328,7 @@ public class HostCapacityMaintainerTest { tester.assertNodesUnchanged(); // Increasing the capacity provisions additional hosts - setPreprovisionCapacityFlag(tester, new ClusterCapacity(3, 0.0, 0.0, 0.0, 0.0, null, null, null)); + setPreprovisionCapacityFlag(tester, new ClusterCapacity(3, 0.0, 0.0, 0.0, 0.0, null, null, null, null)); assertEquals(0, tester.provisionedHostsMatching(sharedHostNodeResources)); assertTrue(node("host102").isEmpty()); tester.maintain(); @@ -308,7 +347,8 @@ public class HostCapacityMaintainerTest { resources1.bandwidthGbps() - applicationNodeResources.bandwidthGbps(), resources1.diskSpeed().name(), resources1.storageType().name(), - resources1.architecture().name())); + resources1.architecture().name(), + null)); tester.assertNodesUnchanged(); // But requiring a bit more in the cluster => provisioning of 2 shared hosts. @@ -320,7 +360,8 @@ public class HostCapacityMaintainerTest { resources1.bandwidthGbps(), resources1.diskSpeed().name(), resources1.storageType().name(), - resources1.architecture().name())); + resources1.architecture().name(), + null)); assertEquals(1, tester.provisionedHostsMatching(sharedHostNodeResources)); assertTrue(node("host102").isPresent()); |