diff options
5 files changed, 47 insertions, 29 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index d44226f89eb..bd6d772c215 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -15,6 +15,8 @@ import java.util.function.Predicate; import static com.yahoo.vespa.flags.FetchVector.Dimension.APPLICATION_ID; import static com.yahoo.vespa.flags.FetchVector.Dimension.CLOUD_ACCOUNT; +import static com.yahoo.vespa.flags.FetchVector.Dimension.CLUSTER_ID; +import static com.yahoo.vespa.flags.FetchVector.Dimension.CLUSTER_TYPE; import static com.yahoo.vespa.flags.FetchVector.Dimension.CONSOLE_USER_EMAIL; import static com.yahoo.vespa.flags.FetchVector.Dimension.HOSTNAME; import static com.yahoo.vespa.flags.FetchVector.Dimension.NODE_TYPE; @@ -375,6 +377,13 @@ public class Flags { "Whether to write application data (active session id, last deployed session id etc. ) as json", "Takes effect immediately"); + public static final UnboundIntFlag MIN_EXCLUSIVE_ADVERTISED_MEMORY_GB = defineIntFlag( + "min-exclusive-advertised-memory-gb", 4, + List.of("freva"), "2023-09-08", "2023-11-01", + "Minimum amount of advertised memory for exclusive nodes", + "Takes effect immediately", + APPLICATION_ID, CLUSTER_ID, CLUSTER_TYPE); + public static final UnboundBooleanFlag ASSIGN_RANDOMIZED_ID = defineFeatureFlag( "assign-randomized-id", false, List.of("mortent"), "2023-08-31", "2024-02-01", diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java index 47c388f97a8..06ab9eb1a10 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeResourceLimits.java @@ -7,6 +7,9 @@ import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.Zone; +import com.yahoo.vespa.flags.FetchVector; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.IntFlag; import com.yahoo.vespa.hosted.provision.NodeRepository; import java.util.Locale; @@ -20,19 +23,22 @@ import java.util.Locale; public class NodeResourceLimits { private final NodeRepository nodeRepository; + private final IntFlag minExclusiveAdvertisedMemoryGbFlag; public NodeResourceLimits(NodeRepository nodeRepository) { this.nodeRepository = nodeRepository; + this.minExclusiveAdvertisedMemoryGbFlag = Flags.MIN_EXCLUSIVE_ADVERTISED_MEMORY_GB.bindTo(nodeRepository.flagSource()); } /** Validates the resources applications ask for (which are in "advertised" resource space) */ public void ensureWithinAdvertisedLimits(String type, NodeResources requested, ApplicationId applicationId, ClusterSpec cluster) { - if (! requested.vcpuIsUnspecified() && requested.vcpu() < minAdvertisedVcpu(applicationId, cluster)) - illegal(type, "vcpu", "", cluster, requested.vcpu(), minAdvertisedVcpu(applicationId, cluster)); - if (! requested.memoryGbIsUnspecified() && requested.memoryGb() < minAdvertisedMemoryGb(cluster)) - illegal(type, "memoryGb", "Gb", cluster, requested.memoryGb(), minAdvertisedMemoryGb(cluster)); - if (! requested.diskGbIsUnspecified() && requested.diskGb() < minAdvertisedDiskGb(requested, cluster.isExclusive())) - illegal(type, "diskGb", "Gb", cluster, requested.diskGb(), minAdvertisedDiskGb(requested, cluster.isExclusive())); + boolean exclusive = nodeRepository.exclusiveAllocation(cluster); + if (! requested.vcpuIsUnspecified() && requested.vcpu() < minAdvertisedVcpu(applicationId, cluster, exclusive)) + illegal(type, "vcpu", "", cluster, requested.vcpu(), minAdvertisedVcpu(applicationId, cluster, exclusive)); + if (! requested.memoryGbIsUnspecified() && requested.memoryGb() < minAdvertisedMemoryGb(applicationId, cluster, exclusive)) + illegal(type, "memoryGb", "Gb", cluster, requested.memoryGb(), minAdvertisedMemoryGb(applicationId, cluster, exclusive)); + if (! requested.diskGbIsUnspecified() && requested.diskGb() < minAdvertisedDiskGb(requested, exclusive)) + illegal(type, "diskGb", "Gb", cluster, requested.diskGb(), minAdvertisedDiskGb(requested, exclusive)); } // TODO: Remove this when we are ready to fail, not just warn on this. */ @@ -64,23 +70,28 @@ public class NodeResourceLimits { if (followRecommendations) // TODO: Do unconditionally when we enforce this limit requested = requested.withDiskGb(Math.max(minAdvertisedDiskGb(requested, cluster), requested.diskGb())); - return requested.withVcpu(Math.max(minAdvertisedVcpu(applicationId, cluster), requested.vcpu())) - .withMemoryGb(Math.max(minAdvertisedMemoryGb(cluster), requested.memoryGb())) + return requested.withVcpu(Math.max(minAdvertisedVcpu(applicationId, cluster, exclusive), requested.vcpu())) + .withMemoryGb(Math.max(minAdvertisedMemoryGb(applicationId, cluster, exclusive), requested.memoryGb())) .withDiskGb(Math.max(minAdvertisedDiskGb(requested, exclusive), requested.diskGb())); } - private double minAdvertisedVcpu(ApplicationId applicationId, ClusterSpec cluster) { + private double minAdvertisedVcpu(ApplicationId applicationId, ClusterSpec cluster, boolean exclusive) { if (cluster.type() == ClusterSpec.Type.admin) return 0.1; if (zone().environment().isProduction() && ! zone().system().isCd() && - nodeRepository.exclusiveAllocation(cluster) && ! applicationId.instance().isTester()) return 2; + exclusive && ! applicationId.instance().isTester()) return 2; if (zone().environment().isProduction() && cluster.type().isContent()) return 1.0; - if (zone().environment() == Environment.dev && ! nodeRepository.exclusiveAllocation(cluster)) return 0.1; + if (zone().environment() == Environment.dev && ! exclusive) return 0.1; return 0.5; } - private double minAdvertisedMemoryGb(ClusterSpec cluster) { + private double minAdvertisedMemoryGb(ApplicationId applicationId, ClusterSpec cluster, boolean exclusive) { if (cluster.type() == ClusterSpec.Type.admin) return 1; - return 4; + if (!exclusive) return 4; + return minExclusiveAdvertisedMemoryGbFlag + .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()) + .with(FetchVector.Dimension.CLUSTER_ID, cluster.id().value()) + .with(FetchVector.Dimension.CLUSTER_TYPE, cluster.type().name()) + .value(); } private double minAdvertisedDiskGb(NodeResources requested, boolean exclusive) { @@ -105,7 +116,7 @@ public class NodeResourceLimits { } private double minRealVcpu(ApplicationId applicationId, ClusterSpec cluster) { - return minAdvertisedVcpu(applicationId, cluster); + return minAdvertisedVcpu(applicationId, cluster, nodeRepository.exclusiveAllocation(cluster)); } private static double minRealMemoryGb(ClusterSpec cluster) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 4e19d04ffac..52d4c85bcaf 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -13,7 +13,6 @@ import com.yahoo.config.provision.NodeResources.DiskSpeed; import com.yahoo.config.provision.NodeResources.StorageType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.Zone; -import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies; import com.yahoo.vespa.hosted.provision.provisioning.DynamicProvisioningTester; import org.junit.Test; @@ -25,7 +24,6 @@ import java.util.Optional; import static com.yahoo.config.provision.NodeResources.DiskSpeed.fast; import static com.yahoo.config.provision.NodeResources.DiskSpeed.slow; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; /** @@ -899,7 +897,7 @@ public class AutoscalingTest { @Test public void test_changing_exclusivity() { - var min = new ClusterResources( 2, 1, new NodeResources( 3, 4, 100, 1)); + var min = new ClusterResources( 2, 1, new NodeResources( 3, 8, 100, 1)); var max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1)); var fixture = DynamicProvisioningTester.fixture() .awsProdSetup(true) diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java index 38b8836188b..54703b40781 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java @@ -321,8 +321,8 @@ public class DynamicProvisioningTest { @Test public void migrates_nodes_on_host_flavor_flag_change() { InMemoryFlagSource flagSource = new InMemoryFlagSource(); - List<Flavor> flavors = List.of(new Flavor("x86", new NodeResources(2, 4, 50, 0.1, fast, local, Architecture.x86_64)), - new Flavor("arm", new NodeResources(2, 4, 50, 0.1, fast, local, Architecture.arm64))); + List<Flavor> flavors = List.of(new Flavor("x86", new NodeResources(2, 8, 50, 0.1, fast, local, Architecture.x86_64)), + new Flavor("arm", new NodeResources(2, 8, 50, 0.1, fast, local, Architecture.arm64))); MockHostProvisioner hostProvisioner = new MockHostProvisioner(flavors); ProvisioningTester tester = new ProvisioningTester.Builder() .dynamicProvisioning(true, false) @@ -335,7 +335,7 @@ public class DynamicProvisioningTest { ApplicationId app = ProvisioningTester.applicationId("a1"); ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, new ClusterSpec.Id("cluster1")).vespaVersion("8").build(); - Capacity capacity = Capacity.from(new ClusterResources(4, 2, new NodeResources(2, 4, 50, 0.1, DiskSpeed.any, StorageType.any, Architecture.any))); + Capacity capacity = Capacity.from(new ClusterResources(4, 2, new NodeResources(2, 8, 50, 0.1, DiskSpeed.any, StorageType.any, Architecture.any))); hostProvisioner.setHostFlavor("x86", ClusterSpec.Type.content); tester.activate(app, cluster, capacity); @@ -391,10 +391,10 @@ public class DynamicProvisioningTest { } // Initial deployment - tester.activate(app1, cluster1, Capacity.from(resources(4, 2, 2, 5, 20), + tester.activate(app1, cluster1, Capacity.from(resources(4, 2, 2, 8, 20), resources(6, 3, 4, 20, 40))); tester.assertNodes("Initial allocation at first actual flavor above min (except for disk)", - 4, 2, 2, 20, 20, + 4, 2, 2, 20, 24, app1, cluster1); @@ -413,7 +413,7 @@ public class DynamicProvisioningTest { app1, cluster1); // Widening window does not change allocation - tester.activate(app1, cluster1, Capacity.from(resources(2, 1, 2, 5, 15), + tester.activate(app1, cluster1, Capacity.from(resources(2, 1, 2, 8, 15), resources(8, 4, 4, 20, 30))); tester.assertNodes("No change", 6, 2, 2, 20, 25, @@ -421,7 +421,7 @@ public class DynamicProvisioningTest { // Force 1 more groups: Reducing to 2 nodes per group to preserve node count is rejected // since it will reduce total group memory from 60 to 40. - tester.activate(app1, cluster1, Capacity.from(resources(6, 3, 2, 5, 10), + tester.activate(app1, cluster1, Capacity.from(resources(6, 3, 2, 8, 10), resources(9, 3, 5, 20, 15))); tester.assertNodes("Group size is preserved", 9, 3, 2, 20, 15, diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java index 6ec189d98c3..d64006a6e64 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java @@ -361,7 +361,7 @@ public class VirtualNodeProvisioningTest { @Test public void application_deployment_with_exclusive_app_first() { NodeResources hostResources = new NodeResources(10, 40, 1000, 10); - NodeResources nodeResources = new NodeResources(2, 4, 100, 1); + NodeResources nodeResources = new NodeResources(2, 8, 100, 1); ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build(); tester.makeReadyHosts(4, hostResources).activateTenantHosts(); ApplicationId application1 = ProvisioningTester.applicationId("app1"); @@ -380,7 +380,7 @@ public class VirtualNodeProvisioningTest { @Test public void application_deployment_with_exclusive_app_last() { NodeResources hostResources = new NodeResources(10, 40, 1000, 10); - NodeResources nodeResources = new NodeResources(2, 4, 100, 1); + NodeResources nodeResources = new NodeResources(2, 8, 100, 1); ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build(); tester.makeReadyHosts(4, hostResources).activateTenantHosts(); ApplicationId application1 = ProvisioningTester.applicationId("app1"); @@ -399,7 +399,7 @@ public class VirtualNodeProvisioningTest { @Test public void application_deployment_change_to_exclusive_and_back() { NodeResources hostResources = new NodeResources(10, 40, 1000, 10); - NodeResources nodeResources = new NodeResources(2, 4, 100, 1); + NodeResources nodeResources = new NodeResources(2, 8, 100, 1); ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build(); tester.makeReadyHosts(4, hostResources).activateTenantHosts(); @@ -426,7 +426,7 @@ public class VirtualNodeProvisioningTest { ApplicationId application2 = ApplicationId.from("tenant2", "app2", "default"); ApplicationId application3 = ApplicationId.from("tenant1", "app3", "default"); NodeResources hostResources = new NodeResources(10, 40, 1000, 10); - NodeResources nodeResources = new NodeResources(2, 4, 100, 1); + NodeResources nodeResources = new NodeResources(2, 8, 100, 1); ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build(); tester.makeReadyHosts(4, hostResources).activateTenantHosts(); @@ -441,7 +441,7 @@ public class VirtualNodeProvisioningTest { catch (Exception e) { assertEquals("No room for 3 nodes as 2 of 4 hosts are exclusive", "Could not satisfy request for 3 nodes with " + - "[vcpu: 2.0, memory: 4.0 Gb, disk: 100.0 Gb, bandwidth: 1.0 Gbps, architecture: any] " + + "[vcpu: 2.0, memory: 8.0 Gb, disk: 100.0 Gb, bandwidth: 1.0 Gbps, architecture: any] " + "in tenant2.app2 container cluster 'my-container' 6.39: " + "Not enough suitable nodes available due to host exclusivity constraints", Exceptions.toMessageString(e)); |