diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2023-01-25 17:38:51 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-25 17:38:51 +0100 |
commit | a0deac28d2261734f6c3be07c4a4041b0f8d52af (patch) | |
tree | e2949cbf5fb684e86ed8feea8e3fe203f63a6a31 /config-model/src/main/java | |
parent | bcf49ef8c4efe4e9ff47fc456937560371c7fce1 (diff) | |
parent | 7ae15f46b9a27d2689d47da92bed7079418c7fa6 (diff) |
Merge pull request #25730 from vespa-engine/bratseth/resource-reduction-rebased
Bratseth/resource reduction rebased
Diffstat (limited to 'config-model/src/main/java')
4 files changed, 93 insertions, 94 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java index 16de9d3405f..9e48510e704 100644 --- a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java +++ b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java @@ -38,7 +38,9 @@ import java.util.stream.IntStream; */ public class InMemoryProvisioner implements HostProvisioner { - public static final NodeResources defaultResources = new NodeResources(1, 3, 50, 1); + public static final NodeResources defaultHostResources = new NodeResources(1, 3, 50, 1); + + private final NodeResources defaultNodeResources; /** * If this is true an exception is thrown when all nodes are used. @@ -74,32 +76,37 @@ public class InMemoryProvisioner implements HostProvisioner { /** Creates this with a number of nodes with resources 1, 3, 9, 1 */ public InMemoryProvisioner(int nodeCount, boolean sharedHosts) { - this(nodeCount, defaultResources, sharedHosts); + this(nodeCount, defaultHostResources, sharedHosts); } /** Creates this with a number of nodes with given resources */ public InMemoryProvisioner(int nodeCount, NodeResources resources, boolean sharedHosts) { - this(Map.of(resources, createHostInstances(nodeCount)), true, false, false, sharedHosts, 0); + this(Map.of(resources, createHostInstances(nodeCount)), true, false, false, sharedHosts, NodeResources.unspecified(), 0); + } + + /** Creates this with a number of nodes with given resources */ + public InMemoryProvisioner(int nodeCount, NodeResources resources, boolean sharedHosts, NodeResources defaultResources) { + this(Map.of(resources, createHostInstances(nodeCount)), true, false, false, sharedHosts, defaultResources, 0); } /** Creates this with a set of host names of the flavor 'default' */ public InMemoryProvisioner(boolean failOnOutOfCapacity, boolean sharedHosts, String... hosts) { - this(Map.of(defaultResources, toHostInstances(hosts)), failOnOutOfCapacity, false, false, sharedHosts, 0); + this(Map.of(defaultHostResources, toHostInstances(hosts)), failOnOutOfCapacity, false, false, sharedHosts, defaultHostResources, 0); } /** Creates this with a set of host names of the flavor 'default' */ public InMemoryProvisioner(boolean failOnOutOfCapacity, boolean sharedHosts, List<String> hosts) { - this(Map.of(defaultResources, toHostInstances(hosts.toArray(new String[0]))), failOnOutOfCapacity, false, false, sharedHosts, 0); + this(Map.of(defaultHostResources, toHostInstances(hosts.toArray(new String[0]))), failOnOutOfCapacity, false, false, sharedHosts, defaultHostResources, 0); } /** Creates this with a set of hosts of the flavor 'default' */ public InMemoryProvisioner(Hosts hosts, boolean failOnOutOfCapacity, boolean sharedHosts, String ... retiredHostNames) { - this(Map.of(defaultResources, hosts.asCollection()), failOnOutOfCapacity, false, false, sharedHosts, 0, retiredHostNames); + this(Map.of(defaultHostResources, hosts.asCollection()), failOnOutOfCapacity, false, false, sharedHosts, defaultHostResources, 0, retiredHostNames); } /** Creates this with a set of hosts of the flavor 'default' */ public InMemoryProvisioner(Hosts hosts, boolean failOnOutOfCapacity, boolean sharedHosts, int startIndexForClusters, String ... retiredHostNames) { - this(Map.of(defaultResources, hosts.asCollection()), failOnOutOfCapacity, false, false, sharedHosts, startIndexForClusters, retiredHostNames); + this(Map.of(defaultHostResources, hosts.asCollection()), failOnOutOfCapacity, false, false, sharedHosts, defaultHostResources, startIndexForClusters, retiredHostNames); } public InMemoryProvisioner(Map<NodeResources, Collection<Host>> hosts, @@ -107,8 +114,10 @@ public class InMemoryProvisioner implements HostProvisioner { boolean useMaxResources, boolean alwaysReturnOneNode, boolean sharedHosts, + NodeResources defaultResources, int startIndexForClusters, String ... retiredHostNames) { + this.defaultNodeResources = defaultResources; this.failOnOutOfCapacity = failOnOutOfCapacity; this.useMaxResources = useMaxResources; this.alwaysReturnOneNode = alwaysReturnOneNode; @@ -139,9 +148,9 @@ public class InMemoryProvisioner implements HostProvisioner { @Override public HostSpec allocateHost(String alias) { - List<Host> defaultHosts = freeNodes.get(defaultResources); + List<Host> defaultHosts = freeNodes.get(defaultHostResources); if (defaultHosts.isEmpty()) throw new IllegalArgumentException("No more hosts with default resources available"); - Host newHost = freeNodes.removeValue(defaultResources, 0); + Host newHost = freeNodes.removeValue(defaultHostResources, 0); return new HostSpec(newHost.hostname(), List.of(alias), Optional.empty()); } @@ -170,7 +179,7 @@ public class InMemoryProvisioner implements HostProvisioner { int nodes = failOnOutOfCapacity || required ? requested.nodes() - : Math.min(requested.nodes(), freeNodes.get(defaultResources).size() + totalAllocatedTo(cluster)); + : Math.min(requested.nodes(), freeNodes.get(defaultHostResources).size() + totalAllocatedTo(cluster)); if (alwaysReturnOneNode) nodes = 1; @@ -220,8 +229,15 @@ public class InMemoryProvisioner implements HostProvisioner { host.dockerImageRepo()); } - private List<HostSpec> allocateHostGroup(ClusterSpec clusterGroup, NodeResources requestedResources, + // Minimal capacity policies + private NodeResources decideResources(NodeResources requestedResources) { + if (requestedResources.isUnspecified()) return defaultNodeResources; + return requestedResources; + } + + private List<HostSpec> allocateHostGroup(ClusterSpec clusterGroup, NodeResources requestedResourcesOrUnspecified, int nodesInGroup, int startIndex, boolean canFail) { + var requestedResources = decideResources(requestedResourcesOrUnspecified); List<HostSpec> allocation = allocations.getOrDefault(clusterGroup, new ArrayList<>()); allocations.put(clusterGroup, allocation); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java index 7b380cd5acf..c7a363010b7 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java @@ -13,7 +13,7 @@ import com.yahoo.vespa.model.VespaModel; import com.yahoo.vespa.model.application.validation.change.CertificateRemovalChangeValidator; import com.yahoo.vespa.model.application.validation.change.ChangeValidator; import com.yahoo.vespa.model.application.validation.change.CloudAccountChangeValidator; -import com.yahoo.vespa.model.application.validation.change.ClusterSizeReductionValidator; +import com.yahoo.vespa.model.application.validation.change.ResourcesReductionValidator; import com.yahoo.vespa.model.application.validation.change.ConfigValueChangeValidator; import com.yahoo.vespa.model.application.validation.change.ContainerRestartValidator; import com.yahoo.vespa.model.application.validation.change.ContentClusterRemovalValidator; @@ -23,7 +23,6 @@ import com.yahoo.vespa.model.application.validation.change.IndexedSearchClusterC import com.yahoo.vespa.model.application.validation.change.IndexingModeChangeValidator; import com.yahoo.vespa.model.application.validation.change.NodeResourceChangeValidator; import com.yahoo.vespa.model.application.validation.change.RedundancyIncreaseValidator; -import com.yahoo.vespa.model.application.validation.change.ResourcesReductionValidator; import com.yahoo.vespa.model.application.validation.change.StartupCommandChangeValidator; import com.yahoo.vespa.model.application.validation.change.StreamingSearchClusterChangeValidator; import com.yahoo.vespa.model.application.validation.first.RedundancyValidator; @@ -116,7 +115,7 @@ public class Validation { new StartupCommandChangeValidator(), new ContentTypeRemovalValidator(), new ContentClusterRemovalValidator(), - new ClusterSizeReductionValidator(), + new ResourcesReductionValidator(), new ResourcesReductionValidator(), new ContainerRestartValidator(), new NodeResourceChangeValidator(), diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ClusterSizeReductionValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ClusterSizeReductionValidator.java deleted file mode 100644 index 353be99cfa9..00000000000 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ClusterSizeReductionValidator.java +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.model.application.validation.change; - -import com.yahoo.config.model.api.ConfigChangeAction; -import com.yahoo.config.model.deploy.DeployState; -import com.yahoo.config.provision.Capacity; -import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.vespa.model.VespaModel; -import com.yahoo.config.application.api.ValidationId; - -import java.util.List; - -/** - * Checks that no cluster sizes are reduced too much in one go. - * - * @author bratseth - */ -public class ClusterSizeReductionValidator implements ChangeValidator { - - @Override - public List<ConfigChangeAction> validate(VespaModel current, VespaModel next, DeployState deployState) { - for (var clusterId : current.allClusters()) { - Capacity currentCapacity = current.provisioned().all().get(clusterId); - Capacity nextCapacity = next.provisioned().all().get(clusterId); - if (currentCapacity == null || nextCapacity == null) continue; - validate(currentCapacity, nextCapacity, clusterId, deployState); - } - return List.of(); - } - - private void validate(Capacity current, Capacity next, ClusterSpec.Id clusterId, DeployState deployState) { - int currentSize = current.minResources().nodes(); - int nextSize = next.minResources().nodes(); - // don't allow more than 50% reduction, but always allow to reduce size with 1 - if ( nextSize < currentSize * 0.5 && nextSize != currentSize - 1) - deployState.validationOverrides().invalid(ValidationId.clusterSizeReduction, - "Size reduction in '" + clusterId.value() + "' is too large: " + - "New min size must be at least 50% of the current min size. " + - "Current size: " + currentSize + ", new size: " + nextSize, - deployState.now()); - } - -} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java index b27266221d2..cbfbf2236d4 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java @@ -4,63 +4,90 @@ package com.yahoo.vespa.model.application.validation.change; import com.yahoo.config.application.api.ValidationId; import com.yahoo.config.model.api.ConfigChangeAction; import com.yahoo.config.model.deploy.DeployState; -import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.model.VespaModel; import java.util.List; -import java.util.Locale; -import java.util.Optional; -import java.util.stream.Stream; /** - * Checks that no nodes resources are reduced too much in one go. + * Checks that no cluster sizes are reduced too much in one go. * - * @author freva + * @author bratseth */ public class ResourcesReductionValidator implements ChangeValidator { @Override public List<ConfigChangeAction> validate(VespaModel current, VespaModel next, DeployState deployState) { for (var clusterId : current.allClusters()) { - Capacity currentCapacity = current.provisioned().all().get(clusterId); - Capacity nextCapacity = next.provisioned().all().get(clusterId); - if (currentCapacity == null || nextCapacity == null) continue; - validate(currentCapacity, nextCapacity, clusterId, deployState); + if (next.allClusters().contains(clusterId)) + validate(clusterId, current, next, deployState); } - return List.of(); } - private void validate(Capacity current, Capacity next, ClusterSpec.Id clusterId, DeployState deployState) { - if (current.minResources().nodeResources().isUnspecified()) return; - if (next.minResources().nodeResources().isUnspecified()) return; - - List<String> illegalChanges = Stream.of( - validateResource("vCPU", - current.minResources().nodeResources().vcpu(), - next.minResources().nodeResources().vcpu()), - validateResource("memory GB", - current.minResources().nodeResources().memoryGb(), - next.minResources().nodeResources().memoryGb()), - validateResource("disk GB", - current.minResources().nodeResources().diskGb(), - next.minResources().nodeResources().diskGb())) - .flatMap(Optional::stream) - .toList(); - if (illegalChanges.isEmpty()) return; + private void validate(ClusterSpec.Id clusterId, + VespaModel currentModel, + VespaModel nextModel, + DeployState deployState) { + ClusterResources current = clusterResources(clusterId, currentModel); + ClusterResources next = clusterResources(clusterId, nextModel); + if (current == null || next == null) return; // No request recording - test + if (current.nodeResources().isUnspecified() || next.nodeResources().isUnspecified()) { + // Self-hosted - unspecified resources; compare node count + int currentNodes = current.nodes(); + int nextNodes = next.nodes(); + if (nextNodes < 0.5 * currentNodes && nextNodes != currentNodes - 1) { + deployState.validationOverrides().invalid(ValidationId.resourcesReduction, + "Size reduction in '" + clusterId.value() + "' is too large: " + + "To guard against mistakes, the new max nodes must be at least 50% of the current nodes. " + + "Current nodes: " + currentNodes + ", new nodes: " + nextNodes, + deployState.now()); + } + } + else { + NodeResources currentResources = current.totalResources(); + NodeResources nextResources = next.totalResources(); + if (nextResources.vcpu() < 0.5 * currentResources.vcpu() || + nextResources.memoryGb() < 0.5 * currentResources.memoryGb() || + nextResources.diskGb() < 0.5 * currentResources.diskGb()) + deployState.validationOverrides().invalid(ValidationId.resourcesReduction, + "Resource reduction in '" + clusterId.value() + "' is too large: " + + "To guard against mistakes, the new max resources must be at least 50% of the current " + + "max resources in all dimensions. " + + "Current: " + currentResources.withBandwidthGbps(0) + // (don't output bandwidth here) + ", new: " + nextResources.withBandwidthGbps(0), + deployState.now()); + } - deployState.validationOverrides().invalid(ValidationId.resourcesReduction, - "Resource reduction in '" + clusterId.value() + "' is too large. " + - String.join(" ", illegalChanges) + - " New min resources must be at least 50% of the current min resources", - deployState.now()); } - private static Optional<String> validateResource(String resourceName, double currentValue, double nextValue) { - // don't allow more than 50% reduction, but always allow to reduce by 1 - if (nextValue >= currentValue * 0.5 || nextValue >= currentValue - 1) return Optional.empty(); - return Optional.of(String.format(Locale.ENGLISH ,"Current %s: %.2f, new: %.2f.", resourceName, currentValue, nextValue)); + /** + * If the given requested cluster resources does not specify node resources, return them with + * the current node resources of the cluster, as that is what unspecified resources actually resolved to. + * This will always yield specified node resources on hosted instances and never on self-hosted instances. + */ + private ClusterResources clusterResources(ClusterSpec.Id id, VespaModel model) { + if ( ! model.provisioned().all().containsKey(id)) return null; + + ClusterResources resources = model.provisioned().all().get(id).maxResources(); + if ( ! resources.nodeResources().isUnspecified()) return resources; + + var containerCluster = model.getContainerClusters().get(id.value()); + if (containerCluster != null) { + if ( ! containerCluster.getContainers().isEmpty()) + return resources.with(containerCluster.getContainers().get(0).getHostResource().advertisedResources()); + } + + var contentCluster = model.getContentClusters().get(id.value()); + if (contentCluster != null) { + var searchCluster = contentCluster.getSearch(); + if ( ! searchCluster.getSearchNodes().isEmpty()) + return resources.with(searchCluster.getSearchNodes().get(0).getHostResource().advertisedResources()); + } + + return resources; // only expected for admin clusters } } |