diff options
Diffstat (limited to 'config-model/src/main/java/com/yahoo/vespa/model/application')
7 files changed, 98 insertions, 29 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java index 40c9a03b126..02a6b243054 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java @@ -132,7 +132,7 @@ public class ConstantTensorJsonValidator { private void consumeTopObject() throws IOException { for (var cur = parser.nextToken(); cur != JsonToken.END_OBJECT; cur = parser.nextToken()) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); switch (fieldName) { case FIELD_TYPE -> consumeTypeField(); case FIELD_VALUES -> consumeValuesField(); @@ -189,7 +189,7 @@ public class ConstantTensorJsonValidator { } for (var cur = parser.nextToken(); cur != JsonToken.END_OBJECT; cur = parser.nextToken()) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - validateNumeric(parser.getCurrentName(), parser.nextToken()); + validateNumeric(parser.currentName(), parser.nextToken()); } } @@ -199,7 +199,7 @@ public class ConstantTensorJsonValidator { boolean seenValue = false; for (int i = 0; i < 2; i++) { assertNextTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); switch (fieldName) { case FIELD_ADDRESS -> { validateTensorAddress(new HashSet<>(tensorDimensions.keySet())); @@ -228,13 +228,13 @@ public class ConstantTensorJsonValidator { // Iterate within the address key, value pairs while ((parser.nextToken() != JsonToken.END_OBJECT)) { assertCurrentTokenIs(JsonToken.FIELD_NAME); - String dimensionName = parser.getCurrentName(); + String dimensionName = parser.currentName(); TensorType.Dimension dimension = tensorDimensions.get(dimensionName); if (dimension == null) { - throw new InvalidConstantTensorException(parser, String.format("Tensor dimension '%s' does not exist", parser.getCurrentName())); + throw new InvalidConstantTensorException(parser, String.format("Tensor dimension '%s' does not exist", dimensionName)); } if (!cellDimensions.contains(dimensionName)) { - throw new InvalidConstantTensorException(parser, String.format("Duplicate tensor dimension '%s'", parser.getCurrentName())); + throw new InvalidConstantTensorException(parser, String.format("Duplicate tensor dimension '%s'", dimensionName)); } cellDimensions.remove(dimensionName); validateLabel(dimension); @@ -300,7 +300,7 @@ public class ConstantTensorJsonValidator { } private void assertCurrentTokenIs(JsonToken wantedToken) { - assertTokenIs(parser.getCurrentToken(), wantedToken); + assertTokenIs(parser.currentToken(), wantedToken); } private void assertNextTokenIs(JsonToken wantedToken) throws IOException { @@ -316,11 +316,11 @@ public class ConstantTensorJsonValidator { static class InvalidConstantTensorException extends IllegalArgumentException { InvalidConstantTensorException(JsonParser parser, String message) { - super(message + " " + parser.getCurrentLocation().toString()); + super(message + " " + parser.currentLocation().toString()); } InvalidConstantTensorException(JsonParser parser, Exception base) { - super("Failed to parse JSON stream " + parser.getCurrentLocation().toString(), base); + super("Failed to parse JSON stream " + parser.currentLocation().toString(), base); } InvalidConstantTensorException(IOException base) { @@ -412,7 +412,7 @@ public class ConstantTensorJsonValidator { boolean seenValues = false; for (int i = 0; i < 2; i++) { assertNextTokenIs(JsonToken.FIELD_NAME); - String fieldName = parser.getCurrentName(); + String fieldName = parser.currentName(); switch (fieldName) { case FIELD_ADDRESS -> { validateTensorAddress(new HashSet<>(mappedDims)); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java index 9cf5fe84c21..4900b56801c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java @@ -23,21 +23,22 @@ public class JvmHeapSizeValidator implements Validator { context.model().getContainerClusters().forEach((clusterId, appCluster) -> { var mp = appCluster.getMemoryPercentage().orElse(null); if (mp == null) return; - if (mp.availableMemoryGb().isEmpty()) { + if (mp.asAbsoluteGb().isEmpty()) { context.deployState().getDeployLogger().log(Level.FINE, "Host resources unknown or percentage overridden with 'allocated-memory'"); return; } long jvmModelCost = appCluster.onnxModelCostCalculator().aggregatedModelCostInBytes(); if (jvmModelCost > 0) { - double availableMemoryGb = mp.availableMemoryGb().getAsDouble(); + double availableMemoryGb = mp.asAbsoluteGb().getAsDouble(); + int percentageOfTotal = mp.ofContainerTotal().getAsInt(); double modelCostGb = jvmModelCost / (1024D * 1024 * 1024); context.deployState().getDeployLogger().log(Level.FINE, () -> Text.format("JVM: %d%% (limit: %d%%), %.2fGB (limit: %.2fGB), ONNX: %.2fGB", - mp.percentage(), percentLimit, availableMemoryGb, gbLimit, modelCostGb)); - if (mp.percentage() < percentLimit) { + percentageOfTotal, percentLimit, availableMemoryGb, gbLimit, modelCostGb)); + if (percentageOfTotal < percentLimit) { context.illegal(Text.format("Allocated percentage of memory of JVM in cluster '%s' is too low (%d%% < %d%%). " + "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " + "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).", - clusterId, mp.percentage(), percentLimit, modelCostGb)); + clusterId, percentageOfTotal, percentLimit, modelCostGb)); } if (availableMemoryGb < gbLimit) { context.illegal( diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java index 4d9386b5f19..ea579aaf5d1 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java @@ -2,12 +2,13 @@ package com.yahoo.vespa.model.application.validation; import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.CapacityPolicies; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Exclusivity; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.QuotaExceededException; import com.yahoo.config.provision.SystemName; -import com.yahoo.config.provision.Zone; import com.yahoo.vespa.model.VespaModel; import com.yahoo.vespa.model.application.validation.Validation.Context; @@ -31,25 +32,35 @@ public class QuotaValidator implements Validator { @Override public void validate(Context context) { + var zone = context.deployState().zone(); + var exclusivity = new Exclusivity(zone, context.deployState().featureFlags().sharedHosts()); + var capacityPolicies = new CapacityPolicies(zone, exclusivity, context.model().applicationPackage().getApplicationId(), + context.deployState().featureFlags().adminClusterArchitecture()); var quota = context.deployState().getProperties().quota(); quota.maxClusterSize().ifPresent(maxClusterSize -> validateMaxClusterSize(maxClusterSize, context.model())); - quota.budgetAsDecimal().ifPresent(budget -> validateBudget(budget, context.model(), context.deployState().getProperties().zone())); + quota.budgetAsDecimal().ifPresent(budget -> validateBudget(budget, context, capacityPolicies)); } - private void validateBudget(BigDecimal budget, VespaModel model, Zone zone) { - var maxSpend = model.allClusters().stream() - .filter(id -> !adminClusterIds(model).contains(id)) - .map(id -> model.provisioned().all().getOrDefault(id, zeroCapacity)) - .mapToDouble(c -> c.maxResources().cost()) // TODO: This may be unspecified -> 0 - .sum(); + private void validateBudget(BigDecimal budget, Context context, + CapacityPolicies capacityPolicies) { + var zone = context.deployState().getProperties().zone(); + var application = context.model().applicationPackage().getApplicationId(); + + var maxSpend = 0.0; + for (var id : context.model().allClusters()) { + if (adminClusterIds(context.model()).contains(id)) continue; + var cluster = context.model().provisioned().clusters().get(id); + var capacity = context.model().provisioned().capacities().getOrDefault(id, zeroCapacity); + maxSpend += capacityPolicies.applyOn(capacity, cluster.isExclusive()).maxResources().cost(); + } - var actualSpend = model.allocatedHosts().getHosts().stream() + var actualSpend = context.model().allocatedHosts().getHosts().stream() .filter(hostSpec -> hostSpec.membership().get().cluster().type() != ClusterSpec.Type.admin) .mapToDouble(hostSpec -> hostSpec.advertisedResources().cost()) .sum(); if (Math.abs(actualSpend) < 0.01) { - log.warning("Deploying application " + model.applicationPackage().getApplicationId() + " with zero budget use. This is suspicious, but not blocked"); + log.warning("Deploying application " + application + " with zero budget use. This is suspicious, but not blocked"); return; } @@ -69,7 +80,7 @@ public class QuotaValidator implements Validator { /** Check that all clusters in the application do not exceed the quota max cluster size. */ private void validateMaxClusterSize(int maxClusterSize, VespaModel model) { - var invalidClusters = model.provisioned().all().entrySet().stream() + var invalidClusters = model.provisioned().capacities().entrySet().stream() .filter(entry -> entry.getValue() != null) .filter(entry -> { var cluster = entry.getValue(); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java index ed0804f7420..7f624032627 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java @@ -19,6 +19,7 @@ import com.yahoo.vespa.model.application.validation.change.IndexingModeChangeVal import com.yahoo.vespa.model.application.validation.change.NodeResourceChangeValidator; import com.yahoo.vespa.model.application.validation.change.RedundancyIncreaseValidator; import com.yahoo.vespa.model.application.validation.change.ResourcesReductionValidator; +import com.yahoo.vespa.model.application.validation.change.RestartOnDeployForLocalLLMValidator; import com.yahoo.vespa.model.application.validation.change.RestartOnDeployForOnnxModelChangesValidator; import com.yahoo.vespa.model.application.validation.change.StartupCommandChangeValidator; import com.yahoo.vespa.model.application.validation.change.StreamingSearchClusterChangeValidator; @@ -129,6 +130,7 @@ public class Validation { new CertificateRemovalChangeValidator().validate(execution); new RedundancyValidator().validate(execution); new RestartOnDeployForOnnxModelChangesValidator().validate(execution); + new RestartOnDeployForLocalLLMValidator().validate(execution); } public interface Context { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java index 5d7a8779005..42410dc3acf 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java @@ -60,9 +60,9 @@ public class ResourcesReductionValidator implements ChangeValidator { * This will always yield specified node resources on hosted instances and never on self-hosted instances. */ private ClusterResources clusterResources(ClusterSpec.Id id, VespaModel model) { - if ( ! model.provisioned().all().containsKey(id)) return null; + if ( ! model.provisioned().capacities().containsKey(id)) return null; - ClusterResources resources = model.provisioned().all().get(id).maxResources(); + ClusterResources resources = model.provisioned().capacities().get(id).maxResources(); if ( ! resources.nodeResources().isUnspecified()) return resources; var containerCluster = model.getContainerClusters().get(id.value()); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java new file mode 100644 index 00000000000..ccfc611c3dc --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java @@ -0,0 +1,55 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.application.validation.change; + +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.vespa.model.VespaModel; +import com.yahoo.vespa.model.application.validation.Validation.ChangeContext; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; + +import java.util.HashSet; +import java.util.Set; +import java.util.logging.Logger; + +import static java.util.logging.Level.INFO; +import static java.util.stream.Collectors.toUnmodifiableSet; + +/** + * If using local LLMs, this validator will make sure that restartOnDeploy is set for + * configs for this cluster. + * + * @author lesters + */ +public class RestartOnDeployForLocalLLMValidator implements ChangeValidator { + + public static final String LOCAL_LLM_COMPONENT = ai.vespa.llm.clients.LocalLLM.class.getName(); + + private static final Logger log = Logger.getLogger(RestartOnDeployForLocalLLMValidator.class.getName()); + + @Override + public void validate(ChangeContext context) { + var previousClustersWithLocalLLM = findClustersWithLocalLLMs(context.previousModel()); + var nextClustersWithLocalLLM = findClustersWithLocalLLMs(context.model()); + + // Only restart services if we use a local LLM in both the next and previous generation + for (var clusterId : intersect(previousClustersWithLocalLLM, nextClustersWithLocalLLM)) { + String message = "Need to restart services in %s due to use of local LLM".formatted(clusterId); + context.require(new VespaRestartAction(clusterId, message)); + log.log(INFO, message); + } + } + + private Set<ClusterSpec.Id> findClustersWithLocalLLMs(VespaModel model) { + return model.getContainerClusters().values().stream() + .filter(cluster -> cluster.getAllComponents().stream() + .anyMatch(component -> component.getClassId().getName().equals(LOCAL_LLM_COMPONENT))) + .map(ApplicationContainerCluster::id) + .collect(toUnmodifiableSet()); + } + + private Set<ClusterSpec.Id> intersect(Set<ClusterSpec.Id> a, Set<ClusterSpec.Id> b) { + Set<ClusterSpec.Id> result = new HashSet<>(a); + result.retainAll(b); + return result; + } + +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java index 008a3fc5547..e57110e44e5 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java @@ -115,7 +115,7 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida double memoryUsedByModels = currentModelCostInGb + nextModelCostInGb; double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - memoryUsedByModels); - var availableMemoryPercentage = cluster.availableMemoryPercentage(); + var availableMemoryPercentage = cluster.heapSizePercentageOfAvailable(); int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage); var prefix = "Validating Onnx models memory usage for %s".formatted(cluster); |