aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo/vespa/model/application
diff options
context:
space:
mode:
Diffstat (limited to 'config-model/src/main/java/com/yahoo/vespa/model/application')
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java20
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java11
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java33
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java2
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java55
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java2
7 files changed, 98 insertions, 29 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java
index 40c9a03b126..02a6b243054 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java
@@ -132,7 +132,7 @@ public class ConstantTensorJsonValidator {
private void consumeTopObject() throws IOException {
for (var cur = parser.nextToken(); cur != JsonToken.END_OBJECT; cur = parser.nextToken()) {
assertCurrentTokenIs(JsonToken.FIELD_NAME);
- String fieldName = parser.getCurrentName();
+ String fieldName = parser.currentName();
switch (fieldName) {
case FIELD_TYPE -> consumeTypeField();
case FIELD_VALUES -> consumeValuesField();
@@ -189,7 +189,7 @@ public class ConstantTensorJsonValidator {
}
for (var cur = parser.nextToken(); cur != JsonToken.END_OBJECT; cur = parser.nextToken()) {
assertCurrentTokenIs(JsonToken.FIELD_NAME);
- validateNumeric(parser.getCurrentName(), parser.nextToken());
+ validateNumeric(parser.currentName(), parser.nextToken());
}
}
@@ -199,7 +199,7 @@ public class ConstantTensorJsonValidator {
boolean seenValue = false;
for (int i = 0; i < 2; i++) {
assertNextTokenIs(JsonToken.FIELD_NAME);
- String fieldName = parser.getCurrentName();
+ String fieldName = parser.currentName();
switch (fieldName) {
case FIELD_ADDRESS -> {
validateTensorAddress(new HashSet<>(tensorDimensions.keySet()));
@@ -228,13 +228,13 @@ public class ConstantTensorJsonValidator {
// Iterate within the address key, value pairs
while ((parser.nextToken() != JsonToken.END_OBJECT)) {
assertCurrentTokenIs(JsonToken.FIELD_NAME);
- String dimensionName = parser.getCurrentName();
+ String dimensionName = parser.currentName();
TensorType.Dimension dimension = tensorDimensions.get(dimensionName);
if (dimension == null) {
- throw new InvalidConstantTensorException(parser, String.format("Tensor dimension '%s' does not exist", parser.getCurrentName()));
+ throw new InvalidConstantTensorException(parser, String.format("Tensor dimension '%s' does not exist", dimensionName));
}
if (!cellDimensions.contains(dimensionName)) {
- throw new InvalidConstantTensorException(parser, String.format("Duplicate tensor dimension '%s'", parser.getCurrentName()));
+ throw new InvalidConstantTensorException(parser, String.format("Duplicate tensor dimension '%s'", dimensionName));
}
cellDimensions.remove(dimensionName);
validateLabel(dimension);
@@ -300,7 +300,7 @@ public class ConstantTensorJsonValidator {
}
private void assertCurrentTokenIs(JsonToken wantedToken) {
- assertTokenIs(parser.getCurrentToken(), wantedToken);
+ assertTokenIs(parser.currentToken(), wantedToken);
}
private void assertNextTokenIs(JsonToken wantedToken) throws IOException {
@@ -316,11 +316,11 @@ public class ConstantTensorJsonValidator {
static class InvalidConstantTensorException extends IllegalArgumentException {
InvalidConstantTensorException(JsonParser parser, String message) {
- super(message + " " + parser.getCurrentLocation().toString());
+ super(message + " " + parser.currentLocation().toString());
}
InvalidConstantTensorException(JsonParser parser, Exception base) {
- super("Failed to parse JSON stream " + parser.getCurrentLocation().toString(), base);
+ super("Failed to parse JSON stream " + parser.currentLocation().toString(), base);
}
InvalidConstantTensorException(IOException base) {
@@ -412,7 +412,7 @@ public class ConstantTensorJsonValidator {
boolean seenValues = false;
for (int i = 0; i < 2; i++) {
assertNextTokenIs(JsonToken.FIELD_NAME);
- String fieldName = parser.getCurrentName();
+ String fieldName = parser.currentName();
switch (fieldName) {
case FIELD_ADDRESS -> {
validateTensorAddress(new HashSet<>(mappedDims));
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
index 9cf5fe84c21..4900b56801c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
@@ -23,21 +23,22 @@ public class JvmHeapSizeValidator implements Validator {
context.model().getContainerClusters().forEach((clusterId, appCluster) -> {
var mp = appCluster.getMemoryPercentage().orElse(null);
if (mp == null) return;
- if (mp.availableMemoryGb().isEmpty()) {
+ if (mp.asAbsoluteGb().isEmpty()) {
context.deployState().getDeployLogger().log(Level.FINE, "Host resources unknown or percentage overridden with 'allocated-memory'");
return;
}
long jvmModelCost = appCluster.onnxModelCostCalculator().aggregatedModelCostInBytes();
if (jvmModelCost > 0) {
- double availableMemoryGb = mp.availableMemoryGb().getAsDouble();
+ double availableMemoryGb = mp.asAbsoluteGb().getAsDouble();
+ int percentageOfTotal = mp.ofContainerTotal().getAsInt();
double modelCostGb = jvmModelCost / (1024D * 1024 * 1024);
context.deployState().getDeployLogger().log(Level.FINE, () -> Text.format("JVM: %d%% (limit: %d%%), %.2fGB (limit: %.2fGB), ONNX: %.2fGB",
- mp.percentage(), percentLimit, availableMemoryGb, gbLimit, modelCostGb));
- if (mp.percentage() < percentLimit) {
+ percentageOfTotal, percentLimit, availableMemoryGb, gbLimit, modelCostGb));
+ if (percentageOfTotal < percentLimit) {
context.illegal(Text.format("Allocated percentage of memory of JVM in cluster '%s' is too low (%d%% < %d%%). " +
"Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " +
"You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).",
- clusterId, mp.percentage(), percentLimit, modelCostGb));
+ clusterId, percentageOfTotal, percentLimit, modelCostGb));
}
if (availableMemoryGb < gbLimit) {
context.illegal(
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java
index 4d9386b5f19..ea579aaf5d1 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java
@@ -2,12 +2,13 @@
package com.yahoo.vespa.model.application.validation;
import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.CapacityPolicies;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Exclusivity;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.QuotaExceededException;
import com.yahoo.config.provision.SystemName;
-import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.model.VespaModel;
import com.yahoo.vespa.model.application.validation.Validation.Context;
@@ -31,25 +32,35 @@ public class QuotaValidator implements Validator {
@Override
public void validate(Context context) {
+ var zone = context.deployState().zone();
+ var exclusivity = new Exclusivity(zone, context.deployState().featureFlags().sharedHosts());
+ var capacityPolicies = new CapacityPolicies(zone, exclusivity, context.model().applicationPackage().getApplicationId(),
+ context.deployState().featureFlags().adminClusterArchitecture());
var quota = context.deployState().getProperties().quota();
quota.maxClusterSize().ifPresent(maxClusterSize -> validateMaxClusterSize(maxClusterSize, context.model()));
- quota.budgetAsDecimal().ifPresent(budget -> validateBudget(budget, context.model(), context.deployState().getProperties().zone()));
+ quota.budgetAsDecimal().ifPresent(budget -> validateBudget(budget, context, capacityPolicies));
}
- private void validateBudget(BigDecimal budget, VespaModel model, Zone zone) {
- var maxSpend = model.allClusters().stream()
- .filter(id -> !adminClusterIds(model).contains(id))
- .map(id -> model.provisioned().all().getOrDefault(id, zeroCapacity))
- .mapToDouble(c -> c.maxResources().cost()) // TODO: This may be unspecified -> 0
- .sum();
+ private void validateBudget(BigDecimal budget, Context context,
+ CapacityPolicies capacityPolicies) {
+ var zone = context.deployState().getProperties().zone();
+ var application = context.model().applicationPackage().getApplicationId();
+
+ var maxSpend = 0.0;
+ for (var id : context.model().allClusters()) {
+ if (adminClusterIds(context.model()).contains(id)) continue;
+ var cluster = context.model().provisioned().clusters().get(id);
+ var capacity = context.model().provisioned().capacities().getOrDefault(id, zeroCapacity);
+ maxSpend += capacityPolicies.applyOn(capacity, cluster.isExclusive()).maxResources().cost();
+ }
- var actualSpend = model.allocatedHosts().getHosts().stream()
+ var actualSpend = context.model().allocatedHosts().getHosts().stream()
.filter(hostSpec -> hostSpec.membership().get().cluster().type() != ClusterSpec.Type.admin)
.mapToDouble(hostSpec -> hostSpec.advertisedResources().cost())
.sum();
if (Math.abs(actualSpend) < 0.01) {
- log.warning("Deploying application " + model.applicationPackage().getApplicationId() + " with zero budget use. This is suspicious, but not blocked");
+ log.warning("Deploying application " + application + " with zero budget use. This is suspicious, but not blocked");
return;
}
@@ -69,7 +80,7 @@ public class QuotaValidator implements Validator {
/** Check that all clusters in the application do not exceed the quota max cluster size. */
private void validateMaxClusterSize(int maxClusterSize, VespaModel model) {
- var invalidClusters = model.provisioned().all().entrySet().stream()
+ var invalidClusters = model.provisioned().capacities().entrySet().stream()
.filter(entry -> entry.getValue() != null)
.filter(entry -> {
var cluster = entry.getValue();
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
index ed0804f7420..7f624032627 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
@@ -19,6 +19,7 @@ import com.yahoo.vespa.model.application.validation.change.IndexingModeChangeVal
import com.yahoo.vespa.model.application.validation.change.NodeResourceChangeValidator;
import com.yahoo.vespa.model.application.validation.change.RedundancyIncreaseValidator;
import com.yahoo.vespa.model.application.validation.change.ResourcesReductionValidator;
+import com.yahoo.vespa.model.application.validation.change.RestartOnDeployForLocalLLMValidator;
import com.yahoo.vespa.model.application.validation.change.RestartOnDeployForOnnxModelChangesValidator;
import com.yahoo.vespa.model.application.validation.change.StartupCommandChangeValidator;
import com.yahoo.vespa.model.application.validation.change.StreamingSearchClusterChangeValidator;
@@ -129,6 +130,7 @@ public class Validation {
new CertificateRemovalChangeValidator().validate(execution);
new RedundancyValidator().validate(execution);
new RestartOnDeployForOnnxModelChangesValidator().validate(execution);
+ new RestartOnDeployForLocalLLMValidator().validate(execution);
}
public interface Context {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java
index 5d7a8779005..42410dc3acf 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java
@@ -60,9 +60,9 @@ public class ResourcesReductionValidator implements ChangeValidator {
* This will always yield specified node resources on hosted instances and never on self-hosted instances.
*/
private ClusterResources clusterResources(ClusterSpec.Id id, VespaModel model) {
- if ( ! model.provisioned().all().containsKey(id)) return null;
+ if ( ! model.provisioned().capacities().containsKey(id)) return null;
- ClusterResources resources = model.provisioned().all().get(id).maxResources();
+ ClusterResources resources = model.provisioned().capacities().get(id).maxResources();
if ( ! resources.nodeResources().isUnspecified()) return resources;
var containerCluster = model.getContainerClusters().get(id.value());
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java
new file mode 100644
index 00000000000..ccfc611c3dc
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java
@@ -0,0 +1,55 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.model.application.validation.change;
+
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.vespa.model.VespaModel;
+import com.yahoo.vespa.model.application.validation.Validation.ChangeContext;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import static java.util.logging.Level.INFO;
+import static java.util.stream.Collectors.toUnmodifiableSet;
+
+/**
+ * If using local LLMs, this validator will make sure that restartOnDeploy is set for
+ * configs for this cluster.
+ *
+ * @author lesters
+ */
+public class RestartOnDeployForLocalLLMValidator implements ChangeValidator {
+
+ public static final String LOCAL_LLM_COMPONENT = ai.vespa.llm.clients.LocalLLM.class.getName();
+
+ private static final Logger log = Logger.getLogger(RestartOnDeployForLocalLLMValidator.class.getName());
+
+ @Override
+ public void validate(ChangeContext context) {
+ var previousClustersWithLocalLLM = findClustersWithLocalLLMs(context.previousModel());
+ var nextClustersWithLocalLLM = findClustersWithLocalLLMs(context.model());
+
+ // Only restart services if we use a local LLM in both the next and previous generation
+ for (var clusterId : intersect(previousClustersWithLocalLLM, nextClustersWithLocalLLM)) {
+ String message = "Need to restart services in %s due to use of local LLM".formatted(clusterId);
+ context.require(new VespaRestartAction(clusterId, message));
+ log.log(INFO, message);
+ }
+ }
+
+ private Set<ClusterSpec.Id> findClustersWithLocalLLMs(VespaModel model) {
+ return model.getContainerClusters().values().stream()
+ .filter(cluster -> cluster.getAllComponents().stream()
+ .anyMatch(component -> component.getClassId().getName().equals(LOCAL_LLM_COMPONENT)))
+ .map(ApplicationContainerCluster::id)
+ .collect(toUnmodifiableSet());
+ }
+
+ private Set<ClusterSpec.Id> intersect(Set<ClusterSpec.Id> a, Set<ClusterSpec.Id> b) {
+ Set<ClusterSpec.Id> result = new HashSet<>(a);
+ result.retainAll(b);
+ return result;
+ }
+
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java
index 008a3fc5547..e57110e44e5 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java
@@ -115,7 +115,7 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida
double memoryUsedByModels = currentModelCostInGb + nextModelCostInGb;
double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - memoryUsedByModels);
- var availableMemoryPercentage = cluster.availableMemoryPercentage();
+ var availableMemoryPercentage = cluster.heapSizePercentageOfAvailable();
int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage);
var prefix = "Validating Onnx models memory usage for %s".formatted(cluster);