aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change
diff options
context:
space:
mode:
Diffstat (limited to 'config-model/src/main/java/com/yahoo/vespa/model/application/validation/change')
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ConfigValueChangeValidator.java1
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/NodeResourceChangeValidator.java2
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java6
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java55
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java4
5 files changed, 61 insertions, 7 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ConfigValueChangeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ConfigValueChangeValidator.java
index bfd100f40c9..3722dd84e29 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ConfigValueChangeValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ConfigValueChangeValidator.java
@@ -15,7 +15,6 @@ import com.yahoo.vespa.model.utils.internal.ReflectionUtil;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
-import java.util.function.Consumer;
import java.util.logging.Level;
import java.util.stream.Stream;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/NodeResourceChangeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/NodeResourceChangeValidator.java
index 0d4776ad00a..ce24d11121c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/NodeResourceChangeValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/NodeResourceChangeValidator.java
@@ -36,7 +36,7 @@ public class NodeResourceChangeValidator implements ChangeValidator {
}
private boolean changeRequiresRestart(NodeResources currentResources, NodeResources nextResources) {
- return currentResources.memoryGb() != nextResources.memoryGb();
+ return currentResources.memoryGiB() != nextResources.memoryGiB();
}
private Optional<NodeResources> resourcesOf(ClusterSpec.Id clusterId, VespaModel model) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java
index 5d7a8779005..a1f8a97d47f 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java
@@ -42,7 +42,7 @@ public class ResourcesReductionValidator implements ChangeValidator {
NodeResources currentResources = current.totalResources();
NodeResources nextResources = next.totalResources();
if (nextResources.vcpu() < 0.5 * currentResources.vcpu() ||
- nextResources.memoryGb() < 0.5 * currentResources.memoryGb() ||
+ nextResources.memoryGiB() < 0.5 * currentResources.memoryGiB() ||
nextResources.diskGb() < 0.5 * currentResources.diskGb())
context.invalid(ValidationId.resourcesReduction,
"Resource reduction in '" + clusterId.value() + "' is too large: " +
@@ -60,9 +60,9 @@ public class ResourcesReductionValidator implements ChangeValidator {
* This will always yield specified node resources on hosted instances and never on self-hosted instances.
*/
private ClusterResources clusterResources(ClusterSpec.Id id, VespaModel model) {
- if ( ! model.provisioned().all().containsKey(id)) return null;
+ if ( ! model.provisioned().capacities().containsKey(id)) return null;
- ClusterResources resources = model.provisioned().all().get(id).maxResources();
+ ClusterResources resources = model.provisioned().capacities().get(id).maxResources();
if ( ! resources.nodeResources().isUnspecified()) return resources;
var containerCluster = model.getContainerClusters().get(id.value());
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java
new file mode 100644
index 00000000000..ccfc611c3dc
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java
@@ -0,0 +1,55 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.model.application.validation.change;
+
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.vespa.model.VespaModel;
+import com.yahoo.vespa.model.application.validation.Validation.ChangeContext;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import static java.util.logging.Level.INFO;
+import static java.util.stream.Collectors.toUnmodifiableSet;
+
+/**
+ * If using local LLMs, this validator will make sure that restartOnDeploy is set for
+ * configs for this cluster.
+ *
+ * @author lesters
+ */
+public class RestartOnDeployForLocalLLMValidator implements ChangeValidator {
+
+ public static final String LOCAL_LLM_COMPONENT = ai.vespa.llm.clients.LocalLLM.class.getName();
+
+ private static final Logger log = Logger.getLogger(RestartOnDeployForLocalLLMValidator.class.getName());
+
+ @Override
+ public void validate(ChangeContext context) {
+ var previousClustersWithLocalLLM = findClustersWithLocalLLMs(context.previousModel());
+ var nextClustersWithLocalLLM = findClustersWithLocalLLMs(context.model());
+
+ // Only restart services if we use a local LLM in both the next and previous generation
+ for (var clusterId : intersect(previousClustersWithLocalLLM, nextClustersWithLocalLLM)) {
+ String message = "Need to restart services in %s due to use of local LLM".formatted(clusterId);
+ context.require(new VespaRestartAction(clusterId, message));
+ log.log(INFO, message);
+ }
+ }
+
+ private Set<ClusterSpec.Id> findClustersWithLocalLLMs(VespaModel model) {
+ return model.getContainerClusters().values().stream()
+ .filter(cluster -> cluster.getAllComponents().stream()
+ .anyMatch(component -> component.getClassId().getName().equals(LOCAL_LLM_COMPONENT)))
+ .map(ApplicationContainerCluster::id)
+ .collect(toUnmodifiableSet());
+ }
+
+ private Set<ClusterSpec.Id> intersect(Set<ClusterSpec.Id> a, Set<ClusterSpec.Id> b) {
+ Set<ClusterSpec.Id> result = new HashSet<>(a);
+ result.retainAll(b);
+ return result;
+ }
+
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java
index 008a3fc5547..c68e13102d7 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java
@@ -111,11 +111,11 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida
double currentModelCostInGb = onnxModelCostInGb(clusterInCurrentModel);
double nextModelCostInGb = onnxModelCostInGb(cluster);
- double totalMemory = containers.stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow();
+ double totalMemory = containers.stream().mapToDouble(c -> c.getHostResource().realResources().memoryGiB()).min().orElseThrow();
double memoryUsedByModels = currentModelCostInGb + nextModelCostInGb;
double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - memoryUsedByModels);
- var availableMemoryPercentage = cluster.availableMemoryPercentage();
+ var availableMemoryPercentage = cluster.heapSizePercentageOfAvailable();
int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage);
var prefix = "Validating Onnx models memory usage for %s".formatted(cluster);