diff options
Diffstat (limited to 'config-model/src/main/java/com/yahoo/vespa/model/application/validation/change')
5 files changed, 61 insertions, 7 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ConfigValueChangeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ConfigValueChangeValidator.java index bfd100f40c9..3722dd84e29 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ConfigValueChangeValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ConfigValueChangeValidator.java @@ -15,7 +15,6 @@ import com.yahoo.vespa.model.utils.internal.ReflectionUtil; import java.util.Arrays; import java.util.List; import java.util.Optional; -import java.util.function.Consumer; import java.util.logging.Level; import java.util.stream.Stream; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/NodeResourceChangeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/NodeResourceChangeValidator.java index 0d4776ad00a..ce24d11121c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/NodeResourceChangeValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/NodeResourceChangeValidator.java @@ -36,7 +36,7 @@ public class NodeResourceChangeValidator implements ChangeValidator { } private boolean changeRequiresRestart(NodeResources currentResources, NodeResources nextResources) { - return currentResources.memoryGb() != nextResources.memoryGb(); + return currentResources.memoryGiB() != nextResources.memoryGiB(); } private Optional<NodeResources> resourcesOf(ClusterSpec.Id clusterId, VespaModel model) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java index 5d7a8779005..a1f8a97d47f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/ResourcesReductionValidator.java @@ -42,7 +42,7 @@ public class ResourcesReductionValidator implements ChangeValidator { NodeResources currentResources = current.totalResources(); NodeResources nextResources = next.totalResources(); if (nextResources.vcpu() < 0.5 * currentResources.vcpu() || - nextResources.memoryGb() < 0.5 * currentResources.memoryGb() || + nextResources.memoryGiB() < 0.5 * currentResources.memoryGiB() || nextResources.diskGb() < 0.5 * currentResources.diskGb()) context.invalid(ValidationId.resourcesReduction, "Resource reduction in '" + clusterId.value() + "' is too large: " + @@ -60,9 +60,9 @@ public class ResourcesReductionValidator implements ChangeValidator { * This will always yield specified node resources on hosted instances and never on self-hosted instances. */ private ClusterResources clusterResources(ClusterSpec.Id id, VespaModel model) { - if ( ! model.provisioned().all().containsKey(id)) return null; + if ( ! model.provisioned().capacities().containsKey(id)) return null; - ClusterResources resources = model.provisioned().all().get(id).maxResources(); + ClusterResources resources = model.provisioned().capacities().get(id).maxResources(); if ( ! resources.nodeResources().isUnspecified()) return resources; var containerCluster = model.getContainerClusters().get(id.value()); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java new file mode 100644 index 00000000000..ccfc611c3dc --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForLocalLLMValidator.java @@ -0,0 +1,55 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.application.validation.change; + +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.vespa.model.VespaModel; +import com.yahoo.vespa.model.application.validation.Validation.ChangeContext; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; + +import java.util.HashSet; +import java.util.Set; +import java.util.logging.Logger; + +import static java.util.logging.Level.INFO; +import static java.util.stream.Collectors.toUnmodifiableSet; + +/** + * If using local LLMs, this validator will make sure that restartOnDeploy is set for + * configs for this cluster. + * + * @author lesters + */ +public class RestartOnDeployForLocalLLMValidator implements ChangeValidator { + + public static final String LOCAL_LLM_COMPONENT = ai.vespa.llm.clients.LocalLLM.class.getName(); + + private static final Logger log = Logger.getLogger(RestartOnDeployForLocalLLMValidator.class.getName()); + + @Override + public void validate(ChangeContext context) { + var previousClustersWithLocalLLM = findClustersWithLocalLLMs(context.previousModel()); + var nextClustersWithLocalLLM = findClustersWithLocalLLMs(context.model()); + + // Only restart services if we use a local LLM in both the next and previous generation + for (var clusterId : intersect(previousClustersWithLocalLLM, nextClustersWithLocalLLM)) { + String message = "Need to restart services in %s due to use of local LLM".formatted(clusterId); + context.require(new VespaRestartAction(clusterId, message)); + log.log(INFO, message); + } + } + + private Set<ClusterSpec.Id> findClustersWithLocalLLMs(VespaModel model) { + return model.getContainerClusters().values().stream() + .filter(cluster -> cluster.getAllComponents().stream() + .anyMatch(component -> component.getClassId().getName().equals(LOCAL_LLM_COMPONENT))) + .map(ApplicationContainerCluster::id) + .collect(toUnmodifiableSet()); + } + + private Set<ClusterSpec.Id> intersect(Set<ClusterSpec.Id> a, Set<ClusterSpec.Id> b) { + Set<ClusterSpec.Id> result = new HashSet<>(a); + result.retainAll(b); + return result; + } + +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java index 008a3fc5547..c68e13102d7 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java @@ -111,11 +111,11 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida double currentModelCostInGb = onnxModelCostInGb(clusterInCurrentModel); double nextModelCostInGb = onnxModelCostInGb(cluster); - double totalMemory = containers.stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow(); + double totalMemory = containers.stream().mapToDouble(c -> c.getHostResource().realResources().memoryGiB()).min().orElseThrow(); double memoryUsedByModels = currentModelCostInGb + nextModelCostInGb; double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - memoryUsedByModels); - var availableMemoryPercentage = cluster.availableMemoryPercentage(); + var availableMemoryPercentage = cluster.heapSizePercentageOfAvailable(); int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage); var prefix = "Validating Onnx models memory usage for %s".formatted(cluster); |