diff options
Diffstat (limited to 'config-model')
3 files changed, 47 insertions, 12 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java index 60f325cbe43..05ebaf9ef2d 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java @@ -5,7 +5,9 @@ package com.yahoo.vespa.model.application.validation; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.text.Text; import com.yahoo.vespa.model.VespaModel; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; +import java.util.function.BiConsumer; import java.util.logging.Level; /** @@ -15,6 +17,9 @@ import java.util.logging.Level; */ public class JvmHeapSizeValidator extends Validator { + public static int percentLimit = 15; + public static double gbLimit = 0.6; + @Override public void validate(VespaModel model, DeployState ds) { if (!ds.featureFlags().dynamicHeapSize()) return; @@ -29,8 +34,6 @@ public class JvmHeapSizeValidator extends Validator { } long jvmModelCost = appCluster.onnxModelCostCalculator().aggregatedModelCostInBytes(); if (jvmModelCost > 0) { - int percentLimit = 15; - double gbLimit = 0.6; double availableMemoryGb = mp.availableMemoryGb().getAsDouble(); double modelCostGb = jvmModelCost / (1024D * 1024 * 1024); ds.getDeployLogger().log(Level.FINE, () -> Text.format("JVM: %d%% (limit: %d%%), %.2fGB (limit: %.2fGB), ONNX: %.2fGB", diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java index 355bce24c0f..15e9e526db2 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.application.validation.change; +import com.yahoo.config.application.api.DeployLogger; import com.yahoo.config.model.api.ConfigChangeAction; import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.deploy.DeployState; @@ -15,8 +16,11 @@ import java.util.Optional; import java.util.Set; import java.util.logging.Logger; +import static com.yahoo.vespa.model.application.validation.JvmHeapSizeValidator.gbLimit; +import static com.yahoo.vespa.model.application.validation.JvmHeapSizeValidator.percentLimit; import static java.util.logging.Level.FINE; import static com.yahoo.config.model.api.OnnxModelCost.ModelInfo; +import static java.util.logging.Level.INFO; /** * If Onnx models change in a way that requires restart of containers in @@ -36,7 +40,6 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida // Compare onnx models used by each cluster and set restart on deploy for cluster if estimated cost, // model hash or model options have changed - // TODO: Skip if container has enough memory to handle reload of onnx model (2 models in memory at the same time) for (var cluster : nextModel.getContainerClusters().values()) { var clusterInCurrentModel = currentModel.getContainerClusters().get(cluster.getName()); if (clusterInCurrentModel == null) continue; @@ -44,6 +47,9 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida var currentModels = clusterInCurrentModel.onnxModelCostCalculator().models(); var nextModels = cluster.onnxModelCostCalculator().models(); + if (enoughMemoryToAvoidRestart(clusterInCurrentModel, cluster, deployState.getDeployLogger())) + continue; + log.log(FINE, "Validating " + cluster + ", current models=" + currentModels + ", next models=" + nextModels); actions.addAll(validateModelChanges(cluster, currentModels, nextModels)); actions.addAll(validateSetOfModels(cluster, currentModels, nextModels)); @@ -58,7 +64,6 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida for (var nextModelInfo : nextModels.values()) { if (! currentModels.containsKey(nextModelInfo.modelId())) continue; - log.log(FINE, "Checking if " + nextModelInfo + " has changed"); modelChanged(nextModelInfo, currentModels.get(nextModelInfo.modelId())).ifPresent(change -> { String message = "Onnx model '%s' has changed (%s), need to restart services in %s" .formatted(nextModelInfo.modelId(), change, cluster); @@ -84,6 +89,7 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida } private Optional<String> modelChanged(OnnxModelCost.ModelInfo a, OnnxModelCost.ModelInfo b) { + log.log(FINE, "Checking if model has changed (%s) -> (%s)".formatted(a, b)); if (a.estimatedCost() != b.estimatedCost()) return Optional.of("estimated cost"); if (a.hash() != b.hash()) return Optional.of("model hash"); if (a.onnxModelOptions().isPresent() && b.onnxModelOptions().isEmpty()) return Optional.of("model option(s)"); @@ -94,16 +100,38 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida } private static void setRestartOnDeployAndAddRestartAction(List<ConfigChangeAction> actions, ApplicationContainerCluster cluster, String message) { + log.log(INFO, message); cluster.onnxModelCostCalculator().setRestartOnDeploy(); actions.add(new VespaRestartAction(cluster.id(), message)); } - private static boolean enoughMemoryToAvoidRestart(ApplicationContainerCluster cluster) { - // Node memory is known so convert available memory percentage to node memory percentage + private static boolean enoughMemoryToAvoidRestart(ApplicationContainerCluster clusterInCurrentModel, + ApplicationContainerCluster cluster, + DeployLogger deployLogger) { + double currentModelCostInGb = onnxModelCostInGb(clusterInCurrentModel); + double nextModelCostInGb = onnxModelCostInGb(cluster); + double totalMemory = cluster.getContainers().get(0).getHostResource().realResources().memoryGb(); - double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb); - double costInGb = (double) cluster.onnxModelCostCalculator().aggregatedModelCostInBytes() / 1024 / 1024 / 1024; - return ( 2 * costInGb < availableMemory); + double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - currentModelCostInGb - currentModelCostInGb); + if (availableMemory <= 0.0) + return false; + + var availableMemoryPercentage = cluster.availableMemoryPercentage(); + int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage); + + if (memoryPercentage < percentLimit || availableMemory < gbLimit) { + deployLogger.log(INFO, "Validating %s, not enough memory (%s) to avoid restart (models require %s), consider a flavor with more memory to avoid this" + .formatted(cluster, availableMemory, currentModelCostInGb + nextModelCostInGb)); + return false; + } + + log.log(FINE, "Validating " + cluster + ", enough memory (%s) to avoid restart (models require %s)" + .formatted(availableMemory, currentModelCostInGb + nextModelCostInGb)); + return true; + } + + private static double onnxModelCostInGb(ApplicationContainerCluster clusterInCurrentModel) { + return (double) clusterInCurrentModel.onnxModelCostCalculator().aggregatedModelCostInBytes() / 1024 / 1024 / 1024; } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java index 20b5c687257..8c4adfb96cb 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java @@ -210,9 +210,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat if (memoryPercentage != null) return Optional.of(JvmMemoryPercentage.of(memoryPercentage)); if (isHostedVespa()) { - int availableMemoryPercentage = getHostClusterId().isPresent() ? - heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster : - heapSizePercentageOfAvailableMemory; + int availableMemoryPercentage = availableMemoryPercentage(); if (getContainers().isEmpty()) return Optional.of(JvmMemoryPercentage.of(availableMemoryPercentage)); // Node memory is not known // Node memory is known so convert available memory percentage to node memory percentage @@ -229,6 +227,12 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat return Optional.empty(); } + public int availableMemoryPercentage() { + return getHostClusterId().isPresent() ? + heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster : + heapSizePercentageOfAvailableMemory; + } + /** Create list of endpoints, these will be consumed later by LbServicesProducer */ private void createEndpoints(DeployState deployState) { if (!configureEndpoints(deployState)) return; |