aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2023-12-12 20:47:15 +0100
committerGitHub <noreply@github.com>2023-12-12 20:47:15 +0100
commit2aeb805cc79819145e5cc5df0c2ff182507e9ec0 (patch)
treec5c071714bbdaeffa0ef3eaffeaacde4c96605ae
parent92a214eb3addf236446cec2ea52d2d2aeefa4140 (diff)
parent9ec9eef82b773e52ff1bbfa81b42517d5f00bc6c (diff)
Merge pull request #29618 from vespa-engine/hmusum/avoid-restarting-if-enough-memory
Avoid restarting if we have enough memory to handle Onnx model changes
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java5
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java42
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java10
3 files changed, 45 insertions, 12 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
index 60f325cbe43..e9038ff2b0f 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
@@ -15,6 +15,9 @@ import java.util.logging.Level;
*/
public class JvmHeapSizeValidator extends Validator {
+ public static final int percentLimit = 15;
+ public static final double gbLimit = 0.6;
+
@Override
public void validate(VespaModel model, DeployState ds) {
if (!ds.featureFlags().dynamicHeapSize()) return;
@@ -29,8 +32,6 @@ public class JvmHeapSizeValidator extends Validator {
}
long jvmModelCost = appCluster.onnxModelCostCalculator().aggregatedModelCostInBytes();
if (jvmModelCost > 0) {
- int percentLimit = 15;
- double gbLimit = 0.6;
double availableMemoryGb = mp.availableMemoryGb().getAsDouble();
double modelCostGb = jvmModelCost / (1024D * 1024 * 1024);
ds.getDeployLogger().log(Level.FINE, () -> Text.format("JVM: %d%% (limit: %d%%), %.2fGB (limit: %.2fGB), ONNX: %.2fGB",
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java
index 355bce24c0f..15e9e526db2 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.application.validation.change;
+import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.config.model.api.ConfigChangeAction;
import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.deploy.DeployState;
@@ -15,8 +16,11 @@ import java.util.Optional;
import java.util.Set;
import java.util.logging.Logger;
+import static com.yahoo.vespa.model.application.validation.JvmHeapSizeValidator.gbLimit;
+import static com.yahoo.vespa.model.application.validation.JvmHeapSizeValidator.percentLimit;
import static java.util.logging.Level.FINE;
import static com.yahoo.config.model.api.OnnxModelCost.ModelInfo;
+import static java.util.logging.Level.INFO;
/**
* If Onnx models change in a way that requires restart of containers in
@@ -36,7 +40,6 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida
// Compare onnx models used by each cluster and set restart on deploy for cluster if estimated cost,
// model hash or model options have changed
- // TODO: Skip if container has enough memory to handle reload of onnx model (2 models in memory at the same time)
for (var cluster : nextModel.getContainerClusters().values()) {
var clusterInCurrentModel = currentModel.getContainerClusters().get(cluster.getName());
if (clusterInCurrentModel == null) continue;
@@ -44,6 +47,9 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida
var currentModels = clusterInCurrentModel.onnxModelCostCalculator().models();
var nextModels = cluster.onnxModelCostCalculator().models();
+ if (enoughMemoryToAvoidRestart(clusterInCurrentModel, cluster, deployState.getDeployLogger()))
+ continue;
+
log.log(FINE, "Validating " + cluster + ", current models=" + currentModels + ", next models=" + nextModels);
actions.addAll(validateModelChanges(cluster, currentModels, nextModels));
actions.addAll(validateSetOfModels(cluster, currentModels, nextModels));
@@ -58,7 +64,6 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida
for (var nextModelInfo : nextModels.values()) {
if (! currentModels.containsKey(nextModelInfo.modelId())) continue;
- log.log(FINE, "Checking if " + nextModelInfo + " has changed");
modelChanged(nextModelInfo, currentModels.get(nextModelInfo.modelId())).ifPresent(change -> {
String message = "Onnx model '%s' has changed (%s), need to restart services in %s"
.formatted(nextModelInfo.modelId(), change, cluster);
@@ -84,6 +89,7 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida
}
private Optional<String> modelChanged(OnnxModelCost.ModelInfo a, OnnxModelCost.ModelInfo b) {
+ log.log(FINE, "Checking if model has changed (%s) -> (%s)".formatted(a, b));
if (a.estimatedCost() != b.estimatedCost()) return Optional.of("estimated cost");
if (a.hash() != b.hash()) return Optional.of("model hash");
if (a.onnxModelOptions().isPresent() && b.onnxModelOptions().isEmpty()) return Optional.of("model option(s)");
@@ -94,16 +100,38 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida
}
private static void setRestartOnDeployAndAddRestartAction(List<ConfigChangeAction> actions, ApplicationContainerCluster cluster, String message) {
+ log.log(INFO, message);
cluster.onnxModelCostCalculator().setRestartOnDeploy();
actions.add(new VespaRestartAction(cluster.id(), message));
}
- private static boolean enoughMemoryToAvoidRestart(ApplicationContainerCluster cluster) {
- // Node memory is known so convert available memory percentage to node memory percentage
+ private static boolean enoughMemoryToAvoidRestart(ApplicationContainerCluster clusterInCurrentModel,
+ ApplicationContainerCluster cluster,
+ DeployLogger deployLogger) {
+ double currentModelCostInGb = onnxModelCostInGb(clusterInCurrentModel);
+ double nextModelCostInGb = onnxModelCostInGb(cluster);
+
double totalMemory = cluster.getContainers().get(0).getHostResource().realResources().memoryGb();
- double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb);
- double costInGb = (double) cluster.onnxModelCostCalculator().aggregatedModelCostInBytes() / 1024 / 1024 / 1024;
- return ( 2 * costInGb < availableMemory);
+ double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - currentModelCostInGb - currentModelCostInGb);
+ if (availableMemory <= 0.0)
+ return false;
+
+ var availableMemoryPercentage = cluster.availableMemoryPercentage();
+ int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage);
+
+ if (memoryPercentage < percentLimit || availableMemory < gbLimit) {
+ deployLogger.log(INFO, "Validating %s, not enough memory (%s) to avoid restart (models require %s), consider a flavor with more memory to avoid this"
+ .formatted(cluster, availableMemory, currentModelCostInGb + nextModelCostInGb));
+ return false;
+ }
+
+ log.log(FINE, "Validating " + cluster + ", enough memory (%s) to avoid restart (models require %s)"
+ .formatted(availableMemory, currentModelCostInGb + nextModelCostInGb));
+ return true;
+ }
+
+ private static double onnxModelCostInGb(ApplicationContainerCluster clusterInCurrentModel) {
+ return (double) clusterInCurrentModel.onnxModelCostCalculator().aggregatedModelCostInBytes() / 1024 / 1024 / 1024;
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
index 20b5c687257..8c4adfb96cb 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
@@ -210,9 +210,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
if (memoryPercentage != null) return Optional.of(JvmMemoryPercentage.of(memoryPercentage));
if (isHostedVespa()) {
- int availableMemoryPercentage = getHostClusterId().isPresent() ?
- heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster :
- heapSizePercentageOfAvailableMemory;
+ int availableMemoryPercentage = availableMemoryPercentage();
if (getContainers().isEmpty()) return Optional.of(JvmMemoryPercentage.of(availableMemoryPercentage)); // Node memory is not known
// Node memory is known so convert available memory percentage to node memory percentage
@@ -229,6 +227,12 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
return Optional.empty();
}
+ public int availableMemoryPercentage() {
+ return getHostClusterId().isPresent() ?
+ heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster :
+ heapSizePercentageOfAvailableMemory;
+ }
+
/** Create list of endpoints, these will be consumed later by LbServicesProducer */
private void createEndpoints(DeployState deployState) {
if (!configureEndpoints(deployState)) return;