aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-09-22 15:16:37 +0200
committerBjørn Christian Seime <bjorncs@yahooinc.com>2023-09-22 15:31:29 +0200
commitd83fb9612cfef846273739c50fb6fdbd9c95de3a (patch)
tree8c4d31c394b514663e323047ed60a7241af5a608
parenteff9fc1b006dc526caa8499473337548305d3bf4 (diff)
Add validator validating heap size calculation
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java51
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java1
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java126
3 files changed, 178 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
new file mode 100644
index 00000000000..2c5e0db14b9
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
@@ -0,0 +1,51 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model.application.validation;
+
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.vespa.model.VespaModel;
+
+import java.util.logging.Level;
+
+/**
+ * Validates that the container node flavour has enough resources to run configured ONNX models.
+ *
+ * @author bjorncs
+ */
+public class JvmHeapSizeValidator extends Validator {
+
+ @Override
+ public void validate(VespaModel model, DeployState ds) {
+ if (!ds.featureFlags().dynamicHeapSize()) return;
+ if (!ds.isHostedTenantApplication(model.getAdmin().getApplicationType())) return;
+
+ model.getContainerClusters().forEach((clusterId, appCluster) -> {
+ var mp = appCluster.getMemoryPercentage().orElse(null);
+ if (mp == null) return;
+ if (mp.availableMemoryGb().isEmpty()) {
+ ds.getDeployLogger().log(Level.FINE, "Host resources unknown or percentage overridden with 'allocated-memory'");
+ return;
+ }
+ long jvmModelCost = appCluster.onnxModelCost().aggregatedModelCostInBytes();
+ if (jvmModelCost > 0) {
+ int percentLimit = 10;
+ if (mp.percentage() < percentLimit) {
+ throw new IllegalArgumentException(
+ ("Allocated percentage of memory of JVM in cluster '%s' is too low (%d%% < %d%%). " +
+ "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " +
+ "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).")
+ .formatted(clusterId, mp.percentage(), percentLimit, jvmModelCost / (1024D * 1024 * 1024)));
+ }
+ double gbLimit = 0.4;
+ double availableMemoryGb = mp.availableMemoryGb().getAsDouble();
+ if (availableMemoryGb < gbLimit) {
+ throw new IllegalArgumentException(
+ ("Allocated memory to JVM in cluster '%s' is too low (%.2fGB < %.2fGB). " +
+ "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " +
+ "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).")
+ .formatted(clusterId, availableMemoryGb, gbLimit, jvmModelCost / (1024D * 1024 * 1024)));
+ }
+ }
+ });
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
index 53a553ee624..b9ecf7c2d22 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
@@ -87,6 +87,7 @@ public class Validation {
new AccessControlFilterExcludeValidator().validate(model, deployState);
new CloudUserFilterValidator().validate(model, deployState);
new CloudHttpConnectorValidator().validate(model, deployState);
+ new JvmHeapSizeValidator().validate(model, deployState);
additionalValidators.forEach(v -> v.validate(model, deployState));
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java
new file mode 100644
index 00000000000..086f2fe778f
--- /dev/null
+++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java
@@ -0,0 +1,126 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model.application.validation;
+
+import com.yahoo.config.ModelReference;
+import com.yahoo.config.application.api.ApplicationFile;
+import com.yahoo.config.application.api.DeployLogger;
+import com.yahoo.config.model.NullConfigModelRegistry;
+import com.yahoo.config.model.api.OnnxModelCost;
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.config.model.deploy.TestProperties;
+import com.yahoo.config.model.provision.InMemoryProvisioner;
+import com.yahoo.config.model.test.MockApplicationPackage;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.model.VespaModel;
+import org.junit.jupiter.api.Test;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * @author bjorncs
+ */
+class JvmHeapSizeValidatorTest {
+
+ @Test
+ void fails_on_too_low_jvm_percentage() throws IOException, SAXException {
+ var deployState = createDeployState(8, 7L * 1024 * 1024 * 1024);
+ var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+ var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState));
+ String expectedMessage = "Allocated percentage of memory of JVM in cluster 'container' is too low (3% < 10%). Estimated cost of ONNX models is 7.00GB";
+ assertTrue(e.getMessage().contains(expectedMessage), e.getMessage());
+ }
+
+ @Test
+ void fails_on_too_low_heap_size() throws IOException, SAXException {
+ var deployState = createDeployState(2, 1024L * 1024 * 1024);
+ var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+ var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState));
+ String expectedMessage = "Allocated memory to JVM in cluster 'container' is too low (0.30GB < 0.40GB). Estimated cost of ONNX models is 1.00GB.";
+ assertTrue(e.getMessage().contains(expectedMessage), e.getMessage());
+ }
+
+ @Test
+ void accepts_adequate_heap_size() throws IOException, SAXException {
+ var deployState = createDeployState(8, 1024L * 1024 * 1024);
+ var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+ assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState));
+ }
+
+ @Test
+ void accepts_services_with_explicit_jvm_size() throws IOException, SAXException {
+ String servicesXml =
+ """
+ <?xml version="1.0" encoding="utf-8" ?>
+ <services version='1.0'>
+ <container version='1.0'>
+ <nodes count="2">
+ <jvm allocated-memory='5%'/>
+ <resources vcpu="4" memory="2Gb" disk="125Gb"/>
+ </nodes>
+ <component id="hf-embedder" type="hugging-face-embedder">
+ <transformer-model url="https://my/url/model.onnx"/>
+ <tokenizer-model path="app/tokenizer.json"/>
+ </component>
+ </container>
+ </services>""";
+ var deployState = createDeployState(servicesXml, 2, 1024L * 1024 * 1024);
+ var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+ assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState));
+ }
+
+ private static DeployState createDeployState(String servicesXml, double nodeGb, long modelCostBytes) {
+ return new DeployState.Builder()
+ .applicationPackage(
+ new MockApplicationPackage.Builder()
+ .withServices(servicesXml)
+ .build())
+ .modelHostProvisioner(new InMemoryProvisioner(5, new NodeResources(4, nodeGb, 125, 0.3), true))
+ .properties(new TestProperties().setHostedVespa(true).setDynamicHeapSize(true))
+ .onnxModelCost(new ModelCostDummy(modelCostBytes))
+ .build();
+ }
+
+ private static DeployState createDeployState(double nodeGb, long modelCostBytes) {
+ String servicesXml =
+ """
+ <?xml version="1.0" encoding="utf-8" ?>
+ <services version='1.0'>
+ <container version='1.0'>
+ <nodes count="2">
+ <resources vcpu="4" memory="%fGb" disk="125Gb"/>
+ </nodes>
+ <component id="hf-embedder" type="hugging-face-embedder">
+ <transformer-model url="https://my/url/model.onnx"/>
+ <tokenizer-model path="app/tokenizer.json"/>
+ </component>
+ </container>
+ </services>""".formatted(nodeGb);
+ return createDeployState(servicesXml, nodeGb, modelCostBytes);
+ }
+
+ private static class ModelCostDummy implements OnnxModelCost, OnnxModelCost.Calculator {
+ final AtomicLong totalCost = new AtomicLong();
+ final long modelCost;
+
+ ModelCostDummy(long modelCost) { this.modelCost = modelCost; }
+
+ @Override public Calculator newCalculator(DeployLogger logger) { return this; }
+ @Override public long aggregatedModelCostInBytes() { return totalCost.get(); }
+ @Override public void registerModel(ApplicationFile path) {}
+
+ @Override
+ public void registerModel(ModelReference ref) {
+ assertEquals("https://my/url/model.onnx", ref.url().orElseThrow().value().toString());
+ totalCost.addAndGet(modelCost);
+ }
+ }
+
+} \ No newline at end of file