Add validator validating heap size calculation

author: Bjørn Christian Seime <bjorncs@yahooinc.com> 2023-09-22 15:16:37 +0200
committer: Bjørn Christian Seime <bjorncs@yahooinc.com> 2023-09-22 15:31:29 +0200
commit: d83fb9612cfef846273739c50fb6fdbd9c95de3a (patch)
tree: 8c4d31c394b514663e323047ed60a7241af5a608
parent: eff9fc1b006dc526caa8499473337548305d3bf4 (diff)
3 files changed, 178 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
new file mode 100644
index 00000000000..2c5e0db14b9
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
@@ -0,0 +1,51 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model.application.validation;
+
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.vespa.model.VespaModel;
+
+import java.util.logging.Level;
+
+/**
+ * Validates that the container node flavour has enough resources to run configured ONNX models.
+ *
+ * @author bjorncs
+ */
+public class JvmHeapSizeValidator extends Validator {
+
+    @Override
+    public void validate(VespaModel model, DeployState ds) {
+        if (!ds.featureFlags().dynamicHeapSize()) return;
+        if (!ds.isHostedTenantApplication(model.getAdmin().getApplicationType())) return;
+
+        model.getContainerClusters().forEach((clusterId, appCluster) -> {
+            var mp = appCluster.getMemoryPercentage().orElse(null);
+            if (mp == null) return;
+            if (mp.availableMemoryGb().isEmpty()) {
+                ds.getDeployLogger().log(Level.FINE, "Host resources unknown or percentage overridden with 'allocated-memory'");
+                return;
+            }
+            long jvmModelCost = appCluster.onnxModelCost().aggregatedModelCostInBytes();
+            if (jvmModelCost > 0) {
+                int percentLimit = 10;
+                if (mp.percentage() < percentLimit) {
+                    throw new IllegalArgumentException(
+                            ("Allocated percentage of memory of JVM in cluster '%s' is too low (%d%% < %d%%). " +
+                                    "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " +
+                                    "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).")
+                                            .formatted(clusterId, mp.percentage(), percentLimit, jvmModelCost / (1024D * 1024 * 1024)));
+                }
+                double gbLimit = 0.4;
+                double availableMemoryGb = mp.availableMemoryGb().getAsDouble();
+                if (availableMemoryGb < gbLimit) {
+                    throw new IllegalArgumentException(
+                            ("Allocated memory to JVM in cluster '%s' is too low (%.2fGB < %.2fGB). " +
+                                    "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " +
+                                    "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).")
+                                            .formatted(clusterId, availableMemoryGb, gbLimit, jvmModelCost / (1024D * 1024 * 1024)));
+                }
+            }
+        });
+    }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
index 53a553ee624..b9ecf7c2d22 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
@@ -87,6 +87,7 @@ public class Validation {
         new AccessControlFilterExcludeValidator().validate(model, deployState);
         new CloudUserFilterValidator().validate(model, deployState);
         new CloudHttpConnectorValidator().validate(model, deployState);
+        new JvmHeapSizeValidator().validate(model, deployState);
 
         additionalValidators.forEach(v -> v.validate(model, deployState));
 
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java
new file mode 100644
index 00000000000..086f2fe778f
--- /dev/null
+++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java
@@ -0,0 +1,126 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model.application.validation;
+
+import com.yahoo.config.ModelReference;
+import com.yahoo.config.application.api.ApplicationFile;
+import com.yahoo.config.application.api.DeployLogger;
+import com.yahoo.config.model.NullConfigModelRegistry;
+import com.yahoo.config.model.api.OnnxModelCost;
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.config.model.deploy.TestProperties;
+import com.yahoo.config.model.provision.InMemoryProvisioner;
+import com.yahoo.config.model.test.MockApplicationPackage;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.model.VespaModel;
+import org.junit.jupiter.api.Test;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * @author bjorncs
+ */
+class JvmHeapSizeValidatorTest {
+
+    @Test
+    void fails_on_too_low_jvm_percentage() throws IOException, SAXException {
+        var deployState = createDeployState(8, 7L * 1024 * 1024 * 1024);
+        var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+        var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState));
+        String expectedMessage = "Allocated percentage of memory of JVM in cluster 'container' is too low (3% < 10%). Estimated cost of ONNX models is 7.00GB";
+        assertTrue(e.getMessage().contains(expectedMessage), e.getMessage());
+    }
+
+    @Test
+    void fails_on_too_low_heap_size() throws IOException, SAXException {
+        var deployState = createDeployState(2, 1024L * 1024 * 1024);
+        var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+        var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState));
+        String expectedMessage = "Allocated memory to JVM in cluster 'container' is too low (0.30GB < 0.40GB). Estimated cost of ONNX models is 1.00GB.";
+        assertTrue(e.getMessage().contains(expectedMessage), e.getMessage());
+    }
+
+    @Test
+    void accepts_adequate_heap_size() throws IOException, SAXException {
+        var deployState = createDeployState(8, 1024L * 1024 * 1024);
+        var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+        assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState));
+    }
+
+    @Test
+    void accepts_services_with_explicit_jvm_size() throws IOException, SAXException {
+        String servicesXml =
+                """
+                <?xml version="1.0" encoding="utf-8" ?>
+                <services version='1.0'>
+                    <container version='1.0'>
+                        <nodes count="2">
+                            <jvm allocated-memory='5%'/>
+                            <resources vcpu="4" memory="2Gb" disk="125Gb"/>
+                        </nodes>
+                        <component id="hf-embedder" type="hugging-face-embedder">
+                            <transformer-model url="https://my/url/model.onnx"/>
+                            <tokenizer-model path="app/tokenizer.json"/>
+                        </component>
+                    </container>
+                </services>""";
+        var deployState = createDeployState(servicesXml, 2, 1024L * 1024 * 1024);
+        var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+        assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState));
+    }
+
+    private static DeployState createDeployState(String servicesXml, double nodeGb, long modelCostBytes) {
+        return new DeployState.Builder()
+                .applicationPackage(
+                        new MockApplicationPackage.Builder()
+                                .withServices(servicesXml)
+                                .build())
+                .modelHostProvisioner(new InMemoryProvisioner(5, new NodeResources(4, nodeGb, 125, 0.3), true))
+                .properties(new TestProperties().setHostedVespa(true).setDynamicHeapSize(true))
+                .onnxModelCost(new ModelCostDummy(modelCostBytes))
+                .build();
+    }
+
+    private static DeployState createDeployState(double nodeGb, long modelCostBytes) {
+        String servicesXml =
+                """
+                <?xml version="1.0" encoding="utf-8" ?>
+                <services version='1.0'>
+                    <container version='1.0'>
+                        <nodes count="2">
+                            <resources vcpu="4" memory="%fGb" disk="125Gb"/>
+                        </nodes>
+                        <component id="hf-embedder" type="hugging-face-embedder">
+                            <transformer-model url="https://my/url/model.onnx"/>
+                            <tokenizer-model path="app/tokenizer.json"/>
+                        </component>
+                    </container>
+                </services>""".formatted(nodeGb);
+        return createDeployState(servicesXml, nodeGb, modelCostBytes);
+    }
+
+    private static class ModelCostDummy implements OnnxModelCost, OnnxModelCost.Calculator {
+        final AtomicLong totalCost = new AtomicLong();
+        final long modelCost;
+
+        ModelCostDummy(long modelCost) { this.modelCost = modelCost; }
+
+        @Override public Calculator newCalculator(DeployLogger logger) { return this; }
+        @Override public long aggregatedModelCostInBytes() { return totalCost.get(); }
+        @Override public void registerModel(ApplicationFile path) {}
+
+        @Override
+        public void registerModel(ModelReference ref) {
+            assertEquals("https://my/url/model.onnx", ref.url().orElseThrow().value().toString());
+            totalCost.addAndGet(modelCost);
+        }
+    }
+
+}
+\ No newline at end of file
author	Bjørn Christian Seime <bjorncs@yahooinc.com>	2023-09-22 15:16:37 +0200
committer	Bjørn Christian Seime <bjorncs@yahooinc.com>	2023-09-22 15:31:29 +0200
commit	d83fb9612cfef846273739c50fb6fdbd9c95de3a (patch)
tree	8c4d31c394b514663e323047ed60a7241af5a608
parent	eff9fc1b006dc526caa8499473337548305d3bf4 (diff)