diff options
author | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-09-22 15:16:37 +0200 |
---|---|---|
committer | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-09-22 15:31:29 +0200 |
commit | d83fb9612cfef846273739c50fb6fdbd9c95de3a (patch) | |
tree | 8c4d31c394b514663e323047ed60a7241af5a608 | |
parent | eff9fc1b006dc526caa8499473337548305d3bf4 (diff) |
Add validator validating heap size calculation
3 files changed, 178 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java new file mode 100644 index 00000000000..2c5e0db14b9 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model.application.validation; + +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.vespa.model.VespaModel; + +import java.util.logging.Level; + +/** + * Validates that the container node flavour has enough resources to run configured ONNX models. + * + * @author bjorncs + */ +public class JvmHeapSizeValidator extends Validator { + + @Override + public void validate(VespaModel model, DeployState ds) { + if (!ds.featureFlags().dynamicHeapSize()) return; + if (!ds.isHostedTenantApplication(model.getAdmin().getApplicationType())) return; + + model.getContainerClusters().forEach((clusterId, appCluster) -> { + var mp = appCluster.getMemoryPercentage().orElse(null); + if (mp == null) return; + if (mp.availableMemoryGb().isEmpty()) { + ds.getDeployLogger().log(Level.FINE, "Host resources unknown or percentage overridden with 'allocated-memory'"); + return; + } + long jvmModelCost = appCluster.onnxModelCost().aggregatedModelCostInBytes(); + if (jvmModelCost > 0) { + int percentLimit = 10; + if (mp.percentage() < percentLimit) { + throw new IllegalArgumentException( + ("Allocated percentage of memory of JVM in cluster '%s' is too low (%d%% < %d%%). " + + "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " + + "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).") + .formatted(clusterId, mp.percentage(), percentLimit, jvmModelCost / (1024D * 1024 * 1024))); + } + double gbLimit = 0.4; + double availableMemoryGb = mp.availableMemoryGb().getAsDouble(); + if (availableMemoryGb < gbLimit) { + throw new IllegalArgumentException( + ("Allocated memory to JVM in cluster '%s' is too low (%.2fGB < %.2fGB). " + + "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " + + "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).") + .formatted(clusterId, availableMemoryGb, gbLimit, jvmModelCost / (1024D * 1024 * 1024))); + } + } + }); + } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java index 53a553ee624..b9ecf7c2d22 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java @@ -87,6 +87,7 @@ public class Validation { new AccessControlFilterExcludeValidator().validate(model, deployState); new CloudUserFilterValidator().validate(model, deployState); new CloudHttpConnectorValidator().validate(model, deployState); + new JvmHeapSizeValidator().validate(model, deployState); additionalValidators.forEach(v -> v.validate(model, deployState)); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java new file mode 100644 index 00000000000..086f2fe778f --- /dev/null +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java @@ -0,0 +1,126 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model.application.validation; + +import com.yahoo.config.ModelReference; +import com.yahoo.config.application.api.ApplicationFile; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.NullConfigModelRegistry; +import com.yahoo.config.model.api.OnnxModelCost; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.config.model.provision.InMemoryProvisioner; +import com.yahoo.config.model.test.MockApplicationPackage; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.model.VespaModel; +import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicLong; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * @author bjorncs + */ +class JvmHeapSizeValidatorTest { + + @Test + void fails_on_too_low_jvm_percentage() throws IOException, SAXException { + var deployState = createDeployState(8, 7L * 1024 * 1024 * 1024); + var model = new VespaModel(new NullConfigModelRegistry(), deployState); + var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState)); + String expectedMessage = "Allocated percentage of memory of JVM in cluster 'container' is too low (3% < 10%). Estimated cost of ONNX models is 7.00GB"; + assertTrue(e.getMessage().contains(expectedMessage), e.getMessage()); + } + + @Test + void fails_on_too_low_heap_size() throws IOException, SAXException { + var deployState = createDeployState(2, 1024L * 1024 * 1024); + var model = new VespaModel(new NullConfigModelRegistry(), deployState); + var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState)); + String expectedMessage = "Allocated memory to JVM in cluster 'container' is too low (0.30GB < 0.40GB). Estimated cost of ONNX models is 1.00GB."; + assertTrue(e.getMessage().contains(expectedMessage), e.getMessage()); + } + + @Test + void accepts_adequate_heap_size() throws IOException, SAXException { + var deployState = createDeployState(8, 1024L * 1024 * 1024); + var model = new VespaModel(new NullConfigModelRegistry(), deployState); + assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState)); + } + + @Test + void accepts_services_with_explicit_jvm_size() throws IOException, SAXException { + String servicesXml = + """ + <?xml version="1.0" encoding="utf-8" ?> + <services version='1.0'> + <container version='1.0'> + <nodes count="2"> + <jvm allocated-memory='5%'/> + <resources vcpu="4" memory="2Gb" disk="125Gb"/> + </nodes> + <component id="hf-embedder" type="hugging-face-embedder"> + <transformer-model url="https://my/url/model.onnx"/> + <tokenizer-model path="app/tokenizer.json"/> + </component> + </container> + </services>"""; + var deployState = createDeployState(servicesXml, 2, 1024L * 1024 * 1024); + var model = new VespaModel(new NullConfigModelRegistry(), deployState); + assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState)); + } + + private static DeployState createDeployState(String servicesXml, double nodeGb, long modelCostBytes) { + return new DeployState.Builder() + .applicationPackage( + new MockApplicationPackage.Builder() + .withServices(servicesXml) + .build()) + .modelHostProvisioner(new InMemoryProvisioner(5, new NodeResources(4, nodeGb, 125, 0.3), true)) + .properties(new TestProperties().setHostedVespa(true).setDynamicHeapSize(true)) + .onnxModelCost(new ModelCostDummy(modelCostBytes)) + .build(); + } + + private static DeployState createDeployState(double nodeGb, long modelCostBytes) { + String servicesXml = + """ + <?xml version="1.0" encoding="utf-8" ?> + <services version='1.0'> + <container version='1.0'> + <nodes count="2"> + <resources vcpu="4" memory="%fGb" disk="125Gb"/> + </nodes> + <component id="hf-embedder" type="hugging-face-embedder"> + <transformer-model url="https://my/url/model.onnx"/> + <tokenizer-model path="app/tokenizer.json"/> + </component> + </container> + </services>""".formatted(nodeGb); + return createDeployState(servicesXml, nodeGb, modelCostBytes); + } + + private static class ModelCostDummy implements OnnxModelCost, OnnxModelCost.Calculator { + final AtomicLong totalCost = new AtomicLong(); + final long modelCost; + + ModelCostDummy(long modelCost) { this.modelCost = modelCost; } + + @Override public Calculator newCalculator(DeployLogger logger) { return this; } + @Override public long aggregatedModelCostInBytes() { return totalCost.get(); } + @Override public void registerModel(ApplicationFile path) {} + + @Override + public void registerModel(ModelReference ref) { + assertEquals("https://my/url/model.onnx", ref.url().orElseThrow().value().toString()); + totalCost.addAndGet(modelCost); + } + } + +}
\ No newline at end of file |