diff options
Diffstat (limited to 'config-model')
2 files changed, 107 insertions, 32 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java index 373bfe24984..b809f36a436 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidator.java @@ -6,6 +6,7 @@ import com.yahoo.config.model.api.ConfigChangeAction; import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.vespa.model.Host; import com.yahoo.vespa.model.application.validation.Validation.ChangeContext; +import com.yahoo.vespa.model.container.ApplicationContainer; import com.yahoo.vespa.model.container.ApplicationContainerCluster; import java.util.ArrayList; @@ -34,7 +35,8 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida @Override public void validate(ChangeContext context) { - if ( ! context.deployState().featureFlags().restartOnDeployWhenOnnxModelChanges()) return; + if ( ! context.deployState().featureFlags().restartOnDeployWhenOnnxModelChanges() + || ! context.deployState().isHosted()) return; // Compare onnx models used by each cluster and set restart on deploy for cluster if estimated cost, // model hash or model options have changed @@ -104,10 +106,13 @@ public class RestartOnDeployForOnnxModelChangesValidator implements ChangeValida private static boolean enoughMemoryToAvoidRestart(ApplicationContainerCluster clusterInCurrentModel, ApplicationContainerCluster cluster, DeployLogger deployLogger) { + List<ApplicationContainer> containers = cluster.getContainers(); + if (containers.isEmpty()) return true; + double currentModelCostInGb = onnxModelCostInGb(clusterInCurrentModel); double nextModelCostInGb = onnxModelCostInGb(cluster); - double totalMemory = cluster.getContainers().get(0).getHostResource().realResources().memoryGb(); + double totalMemory = containers.stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow(); double memoryUsedByModels = currentModelCostInGb + nextModelCostInGb; double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - memoryUsedByModels); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidatorTest.java index 67e8a4d512e..92c7fffd72f 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidatorTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/RestartOnDeployForOnnxModelChangesValidatorTest.java @@ -2,11 +2,16 @@ package com.yahoo.vespa.model.application.validation.change; import com.yahoo.config.application.api.ApplicationFile; +import com.yahoo.config.model.api.ApplicationClusterEndpoint; import com.yahoo.config.model.api.ConfigChangeAction; +import com.yahoo.config.model.api.ContainerEndpoint; import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.OnnxModelOptions; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.config.model.provision.InMemoryProvisioner; +import com.yahoo.config.model.test.MockApplicationPackage; +import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.model.VespaModel; import com.yahoo.vespa.model.application.validation.ValidationTester; import com.yahoo.vespa.model.test.utils.VespaModelCreatorWithMockPkg; @@ -16,6 +21,7 @@ import java.net.URI; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -25,6 +31,12 @@ import static org.junit.jupiter.api.Assertions.assertTrue; */ public class RestartOnDeployForOnnxModelChangesValidatorTest { + // Must be so large that changing model set or options requires restart (due to using more memory than available), + // but not so large that deployment will not work at all with one model + private static final long defaultCost = 723456789; + private static final long defaultHash = 0; + + @Test void validate_no_changes() { VespaModel current = createModel(); @@ -35,48 +47,64 @@ public class RestartOnDeployForOnnxModelChangesValidatorTest { @Test void validate_changed_estimated_cost() { - VespaModel current = createModel(); - VespaModel next = createModel(onnxModelCost(123, 0)); + VespaModel current = createModel(onnxModelCost(70000000, defaultHash)); + VespaModel next = createModel(onnxModelCost(723456789, defaultHash)); List<ConfigChangeAction> result = validateModel(current, next); assertEquals(1, result.size()); assertTrue(result.get(0).validationId().isEmpty()); - assertEquals("Onnx model 'https://my/url/e5-base-v2.onnx' has changed (estimated cost), need to restart services in container cluster 'cluster1'", result.get(0).getMessage()); - assertStartsWith("Onnx model 'https://my/url/e5-base-v2.onnx' has changed (estimated cost)", result); + assertEquals("Onnx model 'https://data.vespa.oath.cloud/onnx_models/e5-base-v2/model.onnx' has changed (estimated cost), need to restart services in container cluster 'cluster1'", result.get(0).getMessage()); + } + + @Test + void validate_changed_estimated_cost_non_hosted() { + boolean hosted = false; + VespaModel current = createModel(onnxModelCost(70000000, defaultHash), hosted); + VespaModel next = createModel(onnxModelCost(723456789, defaultHash), hosted); + List<ConfigChangeAction> result = validateModel(current, next, hosted); + assertEquals(0, result.size()); } @Test void validate_changed_hash() { VespaModel current = createModel(); - VespaModel next = createModel(onnxModelCost(0, 123)); + VespaModel next = createModel(onnxModelCost(defaultCost, 123)); List<ConfigChangeAction> result = validateModel(current, next); assertEquals(1, result.size()); - assertStartsWith("Onnx model 'https://my/url/e5-base-v2.onnx' has changed (model hash)", result); + assertStartsWith("Onnx model 'https://data.vespa.oath.cloud/onnx_models/e5-base-v2/model.onnx' has changed (model hash)", result); } @Test void validate_changed_option() { VespaModel current = createModel(); - VespaModel next = createModel(onnxModelCost(0, 0), "sequential"); + VespaModel next = createModel(onnxModelCost(), true, "sequential"); List<ConfigChangeAction> result = validateModel(current, next); assertEquals(1, result.size()); - assertStartsWith("Onnx model 'https://my/url/e5-base-v2.onnx' has changed (model option(s))", result); + assertStartsWith("Onnx model 'https://data.vespa.oath.cloud/onnx_models/e5-base-v2/model.onnx' has changed (model option(s))", result); } @Test void validate_changed_model_set() { VespaModel current = createModel(); - VespaModel next = createModel(onnxModelCost(0, 0), "parallel", "e5-small-v2"); + VespaModel next = createModel(onnxModelCost(), true, "parallel", "e5-small-v2"); List<ConfigChangeAction> result = validateModel(current, next); assertEquals(1, result.size()); - assertStartsWith("Onnx model set has changed from [https://my/url/e5-base-v2.onnx] to [https://my/url/e5-small-v2.onnx", result); + assertStartsWith("Onnx model set has changed from [https://data.vespa.oath.cloud/onnx_models/e5-base-v2/model.onnx] " + + "to [https://data.vespa.oath.cloud/onnx_models/e5-small-v2/model.onnx", + result); } private static List<ConfigChangeAction> validateModel(VespaModel current, VespaModel next) { - return ValidationTester.validateChanges(new RestartOnDeployForOnnxModelChangesValidator(), next, deployStateBuilder().previousModel(current).build()); + return validateModel(current, next, true); + } + + private static List<ConfigChangeAction> validateModel(VespaModel current, VespaModel next, boolean hosted) { + return ValidationTester.validateChanges(new RestartOnDeployForOnnxModelChangesValidator(), + next, + deployStateBuilder(hosted).previousModel(current).build()); } private static OnnxModelCost onnxModelCost() { - return onnxModelCost(0, 0); + return onnxModelCost(defaultCost, defaultHash); } private static OnnxModelCost onnxModelCost(long estimatedCost, long hash) { @@ -111,25 +139,65 @@ public class RestartOnDeployForOnnxModelChangesValidatorTest { } private static VespaModel createModel() { - return createModel(onnxModelCost()); + return createModel(onnxModelCost(), true); } private static VespaModel createModel(OnnxModelCost onnxModelCost) { - return createModel(onnxModelCost, "parallel"); + return createModel(onnxModelCost, true, "parallel"); + } + + private static VespaModel createModel(OnnxModelCost onnxModelCost, boolean hosted) { + return createModel(onnxModelCost, hosted, "parallel"); + } + + private static VespaModel createModel(OnnxModelCost onnxModelCost, boolean hosted, String executionMode) { + return createModel(onnxModelCost, hosted, executionMode, "e5-base-v2"); } - private static VespaModel createModel(OnnxModelCost onnxModelCost, String executionMode) { - return createModel(onnxModelCost, executionMode, "e5-base-v2"); + private static VespaModel createModel(OnnxModelCost onnxModelCost, boolean hosted, String executionMode, String modelId) { + DeployState.Builder builder = deployStateBuilder(hosted) + .onnxModelCost(onnxModelCost); + return hosted ? hostedModel(builder, executionMode, modelId) : nonHostedModel(builder, executionMode, modelId); } - private static VespaModel createModel(OnnxModelCost onnxModelCost, String executionMode, String modelId) { - DeployState.Builder builder = deployStateBuilder(); - builder.onnxModelCost(onnxModelCost); - return createModel(builder, executionMode, modelId); + private static VespaModel hostedModel(DeployState.Builder builder, String executionMode, String modelId) { + var servicesXml = """ + <services version='1.0'> + <container id='cluster1' version='1.0'> + <component id="hf-embedder" type="hugging-face-embedder"> + <transformer-model model-id="%s" url="https://my/url/%s.onnx"/> + <tokenizer-model model-id="e5-base-v2-vocab" path="app/tokenizer.json"/> + <onnx-execution-mode>%s</onnx-execution-mode> + </component> + <nodes count='1'> + <resources vcpu='1' memory='2Gb' disk='25Gb'/> + </nodes> + </container> + </services> + """.formatted(modelId, modelId, executionMode); + + var deploymentXml = """ + <deployment version='1.0' empty-host-ttl='1d'> + <instance id='default'> + <prod> + <region>us-east-1</region> + <region empty-host-ttl='0m'>us-north-1</region> + <region>us-west-1</region> + </prod> + </instance> + </deployment> + """; + + var applicationPackage = new MockApplicationPackage.Builder() + .withServices(servicesXml) + .withDeploymentSpec(deploymentXml) + .build(); + + return new VespaModelCreatorWithMockPkg(applicationPackage).create(builder); } - private static VespaModel createModel(DeployState.Builder builder, String executionMode, String modelId) { - String xml = """ + private static VespaModel nonHostedModel(DeployState.Builder builder, String executionMode, String modelId) { + var xml = """ <services version='1.0'> <container id='cluster1' version='1.0'> <http> @@ -141,20 +209,22 @@ public class RestartOnDeployForOnnxModelChangesValidatorTest { <onnx-execution-mode>%s</onnx-execution-mode> </component> </container> - <container id='cluster2' version='1.0'> - <http> - <server id='server1' port='8081'/> - </http> - </container> </services> """.formatted(modelId, modelId, executionMode); return new VespaModelCreatorWithMockPkg(null, xml).create(builder); } - private static DeployState.Builder deployStateBuilder() { - return new DeployState.Builder() - .properties((new TestProperties()).setRestartOnDeployForOnnxModelChanges(true)); + + private static DeployState.Builder deployStateBuilder(boolean hosted) { + var builder = new DeployState.Builder() + .properties((new TestProperties()) + .setRestartOnDeployForOnnxModelChanges(true) + .setHostedVespa(hosted)); + if (hosted) + builder.endpoints(Set.of(new ContainerEndpoint("cluster1", ApplicationClusterEndpoint.Scope.zone, List.of("tc.example.com")))) + .modelHostProvisioner(new InMemoryProvisioner(5, new NodeResources(1, 2, 25, 0.3), true)); + return builder; } private static void assertStartsWith(String expected, List<ConfigChangeAction> result) { |