diff options
author | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-09-22 15:15:59 +0200 |
---|---|---|
committer | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-09-22 15:31:29 +0200 |
commit | 5df57a04fff32458dd9aa5642d4b7f5f7919f614 (patch) | |
tree | 07bd3f5f5aeddfb5070d614f0cbd03471146447b /config-model | |
parent | 0d8c6fc3b5b9735918e20e0518ad680dc6cc82b4 (diff) |
Aggregate Onnx model cost for JVM heap size calculation
Diffstat (limited to 'config-model')
8 files changed, 51 insertions, 13 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java index 4df7a76031a..5ab258ecce8 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java @@ -18,6 +18,7 @@ import com.yahoo.config.model.api.EndpointCertificateSecrets; import com.yahoo.config.model.api.HostProvisioner; import com.yahoo.config.model.api.Model; import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.Provisioned; import com.yahoo.config.model.api.Reindexing; import com.yahoo.config.model.api.ValidationParameters; @@ -90,6 +91,7 @@ public class DeployState implements ConfigDefinitionStore { private final Provisioned provisioned; private final Reindexing reindexing; private final ExecutorService executor; + private final OnnxModelCost onnxModelCost; public static DeployState createTestState() { return new Builder().build(); @@ -124,7 +126,8 @@ public class DeployState implements ConfigDefinitionStore { boolean accessLoggingEnabledByDefault, Optional<DockerImage> wantedDockerImageRepo, Reindexing reindexing, - Optional<ValidationOverrides> validationOverrides) { + Optional<ValidationOverrides> validationOverrides, + OnnxModelCost onnxModelCost) { this.logger = deployLogger; this.fileRegistry = fileRegistry; this.executor = executor; @@ -152,6 +155,7 @@ public class DeployState implements ConfigDefinitionStore { this.now = now; this.wantedDockerImageRepo = wantedDockerImageRepo; this.reindexing = reindexing; + this.onnxModelCost = onnxModelCost; } public static HostProvisioner getDefaultModelHostProvisioner(ApplicationPackage applicationPackage) { @@ -305,6 +309,8 @@ public class DeployState implements ConfigDefinitionStore { public Optional<Reindexing> reindexing() { return Optional.ofNullable(reindexing); } + public OnnxModelCost onnxModelCost() { return onnxModelCost; } + public boolean isHostedTenantApplication(ApplicationType type) { boolean isTesterApplication = getProperties().applicationId().instance().isTester(); return isHosted() && type == ApplicationType.DEFAULT && !isTesterApplication; @@ -333,6 +339,7 @@ public class DeployState implements ConfigDefinitionStore { private QueryProfiles queryProfiles = null; private Reindexing reindexing = null; private Optional<ValidationOverrides> validationOverrides = Optional.empty(); + private OnnxModelCost onnxModelCost = OnnxModelCost.disabled(); public Builder() {} @@ -450,6 +457,8 @@ public class DeployState implements ConfigDefinitionStore { return this; } + public Builder onnxModelCost(OnnxModelCost instance) { this.onnxModelCost = instance; return this; } + public DeployState build() { return build(new ValidationParameters()); } @@ -482,7 +491,8 @@ public class DeployState implements ConfigDefinitionStore { accessLoggingEnabledByDefault, wantedDockerImageRepo, reindexing, - validationOverrides); + validationOverrides, + onnxModelCost); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java index 28ff8dff620..727a18aee2c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java @@ -21,6 +21,7 @@ import com.yahoo.config.model.api.Model; import com.yahoo.config.model.api.ModelContext; import com.yahoo.config.model.api.ModelCreateResult; import com.yahoo.config.model.api.ModelFactory; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.ValidationParameters; import com.yahoo.config.model.application.provider.ApplicationPackageXmlFilesValidator; import com.yahoo.config.model.builder.xml.ConfigModelBuilder; @@ -197,7 +198,8 @@ public class VespaModelFactory implements ModelFactory { .zone(zone) .now(clock.instant()) .wantedNodeVespaVersion(modelContext.wantedNodeVespaVersion()) - .wantedDockerImageRepo(modelContext.wantedDockerImageRepo()); + .wantedDockerImageRepo(modelContext.wantedDockerImageRepo()) + .onnxModelCost(modelContext.properties().hostedVespa() ? new DefaultOnnxModelCost() : OnnxModelCost.disabled()); modelContext.previousModel().ifPresent(builder::previousModel); modelContext.reindexing().ifPresent(builder::reindexing); return builder.build(validationParameters); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java index 7501f6162c7..ed89b810301 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java @@ -7,11 +7,12 @@ import com.yahoo.config.model.producer.AnyConfigProducer; import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.osgi.provider.model.ComponentModel; import com.yahoo.text.XML; -import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder; -import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.component.BertEmbedder; import com.yahoo.vespa.model.container.component.ColBertEmbedder; import com.yahoo.vespa.model.container.component.Component; +import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder; +import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer; import com.yahoo.vespa.model.container.xml.BundleInstantiationSpecificationBuilder; import org.w3c.dom.Element; @@ -35,19 +36,20 @@ public class DomComponentBuilder extends VespaDomBuilder.DomConfigProducerBuilde @Override protected Component<? super Component<?, ?>, ?> doBuild(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element spec) { - var component = buildComponent(spec, deployState); + var component = buildComponent(spec, deployState, ancestor); addChildren(deployState, ancestor, spec, component); return component; } - private Component<? super Component<?, ?>, ?> buildComponent(Element spec, DeployState state) { + private Component<? super Component<?, ?>, ?> buildComponent( + Element spec, DeployState state, TreeConfigProducer<AnyConfigProducer> ancestor) { if (spec.hasAttribute("type")) { var type = spec.getAttribute("type"); return switch (type) { - case "hugging-face-embedder" -> new HuggingFaceEmbedder(spec, state); + case "hugging-face-embedder" -> new HuggingFaceEmbedder((ApplicationContainerCluster)ancestor, spec, state); case "hugging-face-tokenizer" -> new HuggingFaceTokenizer(spec, state); - case "bert-embedder" -> new BertEmbedder(spec, state); case "colbert-embedder" -> new ColBertEmbedder(spec, state); + case "bert-embedder" -> new BertEmbedder((ApplicationContainerCluster)ancestor, spec, state); default -> throw new IllegalArgumentException("Unknown component type '%s'".formatted(type)); }; } else { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java index b9021912244..49ad67f633f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java @@ -8,10 +8,12 @@ import com.yahoo.component.ComponentId; import com.yahoo.component.ComponentSpecification; import com.yahoo.config.FileReference; import com.yahoo.config.application.api.ComponentInfo; +import com.yahoo.config.application.api.DeployLogger; import com.yahoo.config.model.api.ApplicationClusterEndpoint; import com.yahoo.config.model.api.ApplicationClusterInfo; import com.yahoo.config.model.api.ContainerEndpoint; import com.yahoo.config.model.api.Model; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.config.provision.AllocatedHosts; @@ -47,6 +49,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.logging.Level; import java.util.stream.Collectors; import static com.yahoo.vespa.model.container.docproc.DocprocChains.DOCUMENT_TYPE_MANAGER_CLASS; @@ -82,6 +85,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private final Set<FileReference> applicationBundles = new LinkedHashSet<>(); private final Set<String> previousHosts; + private final OnnxModelCost.Calculator onnxModelCost; + private final DeployLogger logger; private ContainerModelEvaluation modelEvaluation; @@ -125,6 +130,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat heapSizePercentageOfAvailableMemory = deployState.featureFlags().heapSizePercentage() > 0 ? Math.min(99, deployState.featureFlags().heapSizePercentage()) : defaultHeapSizePercentageOfAvailableMemory; + onnxModelCost = deployState.onnxModelCost().newCalculator(deployState.getDeployLogger()); + logger = deployState.getDeployLogger(); } @Override @@ -193,8 +200,12 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat // Node memory is known so convert available memory percentage to node memory percentage double totalMemory = getContainers().get(0).getHostResource().realResources().memoryGb(); - double availableMemory = totalMemory - Host.memoryOverheadGb; - return Optional.of((int) (availableMemory / totalMemory * availableMemoryPercentage)); + double jvmHeapDeductionGb = onnxModelCost.aggregatedModelCostInBytes() / (1024D * 1024 * 1024); + double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - jvmHeapDeductionGb); + int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage); + logger.log(Level.FINE, () -> "memoryPercentage=%d, availableMemory=%f, totalMemory=%f, availableMemoryPercentage=%d, jvmHeapDeductionGb=%f" + .formatted(memoryPercentage, availableMemory, totalMemory, availableMemoryPercentage, jvmHeapDeductionGb)); + return Optional.of(memoryPercentage); } return Optional.empty(); } @@ -373,6 +384,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat @Override public String name() { return getName(); } + public OnnxModelCost.Calculator onnxModelCost() { return onnxModelCost; } + public static class MbusParams { // the amount of the maxpendingbytes to process concurrently, typically 0.2 (20%) final Double maxConcurrentFactor; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java index 205848e1b67..76bb1a9e02a 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java @@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.BertBaseEmbedderConfig; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.xml.ModelIdResolver; import org.w3c.dom.Element; @@ -33,7 +34,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf private final Integer onnxGpuDevice; - public BertEmbedder(Element xml, DeployState state) { + public BertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml); model = ModelIdResolver.resolveToModelReference(getChild(xml, "transformer-model"), state); vocab = ModelIdResolver.resolveToModelReference(getChild(xml, "tokenizer-vocab"), state); @@ -49,6 +50,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); + cluster.onnxModelCost().registerModel(model); } @Override diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java index f4017339699..41b80bf1cb2 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java @@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.xml.ModelIdResolver; import org.w3c.dom.Element; @@ -33,7 +34,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm private final Integer onnxGpuDevice; private final String poolingStrategy; - public HuggingFaceEmbedder(Element xml, DeployState state) { + public HuggingFaceEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml); var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow(); model = ModelIdResolver.resolveToModelReference(transformerModelElem, state); @@ -51,6 +52,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null); + cluster.onnxModelCost().registerModel(model); } private static ModelReference resolveDefaultVocab(Element model, DeployState state) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java index f0296d49472..3261d454b4f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container.search; +import com.yahoo.config.application.api.ApplicationPackage; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.QrSearchersConfig; import com.yahoo.prelude.semantics.SemanticRulesConfig; @@ -56,12 +57,14 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> private QueryProfiles queryProfiles; private SemanticRules semanticRules; private PageTemplates pageTemplates; + private ApplicationPackage app; public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) { super(chains); this.globalPhase = deployState.featureFlags().enableGlobalPhase(); this.useReconfigurableDispatcher = deployState.featureFlags().useReconfigurableDispatcher(); this.schemasWithGlobalPhase = getSchemasWithGlobalPhase(deployState); + this.app = deployState.getApplicationPackage(); this.owningCluster = cluster; owningCluster.addComponent(Component.fromClassAndBundle(CompiledQueryProfileRegistry.class, SEARCH_AND_DOCPROC_BUNDLE)); @@ -96,6 +99,9 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> if ( ! schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue; var factory = new RankProfilesEvaluatorComponent(documentDb); if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) { + var onnxModels = documentDb.getDerivedConfiguration().getRankProfileList().getOnnxModels(); + onnxModels.asMap().forEach( + (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath()))); owningCluster.addComponent(factory); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 35b0213bf59..d9c4dea478c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -778,6 +778,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { !container.getHostResource().realResources().gpuResources().isZero()); onnxModel.setGpuDevice(gpuDevice, hasGpu); } + cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath())); } cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles, models)); |