summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java3
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java14
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java14
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java17
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java6
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java1
9 files changed, 52 insertions, 15 deletions
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java b/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java
index 8c7f0db3bec..422ceba8074 100644
--- a/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java
+++ b/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java
@@ -5,7 +5,6 @@ package com.yahoo.config.model.api;
import com.yahoo.config.ModelReference;
import com.yahoo.config.application.api.ApplicationFile;
import com.yahoo.config.application.api.DeployLogger;
-import com.yahoo.config.provision.ApplicationId;
/**
* @author bjorncs
@@ -20,7 +19,7 @@ public interface OnnxModelCost {
void registerModel(ModelReference ref);
}
- static OnnxModelCost testInstance() {
+ static OnnxModelCost disabled() {
return (__) -> new Calculator() {
@Override public long aggregatedModelCostInBytes() { return 0; }
@Override public void registerModel(ApplicationFile path) {}
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java
index 4df7a76031a..5ab258ecce8 100644
--- a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java
+++ b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java
@@ -18,6 +18,7 @@ import com.yahoo.config.model.api.EndpointCertificateSecrets;
import com.yahoo.config.model.api.HostProvisioner;
import com.yahoo.config.model.api.Model;
import com.yahoo.config.model.api.ModelContext;
+import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.api.Provisioned;
import com.yahoo.config.model.api.Reindexing;
import com.yahoo.config.model.api.ValidationParameters;
@@ -90,6 +91,7 @@ public class DeployState implements ConfigDefinitionStore {
private final Provisioned provisioned;
private final Reindexing reindexing;
private final ExecutorService executor;
+ private final OnnxModelCost onnxModelCost;
public static DeployState createTestState() {
return new Builder().build();
@@ -124,7 +126,8 @@ public class DeployState implements ConfigDefinitionStore {
boolean accessLoggingEnabledByDefault,
Optional<DockerImage> wantedDockerImageRepo,
Reindexing reindexing,
- Optional<ValidationOverrides> validationOverrides) {
+ Optional<ValidationOverrides> validationOverrides,
+ OnnxModelCost onnxModelCost) {
this.logger = deployLogger;
this.fileRegistry = fileRegistry;
this.executor = executor;
@@ -152,6 +155,7 @@ public class DeployState implements ConfigDefinitionStore {
this.now = now;
this.wantedDockerImageRepo = wantedDockerImageRepo;
this.reindexing = reindexing;
+ this.onnxModelCost = onnxModelCost;
}
public static HostProvisioner getDefaultModelHostProvisioner(ApplicationPackage applicationPackage) {
@@ -305,6 +309,8 @@ public class DeployState implements ConfigDefinitionStore {
public Optional<Reindexing> reindexing() { return Optional.ofNullable(reindexing); }
+ public OnnxModelCost onnxModelCost() { return onnxModelCost; }
+
public boolean isHostedTenantApplication(ApplicationType type) {
boolean isTesterApplication = getProperties().applicationId().instance().isTester();
return isHosted() && type == ApplicationType.DEFAULT && !isTesterApplication;
@@ -333,6 +339,7 @@ public class DeployState implements ConfigDefinitionStore {
private QueryProfiles queryProfiles = null;
private Reindexing reindexing = null;
private Optional<ValidationOverrides> validationOverrides = Optional.empty();
+ private OnnxModelCost onnxModelCost = OnnxModelCost.disabled();
public Builder() {}
@@ -450,6 +457,8 @@ public class DeployState implements ConfigDefinitionStore {
return this;
}
+ public Builder onnxModelCost(OnnxModelCost instance) { this.onnxModelCost = instance; return this; }
+
public DeployState build() {
return build(new ValidationParameters());
}
@@ -482,7 +491,8 @@ public class DeployState implements ConfigDefinitionStore {
accessLoggingEnabledByDefault,
wantedDockerImageRepo,
reindexing,
- validationOverrides);
+ validationOverrides,
+ onnxModelCost);
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
index 28ff8dff620..727a18aee2c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
@@ -21,6 +21,7 @@ import com.yahoo.config.model.api.Model;
import com.yahoo.config.model.api.ModelContext;
import com.yahoo.config.model.api.ModelCreateResult;
import com.yahoo.config.model.api.ModelFactory;
+import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.api.ValidationParameters;
import com.yahoo.config.model.application.provider.ApplicationPackageXmlFilesValidator;
import com.yahoo.config.model.builder.xml.ConfigModelBuilder;
@@ -197,7 +198,8 @@ public class VespaModelFactory implements ModelFactory {
.zone(zone)
.now(clock.instant())
.wantedNodeVespaVersion(modelContext.wantedNodeVespaVersion())
- .wantedDockerImageRepo(modelContext.wantedDockerImageRepo());
+ .wantedDockerImageRepo(modelContext.wantedDockerImageRepo())
+ .onnxModelCost(modelContext.properties().hostedVespa() ? new DefaultOnnxModelCost() : OnnxModelCost.disabled());
modelContext.previousModel().ifPresent(builder::previousModel);
modelContext.reindexing().ifPresent(builder::reindexing);
return builder.build(validationParameters);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
index 7501f6162c7..ed89b810301 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
@@ -7,11 +7,12 @@ import com.yahoo.config.model.producer.AnyConfigProducer;
import com.yahoo.config.model.producer.TreeConfigProducer;
import com.yahoo.osgi.provider.model.ComponentModel;
import com.yahoo.text.XML;
-import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder;
-import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import com.yahoo.vespa.model.container.component.BertEmbedder;
import com.yahoo.vespa.model.container.component.ColBertEmbedder;
import com.yahoo.vespa.model.container.component.Component;
+import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder;
+import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer;
import com.yahoo.vespa.model.container.xml.BundleInstantiationSpecificationBuilder;
import org.w3c.dom.Element;
@@ -35,19 +36,20 @@ public class DomComponentBuilder extends VespaDomBuilder.DomConfigProducerBuilde
@Override
protected Component<? super Component<?, ?>, ?> doBuild(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element spec) {
- var component = buildComponent(spec, deployState);
+ var component = buildComponent(spec, deployState, ancestor);
addChildren(deployState, ancestor, spec, component);
return component;
}
- private Component<? super Component<?, ?>, ?> buildComponent(Element spec, DeployState state) {
+ private Component<? super Component<?, ?>, ?> buildComponent(
+ Element spec, DeployState state, TreeConfigProducer<AnyConfigProducer> ancestor) {
if (spec.hasAttribute("type")) {
var type = spec.getAttribute("type");
return switch (type) {
- case "hugging-face-embedder" -> new HuggingFaceEmbedder(spec, state);
+ case "hugging-face-embedder" -> new HuggingFaceEmbedder((ApplicationContainerCluster)ancestor, spec, state);
case "hugging-face-tokenizer" -> new HuggingFaceTokenizer(spec, state);
- case "bert-embedder" -> new BertEmbedder(spec, state);
case "colbert-embedder" -> new ColBertEmbedder(spec, state);
+ case "bert-embedder" -> new BertEmbedder((ApplicationContainerCluster)ancestor, spec, state);
default -> throw new IllegalArgumentException("Unknown component type '%s'".formatted(type));
};
} else {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
index b9021912244..49ad67f633f 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
@@ -8,10 +8,12 @@ import com.yahoo.component.ComponentId;
import com.yahoo.component.ComponentSpecification;
import com.yahoo.config.FileReference;
import com.yahoo.config.application.api.ComponentInfo;
+import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.config.model.api.ApplicationClusterEndpoint;
import com.yahoo.config.model.api.ApplicationClusterInfo;
import com.yahoo.config.model.api.ContainerEndpoint;
import com.yahoo.config.model.api.Model;
+import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.config.model.producer.TreeConfigProducer;
import com.yahoo.config.provision.AllocatedHosts;
@@ -47,6 +49,7 @@ import java.util.LinkedHashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
+import java.util.logging.Level;
import java.util.stream.Collectors;
import static com.yahoo.vespa.model.container.docproc.DocprocChains.DOCUMENT_TYPE_MANAGER_CLASS;
@@ -82,6 +85,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private final Set<FileReference> applicationBundles = new LinkedHashSet<>();
private final Set<String> previousHosts;
+ private final OnnxModelCost.Calculator onnxModelCost;
+ private final DeployLogger logger;
private ContainerModelEvaluation modelEvaluation;
@@ -125,6 +130,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
heapSizePercentageOfAvailableMemory = deployState.featureFlags().heapSizePercentage() > 0
? Math.min(99, deployState.featureFlags().heapSizePercentage())
: defaultHeapSizePercentageOfAvailableMemory;
+ onnxModelCost = deployState.onnxModelCost().newCalculator(deployState.getDeployLogger());
+ logger = deployState.getDeployLogger();
}
@Override
@@ -193,8 +200,12 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
// Node memory is known so convert available memory percentage to node memory percentage
double totalMemory = getContainers().get(0).getHostResource().realResources().memoryGb();
- double availableMemory = totalMemory - Host.memoryOverheadGb;
- return Optional.of((int) (availableMemory / totalMemory * availableMemoryPercentage));
+ double jvmHeapDeductionGb = onnxModelCost.aggregatedModelCostInBytes() / (1024D * 1024 * 1024);
+ double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - jvmHeapDeductionGb);
+ int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage);
+ logger.log(Level.FINE, () -> "memoryPercentage=%d, availableMemory=%f, totalMemory=%f, availableMemoryPercentage=%d, jvmHeapDeductionGb=%f"
+ .formatted(memoryPercentage, availableMemory, totalMemory, availableMemoryPercentage, jvmHeapDeductionGb));
+ return Optional.of(memoryPercentage);
}
return Optional.empty();
}
@@ -373,6 +384,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
@Override
public String name() { return getName(); }
+ public OnnxModelCost.Calculator onnxModelCost() { return onnxModelCost; }
+
public static class MbusParams {
// the amount of the maxpendingbytes to process concurrently, typically 0.2 (20%)
final Double maxConcurrentFactor;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
index 205848e1b67..76bb1a9e02a 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
@@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.BertBaseEmbedderConfig;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import com.yahoo.vespa.model.container.xml.ModelIdResolver;
import org.w3c.dom.Element;
@@ -33,7 +34,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf
private final Integer onnxGpuDevice;
- public BertEmbedder(Element xml, DeployState state) {
+ public BertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml);
model = ModelIdResolver.resolveToModelReference(getChild(xml, "transformer-model"), state);
vocab = ModelIdResolver.resolveToModelReference(getChild(xml, "tokenizer-vocab"), state);
@@ -49,6 +50,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf
onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
+ cluster.onnxModelCost().registerModel(model);
}
@Override
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
index f4017339699..41b80bf1cb2 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
@@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import com.yahoo.vespa.model.container.xml.ModelIdResolver;
import org.w3c.dom.Element;
@@ -33,7 +34,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm
private final Integer onnxGpuDevice;
private final String poolingStrategy;
- public HuggingFaceEmbedder(Element xml, DeployState state) {
+ public HuggingFaceEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml);
var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow();
model = ModelIdResolver.resolveToModelReference(transformerModelElem, state);
@@ -51,6 +52,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm
onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null);
+ cluster.onnxModelCost().registerModel(model);
}
private static ModelReference resolveDefaultVocab(Element model, DeployState state) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
index f0296d49472..3261d454b4f 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.container.search;
+import com.yahoo.config.application.api.ApplicationPackage;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.container.QrSearchersConfig;
import com.yahoo.prelude.semantics.SemanticRulesConfig;
@@ -56,12 +57,14 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
private QueryProfiles queryProfiles;
private SemanticRules semanticRules;
private PageTemplates pageTemplates;
+ private ApplicationPackage app;
public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) {
super(chains);
this.globalPhase = deployState.featureFlags().enableGlobalPhase();
this.useReconfigurableDispatcher = deployState.featureFlags().useReconfigurableDispatcher();
this.schemasWithGlobalPhase = getSchemasWithGlobalPhase(deployState);
+ this.app = deployState.getApplicationPackage();
this.owningCluster = cluster;
owningCluster.addComponent(Component.fromClassAndBundle(CompiledQueryProfileRegistry.class, SEARCH_AND_DOCPROC_BUNDLE));
@@ -96,6 +99,9 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
if ( ! schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue;
var factory = new RankProfilesEvaluatorComponent(documentDb);
if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) {
+ var onnxModels = documentDb.getDerivedConfiguration().getRankProfileList().getOnnxModels();
+ onnxModels.asMap().forEach(
+ (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath())));
owningCluster.addComponent(factory);
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
index 35b0213bf59..d9c4dea478c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
@@ -778,6 +778,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
!container.getHostResource().realResources().gpuResources().isZero());
onnxModel.setGpuDevice(gpuDevice, hasGpu);
}
+ cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath()));
}
cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles, models));