summaryrefslogtreecommitdiffstats
path: root/config-model
diff options
context:
space:
mode:
Diffstat (limited to 'config-model')
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java8
-rw-r--r--config-model/src/main/java/com/yahoo/schema/OnnxModel.java32
-rw-r--r--config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java18
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java38
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java30
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java33
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java7
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java2
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java3
-rw-r--r--config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg2
-rw-r--r--config-model/src/test/derived/tensor/rank-profiles.cfg21
-rw-r--r--config-model/src/test/derived/tensor/tensor.sd10
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java11
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java18
15 files changed, 149 insertions, 88 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
index 1bda8a509f1..41df042284e 100644
--- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
+++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
@@ -88,6 +88,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
private int contentLayerMetadataFeatureLevel = 0;
private boolean dynamicHeapSize = false;
private long mergingMaxMemoryUsagePerNode = -1;
+ private boolean usePerDocumentThrottledDeleteBucket = false;
@Override public ModelContext.FeatureFlags featureFlags() { return this; }
@Override public boolean multitenant() { return multitenant; }
@@ -128,7 +129,6 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
@Override public int maxCompactBuffers() { return maxCompactBuffers; }
@Override public boolean useV8GeoPositions() { return useV8GeoPositions; }
@Override public List<String> environmentVariables() { return environmentVariables; }
- @Override public Architecture adminClusterArchitecture() { return adminClusterNodeResourcesArchitecture; }
@Override public boolean sharedStringRepoNoReclaim() { return sharedStringRepoNoReclaim; }
@Override public boolean loadCodeAsHugePages() { return loadCodeAsHugePages; }
@Override public int mbusNetworkThreads() { return mbus_network_threads; }
@@ -148,6 +148,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
@Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; }
@Override public boolean dynamicHeapSize() { return dynamicHeapSize; }
@Override public long mergingMaxMemoryUsagePerNode() { return mergingMaxMemoryUsagePerNode; }
+ @Override public boolean usePerDocumentThrottledDeleteBucket() { return usePerDocumentThrottledDeleteBucket; }
public TestProperties sharedStringRepoNoReclaim(boolean sharedStringRepoNoReclaim) {
this.sharedStringRepoNoReclaim = sharedStringRepoNoReclaim;
@@ -390,6 +391,11 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
return this;
}
+ public TestProperties setUsePerDocumentThrottledDeleteBucket(boolean enableThrottling) {
+ this.usePerDocumentThrottledDeleteBucket = enableThrottling;
+ return this;
+ }
+
public static class Spec implements ConfigServerSpec {
private final String hostName;
diff --git a/config-model/src/main/java/com/yahoo/schema/OnnxModel.java b/config-model/src/main/java/com/yahoo/schema/OnnxModel.java
index f3f09150c1d..9456baafd57 100644
--- a/config-model/src/main/java/com/yahoo/schema/OnnxModel.java
+++ b/config-model/src/main/java/com/yahoo/schema/OnnxModel.java
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema;
+import com.yahoo.config.model.api.OnnxModelOptions;
import com.yahoo.searchlib.rankingexpression.Reference;
import com.yahoo.tensor.TensorType;
import com.yahoo.vespa.model.ml.OnnxModelInfo;
@@ -27,10 +28,7 @@ public class OnnxModel extends DistributableResource implements Cloneable {
private final Set<String> initializers = new HashSet<>();
// Runtime options
- private String statelessExecutionMode = null;
- private Integer statelessInterOpThreads = null;
- private Integer statelessIntraOpThreads = null;
- private GpuDevice gpuDevice = null;
+ private OnnxModelOptions onnxModelOptions = OnnxModelOptions.empty();
public OnnxModel(String name) {
super(name);
@@ -133,50 +131,46 @@ public class OnnxModel extends DistributableResource implements Cloneable {
public void setStatelessExecutionMode(String executionMode) {
if ("parallel".equalsIgnoreCase(executionMode)) {
- this.statelessExecutionMode = "parallel";
+ onnxModelOptions = onnxModelOptions.withExecutionMode("parallel");
} else if ("sequential".equalsIgnoreCase(executionMode)) {
- this.statelessExecutionMode = "sequential";
+ onnxModelOptions = onnxModelOptions.withExecutionMode("sequential");
}
}
public Optional<String> getStatelessExecutionMode() {
- return Optional.ofNullable(statelessExecutionMode);
+ return onnxModelOptions.executionMode();
}
public void setStatelessInterOpThreads(int interOpThreads) {
if (interOpThreads >= 0) {
- this.statelessInterOpThreads = interOpThreads;
+ onnxModelOptions = onnxModelOptions.withInterOpThreads(interOpThreads);
}
}
public Optional<Integer> getStatelessInterOpThreads() {
- return Optional.ofNullable(statelessInterOpThreads);
+ return onnxModelOptions.interOpThreads();
}
public void setStatelessIntraOpThreads(int intraOpThreads) {
if (intraOpThreads >= 0) {
- this.statelessIntraOpThreads = intraOpThreads;
+ onnxModelOptions = onnxModelOptions.withIntraOpThreads(intraOpThreads);
}
}
public Optional<Integer> getStatelessIntraOpThreads() {
- return Optional.ofNullable(statelessIntraOpThreads);
+ return onnxModelOptions.intraOpThreads();
}
public void setGpuDevice(int deviceNumber, boolean required) {
if (deviceNumber >= 0) {
- this.gpuDevice = new GpuDevice(deviceNumber, required);
+ onnxModelOptions = onnxModelOptions.withGpuDevice(new OnnxModelOptions.GpuDevice(deviceNumber, required));
}
}
- public Optional<GpuDevice> getGpuDevice() {
- return Optional.ofNullable(gpuDevice);
+ public Optional<OnnxModelOptions.GpuDevice> getGpuDevice() {
+ return onnxModelOptions.gpuDevice();
}
- public record GpuDevice(int deviceNumber, boolean required) {
- public GpuDevice {
- if (deviceNumber < 0) throw new IllegalArgumentException("deviceNumber cannot be negative, got " + deviceNumber);
- }
- }
+ public OnnxModelOptions onnxModelOptions() { return onnxModelOptions; }
}
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java
index 87b79ddcdc3..388d2627224 100644
--- a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java
+++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java
@@ -171,7 +171,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
private final OptionalDouble approximateThreshold;
private final OptionalDouble targetHitsMaxAdjustmentFactor;
private final double rankScoreDropLimit;
- private final boolean enableNestedMultivalueGrouping;
+ private final boolean alwaysMarkPhraseExpensive;
/**
* The rank type definitions used to derive settings for the native rank features
@@ -186,6 +186,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
private RankingExpression secondPhaseRanking;
private RankingExpression globalPhaseRanking;
private final int globalPhaseRerankCount;
+ private final SerializationContext functionSerializationContext;
/**
* Creates a raw rank profile from the given rank profile
@@ -212,7 +213,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
minHitsPerThread = compiled.getMinHitsPerThread();
numSearchPartitions = compiled.getNumSearchPartitions();
termwiseLimit = compiled.getTermwiseLimit().orElse(deployProperties.featureFlags().defaultTermwiseLimit());
- enableNestedMultivalueGrouping = deployProperties.featureFlags().enableNestedMultivalueGrouping();
+ alwaysMarkPhraseExpensive = deployProperties.featureFlags().alwaysMarkPhraseExpensive();
postFilterThreshold = compiled.getPostFilterThreshold();
approximateThreshold = compiled.getApproximateThreshold();
targetHitsMaxAdjustmentFactor = compiled.getTargetHitsMaxAdjustmentFactor();
@@ -225,7 +226,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
List<ExpressionFunction> functionExpressions = functions.values().stream().map(RankProfile.RankingExpressionFunction::function).toList();
Map<String, String> functionProperties = new LinkedHashMap<>();
var typeContext = compiled.typeContext(queryProfiles);
- SerializationContext functionSerializationContext = new SerializationContext(functionExpressions, Map.of(), typeContext);
+ this.functionSerializationContext = new SerializationContext(functionExpressions, Map.of(), typeContext);
if (firstPhaseRanking != null) {
functionProperties.putAll(firstPhaseRanking.getRankProperties(functionSerializationContext));
}
@@ -265,8 +266,6 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
private void derivePropertiesAndFeaturesFromFunctions(Map<String, RankProfile.RankingExpressionFunction> functions,
Map<String, String> functionProperties,
SerializationContext functionContext) {
- if (functions.isEmpty()) return;
-
replaceFunctionFeatures(summaryFeatures, functionContext);
replaceFunctionFeatures(matchFeatures, functionContext);
@@ -465,8 +464,8 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
if (termwiseLimit < 1.0) {
properties.add(new Pair<>("vespa.matching.termwise_limit", termwiseLimit + ""));
}
- if (enableNestedMultivalueGrouping) {
- properties.add(new Pair<>("vespa.temporary.enable_nested_multivalue_grouping", String.valueOf(enableNestedMultivalueGrouping)));
+ if (alwaysMarkPhraseExpensive) {
+ properties.add(new Pair<>("vespa.matching.always_mark_phrase_expensive", String.valueOf(alwaysMarkPhraseExpensive)));
}
if (postFilterThreshold.isPresent()) {
properties.add(new Pair<>("vespa.matching.global_filter.upper_limit", String.valueOf(postFilterThreshold.getAsDouble())));
@@ -556,11 +555,12 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
if ("".equals(name))
name = phase;
+ String expressionAsString = expression.getRoot().toString(functionSerializationContext).toString();
if (expression.getRoot() instanceof ReferenceNode) {
- properties.add(new Pair<>("vespa.rank." + phase, expression.getRoot().toString()));
+ properties.add(new Pair<>("vespa.rank." + phase, expressionAsString));
} else {
properties.add(new Pair<>("vespa.rank." + phase, wrapInRankingExpression(name)));
- properties.add(new Pair<>(RankingExpression.propertyName(name), expression.getRoot().toString()));
+ properties.add(new Pair<>(RankingExpression.propertyName(name), expressionAsString));
}
return properties;
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
index a644382625b..67fb720b8c0 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
@@ -1,13 +1,15 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
+import com.yahoo.config.model.api.OnnxModelOptions;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.BertBaseEmbedderConfig;
import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import org.w3c.dom.Element;
+import static com.yahoo.embedding.BertBaseEmbedderConfig.OnnxExecutionMode;
+import static com.yahoo.embedding.BertBaseEmbedderConfig.PoolingStrategy;
import static com.yahoo.text.XML.getChildValue;
import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME;
@@ -16,6 +18,7 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI
*/
public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConfig.Producer {
+ private final OnnxModelOptions onnxModelOptions;
private final ModelReference modelRef;
private final ModelReference vocabRef;
private final Integer maxTokens;
@@ -23,18 +26,18 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf
private final String transformerAttentionMask;
private final String transformerTokenTypeIds;
private final String transformerOutput;
- private final Integer tranformerStartSequenceToken;
+ private final Integer transformerStartSequenceToken;
private final Integer transformerEndSequenceToken;
private final String poolingStrategy;
- private final String onnxExecutionMode;
- private final Integer onnxInteropThreads;
- private final Integer onnxIntraopThreads;
- private final Integer onnxGpuDevice;
-
public BertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml);
var model = Model.fromXml(state, xml, "transformer-model").orElseThrow();
+ this.onnxModelOptions = new OnnxModelOptions(
+ getChildValue(xml, "onnx-execution-mode"),
+ getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt),
+ getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt),
+ getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).map(OnnxModelOptions.GpuDevice::new));
modelRef = model.modelReference();
vocabRef = Model.fromXml(state, xml, "tokenizer-vocab").orElseThrow().modelReference();
maxTokens = getChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null);
@@ -42,14 +45,10 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf
transformerAttentionMask = getChildValue(xml, "transformer-attention-mask").orElse(null);
transformerTokenTypeIds = getChildValue(xml, "transformer-token-type-ids").orElse(null);
transformerOutput = getChildValue(xml, "transformer-output").orElse(null);
- tranformerStartSequenceToken = getChildValue(xml, "transformer-start-sequence-token").map(Integer::parseInt).orElse(null);
+ transformerStartSequenceToken = getChildValue(xml, "transformer-start-sequence-token").map(Integer::parseInt).orElse(null);
transformerEndSequenceToken = getChildValue(xml, "transformer-end-sequence-token").map(Integer::parseInt).orElse(null);
poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null);
- onnxExecutionMode = getChildValue(xml, "onnx-execution-mode").orElse(null);
- onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
- onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
- onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
- model.registerOnnxModelCost(cluster);
+ model.registerOnnxModelCost(cluster, onnxModelOptions);
}
@Override
@@ -60,12 +59,13 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf
if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask);
if (transformerTokenTypeIds != null) b.transformerTokenTypeIds(transformerTokenTypeIds);
if (transformerOutput != null) b.transformerOutput(transformerOutput);
- if (tranformerStartSequenceToken != null) b.transformerStartSequenceToken(tranformerStartSequenceToken);
+ if (transformerStartSequenceToken != null) b.transformerStartSequenceToken(transformerStartSequenceToken);
if (transformerEndSequenceToken != null) b.transformerEndSequenceToken(transformerEndSequenceToken);
- if (poolingStrategy != null) b.poolingStrategy(BertBaseEmbedderConfig.PoolingStrategy.Enum.valueOf(poolingStrategy));
- if (onnxExecutionMode != null) b.onnxExecutionMode(BertBaseEmbedderConfig.OnnxExecutionMode.Enum.valueOf(onnxExecutionMode));
- if (onnxInteropThreads != null) b.onnxInterOpThreads(onnxInteropThreads);
- if (onnxIntraopThreads != null) b.onnxIntraOpThreads(onnxIntraopThreads);
- if (onnxGpuDevice != null) b.onnxGpuDevice(onnxGpuDevice);
+ if (poolingStrategy != null) b.poolingStrategy(PoolingStrategy.Enum.valueOf(poolingStrategy));
+ onnxModelOptions.executionMode().ifPresent(value -> b.onnxExecutionMode(OnnxExecutionMode.Enum.valueOf(value)));
+ onnxModelOptions.interOpThreads().ifPresent(b::onnxInterOpThreads);
+ onnxModelOptions.intraOpThreads().ifPresent(b::onnxIntraOpThreads);
+ onnxModelOptions.gpuDevice().ifPresent(value -> b.onnxGpuDevice(value.deviceNumber()));
}
+
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
index ed56579988d..d22e6afc3d1 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
@@ -1,13 +1,14 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
+import com.yahoo.config.model.api.OnnxModelOptions;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.ColBertEmbedderConfig;
import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import org.w3c.dom.Element;
+import static com.yahoo.embedding.ColBertEmbedderConfig.TransformerExecutionMode;
import static com.yahoo.text.XML.getChildValue;
import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME;
@@ -16,6 +17,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI
* @author bergum
*/
public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderConfig.Producer {
+
+ private final OnnxModelOptions onnxModelOptions;
private final ModelReference modelRef;
private final ModelReference vocabRef;
@@ -31,14 +34,15 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo
private final String transformerAttentionMask;
private final String transformerOutput;
- private final String onnxExecutionMode;
- private final Integer onnxInteropThreads;
- private final Integer onnxIntraopThreads;
- private final Integer onnxGpuDevice;
public ColBertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.ColBertEmbedder", INTEGRATION_BUNDLE_NAME, xml);
var model = Model.fromXml(state, xml, "transformer-model").orElseThrow();
+ this.onnxModelOptions = new OnnxModelOptions(
+ getChildValue(xml, "onnx-execution-mode"),
+ getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt),
+ getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt),
+ getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).map(OnnxModelOptions.GpuDevice::new));
modelRef = model.modelReference();
vocabRef = Model.fromXml(state, xml, "tokenizer-model")
.map(Model::modelReference)
@@ -52,11 +56,7 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo
transformerInputIds = getChildValue(xml, "transformer-input-ids").orElse(null);
transformerAttentionMask = getChildValue(xml, "transformer-attention-mask").orElse(null);
transformerOutput = getChildValue(xml, "transformer-output").orElse(null);
- onnxExecutionMode = getChildValue(xml, "onnx-execution-mode").orElse(null);
- onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
- onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
- onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
- model.registerOnnxModelCost(cluster);
+ model.registerOnnxModelCost(cluster, onnxModelOptions);
}
private static ModelReference resolveDefaultVocab(Model model, DeployState state) {
@@ -79,10 +79,10 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo
if (transformerStartSequenceToken != null) b.transformerStartSequenceToken(transformerStartSequenceToken);
if (transformerEndSequenceToken != null) b.transformerEndSequenceToken(transformerEndSequenceToken);
if (transformerMaskToken != null) b.transformerMaskToken(transformerMaskToken);
- if (onnxExecutionMode != null) b.transformerExecutionMode(
- ColBertEmbedderConfig.TransformerExecutionMode.Enum.valueOf(onnxExecutionMode));
- if (onnxInteropThreads != null) b.transformerInterOpThreads(onnxInteropThreads);
- if (onnxIntraopThreads != null) b.transformerIntraOpThreads(onnxIntraopThreads);
- if (onnxGpuDevice != null) b.transformerGpuDevice(onnxGpuDevice);
+ onnxModelOptions.executionMode().ifPresent(value -> b.transformerExecutionMode(TransformerExecutionMode.Enum.valueOf(value)));
+ onnxModelOptions.interOpThreads().ifPresent(b::transformerInterOpThreads);
+ onnxModelOptions.intraOpThreads().ifPresent(b::transformerIntraOpThreads);
+ onnxModelOptions.gpuDevice().ifPresent(value -> b.transformerGpuDevice(value.deviceNumber()));
}
+
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
index 31b86142445..d98c72ab3a4 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
@@ -1,13 +1,15 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
+import com.yahoo.config.model.api.OnnxModelOptions;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig;
import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import org.w3c.dom.Element;
+import static com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig.PoolingStrategy;
+import static com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig.TransformerExecutionMode;
import static com.yahoo.text.XML.getChildValue;
import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME;
@@ -16,6 +18,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI
* @author bjorncs
*/
public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEmbedderConfig.Producer {
+
+ private final OnnxModelOptions onnxModelOptions;
private final ModelReference modelRef;
private final ModelReference vocabRef;
private final Integer maxTokens;
@@ -24,15 +28,16 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm
private final String transformerTokenTypeIds;
private final String transformerOutput;
private final Boolean normalize;
- private final String onnxExecutionMode;
- private final Integer onnxInteropThreads;
- private final Integer onnxIntraopThreads;
- private final Integer onnxGpuDevice;
private final String poolingStrategy;
public HuggingFaceEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml);
var model = Model.fromXml(state, xml, "transformer-model").orElseThrow();
+ this.onnxModelOptions = new OnnxModelOptions(
+ getChildValue(xml, "onnx-execution-mode"),
+ getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt),
+ getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt),
+ getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).map(OnnxModelOptions.GpuDevice::new));
modelRef = model.modelReference();
vocabRef = Model.fromXml(state, xml, "tokenizer-model")
.map(Model::modelReference)
@@ -43,12 +48,8 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm
transformerTokenTypeIds = getChildValue(xml, "transformer-token-type-ids").orElse(null);
transformerOutput = getChildValue(xml, "transformer-output").orElse(null);
normalize = getChildValue(xml, "normalize").map(Boolean::parseBoolean).orElse(null);
- onnxExecutionMode = getChildValue(xml, "onnx-execution-mode").orElse(null);
- onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
- onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
- onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null);
- model.registerOnnxModelCost(cluster);
+ model.registerOnnxModelCost(cluster, onnxModelOptions);
}
private static ModelReference resolveDefaultVocab(Model model, DeployState state) {
@@ -68,11 +69,11 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm
if (transformerTokenTypeIds != null) b.transformerTokenTypeIds(transformerTokenTypeIds);
if (transformerOutput != null) b.transformerOutput(transformerOutput);
if (normalize != null) b.normalize(normalize);
- if (onnxExecutionMode != null) b.transformerExecutionMode(
- HuggingFaceEmbedderConfig.TransformerExecutionMode.Enum.valueOf(onnxExecutionMode));
- if (onnxInteropThreads != null) b.transformerInterOpThreads(onnxInteropThreads);
- if (onnxIntraopThreads != null) b.transformerIntraOpThreads(onnxIntraopThreads);
- if (onnxGpuDevice != null) b.transformerGpuDevice(onnxGpuDevice);
- if (poolingStrategy != null) b.poolingStrategy(HuggingFaceEmbedderConfig.PoolingStrategy.Enum.valueOf(poolingStrategy));
+ if (poolingStrategy != null) b.poolingStrategy(PoolingStrategy.Enum.valueOf(poolingStrategy));
+ onnxModelOptions.executionMode().ifPresent(value -> b.transformerExecutionMode(TransformerExecutionMode.Enum.valueOf(value)));
+ onnxModelOptions.interOpThreads().ifPresent(b::transformerInterOpThreads);
+ onnxModelOptions.intraOpThreads().ifPresent(b::transformerIntraOpThreads);
+ onnxModelOptions.gpuDevice().ifPresent(value -> b.transformerGpuDevice(value.deviceNumber()));
}
+
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java
index c5daf23d6f8..0d350242fd0 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.application.api.ApplicationFile;
+import com.yahoo.config.model.api.OnnxModelOptions;
import com.yahoo.config.model.builder.xml.XmlHelper;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.path.Path;
@@ -54,10 +55,10 @@ class Model {
return new Model(ds, model.getTagName(), modelId, url, path);
}
- void registerOnnxModelCost(ApplicationContainerCluster c) {
+ void registerOnnxModelCost(ApplicationContainerCluster c, OnnxModelOptions onnxModelOptions) {
var resolvedUrl = resolvedUrl().orElse(null);
- if (file != null) c.onnxModelCost().registerModel(file);
- else if (resolvedUrl != null) c.onnxModelCost().registerModel(resolvedUrl);
+ if (file != null) c.onnxModelCost().registerModel(file, onnxModelOptions);
+ else if (resolvedUrl != null) c.onnxModelCost().registerModel(resolvedUrl, onnxModelOptions);
}
String name() { return paramName; }
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
index d86d117f1d2..31468c05b99 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
@@ -52,11 +52,11 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
private final List<SearchCluster> searchClusters = new LinkedList<>();
private final Collection<String> schemasWithGlobalPhase;
private final boolean globalPhase;
+ private final ApplicationPackage app;
private QueryProfiles queryProfiles;
private SemanticRules semanticRules;
private PageTemplates pageTemplates;
- private ApplicationPackage app;
public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) {
super(chains);
@@ -102,7 +102,7 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) {
var onnxModels = documentDb.getDerivedConfiguration().getRankProfileList().getOnnxModels();
onnxModels.asMap().forEach(
- (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath())));
+ (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath()), model.onnxModelOptions()));
owningCluster.addComponent(factory);
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
index 18020f5df5d..5ffd34c6557 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
@@ -800,7 +800,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
!container.getHostResource().realResources().gpuResources().isZero());
onnxModel.setGpuDevice(gpuDevice, hasGpu);
}
- cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath()));
+ cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath()), onnxModel.onnxModelOptions());
}
cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles, models));
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java b/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java
index c8f5be71f3c..18b9129cead 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java
@@ -47,6 +47,7 @@ public class FileStorProducer implements StorFilestorConfig.Producer {
private final int responseNumThreads;
private final StorFilestorConfig.Response_sequencer_type.Enum responseSequencerType;
private final boolean useAsyncMessageHandlingOnSchedule;
+ private final boolean usePerDocumentThrottledDeleteBucket;
private static StorFilestorConfig.Response_sequencer_type.Enum convertResponseSequencerType(String sequencerType) {
try {
@@ -62,6 +63,7 @@ public class FileStorProducer implements StorFilestorConfig.Producer {
this.responseNumThreads = featureFlags.defaultNumResponseThreads();
this.responseSequencerType = convertResponseSequencerType(featureFlags.responseSequencerType());
this.useAsyncMessageHandlingOnSchedule = featureFlags.useAsyncMessageHandlingOnSchedule();
+ this.usePerDocumentThrottledDeleteBucket = featureFlags.usePerDocumentThrottledDeleteBucket();
}
@Override
@@ -73,6 +75,7 @@ public class FileStorProducer implements StorFilestorConfig.Producer {
builder.num_response_threads(responseNumThreads);
builder.response_sequencer_type(responseSequencerType);
builder.use_async_message_handling_on_schedule(useAsyncMessageHandlingOnSchedule);
+ builder.use_per_document_throttled_delete_bucket(usePerDocumentThrottledDeleteBucket);
var throttleBuilder = new StorFilestorConfig.Async_operation_throttler.Builder();
builder.async_operation_throttler(throttleBuilder);
}
diff --git a/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg b/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg
index 4600884b615..b62d7fd239c 100644
--- a/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg
+++ b/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg
@@ -17,6 +17,8 @@ rankprofile[].fef.property[].value "true"
rankprofile[].fef.property[].name "vespa.type.attribute.aa"
rankprofile[].fef.property[].value "tensor(d1[3])"
rankprofile[].name "simple"
+rankprofile[].fef.property[].name "vespa.type.feature.attribute(aa)"
+rankprofile[].fef.property[].value "tensor(d1[3])"
rankprofile[].fef.property[].name "vespa.rank.firstphase"
rankprofile[].fef.property[].value "rankingExpression(firstphase)"
rankprofile[].fef.property[].name "rankingExpression(firstphase).rankingScript"
diff --git a/config-model/src/test/derived/tensor/rank-profiles.cfg b/config-model/src/test/derived/tensor/rank-profiles.cfg
index cd8375cb68d..a72e9dc27cd 100644
--- a/config-model/src/test/derived/tensor/rank-profiles.cfg
+++ b/config-model/src/test/derived/tensor/rank-profiles.cfg
@@ -47,7 +47,7 @@ rankprofile[].name "profile2"
rankprofile[].fef.property[].name "vespa.rank.firstphase"
rankprofile[].fef.property[].value "rankingExpression(firstphase)"
rankprofile[].fef.property[].name "rankingExpression(firstphase).rankingScript"
-rankprofile[].fef.property[].value "reduce(reduce(join(attribute(f4), tensor(x[10],y[10],z[3])((x==y)*(y==z)), f(a,b)(a * b)), sum, x), sum)"
+rankprofile[].fef.property[].value "reduce(reduce(join(attribute(f4), tensor(x[10],y[10],z[3])(((x == y) * (y == z))), f(a,b)(a * b)), sum, x), sum)"
rankprofile[].fef.property[].name "vespa.type.attribute.f7"
rankprofile[].fef.property[].value "tensor<int8>(p{},x[5])"
rankprofile[].fef.property[].name "vespa.type.attribute.f2"
@@ -206,3 +206,22 @@ rankprofile[].fef.property[].name "vespa.type.query.para"
rankprofile[].fef.property[].value "tensor<float>(p{})"
rankprofile[].fef.property[].name "vespa.type.query.qvec"
rankprofile[].fef.property[].value "tensor<float>(x[40])"
+rankprofile[].name "with-just-unpack"
+rankprofile[].fef.property[].name "vespa.rank.firstphase"
+rankprofile[].fef.property[].value "rankingExpression(firstphase)"
+rankprofile[].fef.property[].name "rankingExpression(firstphase).rankingScript"
+rankprofile[].fef.property[].value "reduce(query(para) * map_subspaces(attribute(f7), f(denseSubspaceInput)(tensor(x[40])((bit(denseSubspaceInput{x:(x / 8)},(x % 8)))))) * query(qvec), sum)"
+rankprofile[].fef.property[].name "vespa.type.attribute.f7"
+rankprofile[].fef.property[].value "tensor<int8>(p{},x[5])"
+rankprofile[].fef.property[].name "vespa.type.attribute.f2"
+rankprofile[].fef.property[].value "tensor<float>(x[2],y[1])"
+rankprofile[].fef.property[].name "vespa.type.attribute.f3"
+rankprofile[].fef.property[].value "tensor(x{})"
+rankprofile[].fef.property[].name "vespa.type.attribute.f4"
+rankprofile[].fef.property[].value "tensor(x[10],y[10])"
+rankprofile[].fef.property[].name "vespa.type.attribute.f5"
+rankprofile[].fef.property[].value "tensor<float>(x[10])"
+rankprofile[].fef.property[].name "vespa.type.query.para"
+rankprofile[].fef.property[].value "tensor(p{})"
+rankprofile[].fef.property[].name "vespa.type.query.qvec"
+rankprofile[].fef.property[].value "tensor(x[40])"
diff --git a/config-model/src/test/derived/tensor/tensor.sd b/config-model/src/test/derived/tensor/tensor.sd
index 3a5fda3ac5d..a0f5cd92c56 100644
--- a/config-model/src/test/derived/tensor/tensor.sd
+++ b/config-model/src/test/derived/tensor/tensor.sd
@@ -135,4 +135,14 @@ schema tensor {
}
}
+ rank-profile with-just-unpack {
+ inputs {
+ query(para) tensor<double>(p{})
+ query(qvec) tensor<double>(x[40])
+ }
+ first-phase {
+ expression: sum(query(para)*unpack_bits(attribute(f7), double, little)*query(qvec))
+ }
+ }
+
}
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java
index 8531aff3b1a..9cadf5cffd8 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java
@@ -1,14 +1,13 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
package com.yahoo.vespa.model.application.validation;
import com.yahoo.config.application.api.ApplicationFile;
import com.yahoo.config.application.api.ApplicationPackage;
-import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.config.model.NullConfigModelRegistry;
import com.yahoo.config.model.api.ApplicationClusterEndpoint;
import com.yahoo.config.model.api.ContainerEndpoint;
import com.yahoo.config.model.api.OnnxModelCost;
+import com.yahoo.config.model.api.OnnxModelOptions;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.config.model.deploy.TestProperties;
import com.yahoo.config.model.provision.InMemoryProvisioner;
@@ -123,12 +122,20 @@ class JvmHeapSizeValidatorTest {
@Override public Calculator newCalculator(ApplicationPackage appPkg, ApplicationId applicationId) { return this; }
@Override public long aggregatedModelCostInBytes() { return totalCost.get(); }
@Override public void registerModel(ApplicationFile path) {}
+ @Override public void registerModel(ApplicationFile path, OnnxModelOptions onnxModelOptions) {}
@Override
public void registerModel(URI uri) {
assertEquals("https://my/url/model.onnx", uri.toString());
totalCost.addAndGet(modelCost);
}
+
+ @Override
+ public void registerModel(URI uri, OnnxModelOptions onnxModelOptions) {
+ assertEquals("https://my/url/model.onnx", uri.toString());
+ totalCost.addAndGet(modelCost);
+ }
+
}
}
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java
index e7b2c549fa5..bdd61d93136 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java
@@ -355,6 +355,24 @@ public class StorageClusterTest {
assertTrue(config.async_operation_throttler().throttle_individual_merge_feed_ops());
}
+ private void verifyUsePerDocumentThrottledDeleteBucket(boolean expected, Boolean enabled) {
+ var props = new TestProperties();
+ if (enabled != null) {
+ props.setUsePerDocumentThrottledDeleteBucket(enabled);
+ }
+ var config = filestorConfigFromProducer(simpleCluster(props));
+ assertEquals(expected, config.use_per_document_throttled_delete_bucket());
+ }
+
+ @Test
+ void delete_bucket_throttling_is_controlled_by_feature_flag() {
+ // TODO update default once rolled out and tested
+ verifyUsePerDocumentThrottledDeleteBucket(false, null);
+
+ verifyUsePerDocumentThrottledDeleteBucket(false, false);
+ verifyUsePerDocumentThrottledDeleteBucket(true, true);
+ }
+
@Test
void testCapacity() {
String xml = joinLines(