diff options
Diffstat (limited to 'config-model/src/main/java')
10 files changed, 90 insertions, 85 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index 1bda8a509f1..41df042284e 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -88,6 +88,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea private int contentLayerMetadataFeatureLevel = 0; private boolean dynamicHeapSize = false; private long mergingMaxMemoryUsagePerNode = -1; + private boolean usePerDocumentThrottledDeleteBucket = false; @Override public ModelContext.FeatureFlags featureFlags() { return this; } @Override public boolean multitenant() { return multitenant; } @@ -128,7 +129,6 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea @Override public int maxCompactBuffers() { return maxCompactBuffers; } @Override public boolean useV8GeoPositions() { return useV8GeoPositions; } @Override public List<String> environmentVariables() { return environmentVariables; } - @Override public Architecture adminClusterArchitecture() { return adminClusterNodeResourcesArchitecture; } @Override public boolean sharedStringRepoNoReclaim() { return sharedStringRepoNoReclaim; } @Override public boolean loadCodeAsHugePages() { return loadCodeAsHugePages; } @Override public int mbusNetworkThreads() { return mbus_network_threads; } @@ -148,6 +148,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea @Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; } @Override public boolean dynamicHeapSize() { return dynamicHeapSize; } @Override public long mergingMaxMemoryUsagePerNode() { return mergingMaxMemoryUsagePerNode; } + @Override public boolean usePerDocumentThrottledDeleteBucket() { return usePerDocumentThrottledDeleteBucket; } public TestProperties sharedStringRepoNoReclaim(boolean sharedStringRepoNoReclaim) { this.sharedStringRepoNoReclaim = sharedStringRepoNoReclaim; @@ -390,6 +391,11 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea return this; } + public TestProperties setUsePerDocumentThrottledDeleteBucket(boolean enableThrottling) { + this.usePerDocumentThrottledDeleteBucket = enableThrottling; + return this; + } + public static class Spec implements ConfigServerSpec { private final String hostName; diff --git a/config-model/src/main/java/com/yahoo/schema/OnnxModel.java b/config-model/src/main/java/com/yahoo/schema/OnnxModel.java index f3f09150c1d..9456baafd57 100644 --- a/config-model/src/main/java/com/yahoo/schema/OnnxModel.java +++ b/config-model/src/main/java/com/yahoo/schema/OnnxModel.java @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.schema; +import com.yahoo.config.model.api.OnnxModelOptions; import com.yahoo.searchlib.rankingexpression.Reference; import com.yahoo.tensor.TensorType; import com.yahoo.vespa.model.ml.OnnxModelInfo; @@ -27,10 +28,7 @@ public class OnnxModel extends DistributableResource implements Cloneable { private final Set<String> initializers = new HashSet<>(); // Runtime options - private String statelessExecutionMode = null; - private Integer statelessInterOpThreads = null; - private Integer statelessIntraOpThreads = null; - private GpuDevice gpuDevice = null; + private OnnxModelOptions onnxModelOptions = OnnxModelOptions.empty(); public OnnxModel(String name) { super(name); @@ -133,50 +131,46 @@ public class OnnxModel extends DistributableResource implements Cloneable { public void setStatelessExecutionMode(String executionMode) { if ("parallel".equalsIgnoreCase(executionMode)) { - this.statelessExecutionMode = "parallel"; + onnxModelOptions = onnxModelOptions.withExecutionMode("parallel"); } else if ("sequential".equalsIgnoreCase(executionMode)) { - this.statelessExecutionMode = "sequential"; + onnxModelOptions = onnxModelOptions.withExecutionMode("sequential"); } } public Optional<String> getStatelessExecutionMode() { - return Optional.ofNullable(statelessExecutionMode); + return onnxModelOptions.executionMode(); } public void setStatelessInterOpThreads(int interOpThreads) { if (interOpThreads >= 0) { - this.statelessInterOpThreads = interOpThreads; + onnxModelOptions = onnxModelOptions.withInterOpThreads(interOpThreads); } } public Optional<Integer> getStatelessInterOpThreads() { - return Optional.ofNullable(statelessInterOpThreads); + return onnxModelOptions.interOpThreads(); } public void setStatelessIntraOpThreads(int intraOpThreads) { if (intraOpThreads >= 0) { - this.statelessIntraOpThreads = intraOpThreads; + onnxModelOptions = onnxModelOptions.withIntraOpThreads(intraOpThreads); } } public Optional<Integer> getStatelessIntraOpThreads() { - return Optional.ofNullable(statelessIntraOpThreads); + return onnxModelOptions.intraOpThreads(); } public void setGpuDevice(int deviceNumber, boolean required) { if (deviceNumber >= 0) { - this.gpuDevice = new GpuDevice(deviceNumber, required); + onnxModelOptions = onnxModelOptions.withGpuDevice(new OnnxModelOptions.GpuDevice(deviceNumber, required)); } } - public Optional<GpuDevice> getGpuDevice() { - return Optional.ofNullable(gpuDevice); + public Optional<OnnxModelOptions.GpuDevice> getGpuDevice() { + return onnxModelOptions.gpuDevice(); } - public record GpuDevice(int deviceNumber, boolean required) { - public GpuDevice { - if (deviceNumber < 0) throw new IllegalArgumentException("deviceNumber cannot be negative, got " + deviceNumber); - } - } + public OnnxModelOptions onnxModelOptions() { return onnxModelOptions; } } diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java index 87b79ddcdc3..388d2627224 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java @@ -171,7 +171,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer { private final OptionalDouble approximateThreshold; private final OptionalDouble targetHitsMaxAdjustmentFactor; private final double rankScoreDropLimit; - private final boolean enableNestedMultivalueGrouping; + private final boolean alwaysMarkPhraseExpensive; /** * The rank type definitions used to derive settings for the native rank features @@ -186,6 +186,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer { private RankingExpression secondPhaseRanking; private RankingExpression globalPhaseRanking; private final int globalPhaseRerankCount; + private final SerializationContext functionSerializationContext; /** * Creates a raw rank profile from the given rank profile @@ -212,7 +213,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer { minHitsPerThread = compiled.getMinHitsPerThread(); numSearchPartitions = compiled.getNumSearchPartitions(); termwiseLimit = compiled.getTermwiseLimit().orElse(deployProperties.featureFlags().defaultTermwiseLimit()); - enableNestedMultivalueGrouping = deployProperties.featureFlags().enableNestedMultivalueGrouping(); + alwaysMarkPhraseExpensive = deployProperties.featureFlags().alwaysMarkPhraseExpensive(); postFilterThreshold = compiled.getPostFilterThreshold(); approximateThreshold = compiled.getApproximateThreshold(); targetHitsMaxAdjustmentFactor = compiled.getTargetHitsMaxAdjustmentFactor(); @@ -225,7 +226,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer { List<ExpressionFunction> functionExpressions = functions.values().stream().map(RankProfile.RankingExpressionFunction::function).toList(); Map<String, String> functionProperties = new LinkedHashMap<>(); var typeContext = compiled.typeContext(queryProfiles); - SerializationContext functionSerializationContext = new SerializationContext(functionExpressions, Map.of(), typeContext); + this.functionSerializationContext = new SerializationContext(functionExpressions, Map.of(), typeContext); if (firstPhaseRanking != null) { functionProperties.putAll(firstPhaseRanking.getRankProperties(functionSerializationContext)); } @@ -265,8 +266,6 @@ public class RawRankProfile implements RankProfilesConfig.Producer { private void derivePropertiesAndFeaturesFromFunctions(Map<String, RankProfile.RankingExpressionFunction> functions, Map<String, String> functionProperties, SerializationContext functionContext) { - if (functions.isEmpty()) return; - replaceFunctionFeatures(summaryFeatures, functionContext); replaceFunctionFeatures(matchFeatures, functionContext); @@ -465,8 +464,8 @@ public class RawRankProfile implements RankProfilesConfig.Producer { if (termwiseLimit < 1.0) { properties.add(new Pair<>("vespa.matching.termwise_limit", termwiseLimit + "")); } - if (enableNestedMultivalueGrouping) { - properties.add(new Pair<>("vespa.temporary.enable_nested_multivalue_grouping", String.valueOf(enableNestedMultivalueGrouping))); + if (alwaysMarkPhraseExpensive) { + properties.add(new Pair<>("vespa.matching.always_mark_phrase_expensive", String.valueOf(alwaysMarkPhraseExpensive))); } if (postFilterThreshold.isPresent()) { properties.add(new Pair<>("vespa.matching.global_filter.upper_limit", String.valueOf(postFilterThreshold.getAsDouble()))); @@ -556,11 +555,12 @@ public class RawRankProfile implements RankProfilesConfig.Producer { if ("".equals(name)) name = phase; + String expressionAsString = expression.getRoot().toString(functionSerializationContext).toString(); if (expression.getRoot() instanceof ReferenceNode) { - properties.add(new Pair<>("vespa.rank." + phase, expression.getRoot().toString())); + properties.add(new Pair<>("vespa.rank." + phase, expressionAsString)); } else { properties.add(new Pair<>("vespa.rank." + phase, wrapInRankingExpression(name))); - properties.add(new Pair<>(RankingExpression.propertyName(name), expression.getRoot().toString())); + properties.add(new Pair<>(RankingExpression.propertyName(name), expressionAsString)); } return properties; } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java index a644382625b..67fb720b8c0 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java @@ -1,13 +1,15 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; +import com.yahoo.config.model.api.OnnxModelOptions; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.BertBaseEmbedderConfig; import com.yahoo.vespa.model.container.ApplicationContainerCluster; import org.w3c.dom.Element; +import static com.yahoo.embedding.BertBaseEmbedderConfig.OnnxExecutionMode; +import static com.yahoo.embedding.BertBaseEmbedderConfig.PoolingStrategy; import static com.yahoo.text.XML.getChildValue; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; @@ -16,6 +18,7 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI */ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConfig.Producer { + private final OnnxModelOptions onnxModelOptions; private final ModelReference modelRef; private final ModelReference vocabRef; private final Integer maxTokens; @@ -23,18 +26,18 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf private final String transformerAttentionMask; private final String transformerTokenTypeIds; private final String transformerOutput; - private final Integer tranformerStartSequenceToken; + private final Integer transformerStartSequenceToken; private final Integer transformerEndSequenceToken; private final String poolingStrategy; - private final String onnxExecutionMode; - private final Integer onnxInteropThreads; - private final Integer onnxIntraopThreads; - private final Integer onnxGpuDevice; - public BertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml); var model = Model.fromXml(state, xml, "transformer-model").orElseThrow(); + this.onnxModelOptions = new OnnxModelOptions( + getChildValue(xml, "onnx-execution-mode"), + getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt), + getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt), + getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).map(OnnxModelOptions.GpuDevice::new)); modelRef = model.modelReference(); vocabRef = Model.fromXml(state, xml, "tokenizer-vocab").orElseThrow().modelReference(); maxTokens = getChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null); @@ -42,14 +45,10 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf transformerAttentionMask = getChildValue(xml, "transformer-attention-mask").orElse(null); transformerTokenTypeIds = getChildValue(xml, "transformer-token-type-ids").orElse(null); transformerOutput = getChildValue(xml, "transformer-output").orElse(null); - tranformerStartSequenceToken = getChildValue(xml, "transformer-start-sequence-token").map(Integer::parseInt).orElse(null); + transformerStartSequenceToken = getChildValue(xml, "transformer-start-sequence-token").map(Integer::parseInt).orElse(null); transformerEndSequenceToken = getChildValue(xml, "transformer-end-sequence-token").map(Integer::parseInt).orElse(null); poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null); - onnxExecutionMode = getChildValue(xml, "onnx-execution-mode").orElse(null); - onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); - onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); - onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); - model.registerOnnxModelCost(cluster); + model.registerOnnxModelCost(cluster, onnxModelOptions); } @Override @@ -60,12 +59,13 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask); if (transformerTokenTypeIds != null) b.transformerTokenTypeIds(transformerTokenTypeIds); if (transformerOutput != null) b.transformerOutput(transformerOutput); - if (tranformerStartSequenceToken != null) b.transformerStartSequenceToken(tranformerStartSequenceToken); + if (transformerStartSequenceToken != null) b.transformerStartSequenceToken(transformerStartSequenceToken); if (transformerEndSequenceToken != null) b.transformerEndSequenceToken(transformerEndSequenceToken); - if (poolingStrategy != null) b.poolingStrategy(BertBaseEmbedderConfig.PoolingStrategy.Enum.valueOf(poolingStrategy)); - if (onnxExecutionMode != null) b.onnxExecutionMode(BertBaseEmbedderConfig.OnnxExecutionMode.Enum.valueOf(onnxExecutionMode)); - if (onnxInteropThreads != null) b.onnxInterOpThreads(onnxInteropThreads); - if (onnxIntraopThreads != null) b.onnxIntraOpThreads(onnxIntraopThreads); - if (onnxGpuDevice != null) b.onnxGpuDevice(onnxGpuDevice); + if (poolingStrategy != null) b.poolingStrategy(PoolingStrategy.Enum.valueOf(poolingStrategy)); + onnxModelOptions.executionMode().ifPresent(value -> b.onnxExecutionMode(OnnxExecutionMode.Enum.valueOf(value))); + onnxModelOptions.interOpThreads().ifPresent(b::onnxInterOpThreads); + onnxModelOptions.intraOpThreads().ifPresent(b::onnxIntraOpThreads); + onnxModelOptions.gpuDevice().ifPresent(value -> b.onnxGpuDevice(value.deviceNumber())); } + } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java index ed56579988d..d22e6afc3d1 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java @@ -1,13 +1,14 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; +import com.yahoo.config.model.api.OnnxModelOptions; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.ColBertEmbedderConfig; import com.yahoo.vespa.model.container.ApplicationContainerCluster; import org.w3c.dom.Element; +import static com.yahoo.embedding.ColBertEmbedderConfig.TransformerExecutionMode; import static com.yahoo.text.XML.getChildValue; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; @@ -16,6 +17,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI * @author bergum */ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderConfig.Producer { + + private final OnnxModelOptions onnxModelOptions; private final ModelReference modelRef; private final ModelReference vocabRef; @@ -31,14 +34,15 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo private final String transformerAttentionMask; private final String transformerOutput; - private final String onnxExecutionMode; - private final Integer onnxInteropThreads; - private final Integer onnxIntraopThreads; - private final Integer onnxGpuDevice; public ColBertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.ColBertEmbedder", INTEGRATION_BUNDLE_NAME, xml); var model = Model.fromXml(state, xml, "transformer-model").orElseThrow(); + this.onnxModelOptions = new OnnxModelOptions( + getChildValue(xml, "onnx-execution-mode"), + getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt), + getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt), + getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).map(OnnxModelOptions.GpuDevice::new)); modelRef = model.modelReference(); vocabRef = Model.fromXml(state, xml, "tokenizer-model") .map(Model::modelReference) @@ -52,11 +56,7 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo transformerInputIds = getChildValue(xml, "transformer-input-ids").orElse(null); transformerAttentionMask = getChildValue(xml, "transformer-attention-mask").orElse(null); transformerOutput = getChildValue(xml, "transformer-output").orElse(null); - onnxExecutionMode = getChildValue(xml, "onnx-execution-mode").orElse(null); - onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); - onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); - onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); - model.registerOnnxModelCost(cluster); + model.registerOnnxModelCost(cluster, onnxModelOptions); } private static ModelReference resolveDefaultVocab(Model model, DeployState state) { @@ -79,10 +79,10 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo if (transformerStartSequenceToken != null) b.transformerStartSequenceToken(transformerStartSequenceToken); if (transformerEndSequenceToken != null) b.transformerEndSequenceToken(transformerEndSequenceToken); if (transformerMaskToken != null) b.transformerMaskToken(transformerMaskToken); - if (onnxExecutionMode != null) b.transformerExecutionMode( - ColBertEmbedderConfig.TransformerExecutionMode.Enum.valueOf(onnxExecutionMode)); - if (onnxInteropThreads != null) b.transformerInterOpThreads(onnxInteropThreads); - if (onnxIntraopThreads != null) b.transformerIntraOpThreads(onnxIntraopThreads); - if (onnxGpuDevice != null) b.transformerGpuDevice(onnxGpuDevice); + onnxModelOptions.executionMode().ifPresent(value -> b.transformerExecutionMode(TransformerExecutionMode.Enum.valueOf(value))); + onnxModelOptions.interOpThreads().ifPresent(b::transformerInterOpThreads); + onnxModelOptions.intraOpThreads().ifPresent(b::transformerIntraOpThreads); + onnxModelOptions.gpuDevice().ifPresent(value -> b.transformerGpuDevice(value.deviceNumber())); } + } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java index 31b86142445..d98c72ab3a4 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java @@ -1,13 +1,15 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; +import com.yahoo.config.model.api.OnnxModelOptions; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; import com.yahoo.vespa.model.container.ApplicationContainerCluster; import org.w3c.dom.Element; +import static com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig.PoolingStrategy; +import static com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig.TransformerExecutionMode; import static com.yahoo.text.XML.getChildValue; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; @@ -16,6 +18,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI * @author bjorncs */ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEmbedderConfig.Producer { + + private final OnnxModelOptions onnxModelOptions; private final ModelReference modelRef; private final ModelReference vocabRef; private final Integer maxTokens; @@ -24,15 +28,16 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm private final String transformerTokenTypeIds; private final String transformerOutput; private final Boolean normalize; - private final String onnxExecutionMode; - private final Integer onnxInteropThreads; - private final Integer onnxIntraopThreads; - private final Integer onnxGpuDevice; private final String poolingStrategy; public HuggingFaceEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml); var model = Model.fromXml(state, xml, "transformer-model").orElseThrow(); + this.onnxModelOptions = new OnnxModelOptions( + getChildValue(xml, "onnx-execution-mode"), + getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt), + getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt), + getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).map(OnnxModelOptions.GpuDevice::new)); modelRef = model.modelReference(); vocabRef = Model.fromXml(state, xml, "tokenizer-model") .map(Model::modelReference) @@ -43,12 +48,8 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm transformerTokenTypeIds = getChildValue(xml, "transformer-token-type-ids").orElse(null); transformerOutput = getChildValue(xml, "transformer-output").orElse(null); normalize = getChildValue(xml, "normalize").map(Boolean::parseBoolean).orElse(null); - onnxExecutionMode = getChildValue(xml, "onnx-execution-mode").orElse(null); - onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); - onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); - onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null); - model.registerOnnxModelCost(cluster); + model.registerOnnxModelCost(cluster, onnxModelOptions); } private static ModelReference resolveDefaultVocab(Model model, DeployState state) { @@ -68,11 +69,11 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm if (transformerTokenTypeIds != null) b.transformerTokenTypeIds(transformerTokenTypeIds); if (transformerOutput != null) b.transformerOutput(transformerOutput); if (normalize != null) b.normalize(normalize); - if (onnxExecutionMode != null) b.transformerExecutionMode( - HuggingFaceEmbedderConfig.TransformerExecutionMode.Enum.valueOf(onnxExecutionMode)); - if (onnxInteropThreads != null) b.transformerInterOpThreads(onnxInteropThreads); - if (onnxIntraopThreads != null) b.transformerIntraOpThreads(onnxIntraopThreads); - if (onnxGpuDevice != null) b.transformerGpuDevice(onnxGpuDevice); - if (poolingStrategy != null) b.poolingStrategy(HuggingFaceEmbedderConfig.PoolingStrategy.Enum.valueOf(poolingStrategy)); + if (poolingStrategy != null) b.poolingStrategy(PoolingStrategy.Enum.valueOf(poolingStrategy)); + onnxModelOptions.executionMode().ifPresent(value -> b.transformerExecutionMode(TransformerExecutionMode.Enum.valueOf(value))); + onnxModelOptions.interOpThreads().ifPresent(b::transformerInterOpThreads); + onnxModelOptions.intraOpThreads().ifPresent(b::transformerIntraOpThreads); + onnxModelOptions.gpuDevice().ifPresent(value -> b.transformerGpuDevice(value.deviceNumber())); } + } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java index c5daf23d6f8..0d350242fd0 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.application.api.ApplicationFile; +import com.yahoo.config.model.api.OnnxModelOptions; import com.yahoo.config.model.builder.xml.XmlHelper; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.path.Path; @@ -54,10 +55,10 @@ class Model { return new Model(ds, model.getTagName(), modelId, url, path); } - void registerOnnxModelCost(ApplicationContainerCluster c) { + void registerOnnxModelCost(ApplicationContainerCluster c, OnnxModelOptions onnxModelOptions) { var resolvedUrl = resolvedUrl().orElse(null); - if (file != null) c.onnxModelCost().registerModel(file); - else if (resolvedUrl != null) c.onnxModelCost().registerModel(resolvedUrl); + if (file != null) c.onnxModelCost().registerModel(file, onnxModelOptions); + else if (resolvedUrl != null) c.onnxModelCost().registerModel(resolvedUrl, onnxModelOptions); } String name() { return paramName; } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java index d86d117f1d2..31468c05b99 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java @@ -52,11 +52,11 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> private final List<SearchCluster> searchClusters = new LinkedList<>(); private final Collection<String> schemasWithGlobalPhase; private final boolean globalPhase; + private final ApplicationPackage app; private QueryProfiles queryProfiles; private SemanticRules semanticRules; private PageTemplates pageTemplates; - private ApplicationPackage app; public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) { super(chains); @@ -102,7 +102,7 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) { var onnxModels = documentDb.getDerivedConfiguration().getRankProfileList().getOnnxModels(); onnxModels.asMap().forEach( - (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath()))); + (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath()), model.onnxModelOptions())); owningCluster.addComponent(factory); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 18020f5df5d..5ffd34c6557 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -800,7 +800,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { !container.getHostResource().realResources().gpuResources().isZero()); onnxModel.setGpuDevice(gpuDevice, hasGpu); } - cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath())); + cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath()), onnxModel.onnxModelOptions()); } cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles, models)); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java b/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java index c8f5be71f3c..18b9129cead 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/FileStorProducer.java @@ -47,6 +47,7 @@ public class FileStorProducer implements StorFilestorConfig.Producer { private final int responseNumThreads; private final StorFilestorConfig.Response_sequencer_type.Enum responseSequencerType; private final boolean useAsyncMessageHandlingOnSchedule; + private final boolean usePerDocumentThrottledDeleteBucket; private static StorFilestorConfig.Response_sequencer_type.Enum convertResponseSequencerType(String sequencerType) { try { @@ -62,6 +63,7 @@ public class FileStorProducer implements StorFilestorConfig.Producer { this.responseNumThreads = featureFlags.defaultNumResponseThreads(); this.responseSequencerType = convertResponseSequencerType(featureFlags.responseSequencerType()); this.useAsyncMessageHandlingOnSchedule = featureFlags.useAsyncMessageHandlingOnSchedule(); + this.usePerDocumentThrottledDeleteBucket = featureFlags.usePerDocumentThrottledDeleteBucket(); } @Override @@ -73,6 +75,7 @@ public class FileStorProducer implements StorFilestorConfig.Producer { builder.num_response_threads(responseNumThreads); builder.response_sequencer_type(responseSequencerType); builder.use_async_message_handling_on_schedule(useAsyncMessageHandlingOnSchedule); + builder.use_per_document_throttled_delete_bucket(usePerDocumentThrottledDeleteBucket); var throttleBuilder = new StorFilestorConfig.Async_operation_throttler.Builder(); builder.async_operation_throttler(throttleBuilder); } |