aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainer.java27
-rw-r--r--config-model/src/test/derived/globalphase_onnx_inside/onnx-models.cfg1
-rw-r--r--config-model/src/test/derived/globalphase_token_functions/onnx-models.cfg1
-rw-r--r--config-model/src/test/derived/vector_constant/onnx-models.cfg1
-rw-r--r--model-evaluation/src/main/java/ai/vespa/models/evaluation/ModelsEvaluator.java2
-rw-r--r--model-integration/pom.xml6
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java1
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java2
-rw-r--r--model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java17
-rw-r--r--model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxRuntime.java26
-rw-r--r--model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def2
-rw-r--r--model-integration/src/main/resources/configdefinitions/embedding.huggingface.hugging-face-embedder.def4
-rw-r--r--searchcore/src/vespa/searchcore/config/onnx-models.def3
13 files changed, 17 insertions, 76 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainer.java
index 9e21fd2d23a..f901bf3c826 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainer.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainer.java
@@ -9,7 +9,6 @@ import com.yahoo.config.model.producer.TreeConfigProducer;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.search.config.QrStartConfig;
-import com.yahoo.vespa.config.search.core.OnnxModelsConfig;
import com.yahoo.vespa.model.container.component.SimpleComponent;
import java.time.Duration;
import java.util.Optional;
@@ -21,7 +20,6 @@ import java.util.Optional;
*/
public final class ApplicationContainer extends Container implements
QrStartConfig.Producer,
- OnnxModelsConfig.Producer,
ZookeeperServerConfig.Producer {
private final boolean isHostedVespa;
@@ -44,15 +42,12 @@ public final class ApplicationContainer extends Container implements
@Override
public void getConfig(QrStartConfig.Builder builder) {
- realResources().ifPresent(r -> builder.jvm.availableProcessors(Math.max(2, (int) Math.ceil(r.vcpu()))));
- }
-
- @Override
- public void getConfig(OnnxModelsConfig.Builder builder) {
- realResources().ifPresent(r -> {
- int count = r.gpuResources().count();
- if (count >= 0) builder.gpu.count(count);
- });
+ if (getHostResource() != null) {
+ NodeResources nodeResources = getHostResource().realResources();
+ if ( ! nodeResources.isUnspecified()) {
+ builder.jvm.availableProcessors(Math.max(2, (int)Math.ceil(nodeResources.vcpu())));
+ }
+ }
}
@Override
@@ -89,14 +84,4 @@ public final class ApplicationContainer extends Container implements
@Override public Optional<String> getPreShutdownCommand() { return Optional.of(prepareStopCommand(Duration.ofMinutes(6))); }
- private Optional<NodeResources> realResources() {
- if (getHostResource() != null) {
- NodeResources nodeResources = getHostResource().realResources();
- if ( ! nodeResources.isUnspecified()) {
- return Optional.of(nodeResources);
- }
- }
- return Optional.empty();
- }
-
}
diff --git a/config-model/src/test/derived/globalphase_onnx_inside/onnx-models.cfg b/config-model/src/test/derived/globalphase_onnx_inside/onnx-models.cfg
index 99f65336794..d63e85e2f19 100644
--- a/config-model/src/test/derived/globalphase_onnx_inside/onnx-models.cfg
+++ b/config-model/src/test/derived/globalphase_onnx_inside/onnx-models.cfg
@@ -1,4 +1,3 @@
-gpu.count -1
model[].name "direct"
model[].fileref "files/ax_plus_b.onnx"
model[].input[].name "vector_B"
diff --git a/config-model/src/test/derived/globalphase_token_functions/onnx-models.cfg b/config-model/src/test/derived/globalphase_token_functions/onnx-models.cfg
index cea4c065014..6283159c324 100644
--- a/config-model/src/test/derived/globalphase_token_functions/onnx-models.cfg
+++ b/config-model/src/test/derived/globalphase_token_functions/onnx-models.cfg
@@ -1,4 +1,3 @@
-gpu.count -1
model[].name "my_ranking_model"
model[].fileref "files/ranking_model.onnx"
model[].input[].name "input_ids"
diff --git a/config-model/src/test/derived/vector_constant/onnx-models.cfg b/config-model/src/test/derived/vector_constant/onnx-models.cfg
index 1dcaf0e1bd6..4c52b72b519 100644
--- a/config-model/src/test/derived/vector_constant/onnx-models.cfg
+++ b/config-model/src/test/derived/vector_constant/onnx-models.cfg
@@ -1,4 +1,3 @@
-gpu.count -1
model[].name "inside"
model[].fileref "ax_plus_b.onnx"
model[].input[].name "vector_B"
diff --git a/model-evaluation/src/main/java/ai/vespa/models/evaluation/ModelsEvaluator.java b/model-evaluation/src/main/java/ai/vespa/models/evaluation/ModelsEvaluator.java
index 303d2acd79e..fd5306f9add 100644
--- a/model-evaluation/src/main/java/ai/vespa/models/evaluation/ModelsEvaluator.java
+++ b/model-evaluation/src/main/java/ai/vespa/models/evaluation/ModelsEvaluator.java
@@ -41,7 +41,7 @@ public class ModelsEvaluator extends AbstractComponent {
RankingExpressionsConfig expressionsConfig,
OnnxModelsConfig onnxModelsConfig,
FileAcquirer fileAcquirer) {
- this(config, constantsConfig, expressionsConfig, onnxModelsConfig, fileAcquirer, new OnnxRuntime(onnxModelsConfig));
+ this(config, constantsConfig, expressionsConfig, onnxModelsConfig, fileAcquirer, new OnnxRuntime());
}
public ModelsEvaluator(RankProfilesConfigImporter importer,
diff --git a/model-integration/pom.xml b/model-integration/pom.xml
index d5d7ae534a4..c27ed9d2c31 100644
--- a/model-integration/pom.xml
+++ b/model-integration/pom.xml
@@ -40,12 +40,6 @@
</dependency>
<dependency>
<groupId>com.yahoo.vespa</groupId>
- <artifactId>searchcore</artifactId>
- <version>${project.version}</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>com.yahoo.vespa</groupId>
<artifactId>searchlib</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
diff --git a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
index b172ef7beee..8e5211ccff1 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
@@ -58,7 +58,6 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder {
OnnxEvaluatorOptions options = new OnnxEvaluatorOptions();
options.setExecutionMode(config.onnxExecutionMode().toString());
options.setThreads(config.onnxInterOpThreads(), config.onnxIntraOpThreads());
- if (config.onnxGpuDevice() >= 0) options.setGpuDevice(config.onnxGpuDevice());
tokenizer = new WordPieceEmbedder.Builder(config.tokenizerVocab().toString()).build();
this.evaluator = onnx.evaluatorOf(config.transformerModel().toString(), options);
diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
index cc13254385b..21dd326689c 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
@@ -40,7 +40,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
tokenizer = new HuggingFaceTokenizer(Paths.get(config.tokenizerPath().toString()));
var onnxOpts = new OnnxEvaluatorOptions();
if (config.transformerGpuDevice() >= 0)
- onnxOpts.setGpuDevice(config.transformerGpuDevice());
+ onnxOpts.setGpuDevice(config.transformerGpuDevice(), config.transformerGpuRequired());
onnxOpts.setExecutionMode(config.transformerExecutionMode().toString());
onnxOpts.setThreads(config.transformerInterOpThreads(), config.transformerIntraOpThreads());
evaluator = onnx.evaluatorOf(config.transformerModel().toString(), onnxOpts);
diff --git a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java
index 6048be8aca9..4a35f4275fa 100644
--- a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java
+++ b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java
@@ -17,7 +17,7 @@ import static ai.onnxruntime.OrtSession.SessionOptions.ExecutionMode.SEQUENTIAL;
*/
public class OnnxEvaluatorOptions {
- private OrtSession.SessionOptions.OptLevel optimizationLevel;
+ private final OrtSession.SessionOptions.OptLevel optimizationLevel;
private OrtSession.SessionOptions.ExecutionMode executionMode;
private int interOpThreads;
private int intraOpThreads;
@@ -86,8 +86,6 @@ public class OnnxEvaluatorOptions {
this.gpuDeviceRequired = required;
}
- public void setGpuDevice(int deviceNumber) { gpuDeviceNumber = deviceNumber; }
-
public boolean requestingGpu() {
return gpuDeviceNumber > -1;
}
@@ -96,19 +94,6 @@ public class OnnxEvaluatorOptions {
return gpuDeviceRequired;
}
- public int gpuDeviceNumber() { return gpuDeviceNumber; }
-
- public OnnxEvaluatorOptions copy() {
- var copy = new OnnxEvaluatorOptions();
- copy.gpuDeviceNumber = gpuDeviceNumber;
- copy.gpuDeviceRequired = gpuDeviceRequired;
- copy.executionMode = executionMode;
- copy.interOpThreads = interOpThreads;
- copy.intraOpThreads = intraOpThreads;
- copy.optimizationLevel = optimizationLevel;
- return copy;
- }
-
@Override
public boolean equals(Object o) {
if (this == o) return true;
diff --git a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxRuntime.java b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxRuntime.java
index ab44a2ae33f..ece1db55c1e 100644
--- a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxRuntime.java
+++ b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxRuntime.java
@@ -10,7 +10,6 @@ import com.yahoo.component.annotation.Inject;
import com.yahoo.jdisc.ResourceReference;
import com.yahoo.jdisc.refcount.DebugReferencesWithStack;
import com.yahoo.jdisc.refcount.References;
-import com.yahoo.vespa.config.search.core.OnnxModelsConfig;
import net.jpountz.xxhash.XXHashFactory;
import java.io.IOException;
@@ -53,24 +52,17 @@ public class OnnxRuntime extends AbstractComponent {
private final Object monitor = new Object();
private final Map<OrtSessionId, SharedOrtSession> sessions = new HashMap<>();
private final OrtSessionFactory factory;
- private final int gpusAvailable;
- // For test use only
- public OnnxRuntime() { this(defaultFactory, new OnnxModelsConfig.Builder().build()); }
+ @Inject public OnnxRuntime() { this(defaultFactory); }
- @Inject public OnnxRuntime(OnnxModelsConfig cfg) { this(defaultFactory, cfg); }
-
- OnnxRuntime(OrtSessionFactory factory, OnnxModelsConfig cfg) {
- this.factory = factory;
- this.gpusAvailable = cfg.gpu().count();
- }
+ OnnxRuntime(OrtSessionFactory factory) { this.factory = factory; }
public OnnxEvaluator evaluatorOf(byte[] model) {
return new OnnxEvaluator(model, null, this);
}
public OnnxEvaluator evaluatorOf(byte[] model, OnnxEvaluatorOptions options) {
- return new OnnxEvaluator(model, overrideOptions(options), this);
+ return new OnnxEvaluator(model, options, this);
}
public OnnxEvaluator evaluatorOf(String modelPath) {
@@ -78,7 +70,7 @@ public class OnnxRuntime extends AbstractComponent {
}
public OnnxEvaluator evaluatorOf(String modelPath, OnnxEvaluatorOptions options) {
- return new OnnxEvaluator(modelPath, overrideOptions(options), this);
+ return new OnnxEvaluator(modelPath, options, this);
}
public static OrtEnvironment ortEnvironment() {
@@ -175,16 +167,6 @@ public class OnnxRuntime extends AbstractComponent {
}
}
- private OnnxEvaluatorOptions overrideOptions(OnnxEvaluatorOptions opts) {
- // Set GPU device required if GPU requested and GPUs are available on system
- if (gpusAvailable > 0 && opts.requestingGpu() && !opts.gpuDeviceRequired()) {
- var copy = opts.copy();
- copy.setGpuDevice(opts.gpuDeviceNumber(), true);
- return copy;
- }
- return opts;
- }
-
int sessionsCached() { synchronized(monitor) { return sessions.size(); } }
static class ReferencedOrtSession implements AutoCloseable {
diff --git a/model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def b/model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def
index e37a33d3b81..ef42d81e1fe 100644
--- a/model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def
+++ b/model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def
@@ -28,4 +28,4 @@ transformerOutput string default=output_0
onnxExecutionMode enum { parallel, sequential } default=sequential
onnxInterOpThreads int default=1
onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n
-onnxGpuDevice int default=-1
+
diff --git a/model-integration/src/main/resources/configdefinitions/embedding.huggingface.hugging-face-embedder.def b/model-integration/src/main/resources/configdefinitions/embedding.huggingface.hugging-face-embedder.def
index 584f23046ba..adc8f653168 100644
--- a/model-integration/src/main/resources/configdefinitions/embedding.huggingface.hugging-face-embedder.def
+++ b/model-integration/src/main/resources/configdefinitions/embedding.huggingface.hugging-face-embedder.def
@@ -17,6 +17,9 @@ transformerAttentionMask string default=attention_mask
# Output name
transformerOutput string default=last_hidden_state
+# GPU configuration
+transformerGpuDevice int default=-1
+transformerGpuRequired bool default=false
# Normalize tensors from tokenizer
normalize bool default=false
@@ -25,4 +28,3 @@ normalize bool default=false
transformerExecutionMode enum { parallel, sequential } default=sequential
transformerInterOpThreads int default=1
transformerIntraOpThreads int default=-4
-transformerGpuDevice int default=-1
diff --git a/searchcore/src/vespa/searchcore/config/onnx-models.def b/searchcore/src/vespa/searchcore/config/onnx-models.def
index 67a83e2afb7..b8f5d319075 100644
--- a/searchcore/src/vespa/searchcore/config/onnx-models.def
+++ b/searchcore/src/vespa/searchcore/config/onnx-models.def
@@ -1,9 +1,6 @@
# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
namespace=vespa.config.search.core
-# Number of GPUs available for ONNX evaluation, or -1 if unknown.
-gpu.count int default=-1
-
model[].name string
model[].fileref file
model[].input[].name string