summaryrefslogtreecommitdiffstats
path: root/model-integration
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-05-05 17:10:13 +0200
committerBjørn Christian Seime <bjorncs@yahooinc.com>2023-05-05 17:10:13 +0200
commitaa81b9f71c38bd5802dde682df531a0b270979b0 (patch)
treeb1b97765cf17a632a355e95a96af6e3b3b68e595 /model-integration
parent710c04989b94d51a772c635fffcc93b8b8a52895 (diff)
Make thread pool size configurable
Diffstat (limited to 'model-integration')
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java9
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java2
-rw-r--r--model-integration/src/main/java/ai/vespa/llm/generation/Generator.java11
-rw-r--r--model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java14
-rw-r--r--model-integration/src/main/resources/configdefinitions/embedding.huggingface.hugging-face-embedder.def5
5 files changed, 24 insertions, 17 deletions
diff --git a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
index bf56d233f89..8e5211ccff1 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
@@ -57,8 +57,7 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder {
OnnxEvaluatorOptions options = new OnnxEvaluatorOptions();
options.setExecutionMode(config.onnxExecutionMode().toString());
- options.setInterOpThreads(modifyThreadCount(config.onnxInterOpThreads()));
- options.setIntraOpThreads(modifyThreadCount(config.onnxIntraOpThreads()));
+ options.setThreads(config.onnxInterOpThreads(), config.onnxIntraOpThreads());
tokenizer = new WordPieceEmbedder.Builder(config.tokenizerVocab().toString()).build();
this.evaluator = onnx.evaluatorOf(config.transformerModel().toString(), options);
@@ -170,10 +169,4 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder {
return d.map((x) -> 0); // Assume only one token type
}
- private int modifyThreadCount(int numThreads) {
- if (numThreads >= 0)
- return numThreads;
- return Math.max(1, (int) Math.ceil(((double) Runtime.getRuntime().availableProcessors()) / (-1 * numThreads)));
- }
-
}
diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
index 7715ae2c896..21dd326689c 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
@@ -41,6 +41,8 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
var onnxOpts = new OnnxEvaluatorOptions();
if (config.transformerGpuDevice() >= 0)
onnxOpts.setGpuDevice(config.transformerGpuDevice(), config.transformerGpuRequired());
+ onnxOpts.setExecutionMode(config.transformerExecutionMode().toString());
+ onnxOpts.setThreads(config.transformerInterOpThreads(), config.transformerIntraOpThreads());
evaluator = onnx.evaluatorOf(config.transformerModel().toString(), onnxOpts);
validateModel();
}
diff --git a/model-integration/src/main/java/ai/vespa/llm/generation/Generator.java b/model-integration/src/main/java/ai/vespa/llm/generation/Generator.java
index f20925b86ee..64dafee646f 100644
--- a/model-integration/src/main/java/ai/vespa/llm/generation/Generator.java
+++ b/model-integration/src/main/java/ai/vespa/llm/generation/Generator.java
@@ -62,8 +62,7 @@ public class Generator extends AbstractComponent {
OnnxEvaluatorOptions encoderOptions = new OnnxEvaluatorOptions();
encoderOptions.setExecutionMode(config.encoderOnnxExecutionMode().toString());
- encoderOptions.setInterOpThreads(modifyThreadCount(config.encoderOnnxInterOpThreads()));
- encoderOptions.setIntraOpThreads(modifyThreadCount(config.encoderOnnxIntraOpThreads()));
+ encoderOptions.setThreads(config.encoderOnnxInterOpThreads(), config.encoderOnnxIntraOpThreads());
encoder = onnx.evaluatorOf(config.encoderModel().toString(), encoderOptions);
@@ -75,8 +74,7 @@ public class Generator extends AbstractComponent {
OnnxEvaluatorOptions decoderOptions = new OnnxEvaluatorOptions();
decoderOptions.setExecutionMode(config.decoderOnnxExecutionMode().toString());
- decoderOptions.setInterOpThreads(modifyThreadCount(config.decoderOnnxInterOpThreads()));
- decoderOptions.setIntraOpThreads(modifyThreadCount(config.decoderOnnxIntraOpThreads()));
+ decoderOptions.setThreads(config.decoderOnnxInterOpThreads(), config.decoderOnnxIntraOpThreads());
decoder = onnx.evaluatorOf(config.decoderModel().toString(), decoderOptions);
@@ -224,9 +222,4 @@ public class Generator extends AbstractComponent {
}
}
- private int modifyThreadCount(int numThreads) {
- if (numThreads >= 0)
- return numThreads;
- return Math.max(1, (int) Math.ceil(((double) Runtime.getRuntime().availableProcessors()) / (-1 * numThreads)));
- }
}
diff --git a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java
index a980ca984ec..4a35f4275fa 100644
--- a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java
+++ b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java
@@ -67,6 +67,20 @@ public class OnnxEvaluatorOptions {
}
}
+ /**
+ * Sets the number of threads for inter and intra op execution.
+ * A negative number is interpreted as an inverse scaling factor <code>threads=CPU/-n</code>
+ */
+ public void setThreads(int interOp, int intraOp) {
+ interOpThreads = calculateThreads(interOp);
+ intraOpThreads = calculateThreads(intraOp);
+ }
+
+ private static int calculateThreads(int t) {
+ if (t >= 0) return t;
+ return Math.max(1, (int) Math.ceil(-1d * Runtime.getRuntime().availableProcessors() / t));
+ }
+
public void setGpuDevice(int deviceNumber, boolean required) {
this.gpuDeviceNumber = deviceNumber;
this.gpuDeviceRequired = required;
diff --git a/model-integration/src/main/resources/configdefinitions/embedding.huggingface.hugging-face-embedder.def b/model-integration/src/main/resources/configdefinitions/embedding.huggingface.hugging-face-embedder.def
index 3eac14afc12..adc8f653168 100644
--- a/model-integration/src/main/resources/configdefinitions/embedding.huggingface.hugging-face-embedder.def
+++ b/model-integration/src/main/resources/configdefinitions/embedding.huggingface.hugging-face-embedder.def
@@ -23,3 +23,8 @@ transformerGpuRequired bool default=false
# Normalize tensors from tokenizer
normalize bool default=false
+
+# Settings for ONNX model evaluation
+transformerExecutionMode enum { parallel, sequential } default=sequential
+transformerInterOpThreads int default=1
+transformerIntraOpThreads int default=-4