diff options
author | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-05-05 17:10:13 +0200 |
---|---|---|
committer | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-05-05 17:10:13 +0200 |
commit | aa81b9f71c38bd5802dde682df531a0b270979b0 (patch) | |
tree | b1b97765cf17a632a355e95a96af6e3b3b68e595 /model-integration/src/main/java | |
parent | 710c04989b94d51a772c635fffcc93b8b8a52895 (diff) |
Make thread pool size configurable
Diffstat (limited to 'model-integration/src/main/java')
4 files changed, 19 insertions, 17 deletions
diff --git a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java index bf56d233f89..8e5211ccff1 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java @@ -57,8 +57,7 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { OnnxEvaluatorOptions options = new OnnxEvaluatorOptions(); options.setExecutionMode(config.onnxExecutionMode().toString()); - options.setInterOpThreads(modifyThreadCount(config.onnxInterOpThreads())); - options.setIntraOpThreads(modifyThreadCount(config.onnxIntraOpThreads())); + options.setThreads(config.onnxInterOpThreads(), config.onnxIntraOpThreads()); tokenizer = new WordPieceEmbedder.Builder(config.tokenizerVocab().toString()).build(); this.evaluator = onnx.evaluatorOf(config.transformerModel().toString(), options); @@ -170,10 +169,4 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { return d.map((x) -> 0); // Assume only one token type } - private int modifyThreadCount(int numThreads) { - if (numThreads >= 0) - return numThreads; - return Math.max(1, (int) Math.ceil(((double) Runtime.getRuntime().availableProcessors()) / (-1 * numThreads))); - } - } diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java index 7715ae2c896..21dd326689c 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java @@ -41,6 +41,8 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { var onnxOpts = new OnnxEvaluatorOptions(); if (config.transformerGpuDevice() >= 0) onnxOpts.setGpuDevice(config.transformerGpuDevice(), config.transformerGpuRequired()); + onnxOpts.setExecutionMode(config.transformerExecutionMode().toString()); + onnxOpts.setThreads(config.transformerInterOpThreads(), config.transformerIntraOpThreads()); evaluator = onnx.evaluatorOf(config.transformerModel().toString(), onnxOpts); validateModel(); } diff --git a/model-integration/src/main/java/ai/vespa/llm/generation/Generator.java b/model-integration/src/main/java/ai/vespa/llm/generation/Generator.java index f20925b86ee..64dafee646f 100644 --- a/model-integration/src/main/java/ai/vespa/llm/generation/Generator.java +++ b/model-integration/src/main/java/ai/vespa/llm/generation/Generator.java @@ -62,8 +62,7 @@ public class Generator extends AbstractComponent { OnnxEvaluatorOptions encoderOptions = new OnnxEvaluatorOptions(); encoderOptions.setExecutionMode(config.encoderOnnxExecutionMode().toString()); - encoderOptions.setInterOpThreads(modifyThreadCount(config.encoderOnnxInterOpThreads())); - encoderOptions.setIntraOpThreads(modifyThreadCount(config.encoderOnnxIntraOpThreads())); + encoderOptions.setThreads(config.encoderOnnxInterOpThreads(), config.encoderOnnxIntraOpThreads()); encoder = onnx.evaluatorOf(config.encoderModel().toString(), encoderOptions); @@ -75,8 +74,7 @@ public class Generator extends AbstractComponent { OnnxEvaluatorOptions decoderOptions = new OnnxEvaluatorOptions(); decoderOptions.setExecutionMode(config.decoderOnnxExecutionMode().toString()); - decoderOptions.setInterOpThreads(modifyThreadCount(config.decoderOnnxInterOpThreads())); - decoderOptions.setIntraOpThreads(modifyThreadCount(config.decoderOnnxIntraOpThreads())); + decoderOptions.setThreads(config.decoderOnnxInterOpThreads(), config.decoderOnnxIntraOpThreads()); decoder = onnx.evaluatorOf(config.decoderModel().toString(), decoderOptions); @@ -224,9 +222,4 @@ public class Generator extends AbstractComponent { } } - private int modifyThreadCount(int numThreads) { - if (numThreads >= 0) - return numThreads; - return Math.max(1, (int) Math.ceil(((double) Runtime.getRuntime().availableProcessors()) / (-1 * numThreads))); - } } diff --git a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java index a980ca984ec..4a35f4275fa 100644 --- a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java +++ b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java @@ -67,6 +67,20 @@ public class OnnxEvaluatorOptions { } } + /** + * Sets the number of threads for inter and intra op execution. + * A negative number is interpreted as an inverse scaling factor <code>threads=CPU/-n</code> + */ + public void setThreads(int interOp, int intraOp) { + interOpThreads = calculateThreads(interOp); + intraOpThreads = calculateThreads(intraOp); + } + + private static int calculateThreads(int t) { + if (t >= 0) return t; + return Math.max(1, (int) Math.ceil(-1d * Runtime.getRuntime().availableProcessors() / t)); + } + public void setGpuDevice(int deviceNumber, boolean required) { this.gpuDeviceNumber = deviceNumber; this.gpuDeviceRequired = required; |