diff options
author | Harald Musum <musum@yahooinc.com> | 2024-04-22 10:01:22 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-22 10:01:22 +0200 |
commit | 67b83227941ad3327207b5dbbdc9ebbf72f684f6 (patch) | |
tree | b0c03219f23462b21c63f2915faab0e7c5c754c8 | |
parent | ecb21ea7c3d2ce8713e2e94163233113cea2800c (diff) | |
parent | 0e37da4fcd053aea6739b97b77312e3928fe4e8a (diff) |
Merge pull request #30988 from vespa-engine/lesters/llm-minimum-threads
Set minimum number of threads to 1
-rw-r--r-- | model-integration/src/main/java/ai/vespa/llm/clients/LocalLLM.java | 3 |
1 files changed, 1 insertions, 2 deletions
diff --git a/model-integration/src/main/java/ai/vespa/llm/clients/LocalLLM.java b/model-integration/src/main/java/ai/vespa/llm/clients/LocalLLM.java index fd1b8b700c8..aa7c071b93a 100644 --- a/model-integration/src/main/java/ai/vespa/llm/clients/LocalLLM.java +++ b/model-integration/src/main/java/ai/vespa/llm/clients/LocalLLM.java @@ -9,7 +9,6 @@ import com.yahoo.component.AbstractComponent; import com.yahoo.component.annotation.Inject; import de.kherud.llama.LlamaModel; import de.kherud.llama.ModelParameters; -import de.kherud.llama.args.LogFormat; import java.util.ArrayList; import java.util.List; @@ -43,7 +42,7 @@ public class LocalLLM extends AbstractComponent implements LanguageModel { maxTokens = config.maxTokens(); // Only used if GPU is not used - var defaultThreadCount = Runtime.getRuntime().availableProcessors() - 2; + var defaultThreadCount = Math.max(Runtime.getRuntime().availableProcessors() - 2, 1); var modelFile = config.model().toFile().getAbsolutePath(); var modelParams = new ModelParameters() |