diff options
author | Lester Solbakken <lester.solbakken@gmail.com> | 2024-04-11 14:04:32 +0200 |
---|---|---|
committer | Lester Solbakken <lester.solbakken@gmail.com> | 2024-04-11 14:04:32 +0200 |
commit | a2b8ee9591ab36ccbe64c2dc31bfd84fa4caffb3 (patch) | |
tree | 269a9f0525cc2f027f18a49cd45faafd32f1a901 /container-search/src/main | |
parent | 20a9ae9b98f15cdbc24253aa6e9aa585b2759a3a (diff) |
Use 'model' config type for LLM models
Diffstat (limited to 'container-search/src/main')
-rw-r--r-- | container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java | 12 | ||||
-rwxr-xr-x | container-search/src/main/resources/configdefinitions/llm-local-client.def | 7 |
2 files changed, 3 insertions, 16 deletions
diff --git a/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java b/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java index 3b99e5f0a09..1e204d29a19 100644 --- a/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java +++ b/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java @@ -44,7 +44,7 @@ public class LocalLLM extends AbstractComponent implements LanguageModel { // Only used if GPU is not used var defaultThreadCount = Runtime.getRuntime().availableProcessors() - 2; - var modelFile = selectModelFile(config); + var modelFile = config.model().toFile().getAbsolutePath(); var modelParams = new ModelParameters() .setModelFilePath(modelFile) .setContinuousBatching(true) @@ -69,16 +69,6 @@ public class LocalLLM extends AbstractComponent implements LanguageModel { new ThreadPoolExecutor.AbortPolicy()); } - private String selectModelFile(LlmLocalClientConfig config) { - if ( ! config.localLlmFile().isEmpty()) { // primarily for testing - return config.localLlmFile(); - } else if (config.modelUrl().exists()) { - return config.modelUrl().getAbsolutePath(); - } - throw new IllegalArgumentException("Local LLM model not set. " + - "Either set 'localLlmFile' or 'modelUrl' in 'llm-local-client' config."); - } - @Override public void deconstruct() { logger.info("Closing LLM model..."); diff --git a/container-search/src/main/resources/configdefinitions/llm-local-client.def b/container-search/src/main/resources/configdefinitions/llm-local-client.def index 08eab19f0f8..c06c24b33e5 100755 --- a/container-search/src/main/resources/configdefinitions/llm-local-client.def +++ b/container-search/src/main/resources/configdefinitions/llm-local-client.def @@ -1,11 +1,8 @@ # Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package=ai.vespa.llm.clients -# Url to the model to use -modelUrl url default="" - -# Local file path to the model to use - will have precedence over model_url if set - mostly for testing -localLlmFile string default="" +# The LLM model to use +model model # Maximum number of requests to handle in parallel pr container node parallelRequests int default=10 |