diff options
author | Lester Solbakken <lester.solbakken@gmail.com> | 2024-04-11 09:49:12 +0200 |
---|---|---|
committer | Lester Solbakken <lester.solbakken@gmail.com> | 2024-04-11 09:49:12 +0200 |
commit | fdb3ea0046eb95b8fe3956876199700cdc76629a (patch) | |
tree | bc731356e34bf6c41f24093c6544c103940600ec /container-search | |
parent | b321d23f99f7ee87dd19044de5951d250c29ec27 (diff) |
Throw exception on too many LLM requests
Diffstat (limited to 'container-search')
-rw-r--r-- | container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java | 4 | ||||
-rwxr-xr-x | container-search/src/main/java/ai/vespa/search/llm/LLMSearcher.java | 7 |
2 files changed, 7 insertions, 4 deletions
diff --git a/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java b/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java index c550406ff92..3b99e5f0a09 100644 --- a/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java +++ b/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java @@ -127,9 +127,7 @@ public class LocalLLM extends AbstractComponent implements LanguageModel { int queueSize = executor.getQueue().size(); String error = String.format("Rejected completion due to too many requests, " + "%d active, %d in queue", activeCount, queueSize); - logger.info(error); - consumer.accept(Completion.from(error, Completion.FinishReason.error)); - completionFuture.complete(Completion.FinishReason.error); + throw new RejectedExecutionException(error); } return completionFuture; } diff --git a/container-search/src/main/java/ai/vespa/search/llm/LLMSearcher.java b/container-search/src/main/java/ai/vespa/search/llm/LLMSearcher.java index 951b3ae3de9..f565315b775 100755 --- a/container-search/src/main/java/ai/vespa/search/llm/LLMSearcher.java +++ b/container-search/src/main/java/ai/vespa/search/llm/LLMSearcher.java @@ -20,6 +20,7 @@ import com.yahoo.search.result.HitGroup; import com.yahoo.search.searchchain.Execution; import java.util.List; +import java.util.concurrent.RejectedExecutionException; import java.util.function.Function; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -83,7 +84,11 @@ public class LLMSearcher extends Searcher { protected Result complete(Query query, Prompt prompt) { var options = new InferenceParameters(getApiKeyHeader(query), s -> lookupProperty(s, query)); var stream = lookupPropertyBool(STREAM_PROPERTY, query, this.stream); // query value overwrites config - return stream ? completeAsync(query, prompt, options) : completeSync(query, prompt, options); + try { + return stream ? completeAsync(query, prompt, options) : completeSync(query, prompt, options); + } catch (RejectedExecutionException e) { + return new Result(query, new ErrorMessage(429, e.getMessage())); + } } private boolean shouldAddPrompt(Query query) { |