summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorLester Solbakken <lester.solbakken@gmail.com>2024-04-11 09:49:12 +0200
committerLester Solbakken <lester.solbakken@gmail.com>2024-04-11 09:49:12 +0200
commitfdb3ea0046eb95b8fe3956876199700cdc76629a (patch)
treebc731356e34bf6c41f24093c6544c103940600ec /container-search
parentb321d23f99f7ee87dd19044de5951d250c29ec27 (diff)
Throw exception on too many LLM requests
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java4
-rwxr-xr-xcontainer-search/src/main/java/ai/vespa/search/llm/LLMSearcher.java7
2 files changed, 7 insertions, 4 deletions
diff --git a/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java b/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java
index c550406ff92..3b99e5f0a09 100644
--- a/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java
+++ b/container-search/src/main/java/ai/vespa/llm/clients/LocalLLM.java
@@ -127,9 +127,7 @@ public class LocalLLM extends AbstractComponent implements LanguageModel {
int queueSize = executor.getQueue().size();
String error = String.format("Rejected completion due to too many requests, " +
"%d active, %d in queue", activeCount, queueSize);
- logger.info(error);
- consumer.accept(Completion.from(error, Completion.FinishReason.error));
- completionFuture.complete(Completion.FinishReason.error);
+ throw new RejectedExecutionException(error);
}
return completionFuture;
}
diff --git a/container-search/src/main/java/ai/vespa/search/llm/LLMSearcher.java b/container-search/src/main/java/ai/vespa/search/llm/LLMSearcher.java
index 951b3ae3de9..f565315b775 100755
--- a/container-search/src/main/java/ai/vespa/search/llm/LLMSearcher.java
+++ b/container-search/src/main/java/ai/vespa/search/llm/LLMSearcher.java
@@ -20,6 +20,7 @@ import com.yahoo.search.result.HitGroup;
import com.yahoo.search.searchchain.Execution;
import java.util.List;
+import java.util.concurrent.RejectedExecutionException;
import java.util.function.Function;
import java.util.logging.Logger;
import java.util.stream.Collectors;
@@ -83,7 +84,11 @@ public class LLMSearcher extends Searcher {
protected Result complete(Query query, Prompt prompt) {
var options = new InferenceParameters(getApiKeyHeader(query), s -> lookupProperty(s, query));
var stream = lookupPropertyBool(STREAM_PROPERTY, query, this.stream); // query value overwrites config
- return stream ? completeAsync(query, prompt, options) : completeSync(query, prompt, options);
+ try {
+ return stream ? completeAsync(query, prompt, options) : completeSync(query, prompt, options);
+ } catch (RejectedExecutionException e) {
+ return new Result(query, new ErrorMessage(429, e.getMessage()));
+ }
}
private boolean shouldAddPrompt(Query query) {