aboutsummaryrefslogtreecommitdiffstats
path: root/model-integration/src/main/resources/configdefinitions/llm-local-client.def
diff options
context:
space:
mode:
Diffstat (limited to 'model-integration/src/main/resources/configdefinitions/llm-local-client.def')
-rwxr-xr-xmodel-integration/src/main/resources/configdefinitions/llm-local-client.def7
1 files changed, 5 insertions, 2 deletions
diff --git a/model-integration/src/main/resources/configdefinitions/llm-local-client.def b/model-integration/src/main/resources/configdefinitions/llm-local-client.def
index 4823a53ec46..6b83ffd0751 100755
--- a/model-integration/src/main/resources/configdefinitions/llm-local-client.def
+++ b/model-integration/src/main/resources/configdefinitions/llm-local-client.def
@@ -8,7 +8,10 @@ model model
parallelRequests int default=1
# Additional number of requests to put in queue for processing before starting to reject new requests
-maxQueueSize int default=10
+maxQueueSize int default=100
+
+# Max number of milliseoncds to wait in the queue before rejecting a request
+maxQueueWait int default=10000
# Use GPU
useGpu bool default=true
@@ -24,6 +27,6 @@ threads int default=-1
# Context is divided between parallel requests. So for 10 parallel requests, each "slot" gets 1/10 of the context
contextSize int default=4096
-# Maximum number of tokens to process in one request - overriden by inference parameters
+# Maximum number of tokens to process in one request - overridden by inference parameters
maxTokens int default=512