diff options
author | Jon Bratseth <bratseth@vespa.ai> | 2023-04-26 19:33:17 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@vespa.ai> | 2023-04-26 19:33:17 +0200 |
commit | 537c0c796cf6bf13773b3c5f4797647e2a9c22fe (patch) | |
tree | 976ab5804db8a71f0a675d9c23d8861b621da0e7 /config-model/src/test/cfg/application/embed_cloud_only | |
parent | 730a047dd5274eb2d547ae703c3306c19b6d5d6c (diff) |
Proper error when using only model-id self-hosted
Diffstat (limited to 'config-model/src/test/cfg/application/embed_cloud_only')
-rw-r--r-- | config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/embedding.bert-base-embedder.def | 30 | ||||
-rw-r--r-- | config-model/src/test/cfg/application/embed_cloud_only/services.xml | 23 |
2 files changed, 53 insertions, 0 deletions
diff --git a/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/embedding.bert-base-embedder.def b/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/embedding.bert-base-embedder.def new file mode 100644 index 00000000000..144dfbd0001 --- /dev/null +++ b/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/embedding.bert-base-embedder.def @@ -0,0 +1,30 @@ +# Copy of this Vespa config stored here because Vespa config definitions are not +# available in unit tests, and are needed (by DomConfigPayloadBuilder.parseLeaf) +# Alternatively, we could make that not need it as it is not strictly necessaery. + +namespace=embedding + +# Wordpiece tokenizer +tokenizerVocab model + +transformerModel model + +# Max length of token sequence model can handle +transformerMaxTokens int default=384 + +# Pooling strategy +poolingStrategy enum { cls, mean } default=mean + +# Input names +transformerInputIds string default=input_ids +transformerAttentionMask string default=attention_mask +transformerTokenTypeIds string default=token_type_ids + +# Output name +transformerOutput string default=output_0 + +# Settings for ONNX model evaluation +onnxExecutionMode enum { parallel, sequential } default=sequential +onnxInterOpThreads int default=1 +onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n + diff --git a/config-model/src/test/cfg/application/embed_cloud_only/services.xml b/config-model/src/test/cfg/application/embed_cloud_only/services.xml new file mode 100644 index 00000000000..57db4f5bfae --- /dev/null +++ b/config-model/src/test/cfg/application/embed_cloud_only/services.xml @@ -0,0 +1,23 @@ +<?xml version="1.0" encoding="utf-8" ?> +<!-- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +<services version="1.0"> + + <container version="1.0"> + + <component id="transformer" class="ai.vespa.embedding.BertBaseEmbedder" bundle="model-integration"> + <config name="embedding.bert-base-embedder"> + <!-- No fallback to url or path when deploying outside cloud --> + <transformerModel model-id="minilm-l6-v2"/> + <tokenizerVocab path="files/vocab.txt"/> + + <!-- tunable parameters: number of threads etc --> + <onnxIntraOpThreads>4</onnxIntraOpThreads> + </config> + </component> + + <nodes> + <node hostalias="node1" /> + </nodes> + </container> + +</services> |