diff options
author | Jo Kristian Bergum <bergum@yahooinc.com> | 2023-12-15 08:42:01 +0100 |
---|---|---|
committer | Jo Kristian Bergum <bergum@yahooinc.com> | 2023-12-15 08:42:01 +0100 |
commit | cbc0733c07b57d8563eea40897072cb35042b605 (patch) | |
tree | fa3b564329dc4a88b48c697692c7896d6d4b36b0 /configdefinitions | |
parent | 8af800ba588f726184ffb8296463bb4b7fbea5a1 (diff) |
Add a splade embedder implementation
Diffstat (limited to 'configdefinitions')
-rw-r--r-- | configdefinitions/src/vespa/CMakeLists.txt | 1 | ||||
-rw-r--r-- | configdefinitions/src/vespa/splade-embedder.def | 29 |
2 files changed, 30 insertions, 0 deletions
diff --git a/configdefinitions/src/vespa/CMakeLists.txt b/configdefinitions/src/vespa/CMakeLists.txt index 5adfb31ac25..049475c9c1b 100644 --- a/configdefinitions/src/vespa/CMakeLists.txt +++ b/configdefinitions/src/vespa/CMakeLists.txt @@ -89,5 +89,6 @@ install_config_definition(hugging-face-embedder.def embedding.huggingface.huggin install_config_definition(hugging-face-tokenizer.def language.huggingface.config.hugging-face-tokenizer.def) install_config_definition(bert-base-embedder.def embedding.bert-base-embedder.def) install_config_definition(col-bert-embedder.def embedding.col-bert-embedder.def) +install_config_definition(splade-embedder.def embedding.splade-embedder.def) install_config_definition(cloud-data-plane-filter.def jdisc.http.filter.security.cloud.config.cloud-data-plane-filter.def) install_config_definition(cloud-token-data-plane-filter.def jdisc.http.filter.security.cloud.config.cloud-token-data-plane-filter.def) diff --git a/configdefinitions/src/vespa/splade-embedder.def b/configdefinitions/src/vespa/splade-embedder.def new file mode 100644 index 00000000000..601753f45bb --- /dev/null +++ b/configdefinitions/src/vespa/splade-embedder.def @@ -0,0 +1,29 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=embedding + +# Path to tokenizer.json +tokenizerPath model + +# Path to model.onnx +transformerModel model + +# Max length of token sequence model can handle +transformerMaxTokens int default=512 + +# Input names +transformerInputIds string default=input_ids +transformerAttentionMask string default=attention_mask +transformerTokenTypeIds string default=token_type_ids + +# Output name +transformerOutput string default=logits + +# score threshold to control sparseness +termScoreThreshold double default=0.0 + +# Settings for ONNX model evaluation +transformerExecutionMode enum { parallel, sequential } default=sequential +transformerInterOpThreads int default=1 +transformerIntraOpThreads int default=-4 +# GPU device id, -1 for CPU +transformerGpuDevice int default=0 |