summaryrefslogtreecommitdiffstats
path: root/configdefinitions
diff options
context:
space:
mode:
authorJo Kristian Bergum <bergum@yahooinc.com>2023-12-15 08:42:01 +0100
committerJo Kristian Bergum <bergum@yahooinc.com>2023-12-15 08:42:01 +0100
commitcbc0733c07b57d8563eea40897072cb35042b605 (patch)
treefa3b564329dc4a88b48c697692c7896d6d4b36b0 /configdefinitions
parent8af800ba588f726184ffb8296463bb4b7fbea5a1 (diff)
Add a splade embedder implementation
Diffstat (limited to 'configdefinitions')
-rw-r--r--configdefinitions/src/vespa/CMakeLists.txt1
-rw-r--r--configdefinitions/src/vespa/splade-embedder.def29
2 files changed, 30 insertions, 0 deletions
diff --git a/configdefinitions/src/vespa/CMakeLists.txt b/configdefinitions/src/vespa/CMakeLists.txt
index 5adfb31ac25..049475c9c1b 100644
--- a/configdefinitions/src/vespa/CMakeLists.txt
+++ b/configdefinitions/src/vespa/CMakeLists.txt
@@ -89,5 +89,6 @@ install_config_definition(hugging-face-embedder.def embedding.huggingface.huggin
install_config_definition(hugging-face-tokenizer.def language.huggingface.config.hugging-face-tokenizer.def)
install_config_definition(bert-base-embedder.def embedding.bert-base-embedder.def)
install_config_definition(col-bert-embedder.def embedding.col-bert-embedder.def)
+install_config_definition(splade-embedder.def embedding.splade-embedder.def)
install_config_definition(cloud-data-plane-filter.def jdisc.http.filter.security.cloud.config.cloud-data-plane-filter.def)
install_config_definition(cloud-token-data-plane-filter.def jdisc.http.filter.security.cloud.config.cloud-token-data-plane-filter.def)
diff --git a/configdefinitions/src/vespa/splade-embedder.def b/configdefinitions/src/vespa/splade-embedder.def
new file mode 100644
index 00000000000..601753f45bb
--- /dev/null
+++ b/configdefinitions/src/vespa/splade-embedder.def
@@ -0,0 +1,29 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+namespace=embedding
+
+# Path to tokenizer.json
+tokenizerPath model
+
+# Path to model.onnx
+transformerModel model
+
+# Max length of token sequence model can handle
+transformerMaxTokens int default=512
+
+# Input names
+transformerInputIds string default=input_ids
+transformerAttentionMask string default=attention_mask
+transformerTokenTypeIds string default=token_type_ids
+
+# Output name
+transformerOutput string default=logits
+
+# score threshold to control sparseness
+termScoreThreshold double default=0.0
+
+# Settings for ONNX model evaluation
+transformerExecutionMode enum { parallel, sequential } default=sequential
+transformerInterOpThreads int default=1
+transformerIntraOpThreads int default=-4
+# GPU device id, -1 for CPU
+transformerGpuDevice int default=0