summaryrefslogtreecommitdiffstats
path: root/model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def
diff options
context:
space:
mode:
authorLester Solbakken <lesters@oath.com>2022-04-05 13:07:27 +0200
committerLester Solbakken <lesters@oath.com>2022-04-05 13:07:27 +0200
commit869b45ec6ea7b618c7d9515cb70cac71f3df3d2b (patch)
tree0ff576649c5a74c363c706b6f2c34b40961f6aa8 /model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def
parenta0b02d8bd00bf8d7f3410cbf47ae432770546883 (diff)
Accept default values for URL types in config
Diffstat (limited to 'model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def')
-rw-r--r--model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def27
1 files changed, 27 insertions, 0 deletions
diff --git a/model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def b/model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def
new file mode 100644
index 00000000000..a37599de411
--- /dev/null
+++ b/model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def
@@ -0,0 +1,27 @@
+
+namespace=embedding
+
+# Transformer model settings
+transformerModelUrl url default=https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx
+
+# Max length of token sequence model can handle
+transformerMaxTokens int default=384
+
+# Pooling strategy
+poolingStrategy enum { cls, mean } default=mean
+
+# Input names
+transformerInputIds string default=input_ids
+transformerAttentionMask string default=attention_mask
+transformerTokenTypeIds string default=token_type_ids
+
+# Output name
+transformerOutput string default=output_0
+
+# Settings for ONNX model evaluation
+onnxExecutionMode enum { parallel, sequential } default=sequential
+onnxInterOpThreads int default=1
+onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n
+
+# Settings for wordpiece tokenizer
+tokenizerVocabUrl url default=https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt