From 7593c064d3ecf3649cd27f5b9c820b5510f225ee Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Mon, 23 May 2022 10:55:21 +0200 Subject: Add services.xml syntax for embedders --- configdefinitions/src/vespa/embedding.bert-base-embedder.def | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'configdefinitions') diff --git a/configdefinitions/src/vespa/embedding.bert-base-embedder.def b/configdefinitions/src/vespa/embedding.bert-base-embedder.def index a37599de411..115e021972c 100644 --- a/configdefinitions/src/vespa/embedding.bert-base-embedder.def +++ b/configdefinitions/src/vespa/embedding.bert-base-embedder.def @@ -1,8 +1,13 @@ namespace=embedding +# Settings for wordpiece tokenizer +tokenizerVocabUrl url +tokenizerVocabPath path + # Transformer model settings -transformerModelUrl url default=https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx +transformerModelUrl url +transformerModelPath path # Max length of token sequence model can handle transformerMaxTokens int default=384 @@ -23,5 +28,3 @@ onnxExecutionMode enum { parallel, sequential } default=sequential onnxInterOpThreads int default=1 onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n -# Settings for wordpiece tokenizer -tokenizerVocabUrl url default=https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt -- cgit v1.2.3