summaryrefslogtreecommitdiffstats
path: root/model-integration
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-05-11 15:41:00 +0200
committerBjørn Christian Seime <bjorncs@yahooinc.com>2023-05-11 16:41:54 +0200
commitfe63824738fc1892221311e7ddd777efcb209f5b (patch)
treedc7d3ce16c4e56ab7cbbc941f2cb9f162d6dacb2 /model-integration
parentae700d12753e1a81de4def087d2f64607f0361df (diff)
Disable special tokens by default
Diffstat (limited to 'model-integration')
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java1
1 files changed, 1 insertions, 0 deletions
diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
index 699dafdd94e..4ed1ae8dc0f 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
@@ -41,6 +41,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
outputName = config.transformerOutput();
normalize = config.normalize();
tokenizer = new HuggingFaceTokenizer.Builder()
+ .addSpecialTokens(true)
.addDefaultModel(Paths.get(config.tokenizerPath().toString()))
.build();
var onnxOpts = new OnnxEvaluatorOptions();