summaryrefslogtreecommitdiffstats
path: root/configdefinitions/src/vespa/hugging-face-tokenizer.def
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-06-12 17:25:55 +0200
committerGitHub <noreply@github.com>2023-06-12 17:25:55 +0200
commitfb5d1bf9f451fbeb4a40d7f73fa856ef81bd77ed (patch)
tree1f8ab291370e84407a827f9a80bcf943f522ca29 /configdefinitions/src/vespa/hugging-face-tokenizer.def
parent0647b650c3334ff86d50431e78549e25dc46caf9 (diff)
parent4f722322cc9f8df5146ffb27d74239b3b4f2d634 (diff)
Merge pull request #27387 from vespa-engine/bjorncs/hfv8.176.13
Prefer truncation configuration from tokenizer model
Diffstat (limited to 'configdefinitions/src/vespa/hugging-face-tokenizer.def')
-rw-r--r--configdefinitions/src/vespa/hugging-face-tokenizer.def13
1 files changed, 10 insertions, 3 deletions
diff --git a/configdefinitions/src/vespa/hugging-face-tokenizer.def b/configdefinitions/src/vespa/hugging-face-tokenizer.def
index bc0d5300de5..896a7b03234 100644
--- a/configdefinitions/src/vespa/hugging-face-tokenizer.def
+++ b/configdefinitions/src/vespa/hugging-face-tokenizer.def
@@ -8,7 +8,14 @@ model[].language string
# The path to the model relative to the application package root
model[].path model
+# Include special tokens in output
addSpecialTokens bool default=true
-maxLength int default=512
-truncation bool default=true
-padding bool default=false
+
+# Used for truncation/padding. Use -1 for model default.
+maxLength int default=-1
+
+# Truncation strategy. Use NOTSET for model default.
+truncation enum { ON, OFF, NOTSET } default=NOTSET
+
+# Padding strategy. Use NOTSET for model default.
+padding enum { ON, OFF, NOTSET } default=NOTSET