diff options
author | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-06-12 17:25:55 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-12 17:25:55 +0200 |
commit | fb5d1bf9f451fbeb4a40d7f73fa856ef81bd77ed (patch) | |
tree | 1f8ab291370e84407a827f9a80bcf943f522ca29 /configdefinitions | |
parent | 0647b650c3334ff86d50431e78549e25dc46caf9 (diff) | |
parent | 4f722322cc9f8df5146ffb27d74239b3b4f2d634 (diff) |
Merge pull request #27387 from vespa-engine/bjorncs/hfv8.176.13
Prefer truncation configuration from tokenizer model
Diffstat (limited to 'configdefinitions')
-rw-r--r-- | configdefinitions/src/vespa/hugging-face-tokenizer.def | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/configdefinitions/src/vespa/hugging-face-tokenizer.def b/configdefinitions/src/vespa/hugging-face-tokenizer.def index bc0d5300de5..896a7b03234 100644 --- a/configdefinitions/src/vespa/hugging-face-tokenizer.def +++ b/configdefinitions/src/vespa/hugging-face-tokenizer.def @@ -8,7 +8,14 @@ model[].language string # The path to the model relative to the application package root model[].path model +# Include special tokens in output addSpecialTokens bool default=true -maxLength int default=512 -truncation bool default=true -padding bool default=false + +# Used for truncation/padding. Use -1 for model default. +maxLength int default=-1 + +# Truncation strategy. Use NOTSET for model default. +truncation enum { ON, OFF, NOTSET } default=NOTSET + +# Padding strategy. Use NOTSET for model default. +padding enum { ON, OFF, NOTSET } default=NOTSET |