diff options
Diffstat (limited to 'linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def')
-rw-r--r-- | linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def b/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def new file mode 100644 index 00000000000..b91c0c45dc4 --- /dev/null +++ b/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def @@ -0,0 +1,18 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# Configures com.yahoo.language.sentencepiece.SentencePieceEncoder + +namespace=language.sentencepiece + +# Whether consecutive unknown character should be collapsed into one large unknown token (default +# or be returned as single character tokens. +collapseUnknowns bool default=true + +# The scoring strategy to use when picking a segmentation. +scoring enum { highestScore, fewestSegments } default=fewestSegments + +# The language a model is for, one of the language tags in com.yahoo.language.Language. +# Use "unknown" for models to be used with any language. +model[].language string +# The path to the model relative to the application package root +model[].path path
\ No newline at end of file |