summaryrefslogtreecommitdiffstats
path: root/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def')
-rw-r--r--linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def18
1 files changed, 18 insertions, 0 deletions
diff --git a/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def b/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def
new file mode 100644
index 00000000000..b91c0c45dc4
--- /dev/null
+++ b/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def
@@ -0,0 +1,18 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# Configures com.yahoo.language.sentencepiece.SentencePieceEncoder
+
+namespace=language.sentencepiece
+
+# Whether consecutive unknown character should be collapsed into one large unknown token (default
+# or be returned as single character tokens.
+collapseUnknowns bool default=true
+
+# The scoring strategy to use when picking a segmentation.
+scoring enum { highestScore, fewestSegments } default=fewestSegments
+
+# The language a model is for, one of the language tags in com.yahoo.language.Language.
+# Use "unknown" for models to be used with any language.
+model[].language string
+# The path to the model relative to the application package root
+model[].path path \ No newline at end of file