aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def
blob: 16ada78688aefdd5572ed399f42451c0789b552a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

# Configures com.yahoo.language.sentencepiece.SentencePieceEmbedder

namespace=language.sentencepiece

# Whether consecutive unknown character should be collapsed into one large unknown token (default
# or be returned as single character tokens.
collapseUnknowns bool default=true

# The scoring strategy to use when picking a segmentation.
scoring enum { highestScore, fewestSegments } default=fewestSegments

# The language a model is for, one of the language tags in com.yahoo.language.Language.
# Use "unknown" for models to be used with any language.
model[].language string
# The path to the model relative to the application package root
model[].path path