aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics-components/src/main/resources/configdefinitions/language.wordpiece.word-piece.def
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-12-17 12:41:17 +0100
committerJon Bratseth <bratseth@gmail.com>2021-12-17 12:41:17 +0100
commit601b117281b74a578126a0f3effead55bc79c680 (patch)
tree29619184a8459763cc024b23e74960e6c9ec7f81 /linguistics-components/src/main/resources/configdefinitions/language.wordpiece.word-piece.def
parent767cb63af0f530605180f5438767406e1db27520 (diff)
BERT -> WordPiece, make subword prefix configurable
Diffstat (limited to 'linguistics-components/src/main/resources/configdefinitions/language.wordpiece.word-piece.def')
-rw-r--r--linguistics-components/src/main/resources/configdefinitions/language.wordpiece.word-piece.def14
1 files changed, 14 insertions, 0 deletions
diff --git a/linguistics-components/src/main/resources/configdefinitions/language.wordpiece.word-piece.def b/linguistics-components/src/main/resources/configdefinitions/language.wordpiece.word-piece.def
new file mode 100644
index 00000000000..08592250eb5
--- /dev/null
+++ b/linguistics-components/src/main/resources/configdefinitions/language.wordpiece.word-piece.def
@@ -0,0 +1,14 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# Configures com.yahoo.language.wordpiece.WordPieceEmbedder
+
+namespace=language.wordpiece
+
+# The prefix to prepend to subword tokens
+subwordPrefix string default="##"
+
+# The language a model is for, one of the language tags in com.yahoo.language.Language.
+# Use "unknown" for a model to be used for any language (i.e by default).
+model[].language string
+# The path to the model relative to the application package root
+model[].path path