summaryrefslogtreecommitdiffstats
path: root/linguistics-components/src/main/resources/configdefinitions
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-05-11 11:07:23 +0200
committerBjørn Christian Seime <bjorncs@yahooinc.com>2023-05-11 16:41:35 +0200
commitce7dd2c983a8840981786eef95a9cc4741487be7 (patch)
tree8aa4140fb8fd7ec8a95d425adda4119d34cf46d3 /linguistics-components/src/main/resources/configdefinitions
parent386e4198d8459803eec0ead6ad81a821737082a7 (diff)
Make HF tokenizer a separate embedder
Diffstat (limited to 'linguistics-components/src/main/resources/configdefinitions')
-rw-r--r--linguistics-components/src/main/resources/configdefinitions/language.huggingface.hugging-face-tokenizer.def9
1 files changed, 9 insertions, 0 deletions
diff --git a/linguistics-components/src/main/resources/configdefinitions/language.huggingface.hugging-face-tokenizer.def b/linguistics-components/src/main/resources/configdefinitions/language.huggingface.hugging-face-tokenizer.def
new file mode 100644
index 00000000000..9d0ab65c28f
--- /dev/null
+++ b/linguistics-components/src/main/resources/configdefinitions/language.huggingface.hugging-face-tokenizer.def
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+namespace=language.huggingface
+
+# The language a model is for, one of the language tags in com.yahoo.language.Language.
+# Use "unknown" for models to be used with any language.
+model[].language string
+# The path to the model relative to the application package root
+model[].path path