From e030993d0c356ba6acd50c3e64da5a1f6e1538fd Mon Sep 17 00:00:00 2001 From: Bjørn Christian Seime Date: Fri, 12 May 2023 10:21:48 +0200 Subject: Revert "Revert "Bjorncs/huggingface tokenizer"" This reverts commit 2bb74878879b3acb1919fd658b8f2c476d8129d6. --- .../language.huggingface.hugging-face-tokenizer.def | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 linguistics-components/src/main/resources/configdefinitions/language.huggingface.hugging-face-tokenizer.def (limited to 'linguistics-components/src/main/resources/configdefinitions') diff --git a/linguistics-components/src/main/resources/configdefinitions/language.huggingface.hugging-face-tokenizer.def b/linguistics-components/src/main/resources/configdefinitions/language.huggingface.hugging-face-tokenizer.def new file mode 100644 index 00000000000..a3e54ea38da --- /dev/null +++ b/linguistics-components/src/main/resources/configdefinitions/language.huggingface.hugging-face-tokenizer.def @@ -0,0 +1,11 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +namespace=language.huggingface + +# The language a model is for, one of the language tags in com.yahoo.language.Language. +# Use "unknown" for models to be used with any language. +model[].language string +# The path to the model relative to the application package root +model[].path path + +addSpecialTokens bool default=true \ No newline at end of file -- cgit v1.2.3