diff options
author | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-06-02 12:10:32 +0200 |
---|---|---|
committer | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-06-02 12:10:32 +0200 |
commit | a67788f2b7786a2cfcb9244d1e72a7fb1815425b (patch) | |
tree | fa34be2f0f13ef4ea116dd12853c734de3bc2eca /linguistics-components | |
parent | e757e5ff2e6dadbe31389c7dfeb3f52827a1668b (diff) |
Introduce services.xml syntax for configuring HuggingFace embedders
Diffstat (limited to 'linguistics-components')
3 files changed, 7 insertions, 13 deletions
diff --git a/linguistics-components/pom.xml b/linguistics-components/pom.xml index 5031ad73556..b3bc52c5e23 100644 --- a/linguistics-components/pom.xml +++ b/linguistics-components/pom.xml @@ -89,6 +89,12 @@ <scope>provided</scope> <classifier>no_aop</classifier> </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>configdefinitions</artifactId> + <version>${project.version}</version> + <scope>compile</scope> + </dependency> </dependencies> <build> <plugins> diff --git a/linguistics-components/src/main/java/com/yahoo/language/huggingface/HuggingFaceTokenizer.java b/linguistics-components/src/main/java/com/yahoo/language/huggingface/HuggingFaceTokenizer.java index f9a37bc477b..2c66fc18c9b 100644 --- a/linguistics-components/src/main/java/com/yahoo/language/huggingface/HuggingFaceTokenizer.java +++ b/linguistics-components/src/main/java/com/yahoo/language/huggingface/HuggingFaceTokenizer.java @@ -6,6 +6,7 @@ import com.yahoo.api.annotations.Beta; import com.yahoo.component.AbstractComponent; import com.yahoo.component.annotation.Inject; import com.yahoo.language.Language; +import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig; import com.yahoo.language.process.Embedder; import com.yahoo.language.process.Segmenter; import com.yahoo.language.tools.Embed; diff --git a/linguistics-components/src/main/resources/configdefinitions/language.huggingface.hugging-face-tokenizer.def b/linguistics-components/src/main/resources/configdefinitions/language.huggingface.hugging-face-tokenizer.def deleted file mode 100644 index 67b3b927f94..00000000000 --- a/linguistics-components/src/main/resources/configdefinitions/language.huggingface.hugging-face-tokenizer.def +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -namespace=language.huggingface - -# The language a model is for, one of the language tags in com.yahoo.language.Language. -# Use "unknown" for models to be used with any language. -model[].language string -# The path to the model relative to the application package root -model[].path model - -addSpecialTokens bool default=true -maxLength int default=-1 -truncation bool default=false
\ No newline at end of file |