summaryrefslogtreecommitdiffstats
path: root/config-model/src/main
diff options
context:
space:
mode:
Diffstat (limited to 'config-model/src/main')
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java16
-rw-r--r--config-model/src/main/resources/schema/common.rnc8
2 files changed, 5 insertions, 19 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
index e0572f8391e..0bf5491e872 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
@@ -4,6 +4,8 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig;
+import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig.Padding;
+import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig.Truncation;
import com.yahoo.text.XML;
import com.yahoo.vespa.model.container.xml.ModelIdResolver;
import org.w3c.dom.Element;
@@ -11,7 +13,6 @@ import org.w3c.dom.Element;
import java.util.Map;
import java.util.TreeMap;
-import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChildValue;
import static com.yahoo.vespa.model.container.ContainerModelEvaluation.LINGUISTICS_BUNDLE_NAME;
/**
@@ -20,10 +21,6 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.LINGUISTI
public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceTokenizerConfig.Producer {
private final Map<String, ModelReference> langToModel = new TreeMap<>();
- private final Boolean specialTokens;
- private final Integer maxLength;
- private final Boolean truncation;
- private final Boolean padding;
public HuggingFaceTokenizer(Element xml, DeployState state) {
super("com.yahoo.language.huggingface.HuggingFaceTokenizer", LINGUISTICS_BUNDLE_NAME, xml);
@@ -31,10 +28,6 @@ public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceT
var lang = element.hasAttribute("language") ? element.getAttribute("language") : "unknown";
langToModel.put(lang, ModelIdResolver.resolveToModelReference(element, state));
}
- specialTokens = getOptionalChildValue(xml, "special-tokens").map(Boolean::parseBoolean).orElse(null);
- maxLength = getOptionalChildValue(xml, "max-length").map(Integer::parseInt).orElse(null);
- truncation = getOptionalChildValue(xml, "truncation").map(Boolean::parseBoolean).orElse(null);
- padding = getOptionalChildValue(xml, "padding").map(Boolean::parseBoolean).orElse(null);
}
@Override
@@ -42,9 +35,6 @@ public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceT
langToModel.forEach((lang, vocab) -> {
builder.model.add(new HuggingFaceTokenizerConfig.Model.Builder().language(lang).path(vocab));
});
- if (specialTokens != null) builder.addSpecialTokens(specialTokens);
- if (maxLength != null) builder.maxLength(maxLength);
- if (truncation != null) builder.truncation(truncation);
- if (padding != null) builder.padding(padding);
+ builder.truncation(Truncation.Enum.OFF).padding(Padding.Enum.OFF).addSpecialTokens(false);
}
}
diff --git a/config-model/src/main/resources/schema/common.rnc b/config-model/src/main/resources/schema/common.rnc
index e130bed0297..ba7e2b6674e 100644
--- a/config-model/src/main/resources/schema/common.rnc
+++ b/config-model/src/main/resources/schema/common.rnc
@@ -88,7 +88,7 @@ HuggingFaceEmbedder =
attribute type { "hugging-face-embedder" } &
element transformer-model { ModelReference } &
element tokenizer-model { ModelReference }? &
- element max-tokens { xsd:nonNegativeInteger }? &
+ element max-tokens { xsd:positiveInteger }? &
element transformer-input-ids { xsd:string }? &
element transformer-attention-mask { xsd:string }? &
element transformer-token-type-ids { xsd:string }? &
@@ -99,11 +99,7 @@ HuggingFaceEmbedder =
HuggingFaceTokenizer =
attribute type { "hugging-face-tokenizer" } &
- element model { attribute language { xsd:string }? & ModelReference }+ &
- element special-tokens { xsd:boolean }? &
- element max-length { xsd:integer }? &
- element truncation { xsd:boolean }? &
- element padding { xsd:boolean }?
+ element model { attribute language { xsd:string }? & ModelReference }+
BertBaseEmbedder =
attribute type { "bert-embedder" } &