aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-06-12 17:25:55 +0200
committerGitHub <noreply@github.com>2023-06-12 17:25:55 +0200
commitfb5d1bf9f451fbeb4a40d7f73fa856ef81bd77ed (patch)
tree1f8ab291370e84407a827f9a80bcf943f522ca29 /config-model/src/main/java/com/yahoo
parent0647b650c3334ff86d50431e78549e25dc46caf9 (diff)
parent4f722322cc9f8df5146ffb27d74239b3b4f2d634 (diff)
Merge pull request #27387 from vespa-engine/bjorncs/hfv8.176.13
Prefer truncation configuration from tokenizer model
Diffstat (limited to 'config-model/src/main/java/com/yahoo')
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java16
1 files changed, 3 insertions, 13 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
index e0572f8391e..0bf5491e872 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
@@ -4,6 +4,8 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig;
+import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig.Padding;
+import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig.Truncation;
import com.yahoo.text.XML;
import com.yahoo.vespa.model.container.xml.ModelIdResolver;
import org.w3c.dom.Element;
@@ -11,7 +13,6 @@ import org.w3c.dom.Element;
import java.util.Map;
import java.util.TreeMap;
-import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChildValue;
import static com.yahoo.vespa.model.container.ContainerModelEvaluation.LINGUISTICS_BUNDLE_NAME;
/**
@@ -20,10 +21,6 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.LINGUISTI
public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceTokenizerConfig.Producer {
private final Map<String, ModelReference> langToModel = new TreeMap<>();
- private final Boolean specialTokens;
- private final Integer maxLength;
- private final Boolean truncation;
- private final Boolean padding;
public HuggingFaceTokenizer(Element xml, DeployState state) {
super("com.yahoo.language.huggingface.HuggingFaceTokenizer", LINGUISTICS_BUNDLE_NAME, xml);
@@ -31,10 +28,6 @@ public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceT
var lang = element.hasAttribute("language") ? element.getAttribute("language") : "unknown";
langToModel.put(lang, ModelIdResolver.resolveToModelReference(element, state));
}
- specialTokens = getOptionalChildValue(xml, "special-tokens").map(Boolean::parseBoolean).orElse(null);
- maxLength = getOptionalChildValue(xml, "max-length").map(Integer::parseInt).orElse(null);
- truncation = getOptionalChildValue(xml, "truncation").map(Boolean::parseBoolean).orElse(null);
- padding = getOptionalChildValue(xml, "padding").map(Boolean::parseBoolean).orElse(null);
}
@Override
@@ -42,9 +35,6 @@ public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceT
langToModel.forEach((lang, vocab) -> {
builder.model.add(new HuggingFaceTokenizerConfig.Model.Builder().language(lang).path(vocab));
});
- if (specialTokens != null) builder.addSpecialTokens(specialTokens);
- if (maxLength != null) builder.maxLength(maxLength);
- if (truncation != null) builder.truncation(truncation);
- if (padding != null) builder.padding(padding);
+ builder.truncation(Truncation.Enum.OFF).padding(Padding.Enum.OFF).addSpecialTokens(false);
}
}