aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-06-08 14:23:16 +0200
committerBjørn Christian Seime <bjorncs@yahooinc.com>2023-06-08 14:23:16 +0200
commitc0652d7794a90e0afb593fc1a3db17c99606a808 (patch)
tree17887acf2818107bbeb7355f5ee463f5fb02873d /config-model/src/main
parentc3d8c532e0f5b1db896d8693409098e8c2980da1 (diff)
Disable padding and make it configurable
Diffstat (limited to 'config-model/src/main')
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java3
-rw-r--r--config-model/src/main/resources/schema/common.rnc3
2 files changed, 5 insertions, 1 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
index 966dbe8260a..e0572f8391e 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
@@ -23,6 +23,7 @@ public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceT
private final Boolean specialTokens;
private final Integer maxLength;
private final Boolean truncation;
+ private final Boolean padding;
public HuggingFaceTokenizer(Element xml, DeployState state) {
super("com.yahoo.language.huggingface.HuggingFaceTokenizer", LINGUISTICS_BUNDLE_NAME, xml);
@@ -33,6 +34,7 @@ public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceT
specialTokens = getOptionalChildValue(xml, "special-tokens").map(Boolean::parseBoolean).orElse(null);
maxLength = getOptionalChildValue(xml, "max-length").map(Integer::parseInt).orElse(null);
truncation = getOptionalChildValue(xml, "truncation").map(Boolean::parseBoolean).orElse(null);
+ padding = getOptionalChildValue(xml, "padding").map(Boolean::parseBoolean).orElse(null);
}
@Override
@@ -43,5 +45,6 @@ public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceT
if (specialTokens != null) builder.addSpecialTokens(specialTokens);
if (maxLength != null) builder.maxLength(maxLength);
if (truncation != null) builder.truncation(truncation);
+ if (padding != null) builder.padding(padding);
}
}
diff --git a/config-model/src/main/resources/schema/common.rnc b/config-model/src/main/resources/schema/common.rnc
index 061e54740f1..e130bed0297 100644
--- a/config-model/src/main/resources/schema/common.rnc
+++ b/config-model/src/main/resources/schema/common.rnc
@@ -102,7 +102,8 @@ HuggingFaceTokenizer =
element model { attribute language { xsd:string }? & ModelReference }+ &
element special-tokens { xsd:boolean }? &
element max-length { xsd:integer }? &
- element truncation { xsd:boolean }?
+ element truncation { xsd:boolean }? &
+ element padding { xsd:boolean }?
BertBaseEmbedder =
attribute type { "bert-embedder" } &