aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/test
diff options
context:
space:
mode:
authorJo Kristian Bergum <bergum@yahooinc.com>2024-01-06 10:54:58 +0100
committerJo Kristian Bergum <bergum@yahooinc.com>2024-01-06 10:54:58 +0100
commit18ae21bce56e018cef2c17d03e63617530af59ae (patch)
tree3c1dcee63395fee2e476be9ce33e2437262b00d7 /config-model/src/test
parente4da75db4556a3cd72b034c4406027f9bba73918 (diff)
handle multilingual models better
Diffstat (limited to 'config-model/src/test')
-rw-r--r--config-model/src/test/cfg/application/embed/services.xml2
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java8
2 files changed, 9 insertions, 1 deletions
diff --git a/config-model/src/test/cfg/application/embed/services.xml b/config-model/src/test/cfg/application/embed/services.xml
index 59c29aefc6a..e92679e3c96 100644
--- a/config-model/src/test/cfg/application/embed/services.xml
+++ b/config-model/src/test/cfg/application/embed/services.xml
@@ -67,9 +67,9 @@
<transformer-start-sequence-token>101</transformer-start-sequence-token>
<transformer-end-sequence-token>102</transformer-end-sequence-token>
<transformer-mask-token>103</transformer-mask-token>
+ <transformer-pad-token>0</transformer-pad-token>
<transformer-input-ids>my_input_ids</transformer-input-ids>
<transformer-attention-mask>my_attention_mask</transformer-attention-mask>
- <transformer-token-type-ids>my_token_type_ids</transformer-token-type-ids>
<transformer-output>my_output</transformer-output>
<onnx-execution-mode>parallel</onnx-execution-mode>
<onnx-intraop-threads>10</onnx-intraop-threads>
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java
index 2532a5be863..4efffc8310a 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java
@@ -130,6 +130,10 @@ public class EmbedderTestCase {
var tokenizerCfg = assertHuggingfaceTokenizerComponentPresent(cluster);
assertEquals("https://my/url/tokenizer.json", modelReference(tokenizerCfg.model().get(0), "path").url().orElseThrow().value());
assertEquals(-1, tokenizerCfg.maxLength());
+ assertEquals(1, embedderCfg.queryTokenId());
+ assertEquals(2, embedderCfg.documentTokenId());
+ assertEquals(0, embedderCfg.transformerPadToken());
+ assertEquals(103, embedderCfg.transformerMaskToken());
}
@Test
@@ -143,6 +147,10 @@ public class EmbedderTestCase {
var tokenizerCfg = assertHuggingfaceTokenizerComponentPresent(cluster);
assertEquals("https://data.vespa.oath.cloud/onnx_models/multilingual-e5-base/tokenizer.json", modelReference(tokenizerCfg.model().get(0), "path").url().orElseThrow().value());
assertEquals(-1, tokenizerCfg.maxLength());
+ assertEquals(1, embedderCfg.queryTokenId());
+ assertEquals(2, embedderCfg.documentTokenId());
+ assertEquals(0, embedderCfg.transformerPadToken());
+ assertEquals(103, embedderCfg.transformerMaskToken());
}
@Test