diff options
author | Jo Kristian Bergum <bergum@yahoo-inc.com> | 2024-01-10 13:28:47 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-10 13:28:47 +0100 |
commit | c4e33003e5ce3f385951c107714ede8556ef8083 (patch) | |
tree | cd4b84ac9e3840384ad0a7c2b6c85de0a01a7102 /config-model/src/test | |
parent | 949cede5ec0375c03dacdbb141f04e471aac8099 (diff) | |
parent | 2f3a69daf2f212aaa3ed29c89407d4af95b65138 (diff) |
Merge pull request #29826 from vespa-engine/jobergum/colbert-handle-multilingual-tokenizers
colbert handle multilingual tokenizers better
Diffstat (limited to 'config-model/src/test')
-rw-r--r-- | config-model/src/test/cfg/application/embed/services.xml | 2 | ||||
-rw-r--r-- | config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java | 8 |
2 files changed, 9 insertions, 1 deletions
diff --git a/config-model/src/test/cfg/application/embed/services.xml b/config-model/src/test/cfg/application/embed/services.xml index 59c29aefc6a..e92679e3c96 100644 --- a/config-model/src/test/cfg/application/embed/services.xml +++ b/config-model/src/test/cfg/application/embed/services.xml @@ -67,9 +67,9 @@ <transformer-start-sequence-token>101</transformer-start-sequence-token> <transformer-end-sequence-token>102</transformer-end-sequence-token> <transformer-mask-token>103</transformer-mask-token> + <transformer-pad-token>0</transformer-pad-token> <transformer-input-ids>my_input_ids</transformer-input-ids> <transformer-attention-mask>my_attention_mask</transformer-attention-mask> - <transformer-token-type-ids>my_token_type_ids</transformer-token-type-ids> <transformer-output>my_output</transformer-output> <onnx-execution-mode>parallel</onnx-execution-mode> <onnx-intraop-threads>10</onnx-intraop-threads> diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java index 2532a5be863..4efffc8310a 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java @@ -130,6 +130,10 @@ public class EmbedderTestCase { var tokenizerCfg = assertHuggingfaceTokenizerComponentPresent(cluster); assertEquals("https://my/url/tokenizer.json", modelReference(tokenizerCfg.model().get(0), "path").url().orElseThrow().value()); assertEquals(-1, tokenizerCfg.maxLength()); + assertEquals(1, embedderCfg.queryTokenId()); + assertEquals(2, embedderCfg.documentTokenId()); + assertEquals(0, embedderCfg.transformerPadToken()); + assertEquals(103, embedderCfg.transformerMaskToken()); } @Test @@ -143,6 +147,10 @@ public class EmbedderTestCase { var tokenizerCfg = assertHuggingfaceTokenizerComponentPresent(cluster); assertEquals("https://data.vespa.oath.cloud/onnx_models/multilingual-e5-base/tokenizer.json", modelReference(tokenizerCfg.model().get(0), "path").url().orElseThrow().value()); assertEquals(-1, tokenizerCfg.maxLength()); + assertEquals(1, embedderCfg.queryTokenId()); + assertEquals(2, embedderCfg.documentTokenId()); + assertEquals(0, embedderCfg.transformerPadToken()); + assertEquals(103, embedderCfg.transformerMaskToken()); } @Test |