diff options
Diffstat (limited to 'config-model/src/test')
7 files changed, 168 insertions, 165 deletions
diff --git a/config-model/src/test/cfg/application/embed/configdefinitions/embedding.bert-base-embedder.def b/config-model/src/test/cfg/application/embed/configdefinitions/embedding.bert-base-embedder.def deleted file mode 100644 index 144dfbd0001..00000000000 --- a/config-model/src/test/cfg/application/embed/configdefinitions/embedding.bert-base-embedder.def +++ /dev/null @@ -1,30 +0,0 @@ -# Copy of this Vespa config stored here because Vespa config definitions are not -# available in unit tests, and are needed (by DomConfigPayloadBuilder.parseLeaf) -# Alternatively, we could make that not need it as it is not strictly necessaery. - -namespace=embedding - -# Wordpiece tokenizer -tokenizerVocab model - -transformerModel model - -# Max length of token sequence model can handle -transformerMaxTokens int default=384 - -# Pooling strategy -poolingStrategy enum { cls, mean } default=mean - -# Input names -transformerInputIds string default=input_ids -transformerAttentionMask string default=attention_mask -transformerTokenTypeIds string default=token_type_ids - -# Output name -transformerOutput string default=output_0 - -# Settings for ONNX model evaluation -onnxExecutionMode enum { parallel, sequential } default=sequential -onnxInterOpThreads int default=1 -onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n - diff --git a/config-model/src/test/cfg/application/embed/configdefinitions/sentence-embedder.def b/config-model/src/test/cfg/application/embed/configdefinitions/sentence-embedder.def new file mode 100644 index 00000000000..87b80f1051a --- /dev/null +++ b/config-model/src/test/cfg/application/embed/configdefinitions/sentence-embedder.def @@ -0,0 +1,26 @@ +package=ai.vespa.example.paragraph + +# WordPiece tokenizer vocabulary +vocab model + +model model + +myValue string + +# Max length of token sequence model can handle +transforerMaxTokens int default=128 + +# Pooling strategy +poolingStrategy enum { cls, mean } default=mean + +# Input names +transformerInputIds string default=input_ids +transformerAttentionMask string default=attention_mask + +# Output name +transformerOutput string default=last_hidden_state + +# Settings for ONNX model evaluation +onnxExecutionMode enum { parallel, sequential } default=sequential +onnxInterOpThreads int default=1 +onnxIntraOpThreads int default=-4 diff --git a/config-model/src/test/cfg/application/embed/services.xml b/config-model/src/test/cfg/application/embed/services.xml index 99c89bc4324..6823ef900ae 100644 --- a/config-model/src/test/cfg/application/embed/services.xml +++ b/config-model/src/test/cfg/application/embed/services.xml @@ -16,6 +16,7 @@ <onnx-intraop-threads>10</onnx-intraop-threads> <onnx-interop-threads>8</onnx-interop-threads> <onnx-gpu-device>1</onnx-gpu-device> + <pooling-strategy>mean</pooling-strategy> </component> <component id="hf-tokenizer" type="hugging-face-tokenizer"> @@ -25,15 +26,24 @@ <truncation>true</truncation> </component> - <component id="transformer" class="ai.vespa.embedding.BertBaseEmbedder" bundle="model-integration"> - <config name="embedding.bert-base-embedder"> - <!-- model specifics --> - <transformerModel model-id="minilm-l6-v2" url="application-url"/> - <tokenizerVocab path="files/vocab.txt"/> + <component id="bert-embedder" type="bert-embedder"> + <!-- model specifics --> + <transformer-model model-id="minilm-l6-v2" url="application-url"/> + <tokenizer-vocab path="files/vocab.txt"/> + <max-tokens>512</max-tokens> + <transformer-input-ids>my_input_ids</transformer-input-ids> + <transformer-attention-mask>my_attention_mask</transformer-attention-mask> + <transformer-token-type-ids>my_token_type_ids</transformer-token-type-ids> + <transformer-output>my_output</transformer-output> + <transformer-start-sequence-token>101</transformer-start-sequence-token> + <transformer-end-sequence-token>102</transformer-end-sequence-token> - <!-- tunable parameters: number of threads etc --> - <onnxIntraOpThreads>4</onnxIntraOpThreads> - </config> + + <!-- tunable parameters: number of threads etc --> + <onnx-execution-mode>parallel</onnx-execution-mode> + <onnx-intraop-threads>4</onnx-intraop-threads> + <onnx-interop-threads>8</onnx-interop-threads> + <onnx-gpu-device>1</onnx-gpu-device> </component> <nodes> diff --git a/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/embedding.bert-base-embedder.def b/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/embedding.bert-base-embedder.def deleted file mode 100644 index 144dfbd0001..00000000000 --- a/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/embedding.bert-base-embedder.def +++ /dev/null @@ -1,30 +0,0 @@ -# Copy of this Vespa config stored here because Vespa config definitions are not -# available in unit tests, and are needed (by DomConfigPayloadBuilder.parseLeaf) -# Alternatively, we could make that not need it as it is not strictly necessaery. - -namespace=embedding - -# Wordpiece tokenizer -tokenizerVocab model - -transformerModel model - -# Max length of token sequence model can handle -transformerMaxTokens int default=384 - -# Pooling strategy -poolingStrategy enum { cls, mean } default=mean - -# Input names -transformerInputIds string default=input_ids -transformerAttentionMask string default=attention_mask -transformerTokenTypeIds string default=token_type_ids - -# Output name -transformerOutput string default=output_0 - -# Settings for ONNX model evaluation -onnxExecutionMode enum { parallel, sequential } default=sequential -onnxInterOpThreads int default=1 -onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n - diff --git a/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/sentence-embedder.def b/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/sentence-embedder.def new file mode 100644 index 00000000000..87b80f1051a --- /dev/null +++ b/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/sentence-embedder.def @@ -0,0 +1,26 @@ +package=ai.vespa.example.paragraph + +# WordPiece tokenizer vocabulary +vocab model + +model model + +myValue string + +# Max length of token sequence model can handle +transforerMaxTokens int default=128 + +# Pooling strategy +poolingStrategy enum { cls, mean } default=mean + +# Input names +transformerInputIds string default=input_ids +transformerAttentionMask string default=attention_mask + +# Output name +transformerOutput string default=last_hidden_state + +# Settings for ONNX model evaluation +onnxExecutionMode enum { parallel, sequential } default=sequential +onnxInterOpThreads int default=1 +onnxIntraOpThreads int default=-4 diff --git a/config-model/src/test/cfg/application/embed_cloud_only/services.xml b/config-model/src/test/cfg/application/embed_cloud_only/services.xml index 57db4f5bfae..e203ec56669 100644 --- a/config-model/src/test/cfg/application/embed_cloud_only/services.xml +++ b/config-model/src/test/cfg/application/embed_cloud_only/services.xml @@ -4,14 +4,11 @@ <container version="1.0"> - <component id="transformer" class="ai.vespa.embedding.BertBaseEmbedder" bundle="model-integration"> - <config name="embedding.bert-base-embedder"> - <!-- No fallback to url or path when deploying outside cloud --> - <transformerModel model-id="minilm-l6-v2"/> - <tokenizerVocab path="files/vocab.txt"/> - - <!-- tunable parameters: number of threads etc --> - <onnxIntraOpThreads>4</onnxIntraOpThreads> + <component id="transformer" class="ai.vespa.example.paragraph.ApplicationSpecificEmbedder" bundle="app"> + <config name='ai.vespa.example.paragraph.sentence-embedder'> + <model model-id="minilm-l6-v2"/> + <vocab path="files/vocab.txt"/> + <myValue>foo</myValue> </config> </component> diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java index 69981233c3f..2a82daef9e3 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java @@ -2,9 +2,13 @@ package com.yahoo.vespa.model.container.xml; import com.yahoo.component.ComponentId; +import com.yahoo.config.InnerNode; +import com.yahoo.config.ModelNode; +import com.yahoo.config.ModelReference; import com.yahoo.config.model.application.provider.FilesApplicationPackage; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.embedding.BertBaseEmbedderConfig; import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig; import com.yahoo.path.Path; @@ -13,6 +17,7 @@ import com.yahoo.vespa.config.ConfigDefinitionKey; import com.yahoo.vespa.config.ConfigPayloadBuilder; import com.yahoo.vespa.model.VespaModel; import com.yahoo.vespa.model.container.ApplicationContainerCluster; +import com.yahoo.vespa.model.container.component.BertEmbedder; import com.yahoo.vespa.model.container.component.Component; import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder; import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer; @@ -35,55 +40,18 @@ import static org.junit.jupiter.api.Assertions.fail; public class EmbedderTestCase { - private static final String BUNDLED_EMBEDDER_CLASS = "ai.vespa.embedding.BertBaseEmbedder"; - private static final String BUNDLED_EMBEDDER_CONFIG = "embedding.bert-base-embedder"; - - @Test - void testBundledEmbedder_selfhosted() throws IOException, SAXException { - String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel id='my_model_id' url='my-model-url' />" + - " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' />" + - " </config>" + - "</component>"; - String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel id='my_model_id' url='my-model-url' />" + - " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' />" + - " </config>" + - "</component>"; - assertTransform(input, component, false); - } - - @Test - void testBundledEmbedder_hosted() throws IOException, SAXException { - String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel model-id='minilm-l6-v2' />" + - " <tokenizerVocab model-id='bert-base-uncased' path='ignored.txt'/>" + - " </config>" + - "</component>"; - String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel model-id='minilm-l6-v2' url='https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx' />" + - " <tokenizerVocab model-id='bert-base-uncased' url='https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt' />" + - " </config>" + - "</component>"; - assertTransform(input, component, true); - } - @Test void testApplicationComponentWithModelReference_hosted() throws IOException, SAXException { - String input = "<component id='test' class='ApplicationSpecificEmbedder' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel model-id='minilm-l6-v2' />" + - " <tokenizerVocab model-id='bert-base-uncased' />" + + String input = "<component id='test' class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder' bundle='app'>" + + " <config name='ai.vespa.example.paragraph.sentence-embedder'>" + + " <model model-id='minilm-l6-v2' />" + + " <vocab model-id='bert-base-uncased' />" + " </config>" + "</component>"; - String component = "<component id='test' class='ApplicationSpecificEmbedder' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel model-id='minilm-l6-v2' url='https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx' />" + - " <tokenizerVocab model-id='bert-base-uncased' url='https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt' />" + + String component = "<component id='test' class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder' bundle='app'>" + + " <config name='ai.vespa.example.paragraph.sentence-embedder'>" + + " <model model-id='minilm-l6-v2' url='https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx' />" + + " <vocab model-id='bert-base-uncased' url='https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt' />" + " </config>" + "</component>"; assertTransform(input, component, true); @@ -91,64 +59,65 @@ public class EmbedderTestCase { @Test void testUnknownModelId_hosted() throws IOException, SAXException { - String embedder = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel model-id='my_model_id' />" + - " <tokenizerVocab model-id='my_vocab_id' />" + + String embedder = "<component id='test' class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder'>" + + " <config name='ai.vespa.example.paragraph.sentence-embedder'>" + + " <model model-id='my_model_id' />" + + " <vocab model-id='my_vocab_id' />" + " </config>" + "</component>"; assertTransformThrows(embedder, - "Unknown model id 'my_model_id' on 'transformerModel'", + "Unknown model id 'my_model_id' on 'model'", true); } @Test - void testApplicationPackageWithEmbedder_selfhosted() throws Exception { - Path applicationDir = Path.fromString("src/test/cfg/application/embed/"); - VespaModel model = loadModel(applicationDir, false); - ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container"); + void huggingfaceEmbedder_selfhosted() throws Exception { + var model = loadModel(Path.fromString("src/test/cfg/application/embed/"), false); + var cluster = model.getContainerClusters().get("container"); + var embedderCfg = assertHuggingfaceEmbedderComponentPresent(cluster); + assertEquals("my_input_ids", embedderCfg.transformerInputIds()); + assertEquals("https://my/url/model.onnx", modelReference(embedderCfg, "transformerModel").url().orElseThrow().value()); + var tokenizerCfg = assertHuggingfaceTokenizerComponentPresent(cluster); + assertEquals("https://my/url/tokenizer.json", modelReference(tokenizerCfg.model().get(0), "path").url().orElseThrow().value()); + assertEquals(768, tokenizerCfg.maxLength()); + } - Component<?, ?> transformer = containerCluster.getComponentsMap().get(new ComponentId("transformer")); - ConfigPayloadBuilder config = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding")); - assertEquals("minilm-l6-v2 application-url \"\"", config.getObject("transformerModel").getValue()); - assertEquals("\"\" \"\" files/vocab.txt", config.getObject("tokenizerVocab").getValue()); - assertEquals("4", config.getObject("onnxIntraOpThreads").getValue()); - - { - var hfEmbedder = (HuggingFaceEmbedder)containerCluster.getComponentsMap().get(new ComponentId("hf-embedder")); - assertEquals("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", hfEmbedder.getClassId().getName()); - var cfgBuilder = new HuggingFaceEmbedderConfig.Builder(); - hfEmbedder.getConfig(cfgBuilder); - var cfg = cfgBuilder.build(); - assertEquals("my_input_ids", cfg.transformerInputIds()); - } - { - var hfTokenizer = (HuggingFaceTokenizer)containerCluster.getComponentsMap().get(new ComponentId("hf-tokenizer")); - assertEquals("com.yahoo.language.huggingface.HuggingFaceTokenizer", hfTokenizer.getClassId().getName()); - var cfgBuilder = new HuggingFaceTokenizerConfig.Builder(); - hfTokenizer.getConfig(cfgBuilder); - var cfg = cfgBuilder.build(); - assertEquals(768, cfg.maxLength()); - } + @Test + void huggingfaceEmbedder_hosted() throws Exception { + var model = loadModel(Path.fromString("src/test/cfg/application/embed/"), true); + var cluster = model.getContainerClusters().get("container"); + var embedderCfg = assertHuggingfaceEmbedderComponentPresent(cluster); + assertEquals("my_input_ids", embedderCfg.transformerInputIds()); + assertEquals("https://data.vespa.oath.cloud/onnx_models/e5-base-v2/model.onnx", modelReference(embedderCfg, "transformerModel").url().orElseThrow().value()); + var tokenizerCfg = assertHuggingfaceTokenizerComponentPresent(cluster); + assertEquals("https://data.vespa.oath.cloud/onnx_models/multilingual-e5-base/tokenizer.json", modelReference(tokenizerCfg.model().get(0), "path").url().orElseThrow().value()); + assertEquals(768, tokenizerCfg.maxLength()); } + @Test - void passesXmlValdiation() { - new VespaModelCreatorWithFilePkg("src/test/cfg/application/embed/").create(); + void bertEmbedder_selfhosted() throws Exception { + var model = loadModel(Path.fromString("src/test/cfg/application/embed/"), false); + var cluster = model.getContainerClusters().get("container"); + var embedderCfg = assertBertEmbedderComponentPresent(cluster); + assertEquals("application-url", modelReference(embedderCfg, "transformerModel").url().orElseThrow().value()); + assertEquals("files/vocab.txt", modelReference(embedderCfg, "tokenizerVocab").path().orElseThrow().value()); } @Test - void testApplicationPackageWithEmbedder_hosted() throws Exception { - Path applicationDir = Path.fromString("src/test/cfg/application/embed/"); - VespaModel model = loadModel(applicationDir, true); - ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container"); + void bertEmbedder_hosted() throws Exception { + var model = loadModel(Path.fromString("src/test/cfg/application/embed/"), true); + var cluster = model.getContainerClusters().get("container"); + var embedderCfg = assertBertEmbedderComponentPresent(cluster); + assertEquals("https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx", + modelReference(embedderCfg, "transformerModel").url().orElseThrow().value()); + assertTrue(modelReference(embedderCfg, "tokenizerVocab").url().isEmpty()); + assertEquals("files/vocab.txt", modelReference(embedderCfg, "tokenizerVocab").path().orElseThrow().value()); + } - Component<?, ?> transformer = containerCluster.getComponentsMap().get(new ComponentId("transformer")); - ConfigPayloadBuilder config = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding")); - assertEquals("minilm-l6-v2 https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx \"\"", - config.getObject("transformerModel").getValue()); - assertEquals("\"\" \"\" files/vocab.txt", config.getObject("tokenizerVocab").getValue()); - assertEquals("4", config.getObject("onnxIntraOpThreads").getValue()); + @Test + void passesXmlValidation() { + new VespaModelCreatorWithFilePkg("src/test/cfg/application/embed/").create(); } @Test @@ -184,7 +153,7 @@ public class EmbedderTestCase { fail("Expected failure"); } catch (IllegalArgumentException e) { - assertEquals("transformerModel is configured with only a 'model-id'. Add a 'path' or 'url' to deploy this outside Vespa Cloud", + assertEquals("model is configured with only a 'model-id'. Add a 'path' or 'url' to deploy this outside Vespa Cloud", Exceptions.toMessageString(e)); } } @@ -244,4 +213,39 @@ public class EmbedderTestCase { return (Element) doc.getFirstChild(); } + private static HuggingFaceTokenizerConfig assertHuggingfaceTokenizerComponentPresent(ApplicationContainerCluster cluster) { + var hfTokenizer = (HuggingFaceTokenizer) cluster.getComponentsMap().get(new ComponentId("hf-tokenizer")); + assertEquals("com.yahoo.language.huggingface.HuggingFaceTokenizer", hfTokenizer.getClassId().getName()); + var cfgBuilder = new HuggingFaceTokenizerConfig.Builder(); + hfTokenizer.getConfig(cfgBuilder); + return cfgBuilder.build(); + } + + private static HuggingFaceEmbedderConfig assertHuggingfaceEmbedderComponentPresent(ApplicationContainerCluster cluster) { + var hfEmbedder = (HuggingFaceEmbedder) cluster.getComponentsMap().get(new ComponentId("hf-embedder")); + assertEquals("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", hfEmbedder.getClassId().getName()); + var cfgBuilder = new HuggingFaceEmbedderConfig.Builder(); + hfEmbedder.getConfig(cfgBuilder); + return cfgBuilder.build(); + } + + private static BertBaseEmbedderConfig assertBertEmbedderComponentPresent(ApplicationContainerCluster cluster) { + var bertEmbedder = (BertEmbedder) cluster.getComponentsMap().get(new ComponentId("bert-embedder")); + assertEquals("ai.vespa.embedding.BertBaseEmbedder", bertEmbedder.getClassId().getName()); + var cfgBuilder = new BertBaseEmbedderConfig.Builder(); + bertEmbedder.getConfig(cfgBuilder); + return cfgBuilder.build(); + } + + // Ugly hack to read underlying model reference from config instance + private static ModelReference modelReference(InnerNode cfg, String name) { + try { + var f = cfg.getClass().getDeclaredField(name); + f.setAccessible(true); + return ((ModelNode) f.get(cfg)).getModelReference(); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + } |