diff options
Diffstat (limited to 'config-model/src/test')
6 files changed, 69 insertions, 83 deletions
diff --git a/config-model/src/test/cfg/application/embed/configdefinitions/embedding.bert-base-embedder.def b/config-model/src/test/cfg/application/embed/configdefinitions/embedding.bert-base-embedder.def new file mode 100644 index 00000000000..a6544187140 --- /dev/null +++ b/config-model/src/test/cfg/application/embed/configdefinitions/embedding.bert-base-embedder.def @@ -0,0 +1,30 @@ +# Copy of this Vespa config stored here because Vespa config definitions are not +# available in unit tests, and are needed (by DomConfigPayloadBuilder.parseLeaf) +# Alternativ ely, we could make that not need it as it is not strictly necessaery. + +namespace=embedding + +# Wordpiece tokenizer +tokenizerVocab model + +transformerModel model + +# Max length of token sequence model can handle +transformerMaxTokens int default=384 + +# Pooling strategy +poolingStrategy enum { cls, mean } default=mean + +# Input names +transformerInputIds string default=input_ids +transformerAttentionMask string default=attention_mask +transformerTokenTypeIds string default=token_type_ids + +# Output name +transformerOutput string default=output_0 + +# Settings for ONNX model evaluation +onnxExecutionMode enum { parallel, sequential } default=sequential +onnxInterOpThreads int default=1 +onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n + diff --git a/config-model/src/test/cfg/application/embed/services.xml b/config-model/src/test/cfg/application/embed/services.xml index 88558ace4bf..cdbcfd67f02 100644 --- a/config-model/src/test/cfg/application/embed/services.xml +++ b/config-model/src/test/cfg/application/embed/services.xml @@ -7,7 +7,7 @@ <component id="transformer" class="ai.vespa.embedding.BertBaseEmbedder" bindle="model-integration"> <config name="embedding.bert-base-embedder"> <!-- model specifics --> - <transformerModel id="minilm-l6-v2" url="application-url"/> + <transformerModel model-id="minilm-l6-v2" url="application-url"/> <tokenizerVocab path="files/vocab.txt"/> <!-- tunable parameters: number of threads etc --> diff --git a/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def b/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def index 81fc88dbf01..87b80f1051a 100644 --- a/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def +++ b/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def @@ -1,12 +1,9 @@ package=ai.vespa.example.paragraph -# Settings for wordpiece tokenizer -vocabPath path -vocabUrl string +# WordPiece tokenizer vocabulary +vocab model -# Transformer model settings -modelPath path -modelUrl string +model model myValue string diff --git a/config-model/src/test/cfg/application/embed_generic/services.xml b/config-model/src/test/cfg/application/embed_generic/services.xml index ea430f24e2f..d2c22c03343 100644 --- a/config-model/src/test/cfg/application/embed_generic/services.xml +++ b/config-model/src/test/cfg/application/embed_generic/services.xml @@ -8,7 +8,7 @@ class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder' bundle='exampleEmbedder'> <config name='ai.vespa.example.paragraph.sentence-embedder'> - <model id="minilm-l6-v2" url="application-url" /> + <model model-id="minilm-l6-v2" url="application-url" /> <vocab path="files/vocab.txt"/> <myValue>foo</myValue> </config> diff --git a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomConfigPayloadBuilderTest.java b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomConfigPayloadBuilderTest.java index 88af584de90..e788fe5fc54 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomConfigPayloadBuilderTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomConfigPayloadBuilderTest.java @@ -130,7 +130,7 @@ public class DomConfigPayloadBuilderTest { new DomConfigPayloadBuilder(null).build(configRoot); fail("Expected exception for wrong tag name."); } catch (ConfigurationRuntimeException e) { - assertEquals("The root element must be 'config', but was 'configs'.", e.getMessage()); + assertEquals("The root element must be 'config', but was 'configs'", e.getMessage()); } } @@ -142,7 +142,7 @@ public class DomConfigPayloadBuilderTest { new DomConfigPayloadBuilder(null).build(configRoot); fail("Expected exception for mismatch between def-name and xml name attribute."); } catch (ConfigurationRuntimeException e) { - assertEquals("The 'config' element must have a 'name' attribute that matches the name of the config definition.", e.getMessage()); + assertEquals("The 'config' element must have a 'name' attribute that matches the name of the config definition", e.getMessage()); } } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java index ffa7e52136f..60386be17db 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java @@ -28,7 +28,6 @@ import static org.junit.jupiter.api.Assertions.fail; public class EmbedderTestCase { - private static final String emptyPathFileName = "services.xml"; private static final String BUNDLED_EMBEDDER_CLASS = "ai.vespa.embedding.BertBaseEmbedder"; private static final String BUNDLED_EMBEDDER_CONFIG = "embedding.bert-base-embedder"; @@ -42,29 +41,8 @@ public class EmbedderTestCase { "</component>"; String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModelUrl>my-model-url</transformerModelUrl>" + - " <transformerModelPath>services.xml</transformerModelPath>" + - " <tokenizerVocabUrl>my-vocab-url</tokenizerVocabUrl>" + - " <tokenizerVocabPath>services.xml</tokenizerVocabPath>" + - " </config>" + - "</component>"; - assertTransform(input, component, false); - } - - @Test - void testPathHasPriority_selfhosted() throws IOException, SAXException { - String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel id='my_model_id' url='my-model-url' path='files/model.onnx' />" + - " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' path='files/vocab.txt' />" + - " </config>" + - "</component>"; - String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModelUrl></transformerModelUrl>" + - " <transformerModelPath>files/model.onnx</transformerModelPath>" + - " <tokenizerVocabUrl></tokenizerVocabUrl>" + - " <tokenizerVocabPath>files/vocab.txt</tokenizerVocabPath>" + + " <transformerModel id='my_model_id' url='my-model-url' />" + + " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' />" + " </config>" + "</component>"; assertTransform(input, component, false); @@ -74,35 +52,31 @@ public class EmbedderTestCase { void testBundledEmbedder_hosted() throws IOException, SAXException { String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel id='minilm-l6-v2' />" + - " <tokenizerVocab id='bert-base-uncased' />" + + " <transformerModel model-id='minilm-l6-v2' />" + + " <tokenizerVocab model-id='bert-base-uncased' />" + " </config>" + "</component>"; String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModelUrl>https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx</transformerModelUrl>" + - " <transformerModelPath>services.xml</transformerModelPath>" + - " <tokenizerVocabUrl>https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt</tokenizerVocabUrl>" + - " <tokenizerVocabPath>services.xml</tokenizerVocabPath>" + + " <transformerModel model-id='minilm-l6-v2' url='https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx' />" + + " <tokenizerVocab model-id='bert-base-uncased' url='https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt' />" + " </config>" + "</component>"; assertTransform(input, component, true); } @Test - void testApplicationEmbedderWithBundledConfig_hosted() throws IOException, SAXException { + void testApplicationComponentWithModelReference_hosted() throws IOException, SAXException { String input = "<component id='test' class='ApplicationSpecificEmbedder' bundle='model-integration'>" + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel id='minilm-l6-v2' />" + - " <tokenizerVocab id='bert-base-uncased' />" + + " <transformerModel model-id='minilm-l6-v2' />" + + " <tokenizerVocab model-id='bert-base-uncased' />" + " </config>" + "</component>"; String component = "<component id='test' class='ApplicationSpecificEmbedder' bundle='model-integration'>" + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModelUrl>https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx</transformerModelUrl>" + - " <transformerModelPath>services.xml</transformerModelPath>" + - " <tokenizerVocabUrl>https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt</tokenizerVocabUrl>" + - " <tokenizerVocabPath>services.xml</tokenizerVocabPath>" + + " <transformerModel model-id='minilm-l6-v2' url='https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx' />" + + " <tokenizerVocab model-id='bert-base-uncased' url='https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt' />" + " </config>" + "</component>"; assertTransform(input, component, true); @@ -112,12 +86,12 @@ public class EmbedderTestCase { void testUnknownModelId_hosted() throws IOException, SAXException { String embedder = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel id='my_model_id' />" + - " <tokenizerVocab id='my_vocab_id' />" + + " <transformerModel model-id='my_model_id' />" + + " <tokenizerVocab model-id='my_vocab_id' />" + " </config>" + "</component>"; assertTransformThrows(embedder, - "Unknown embedder model 'my_model_id'. " + + "Unknown model id 'my_model_id' on 'transformerModel'. " + "Available models are [bert-base-uncased, minilm-l6-v2]", true); } @@ -130,10 +104,8 @@ public class EmbedderTestCase { Component<?, ?> transformer = containerCluster.getComponentsMap().get(new ComponentId("transformer")); ConfigPayloadBuilder config = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding")); - assertEquals("application-url", config.getObject("transformerModelUrl").getValue()); - assertEquals(emptyPathFileName, config.getObject("transformerModelPath").getValue()); - assertEquals("", config.getObject("tokenizerVocabUrl").getValue()); - assertEquals("files/vocab.txt", config.getObject("tokenizerVocabPath").getValue()); + assertEquals("minilm-l6-v2 application-url \"\"", config.getObject("transformerModel").getValue()); + assertEquals("\"\" \"\" files/vocab.txt", config.getObject("tokenizerVocab").getValue()); assertEquals("4", config.getObject("onnxIntraOpThreads").getValue()); } @@ -145,11 +117,9 @@ public class EmbedderTestCase { Component<?, ?> transformer = containerCluster.getComponentsMap().get(new ComponentId("transformer")); ConfigPayloadBuilder config = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding")); - assertEquals("https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx", - config.getObject("transformerModelUrl").getValue()); - assertEquals(emptyPathFileName, config.getObject("transformerModelPath").getValue()); - assertEquals("", config.getObject("tokenizerVocabUrl").getValue()); - assertEquals("files/vocab.txt", config.getObject("tokenizerVocabPath").getValue()); + assertEquals("minilm-l6-v2 https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx \"\"", + config.getObject("transformerModel").getValue()); + assertEquals("\"\" \"\" files/vocab.txt", config.getObject("tokenizerVocab").getValue()); assertEquals("4", config.getObject("onnxIntraOpThreads").getValue()); } @@ -161,10 +131,8 @@ public class EmbedderTestCase { Component<?, ?> testComponent = containerCluster.getComponentsMap().get(new ComponentId("transformer")); ConfigPayloadBuilder config = testComponent.getUserConfigs().get(new ConfigDefinitionKey("sentence-embedder", "ai.vespa.example.paragraph")); - assertEquals("application-url", config.getObject("modelUrl").getValue()); - assertEquals(emptyPathFileName, config.getObject("modelPath").getValue()); - assertEquals("files/vocab.txt", config.getObject("vocabPath").getValue()); - assertEquals("foo", config.getObject("myValue").getValue()); + assertEquals("minilm-l6-v2 application-url \"\"", config.getObject("model").getValue()); + assertEquals("\"\" \"\" files/vocab.txt", config.getObject("vocab").getValue()); } @Test @@ -175,11 +143,9 @@ public class EmbedderTestCase { Component<?, ?> testComponent = containerCluster.getComponentsMap().get(new ComponentId("transformer")); ConfigPayloadBuilder config = testComponent.getUserConfigs().get(new ConfigDefinitionKey("sentence-embedder", "ai.vespa.example.paragraph")); - assertEquals("https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx", - config.getObject("modelUrl").getValue()); - assertEquals(emptyPathFileName, config.getObject("modelPath").getValue()); - assertEquals("files/vocab.txt", config.getObject("vocabPath").getValue()); - assertEquals("foo", config.getObject("myValue").getValue()); + assertEquals("minilm-l6-v2 https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx \"\"", + config.getObject("model").getValue()); + assertEquals("\"\" \"\" files/vocab.txt", config.getObject("vocab").getValue()); } private VespaModel loadModel(Path path, boolean hosted) throws Exception { @@ -189,13 +155,10 @@ public class EmbedderTestCase { return new VespaModel(state); } - private void assertTransform(String embedder, String component) throws IOException, SAXException { - assertTransform(embedder, component, false); - } - - private void assertTransform(String embedder, String expectedComponent, boolean hosted) throws IOException, SAXException { - assertSpec(createElement(expectedComponent), - ModelConfigTransformer.transform(createEmptyDeployState(hosted), createElement(embedder))); + private void assertTransform(String inputComponent, String expectedComponent, boolean hosted) throws IOException, SAXException { + Element component = createElement(inputComponent); + ModelIdResolver.resolveModelIds(component, hosted); + assertSpec(createElement(expectedComponent), component); } private void assertSpec(Element e1, Element e2) { @@ -209,8 +172,9 @@ public class EmbedderTestCase { private void assertAttributes(Element e1, Element e2) { NamedNodeMap map = e1.getAttributes(); for (int i = 0; i < map.getLength(); ++i) { - String attr = map.item(i).getNodeName(); - assertEquals(e1.getAttribute(attr), e2.getAttribute(attr)); + String attribute = map.item(i).getNodeName(); + assertEquals(e1.getAttribute(attribute), e2.getAttribute(attribute), + "Attribute '" + attribute + "' is equal"); } } @@ -227,7 +191,7 @@ public class EmbedderTestCase { private void assertTransformThrows(String embedder, String expectedMessage, boolean hosted) throws IOException, SAXException { try { - ModelConfigTransformer.transform(createEmptyDeployState(hosted), createElement(embedder)); + ModelIdResolver.resolveModelIds(createElement(embedder), hosted); fail("Expected exception was not thrown: " + expectedMessage); } catch (IllegalArgumentException e) { assertEquals(expectedMessage, e.getMessage()); @@ -239,9 +203,4 @@ public class EmbedderTestCase { return (Element) doc.getFirstChild(); } - private DeployState createEmptyDeployState(boolean hosted) { - TestProperties properties = new TestProperties().setHostedVespa(hosted); - return new DeployState.Builder().properties(properties).build(); - } - } |