summaryrefslogtreecommitdiffstats
path: root/config-model/src/test
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2022-08-26 09:58:10 +0200
committerGitHub <noreply@github.com>2022-08-26 09:58:10 +0200
commit40bb8680dbef01e603b8947a194c86e9acc14e30 (patch)
tree3badd5c97ff41449514805921e567c218661ab79 /config-model/src/test
parentd227d62f0cef26ebdb30c0d5280a2462cd39767d (diff)
parentffab68b3f5c28034eaf3a606c1b220c14f7204fa (diff)
Merge pull request #23770 from vespa-engine/bratseth/embedder-syntax-5
Bratseth/embedder syntax 5
Diffstat (limited to 'config-model/src/test')
-rw-r--r--config-model/src/test/cfg/application/embed/services.xml21
-rw-r--r--config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def5
-rw-r--r--config-model/src/test/cfg/application/embed_generic/services.xml16
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java193
-rw-r--r--config-model/src/test/schema-test-files/services.xml1
5 files changed, 132 insertions, 104 deletions
diff --git a/config-model/src/test/cfg/application/embed/services.xml b/config-model/src/test/cfg/application/embed/services.xml
index 9a05337f954..88558ace4bf 100644
--- a/config-model/src/test/cfg/application/embed/services.xml
+++ b/config-model/src/test/cfg/application/embed/services.xml
@@ -4,19 +4,16 @@
<container version="1.0">
- <embedder id="test" class="ai.vespa.embedding.UndefinedEmbedder" bundle="dummy" def="test.dummy">
- <num>12</num>
- <str>some text</str>
- </embedder>
+ <component id="transformer" class="ai.vespa.embedding.BertBaseEmbedder" bindle="model-integration">
+ <config name="embedding.bert-base-embedder">
+ <!-- model specifics -->
+ <transformerModel id="minilm-l6-v2" url="application-url"/>
+ <tokenizerVocab path="files/vocab.txt"/>
- <embedder id="transformer" class="ai.vespa.embedding.BertBaseEmbedder">
- <!-- model specifics -->
- <transformerModel id="test-model-id" url="test-model-url"/>
- <tokenizerVocab path="files/vocab.txt"/>
-
- <!-- tunable parameters: number of threads etc -->
- <onnxIntraOpThreads>4</onnxIntraOpThreads>
- </embedder>
+ <!-- tunable parameters: number of threads etc -->
+ <onnxIntraOpThreads>4</onnxIntraOpThreads>
+ </config>
+ </component>
<nodes>
<node hostalias="node1" />
diff --git a/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def b/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def
index ac5c79d2714..81fc88dbf01 100644
--- a/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def
+++ b/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def
@@ -1,12 +1,15 @@
package=ai.vespa.example.paragraph
# Settings for wordpiece tokenizer
-vocab path
+vocabPath path
+vocabUrl string
# Transformer model settings
modelPath path
modelUrl string
+myValue string
+
# Max length of token sequence model can handle
transforerMaxTokens int default=128
diff --git a/config-model/src/test/cfg/application/embed_generic/services.xml b/config-model/src/test/cfg/application/embed_generic/services.xml
index ab2c1be9745..ea430f24e2f 100644
--- a/config-model/src/test/cfg/application/embed_generic/services.xml
+++ b/config-model/src/test/cfg/application/embed_generic/services.xml
@@ -4,13 +4,15 @@
<container version="1.0">
- <embedder id='transformer'
- class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder'
- bundle='exampleEmbedder'
- def='ai.vespa.example.paragraph.sentence-embedder'>
- <model path="files/model.onnx" /> <!-- Embedder syntax for file path -->
- <vocab>files/vocab.txt</vocab> <!-- Generic config syntax for file path -->
- </embedder>
+ <component id='transformer'
+ class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder'
+ bundle='exampleEmbedder'>
+ <config name='ai.vespa.example.paragraph.sentence-embedder'>
+ <model id="minilm-l6-v2" url="application-url" />
+ <vocab path="files/vocab.txt"/>
+ <myValue>foo</myValue>
+ </config>
+ </component>
<nodes>
<node hostalias='node1'/>
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java
index d64e726eb6a..ffa7e52136f 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java
@@ -28,130 +28,158 @@ import static org.junit.jupiter.api.Assertions.fail;
public class EmbedderTestCase {
- private static final String PREDEFINED_EMBEDDER_CLASS = "ai.vespa.embedding.BertBaseEmbedder";
- private static final String PREDEFINED_EMBEDDER_CONFIG = "embedding.bert-base-embedder";
+ private static final String emptyPathFileName = "services.xml";
+ private static final String BUNDLED_EMBEDDER_CLASS = "ai.vespa.embedding.BertBaseEmbedder";
+ private static final String BUNDLED_EMBEDDER_CONFIG = "embedding.bert-base-embedder";
@Test
- void testGenericEmbedConfig() throws IOException, SAXException {
- String embedder = "<embedder id='test' class='ai.vespa.test' bundle='bundle' def='def.name'>" +
- " <val>123</val>" +
- "</embedder>";
- String component = "<component id='test' class='ai.vespa.test' bundle='bundle'>" +
- " <config name='def.name'>" +
- " <val>123</val>" +
- " </config>" +
- "</component>";
- assertTransform(embedder, component);
- }
-
- @Test
- void testPredefinedEmbedConfigSelfHosted() throws IOException, SAXException {
- String embedder = "<embedder id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "'>" +
- " <transformerModel id='my_model_id' url='my-model-url' />" +
- " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' />" +
- "</embedder>";
- String component = "<component id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
- " <config name='" + PREDEFINED_EMBEDDER_CONFIG + "'>" +
+ void testBundledEmbedder_selfhosted() throws IOException, SAXException {
+ String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
+ " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" +
+ " <transformerModel id='my_model_id' url='my-model-url' />" +
+ " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' />" +
+ " </config>" +
+ "</component>";
+ String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
+ " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" +
" <transformerModelUrl>my-model-url</transformerModelUrl>" +
- " <transformerModelPath></transformerModelPath>" +
+ " <transformerModelPath>services.xml</transformerModelPath>" +
" <tokenizerVocabUrl>my-vocab-url</tokenizerVocabUrl>" +
- " <tokenizerVocabPath></tokenizerVocabPath>" +
+ " <tokenizerVocabPath>services.xml</tokenizerVocabPath>" +
" </config>" +
"</component>";
- assertTransform(embedder, component, false);
+ assertTransform(input, component, false);
}
@Test
- void testPathHasPrioritySelfHosted() throws IOException, SAXException {
- String embedder = "<embedder id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "'>" +
- " <transformerModel id='my_model_id' url='my-model-url' path='files/model.onnx' />" +
- " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' path='files/vocab.txt' />" +
- "</embedder>";
- String component = "<component id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
- " <config name='" + PREDEFINED_EMBEDDER_CONFIG + "'>" +
+ void testPathHasPriority_selfhosted() throws IOException, SAXException {
+ String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
+ " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" +
+ " <transformerModel id='my_model_id' url='my-model-url' path='files/model.onnx' />" +
+ " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' path='files/vocab.txt' />" +
+ " </config>" +
+ "</component>";
+ String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
+ " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" +
" <transformerModelUrl></transformerModelUrl>" +
" <transformerModelPath>files/model.onnx</transformerModelPath>" +
" <tokenizerVocabUrl></tokenizerVocabUrl>" +
" <tokenizerVocabPath>files/vocab.txt</tokenizerVocabPath>" +
" </config>" +
"</component>";
- assertTransform(embedder, component, false);
+ assertTransform(input, component, false);
}
@Test
- void testPredefinedEmbedConfigCloud() throws IOException, SAXException {
- String embedder = "<embedder id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "'>" +
- " <transformerModel id='test-model-id' />" +
- " <tokenizerVocab id='test-model-id' />" +
- "</embedder>";
- String component = "<component id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
- " <config name='" + PREDEFINED_EMBEDDER_CONFIG + "'>" +
- " <transformerModelUrl>test-model-url</transformerModelUrl>" +
- " <transformerModelPath></transformerModelPath>" +
- " <tokenizerVocabUrl>test-model-url</tokenizerVocabUrl>" +
- " <tokenizerVocabPath></tokenizerVocabPath>" +
+ void testBundledEmbedder_hosted() throws IOException, SAXException {
+ String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
+ " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" +
+ " <transformerModel id='minilm-l6-v2' />" +
+ " <tokenizerVocab id='bert-base-uncased' />" +
+ " </config>" +
+ "</component>";
+ String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
+ " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" +
+ " <transformerModelUrl>https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx</transformerModelUrl>" +
+ " <transformerModelPath>services.xml</transformerModelPath>" +
+ " <tokenizerVocabUrl>https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt</tokenizerVocabUrl>" +
+ " <tokenizerVocabPath>services.xml</tokenizerVocabPath>" +
" </config>" +
"</component>";
- assertTransform(embedder, component, true);
+ assertTransform(input, component, true);
}
@Test
- void testCustomEmbedderWithPredefinedConfigCloud() throws IOException, SAXException {
- String embedder = "<embedder id='test' class='ApplicationSpecificEmbedder' def='" + PREDEFINED_EMBEDDER_CONFIG + "'>" +
- " <transformerModel id='test-model-id' />" +
- " <tokenizerVocab id='test-model-id' />" +
- "</embedder>";
+ void testApplicationEmbedderWithBundledConfig_hosted() throws IOException, SAXException {
+ String input = "<component id='test' class='ApplicationSpecificEmbedder' bundle='model-integration'>" +
+ " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" +
+ " <transformerModel id='minilm-l6-v2' />" +
+ " <tokenizerVocab id='bert-base-uncased' />" +
+ " </config>" +
+ "</component>";
String component = "<component id='test' class='ApplicationSpecificEmbedder' bundle='model-integration'>" +
- " <config name='" + PREDEFINED_EMBEDDER_CONFIG + "'>" +
- " <transformerModelUrl>test-model-url</transformerModelUrl>" +
- " <transformerModelPath></transformerModelPath>" +
- " <tokenizerVocabUrl>test-model-url</tokenizerVocabUrl>" +
- " <tokenizerVocabPath></tokenizerVocabPath>" +
+ " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" +
+ " <transformerModelUrl>https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx</transformerModelUrl>" +
+ " <transformerModelPath>services.xml</transformerModelPath>" +
+ " <tokenizerVocabUrl>https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt</tokenizerVocabUrl>" +
+ " <tokenizerVocabPath>services.xml</tokenizerVocabPath>" +
" </config>" +
"</component>";
- assertTransform(embedder, component, true);
+ assertTransform(input, component, true);
}
@Test
- void testUnknownModelIdCloud() throws IOException, SAXException {
- String embedder = "<embedder id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "'>" +
- " <transformerModel id='my_model_id' />" +
- " <tokenizerVocab id='my_vocab_id' />" +
- "</embedder>";
- assertTransformThrows(embedder, "Unknown model id 'my_model_id'", true);
+ void testUnknownModelId_hosted() throws IOException, SAXException {
+ String embedder = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" +
+ " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" +
+ " <transformerModel id='my_model_id' />" +
+ " <tokenizerVocab id='my_vocab_id' />" +
+ " </config>" +
+ "</component>";
+ assertTransformThrows(embedder,
+ "Unknown embedder model 'my_model_id'. " +
+ "Available models are [bert-base-uncased, minilm-l6-v2]",
+ true);
}
@Test
- void testApplicationWithEmbedConfig() throws Exception {
- final String emptyPathFileName = "services.xml";
-
+ void testApplicationPackageWithEmbedder_selfhosted() throws Exception {
Path applicationDir = Path.fromString("src/test/cfg/application/embed/");
VespaModel model = loadModel(applicationDir, false);
ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container");
- Component<?, ?> testComponent = containerCluster.getComponentsMap().get(new ComponentId("test"));
- ConfigPayloadBuilder testConfig = testComponent.getUserConfigs().get(new ConfigDefinitionKey("dummy", "test"));
- assertEquals("12", testConfig.getObject("num").getValue());
- assertEquals("some text", testConfig.getObject("str").getValue());
+ Component<?, ?> transformer = containerCluster.getComponentsMap().get(new ComponentId("transformer"));
+ ConfigPayloadBuilder config = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding"));
+ assertEquals("application-url", config.getObject("transformerModelUrl").getValue());
+ assertEquals(emptyPathFileName, config.getObject("transformerModelPath").getValue());
+ assertEquals("", config.getObject("tokenizerVocabUrl").getValue());
+ assertEquals("files/vocab.txt", config.getObject("tokenizerVocabPath").getValue());
+ assertEquals("4", config.getObject("onnxIntraOpThreads").getValue());
+ }
+
+ @Test
+ void testApplicationPackageWithEmbedder_hosted() throws Exception {
+ Path applicationDir = Path.fromString("src/test/cfg/application/embed/");
+ VespaModel model = loadModel(applicationDir, true);
+ ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container");
Component<?, ?> transformer = containerCluster.getComponentsMap().get(new ComponentId("transformer"));
- ConfigPayloadBuilder transformerConfig = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding"));
- assertEquals("test-model-url", transformerConfig.getObject("transformerModelUrl").getValue());
- assertEquals(emptyPathFileName, transformerConfig.getObject("transformerModelPath").getValue());
- assertEquals("", transformerConfig.getObject("tokenizerVocabUrl").getValue());
- assertEquals("files/vocab.txt", transformerConfig.getObject("tokenizerVocabPath").getValue());
+ ConfigPayloadBuilder config = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding"));
+ assertEquals("https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx",
+ config.getObject("transformerModelUrl").getValue());
+ assertEquals(emptyPathFileName, config.getObject("transformerModelPath").getValue());
+ assertEquals("", config.getObject("tokenizerVocabUrl").getValue());
+ assertEquals("files/vocab.txt", config.getObject("tokenizerVocabPath").getValue());
+ assertEquals("4", config.getObject("onnxIntraOpThreads").getValue());
}
@Test
- void testApplicationWithGenericEmbedConfig() throws Exception {
+ void testApplicationPackageWithApplicationEmbedder_selfhosted() throws Exception {
Path applicationDir = Path.fromString("src/test/cfg/application/embed_generic/");
VespaModel model = loadModel(applicationDir, false);
ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container");
Component<?, ?> testComponent = containerCluster.getComponentsMap().get(new ComponentId("transformer"));
ConfigPayloadBuilder config = testComponent.getUserConfigs().get(new ConfigDefinitionKey("sentence-embedder", "ai.vespa.example.paragraph"));
- assertEquals("files/vocab.txt", config.getObject("vocab").getValue());
- assertEquals("files/model.onnx", config.getObject("modelPath").getValue());
+ assertEquals("application-url", config.getObject("modelUrl").getValue());
+ assertEquals(emptyPathFileName, config.getObject("modelPath").getValue());
+ assertEquals("files/vocab.txt", config.getObject("vocabPath").getValue());
+ assertEquals("foo", config.getObject("myValue").getValue());
+ }
+
+ @Test
+ void testApplicationPackageWithApplicationEmbedder_hosted() throws Exception {
+ Path applicationDir = Path.fromString("src/test/cfg/application/embed_generic/");
+ VespaModel model = loadModel(applicationDir, true);
+ ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container");
+
+ Component<?, ?> testComponent = containerCluster.getComponentsMap().get(new ComponentId("transformer"));
+ ConfigPayloadBuilder config = testComponent.getUserConfigs().get(new ConfigDefinitionKey("sentence-embedder", "ai.vespa.example.paragraph"));
+ assertEquals("https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx",
+ config.getObject("modelUrl").getValue());
+ assertEquals(emptyPathFileName, config.getObject("modelPath").getValue());
+ assertEquals("files/vocab.txt", config.getObject("vocabPath").getValue());
+ assertEquals("foo", config.getObject("myValue").getValue());
}
private VespaModel loadModel(Path path, boolean hosted) throws Exception {
@@ -165,17 +193,16 @@ public class EmbedderTestCase {
assertTransform(embedder, component, false);
}
- private void assertTransform(String embedder, String component, boolean hosted) throws IOException, SAXException {
- Element emb = createElement(embedder);
- Element cmp = createElement(component);
- Element trans = EmbedderConfigTransformer.transform(createEmptyDeployState(hosted), emb);
- assertSpec(cmp, trans);
+ private void assertTransform(String embedder, String expectedComponent, boolean hosted) throws IOException, SAXException {
+ assertSpec(createElement(expectedComponent),
+ ModelConfigTransformer.transform(createEmptyDeployState(hosted), createElement(embedder)));
}
private void assertSpec(Element e1, Element e2) {
assertEquals(e1.getTagName(), e2.getTagName());
assertAttributes(e1, e2);
assertAttributes(e2, e1);
+ assertEquals(XML.getValue(e1).trim(), XML.getValue(e2).trim(), "Content of " + e1.getTagName() + "' is identical");
assertChildren(e1, e2);
}
@@ -200,7 +227,7 @@ public class EmbedderTestCase {
private void assertTransformThrows(String embedder, String expectedMessage, boolean hosted) throws IOException, SAXException {
try {
- EmbedderConfigTransformer.transform(createEmptyDeployState(hosted), createElement(embedder));
+ ModelConfigTransformer.transform(createEmptyDeployState(hosted), createElement(embedder));
fail("Expected exception was not thrown: " + expectedMessage);
} catch (IllegalArgumentException e) {
assertEquals(expectedMessage, e.getMessage());
diff --git a/config-model/src/test/schema-test-files/services.xml b/config-model/src/test/schema-test-files/services.xml
index ffb28726d9a..b32849bb55f 100644
--- a/config-model/src/test/schema-test-files/services.xml
+++ b/config-model/src/test/schema-test-files/services.xml
@@ -196,7 +196,6 @@
<component id="injected-to-handler">
<config name="foo"/>
</component>
- <embedder id="transformer" class="ai.vespa.example.SomeEmbedder" bundle="myBundle" def="my.def-file"/>
</handler>
<server id="server-provider">