aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2022-08-24 13:41:10 +0200
committerJon Bratseth <bratseth@gmail.com>2022-08-24 13:41:10 +0200
commitdd32f335484c133cf47ac354f8709c61c9d58bd3 (patch)
tree1068b373696700a7db64931d4e8967cfdb91cb15
parent69c3cfc1828aab9d759126df90667067e88490cb (diff)
More tests
-rw-r--r--config-model/src/test/cfg/application/embed/services.xml7
-rw-r--r--config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def5
-rw-r--r--config-model/src/test/cfg/application/embed_generic/services.xml5
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java98
4 files changed, 71 insertions, 44 deletions
diff --git a/config-model/src/test/cfg/application/embed/services.xml b/config-model/src/test/cfg/application/embed/services.xml
index eac6e62860e..62a50dd43a3 100644
--- a/config-model/src/test/cfg/application/embed/services.xml
+++ b/config-model/src/test/cfg/application/embed/services.xml
@@ -4,14 +4,9 @@
<container version="1.0">
- <embedder id="test" class="ai.vespa.embedding.UndefinedEmbedder" bundle="dummy" def="test.dummy">
- <num>12</num>
- <str>some text</str>
- </embedder>
-
<embedder id="transformer" class="ai.vespa.embedding.BertBaseEmbedder">
<!-- model specifics -->
- <transformerModel id="minilm-l6-v2" url="test-model-url"/>
+ <transformerModel id="minilm-l6-v2" url="application-url"/>
<tokenizerVocab path="files/vocab.txt"/>
<!-- tunable parameters: number of threads etc -->
diff --git a/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def b/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def
index ac5c79d2714..81fc88dbf01 100644
--- a/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def
+++ b/config-model/src/test/cfg/application/embed_generic/configdefinitions/sentence-embedder.def
@@ -1,12 +1,15 @@
package=ai.vespa.example.paragraph
# Settings for wordpiece tokenizer
-vocab path
+vocabPath path
+vocabUrl string
# Transformer model settings
modelPath path
modelUrl string
+myValue string
+
# Max length of token sequence model can handle
transforerMaxTokens int default=128
diff --git a/config-model/src/test/cfg/application/embed_generic/services.xml b/config-model/src/test/cfg/application/embed_generic/services.xml
index ab2c1be9745..2df4f2eda41 100644
--- a/config-model/src/test/cfg/application/embed_generic/services.xml
+++ b/config-model/src/test/cfg/application/embed_generic/services.xml
@@ -8,8 +8,9 @@
class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder'
bundle='exampleEmbedder'
def='ai.vespa.example.paragraph.sentence-embedder'>
- <model path="files/model.onnx" /> <!-- Embedder syntax for file path -->
- <vocab>files/vocab.txt</vocab> <!-- Generic config syntax for file path -->
+ <model id="minilm-l6-v2" url="application-url" />
+ <vocab path="files/vocab.txt"/>
+ <myValue>foo</myValue>
</embedder>
<nodes>
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java
index 74fd00b2ebd..a987ce8951f 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java
@@ -28,8 +28,9 @@ import static org.junit.jupiter.api.Assertions.fail;
public class EmbedderTestCase {
- private static final String PREDEFINED_EMBEDDER_CLASS = "ai.vespa.embedding.BertBaseEmbedder";
- private static final String PREDEFINED_EMBEDDER_CONFIG = "embedding.bert-base-embedder";
+ private static final String emptyPathFileName = "services.xml";
+ private static final String BUNDLED_EMBEDDER_CLASS = "ai.vespa.embedding.BertBaseEmbedder";
+ private static final String BUBNDLED_EMBEDDER_CONFIG = "embedding.bert-base-embedder";
@Test
void testApplicationEmbedder() throws IOException, SAXException {
@@ -54,7 +55,7 @@ public class EmbedderTestCase {
}
@Test
- void testApplicationEmbedderWithoutConfigAttemptsToSetConfig() throws IOException, SAXException {
+ void testApplicationEmbedderWithoutConfigCannotSetConfig() throws IOException, SAXException {
String embedder = "<embedder id='test' class='ai.vespa.test' bundle='bundle'>" +
" <val>123</val>" +
"</embedder>";
@@ -62,13 +63,13 @@ public class EmbedderTestCase {
}
@Test
- void testPredefinedEmbedConfigSelfHosted() throws IOException, SAXException {
- String embedder = "<embedder id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "'>" +
+ void testBundledEmbedder_selfhosted() throws IOException, SAXException {
+ String embedder = "<embedder id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" +
" <transformerModel id='my_model_id' url='my-model-url' />" +
" <tokenizerVocab id='my_vocab_id' url='my-vocab-url' />" +
"</embedder>";
- String component = "<component id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
- " <config name='" + PREDEFINED_EMBEDDER_CONFIG + "'>" +
+ String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
+ " <config name='" + BUBNDLED_EMBEDDER_CONFIG + "'>" +
" <transformerModelUrl>my-model-url</transformerModelUrl>" +
" <transformerModelPath>services.xml</transformerModelPath>" +
" <tokenizerVocabUrl>my-vocab-url</tokenizerVocabUrl>" +
@@ -79,13 +80,13 @@ public class EmbedderTestCase {
}
@Test
- void testPathHasPrioritySelfHosted() throws IOException, SAXException {
- String embedder = "<embedder id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "'>" +
+ void testPathHasPriority_selfhosted() throws IOException, SAXException {
+ String embedder = "<embedder id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" +
" <transformerModel id='my_model_id' url='my-model-url' path='files/model.onnx' />" +
" <tokenizerVocab id='my_vocab_id' url='my-vocab-url' path='files/vocab.txt' />" +
"</embedder>";
- String component = "<component id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
- " <config name='" + PREDEFINED_EMBEDDER_CONFIG + "'>" +
+ String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
+ " <config name='" + BUBNDLED_EMBEDDER_CONFIG + "'>" +
" <transformerModelUrl></transformerModelUrl>" +
" <transformerModelPath>files/model.onnx</transformerModelPath>" +
" <tokenizerVocabUrl></tokenizerVocabUrl>" +
@@ -96,13 +97,13 @@ public class EmbedderTestCase {
}
@Test
- void testPredefinedEmbedderCloud() throws IOException, SAXException {
- String embedder = "<embedder id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "'>" +
+ void testBundledEmbedder_hosted() throws IOException, SAXException {
+ String embedder = "<embedder id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" +
" <transformerModel id='minilm-l6-v2' />" +
" <tokenizerVocab id='bert-base-uncased' />" +
"</embedder>";
- String component = "<component id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
- " <config name='" + PREDEFINED_EMBEDDER_CONFIG + "'>" +
+ String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" +
+ " <config name='" + BUBNDLED_EMBEDDER_CONFIG + "'>" +
" <transformerModelUrl>https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx</transformerModelUrl>" +
" <transformerModelPath>services.xml</transformerModelPath>" +
" <tokenizerVocabUrl>https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt</tokenizerVocabUrl>" +
@@ -113,13 +114,13 @@ public class EmbedderTestCase {
}
@Test
- void testCustomEmbedderWithPredefinedConfigCloud() throws IOException, SAXException {
- String embedder = "<embedder id='test' class='ApplicationSpecificEmbedder' def='" + PREDEFINED_EMBEDDER_CONFIG + "'>" +
+ void testApplicationEmbedderWithBundledConfig_hosted() throws IOException, SAXException {
+ String embedder = "<embedder id='test' class='ApplicationSpecificEmbedder' def='" + BUBNDLED_EMBEDDER_CONFIG + "'>" +
" <transformerModel id='minilm-l6-v2' />" +
" <tokenizerVocab id='bert-base-uncased' />" +
"</embedder>";
String component = "<component id='test' class='ApplicationSpecificEmbedder' bundle='model-integration'>" +
- " <config name='" + PREDEFINED_EMBEDDER_CONFIG + "'>" +
+ " <config name='" + BUBNDLED_EMBEDDER_CONFIG + "'>" +
" <transformerModelUrl>https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx</transformerModelUrl>" +
" <transformerModelPath>services.xml</transformerModelPath>" +
" <tokenizerVocabUrl>https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt</tokenizerVocabUrl>" +
@@ -130,8 +131,8 @@ public class EmbedderTestCase {
}
@Test
- void testUnknownModelIdCloud() throws IOException, SAXException {
- String embedder = "<embedder id='test' class='" + PREDEFINED_EMBEDDER_CLASS + "'>" +
+ void testUnknownModelId_hosted() throws IOException, SAXException {
+ String embedder = "<embedder id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" +
" <transformerModel id='my_model_id' />" +
" <tokenizerVocab id='my_vocab_id' />" +
"</embedder>";
@@ -139,36 +140,63 @@ public class EmbedderTestCase {
}
@Test
- void testApplicationWithEmbedder() throws Exception {
- final String emptyPathFileName = "services.xml";
-
+ void testApplicationPackageWithEmbedder_selfhosted() throws Exception {
Path applicationDir = Path.fromString("src/test/cfg/application/embed/");
VespaModel model = loadModel(applicationDir, false);
ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container");
- Component<?, ?> testComponent = containerCluster.getComponentsMap().get(new ComponentId("test"));
- ConfigPayloadBuilder testConfig = testComponent.getUserConfigs().get(new ConfigDefinitionKey("dummy", "test"));
- assertEquals("12", testConfig.getObject("num").getValue());
- assertEquals("some text", testConfig.getObject("str").getValue());
+ Component<?, ?> transformer = containerCluster.getComponentsMap().get(new ComponentId("transformer"));
+ ConfigPayloadBuilder config = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding"));
+ assertEquals("application-url", config.getObject("transformerModelUrl").getValue());
+ assertEquals(emptyPathFileName, config.getObject("transformerModelPath").getValue());
+ assertEquals("", config.getObject("tokenizerVocabUrl").getValue());
+ assertEquals("files/vocab.txt", config.getObject("tokenizerVocabPath").getValue());
+ assertEquals("4", config.getObject("onnxIntraOpThreads").getValue());
+ }
+
+ @Test
+ void testApplicationPackageWithEmbedder_hosted() throws Exception {
+ Path applicationDir = Path.fromString("src/test/cfg/application/embed/");
+ VespaModel model = loadModel(applicationDir, true);
+ ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container");
Component<?, ?> transformer = containerCluster.getComponentsMap().get(new ComponentId("transformer"));
- ConfigPayloadBuilder transformerConfig = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding"));
- assertEquals("test-model-url", transformerConfig.getObject("transformerModelUrl").getValue());
- assertEquals(emptyPathFileName, transformerConfig.getObject("transformerModelPath").getValue());
- assertEquals("", transformerConfig.getObject("tokenizerVocabUrl").getValue());
- assertEquals("files/vocab.txt", transformerConfig.getObject("tokenizerVocabPath").getValue());
+ ConfigPayloadBuilder config = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding"));
+ assertEquals("https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx",
+ config.getObject("transformerModelUrl").getValue());
+ assertEquals(emptyPathFileName, config.getObject("transformerModelPath").getValue());
+ assertEquals("", config.getObject("tokenizerVocabUrl").getValue());
+ assertEquals("files/vocab.txt", config.getObject("tokenizerVocabPath").getValue());
+ assertEquals("4", config.getObject("onnxIntraOpThreads").getValue());
}
@Test
- void testApplicationWithGenericEmbedder() throws Exception {
+ void testApplicationPackageWithApplicationEmbedder_selfhosted() throws Exception {
Path applicationDir = Path.fromString("src/test/cfg/application/embed_generic/");
VespaModel model = loadModel(applicationDir, false);
ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container");
Component<?, ?> testComponent = containerCluster.getComponentsMap().get(new ComponentId("transformer"));
ConfigPayloadBuilder config = testComponent.getUserConfigs().get(new ConfigDefinitionKey("sentence-embedder", "ai.vespa.example.paragraph"));
- assertEquals("files/vocab.txt", config.getObject("vocab").getValue());
- assertEquals("files/model.onnx", config.getObject("modelPath").getValue());
+ assertEquals("application-url", config.getObject("modelUrl").getValue());
+ assertEquals(emptyPathFileName, config.getObject("modelPath").getValue());
+ assertEquals("files/vocab.txt", config.getObject("vocabPath").getValue());
+ assertEquals("foo", config.getObject("myValue").getValue());
+ }
+
+ @Test
+ void testApplicationPackageWithApplicationEmbedder_hosted() throws Exception {
+ Path applicationDir = Path.fromString("src/test/cfg/application/embed_generic/");
+ VespaModel model = loadModel(applicationDir, true);
+ ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container");
+
+ Component<?, ?> testComponent = containerCluster.getComponentsMap().get(new ComponentId("transformer"));
+ ConfigPayloadBuilder config = testComponent.getUserConfigs().get(new ConfigDefinitionKey("sentence-embedder", "ai.vespa.example.paragraph"));
+ assertEquals("https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx",
+ config.getObject("modelUrl").getValue());
+ assertEquals(emptyPathFileName, config.getObject("modelPath").getValue());
+ assertEquals("files/vocab.txt", config.getObject("vocabPath").getValue());
+ assertEquals("foo", config.getObject("myValue").getValue());
}
private VespaModel loadModel(Path path, boolean hosted) throws Exception {