diff options
author | Jon Bratseth <bratseth@gmail.com> | 2022-08-24 21:42:52 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2022-08-24 21:42:52 +0200 |
commit | ffab68b3f5c28034eaf3a606c1b220c14f7204fa (patch) | |
tree | ccecb49d4d491f002a97c18e86101e8267d9bd08 /config-model | |
parent | 3fe0ba83594545c773800e3eb8473de50c601966 (diff) |
Drop special embedder config
Diffstat (limited to 'config-model')
10 files changed, 137 insertions, 219 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index d43222a39e5..fc8a542b81c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -351,21 +351,12 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { container.setProp("rotations", String.join(",", rotationsProperty)); } - private static void addEmbedderComponents(DeployState deployState, ApplicationContainerCluster cluster, Element parent) { - for (Element node : XML.getChildren(parent, "embedder")) { - Element transformed = EmbedderConfigTransformer.transform(deployState, node); - cluster.addComponent(new DomComponentBuilder().build(deployState, cluster, transformed)); - } - } - private void addConfiguredComponents(DeployState deployState, ApplicationContainerCluster cluster, Element parent) { for (Element components : XML.getChildren(parent, "components")) { addIncludes(components); addConfiguredComponents(deployState, cluster, components, "component"); - addEmbedderComponents(deployState, cluster, components); } addConfiguredComponents(deployState, cluster, parent, "component"); - addEmbedderComponents(deployState, cluster, parent); } protected void addStatusHandlers(ApplicationContainerCluster cluster, boolean isHostedVespa) { @@ -965,8 +956,9 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private static void addConfiguredComponents(DeployState deployState, ContainerCluster<? extends Container> cluster, Element parent, String componentName) { - for (Element node : XML.getChildren(parent, componentName)) { - cluster.addComponent(new DomComponentBuilder().build(deployState, cluster, node)); + for (Element component : XML.getChildren(parent, componentName)) { + component = ModelConfigTransformer.transform(deployState, component); + cluster.addComponent(new DomComponentBuilder().build(deployState, cluster, component)); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/EmbedderConfigTransformer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/EmbedderConfigTransformer.java deleted file mode 100644 index 456ad9fcd46..00000000000 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/EmbedderConfigTransformer.java +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.model.container.xml; - -import com.yahoo.config.model.deploy.DeployState; -import com.yahoo.text.XML; -import org.w3c.dom.Element; - -import java.util.Map; -import java.util.stream.Collectors; - -/** - * Translates config in services.xml of the form - * - * <embedder id="..." class="..." bundle="..." def="..."> - * <!-- options --> - * </embedder> - * - * to component configuration of the form - * - * <component id="..." class="..." bundle="..."> - * <config name=def> - * <!-- options --> - * </config> - * </component> - * - * with some added interpretations based on recognizing the class. - * - * @author lesters - * @author bratseth - */ -public class EmbedderConfigTransformer { - - private static final Map<String, String> providedModels = - Map.of("minilm-l6-v2", "https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx", - "bert-base-uncased", "https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt"); - - // Until we have optional path parameters, use services.xml as it is guaranteed to exist - private final static String dummyPath = "services.xml"; - - /** - * Transforms the <embedder ...> element to component configuration. - * - * @param deployState the deploy state - as config generation can depend on context - * @param embedder the XML element containing the <embedder ...> - * @return a new XML element containting the <component ...> configuration - */ - public static Element transform(DeployState deployState, Element embedder) { - String embedderId = XML.attribute("id", embedder).orElseThrow(); // Required by schema - String embedderClass = XML.attribute("class", embedder).orElse(embedderId); - - Element component = XML.getDocumentBuilder().newDocument().createElement("component"); - component.setAttribute("id", embedderId); - component.setAttribute("class", embedderClass); - component.setAttribute("bundle", XML.attribute("bundle", embedder).orElse("model-integration")); - - String configDef = embedderConfigFrom(embedder, embedderClass); - if ( ! configDef.isEmpty()) { - Element config = component.getOwnerDocument().createElement("config"); - config.setAttribute("name", configDef); - for (Element child : XML.getChildren(embedder)) - addConfigValue(child, config, deployState.isHosted()); - component.appendChild(config); - } - else if ( ! XML.getChildren(embedder).isEmpty()) { - throw new IllegalArgumentException("Embedder '" + embedder.getAttribute("id") + "' does not specify " + - "a 'def' parameter so it cannot contain config values"); - } - - return component; - } - - /** Adds a config value from an embedder element into a regular config. */ - private static void addConfigValue(Element value, Element config, boolean hosted) { - if (value.hasAttribute("path")) { - addChild(value.getTagName() + "Url", "", config); - addChild(value.getTagName() + "Path", value.getAttribute("path"), config); - } - else if (value.hasAttribute("id") && hosted) { - addChild(value.getTagName() + "Url", modelIdToUrl(value.getAttribute("id")), config); - addChild(value.getTagName() + "Path", dummyPath, config); - } - else if (value.hasAttribute("url")) { - addChild(value.getTagName() + "Url", value.getAttribute("url"), config); - addChild(value.getTagName() + "Path", dummyPath, config); - } - else { - addChild(value.getTagName(), XML.getValue(value), config); - } - } - - private static void addChild(String name, String value, Element parent) { - Element element = parent.getOwnerDocument().createElement(name); - element.setTextContent(value); - parent.appendChild(element); - } - - private static String embedderConfigFrom(Element embedder, String embedderClass) { - String explicitDefinition = embedder.getAttribute("def"); - if ( ! explicitDefinition.isEmpty()) return explicitDefinition; - - // Implicit from class name - return switch (embedderClass) { - case "ai.vespa.embedding.BertBaseEmbedder" -> "embedding.bert-base-embedder"; - default -> ""; - }; - } - - private static String modelIdToUrl(String id) { - if ( ! providedModels.containsKey(id)) - throw new IllegalArgumentException("Unknown embedder model '" + id + "'. Available models are [" + - providedModels.keySet().stream().sorted().collect(Collectors.joining(", ")) + "]"); - return providedModels.get(id); - } - -} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelConfigTransformer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelConfigTransformer.java new file mode 100644 index 00000000000..0065a582145 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelConfigTransformer.java @@ -0,0 +1,73 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.container.xml; + +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.text.XML; +import org.w3c.dom.Element; + +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Translates model references in component configs. + * + * @author lesters + * @author bratseth + */ +public class ModelConfigTransformer { + + private static final Map<String, String> providedModels = + Map.of("minilm-l6-v2", "https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx", + "bert-base-uncased", "https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt"); + + // Until we have optional path parameters, use services.xml as it is guaranteed to exist + private final static String dummyPath = "services.xml"; + + /** + * Transforms the <embedder ...> element to component configuration. + * + * @param deployState the deploy state - as config generation can depend on context + * @param component the XML element containing the <embedder ...> + * @return a new XML element containting the <component ...> configuration + */ + public static Element transform(DeployState deployState, Element component) { + for (Element config : XML.getChildren(component, "config")) { + for (Element value : XML.getChildren(config)) + transformModelValue(value, config, deployState.isHosted()); + } + return component; + } + + /** Expans a model config value into regular config values. */ + private static void transformModelValue(Element value, Element config, boolean hosted) { + if (value.hasAttribute("path")) { + addChild(value.getTagName() + "Url", "", config); + addChild(value.getTagName() + "Path", value.getAttribute("path"), config); + config.removeChild(value); + } + else if (value.hasAttribute("id") && hosted) { + addChild(value.getTagName() + "Url", modelIdToUrl(value.getAttribute("id")), config); + addChild(value.getTagName() + "Path", dummyPath, config); + config.removeChild(value); + } + else if (value.hasAttribute("url")) { + addChild(value.getTagName() + "Url", value.getAttribute("url"), config); + addChild(value.getTagName() + "Path", dummyPath, config); + config.removeChild(value); + } + } + + private static void addChild(String name, String value, Element parent) { + Element element = parent.getOwnerDocument().createElement(name); + element.setTextContent(value); + parent.appendChild(element); + } + + private static String modelIdToUrl(String id) { + if ( ! providedModels.containsKey(id)) + throw new IllegalArgumentException("Unknown embedder model '" + id + "'. Available models are [" + + providedModels.keySet().stream().sorted().collect(Collectors.joining(", ")) + "]"); + return providedModels.get(id); + } + +} diff --git a/config-model/src/main/resources/schema/common.rnc b/config-model/src/main/resources/schema/common.rnc index de44f9cc071..27f3b37b78b 100644 --- a/config-model/src/main/resources/schema/common.rnc +++ b/config-model/src/main/resources/schema/common.rnc @@ -65,15 +65,4 @@ ComponentDefinition = ComponentId & BundleSpec & GenericConfig* & - Component* & - Embedder* - -Embedder = element embedder { - attribute id { string } & - attribute class { xsd:Name | JavaId }? & - attribute bundle { xsd:Name }? & - attribute def { xsd:Name }? & - anyElement* -} - - + Component* diff --git a/config-model/src/main/resources/schema/containercluster.rnc b/config-model/src/main/resources/schema/containercluster.rnc index 4cc55ad75d8..9012462d2eb 100644 --- a/config-model/src/main/resources/schema/containercluster.rnc +++ b/config-model/src/main/resources/schema/containercluster.rnc @@ -17,7 +17,6 @@ ContainerServices = DocumentApi? & Components* & Component* & - Embedder* & Handler* & Server* & Http? & @@ -31,8 +30,7 @@ ClientAuthorize = element client-authorize { empty } Components = element components { Include* & - Component* & - Embedder* + Component* } Include = element \include { diff --git a/config-model/src/main/resources/schema/docproc.rnc b/config-model/src/main/resources/schema/docproc.rnc index b4db09f2fb8..11f8e14fb2d 100644 --- a/config-model/src/main/resources/schema/docproc.rnc +++ b/config-model/src/main/resources/schema/docproc.rnc @@ -50,7 +50,6 @@ ClusterV3 = element cluster { GenericConfig* & SchemaMapping? & Component* & - Embedder* & Handler* & DocprocChainsV3? } diff --git a/config-model/src/test/cfg/application/embed/services.xml b/config-model/src/test/cfg/application/embed/services.xml index 62a50dd43a3..88558ace4bf 100644 --- a/config-model/src/test/cfg/application/embed/services.xml +++ b/config-model/src/test/cfg/application/embed/services.xml @@ -4,14 +4,16 @@ <container version="1.0"> - <embedder id="transformer" class="ai.vespa.embedding.BertBaseEmbedder"> - <!-- model specifics --> - <transformerModel id="minilm-l6-v2" url="application-url"/> - <tokenizerVocab path="files/vocab.txt"/> + <component id="transformer" class="ai.vespa.embedding.BertBaseEmbedder" bindle="model-integration"> + <config name="embedding.bert-base-embedder"> + <!-- model specifics --> + <transformerModel id="minilm-l6-v2" url="application-url"/> + <tokenizerVocab path="files/vocab.txt"/> - <!-- tunable parameters: number of threads etc --> - <onnxIntraOpThreads>4</onnxIntraOpThreads> - </embedder> + <!-- tunable parameters: number of threads etc --> + <onnxIntraOpThreads>4</onnxIntraOpThreads> + </config> + </component> <nodes> <node hostalias="node1" /> diff --git a/config-model/src/test/cfg/application/embed_generic/services.xml b/config-model/src/test/cfg/application/embed_generic/services.xml index 2df4f2eda41..ea430f24e2f 100644 --- a/config-model/src/test/cfg/application/embed_generic/services.xml +++ b/config-model/src/test/cfg/application/embed_generic/services.xml @@ -4,14 +4,15 @@ <container version="1.0"> - <embedder id='transformer' - class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder' - bundle='exampleEmbedder' - def='ai.vespa.example.paragraph.sentence-embedder'> - <model id="minilm-l6-v2" url="application-url" /> - <vocab path="files/vocab.txt"/> - <myValue>foo</myValue> - </embedder> + <component id='transformer' + class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder' + bundle='exampleEmbedder'> + <config name='ai.vespa.example.paragraph.sentence-embedder'> + <model id="minilm-l6-v2" url="application-url" /> + <vocab path="files/vocab.txt"/> + <myValue>foo</myValue> + </config> + </component> <nodes> <node hostalias='node1'/> diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java index 05f848777d4..ffa7e52136f 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java @@ -30,112 +30,92 @@ public class EmbedderTestCase { private static final String emptyPathFileName = "services.xml"; private static final String BUNDLED_EMBEDDER_CLASS = "ai.vespa.embedding.BertBaseEmbedder"; - private static final String BUBNDLED_EMBEDDER_CONFIG = "embedding.bert-base-embedder"; - - @Test - void testApplicationEmbedder() throws IOException, SAXException { - String embedder = "<embedder id='test' class='ai.vespa.test' bundle='bundle' def='def.name'>" + - " <val>123</val>" + - "</embedder>"; - String component = "<component id='test' class='ai.vespa.test' bundle='bundle'>" + - " <config name='def.name'>" + - " <val>123</val>" + - " </config>" + - "</component>"; - assertTransform(embedder, component); - } - - @Test - void testApplicationEmbedderWithoutConfig() throws IOException, SAXException { - String embedder = "<embedder id='test' class='ai.vespa.test' bundle='bundle'>" + - "</embedder>"; - String component = "<component id='test' class='ai.vespa.test' bundle='bundle'>" + - "</component>"; - assertTransform(embedder, component); - } - - @Test - void testApplicationEmbedderWithoutConfigCannotSetConfig() throws IOException, SAXException { - String embedder = "<embedder id='test' class='ai.vespa.test' bundle='bundle'>" + - " <val>123</val>" + - "</embedder>"; - assertTransformThrows(embedder, "Embedder 'test' does not specify a 'def' parameter so it cannot contain config values", false); - } + private static final String BUNDLED_EMBEDDER_CONFIG = "embedding.bert-base-embedder"; @Test void testBundledEmbedder_selfhosted() throws IOException, SAXException { - String embedder = "<embedder id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" + - " <transformerModel id='my_model_id' url='my-model-url' />" + - " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' />" + - "</embedder>"; + String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + + " <transformerModel id='my_model_id' url='my-model-url' />" + + " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' />" + + " </config>" + + "</component>"; String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUBNDLED_EMBEDDER_CONFIG + "'>" + + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + " <transformerModelUrl>my-model-url</transformerModelUrl>" + " <transformerModelPath>services.xml</transformerModelPath>" + " <tokenizerVocabUrl>my-vocab-url</tokenizerVocabUrl>" + " <tokenizerVocabPath>services.xml</tokenizerVocabPath>" + " </config>" + "</component>"; - assertTransform(embedder, component, false); + assertTransform(input, component, false); } @Test void testPathHasPriority_selfhosted() throws IOException, SAXException { - String embedder = "<embedder id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" + - " <transformerModel id='my_model_id' url='my-model-url' path='files/model.onnx' />" + - " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' path='files/vocab.txt' />" + - "</embedder>"; + String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + + " <transformerModel id='my_model_id' url='my-model-url' path='files/model.onnx' />" + + " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' path='files/vocab.txt' />" + + " </config>" + + "</component>"; String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUBNDLED_EMBEDDER_CONFIG + "'>" + + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + " <transformerModelUrl></transformerModelUrl>" + " <transformerModelPath>files/model.onnx</transformerModelPath>" + " <tokenizerVocabUrl></tokenizerVocabUrl>" + " <tokenizerVocabPath>files/vocab.txt</tokenizerVocabPath>" + " </config>" + "</component>"; - assertTransform(embedder, component, false); + assertTransform(input, component, false); } @Test void testBundledEmbedder_hosted() throws IOException, SAXException { - String embedder = "<embedder id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" + - " <transformerModel id='minilm-l6-v2' />" + - " <tokenizerVocab id='bert-base-uncased' />" + - "</embedder>"; + String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + + " <transformerModel id='minilm-l6-v2' />" + + " <tokenizerVocab id='bert-base-uncased' />" + + " </config>" + + "</component>"; String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUBNDLED_EMBEDDER_CONFIG + "'>" + + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + " <transformerModelUrl>https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx</transformerModelUrl>" + " <transformerModelPath>services.xml</transformerModelPath>" + " <tokenizerVocabUrl>https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt</tokenizerVocabUrl>" + " <tokenizerVocabPath>services.xml</tokenizerVocabPath>" + " </config>" + "</component>"; - assertTransform(embedder, component, true); + assertTransform(input, component, true); } @Test void testApplicationEmbedderWithBundledConfig_hosted() throws IOException, SAXException { - String embedder = "<embedder id='test' class='ApplicationSpecificEmbedder' def='" + BUBNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel id='minilm-l6-v2' />" + - " <tokenizerVocab id='bert-base-uncased' />" + - "</embedder>"; + String input = "<component id='test' class='ApplicationSpecificEmbedder' bundle='model-integration'>" + + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + + " <transformerModel id='minilm-l6-v2' />" + + " <tokenizerVocab id='bert-base-uncased' />" + + " </config>" + + "</component>"; String component = "<component id='test' class='ApplicationSpecificEmbedder' bundle='model-integration'>" + - " <config name='" + BUBNDLED_EMBEDDER_CONFIG + "'>" + + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + " <transformerModelUrl>https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx</transformerModelUrl>" + " <transformerModelPath>services.xml</transformerModelPath>" + " <tokenizerVocabUrl>https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt</tokenizerVocabUrl>" + " <tokenizerVocabPath>services.xml</tokenizerVocabPath>" + " </config>" + "</component>"; - assertTransform(embedder, component, true); + assertTransform(input, component, true); } @Test void testUnknownModelId_hosted() throws IOException, SAXException { - String embedder = "<embedder id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" + - " <transformerModel id='my_model_id' />" + - " <tokenizerVocab id='my_vocab_id' />" + - "</embedder>"; + String embedder = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" + + " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + + " <transformerModel id='my_model_id' />" + + " <tokenizerVocab id='my_vocab_id' />" + + " </config>" + + "</component>"; assertTransformThrows(embedder, "Unknown embedder model 'my_model_id'. " + "Available models are [bert-base-uncased, minilm-l6-v2]", @@ -215,7 +195,7 @@ public class EmbedderTestCase { private void assertTransform(String embedder, String expectedComponent, boolean hosted) throws IOException, SAXException { assertSpec(createElement(expectedComponent), - EmbedderConfigTransformer.transform(createEmptyDeployState(hosted), createElement(embedder))); + ModelConfigTransformer.transform(createEmptyDeployState(hosted), createElement(embedder))); } private void assertSpec(Element e1, Element e2) { @@ -247,7 +227,7 @@ public class EmbedderTestCase { private void assertTransformThrows(String embedder, String expectedMessage, boolean hosted) throws IOException, SAXException { try { - EmbedderConfigTransformer.transform(createEmptyDeployState(hosted), createElement(embedder)); + ModelConfigTransformer.transform(createEmptyDeployState(hosted), createElement(embedder)); fail("Expected exception was not thrown: " + expectedMessage); } catch (IllegalArgumentException e) { assertEquals(expectedMessage, e.getMessage()); diff --git a/config-model/src/test/schema-test-files/services.xml b/config-model/src/test/schema-test-files/services.xml index ffb28726d9a..b32849bb55f 100644 --- a/config-model/src/test/schema-test-files/services.xml +++ b/config-model/src/test/schema-test-files/services.xml @@ -196,7 +196,6 @@ <component id="injected-to-handler"> <config name="foo"/> </component> - <embedder id="transformer" class="ai.vespa.example.SomeEmbedder" bundle="myBundle" def="my.def-file"/> </handler> <server id="server-provider"> |