diff options
author | Lester Solbakken <lesters@oath.com> | 2022-05-23 10:55:21 +0200 |
---|---|---|
committer | Lester Solbakken <lesters@oath.com> | 2022-05-23 10:55:21 +0200 |
commit | 7593c064d3ecf3649cd27f5b9c820b5510f225ee (patch) | |
tree | e8e0cf3e574dfddc4e97c670ad53e6104c8f675a /config-model/src/main/java/com | |
parent | e657c0a9618868c9dcf32cfa7e05ac73750b904c (diff) |
Add services.xml syntax for embedders
Diffstat (limited to 'config-model/src/main/java/com')
5 files changed, 425 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 0bd93c6d0df..1121a90693b 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -86,6 +86,7 @@ import com.yahoo.vespa.model.container.search.GUIHandler; import com.yahoo.vespa.model.container.search.PageTemplates; import com.yahoo.vespa.model.container.search.searchchain.SearchChains; import com.yahoo.vespa.model.container.xml.document.DocumentFactoryBuilder; +import com.yahoo.vespa.model.container.xml.embedder.EmbedderConfig; import com.yahoo.vespa.model.content.StorageGroup; import org.w3c.dom.Element; import org.w3c.dom.Node; @@ -197,9 +198,11 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private void addClusterContent(ApplicationContainerCluster cluster, Element spec, ConfigModelContext context) { DeployState deployState = context.getDeployState(); DocumentFactoryBuilder.buildDocumentFactories(cluster, spec); + addConfiguredComponents(deployState, cluster, spec); addSecretStore(cluster, spec, deployState); + addEmbedderComponents(deployState, cluster, spec); addModelEvaluation(spec, cluster, context); addModelEvaluationBundles(cluster); @@ -382,6 +385,13 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { } } + private static void addEmbedderComponents(DeployState deployState, ApplicationContainerCluster cluster, Element spec) { + for (Element node : XML.getChildren(spec, "embedder")) { + Element transformed = EmbedderConfig.transform(deployState, node); + cluster.addComponent(new DomComponentBuilder().build(deployState, cluster, transformed)); + } + } + private void addConfiguredComponents(DeployState deployState, ApplicationContainerCluster cluster, Element spec) { for (Element components : XML.getChildren(spec, "components")) { addIncludes(components); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfig.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfig.java new file mode 100644 index 00000000000..1558f01c231 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfig.java @@ -0,0 +1,79 @@ +package com.yahoo.vespa.model.container.xml.embedder; + +import com.yahoo.config.model.deploy.DeployState; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +/** + * Translates config in services.xml of the form + * + * <embedder id="..." class="..." bundle="..." def="..."> + * <!-- options --> + * </embedder> + * + * to component configuration of the form + * + * <component id="..." class="..." bundle="..."> + * <config name=def> + * <!-- options --> + * </config> + * </component> + * + * with some added interpretations based on recognizing the class. + * + * @author lesters + */ +public class EmbedderConfig { + + static EmbedderConfigTransformer getEmbedderTransformer(Element spec, boolean hosted) { + String classId = getEmbedderClass(spec); + switch (classId) { + case "ai.vespa.embedding.BertBaseEmbedder": return new EmbedderConfigBertBaseTransformer(spec, hosted); + } + return new EmbedderConfigTransformer(spec, hosted); + } + + static String modelIdToUrl(String id) { + switch (id) { + case "test-model-id": + return "test-model-url"; + case "minilm-l6-v2": + return "https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx"; + case "bert-base-uncased": + return "https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt"; + } + throw new IllegalArgumentException("Unknown model id: '" + id + "'"); + } + + /** + * Transforms the <embedder ...> element to component configuration. + * + * @param deployState the deploy state - as config generation can depend on context + * @param embedderSpec the XML element containing the <embedder ...> + * @return a new XML element containting the <component ...> configuration + */ + public static Element transform(DeployState deployState, Element embedderSpec) { + EmbedderConfigTransformer transformer = getEmbedderTransformer(embedderSpec, deployState.isHosted()); + NodeList children = embedderSpec.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + Node child = children.item(i); + if (child instanceof Element) { + transformer.addOption((Element) child); + } + } + return transformer.createComponentConfig(deployState); + } + + private static String getEmbedderClass(Element spec) { + if (spec.hasAttribute("class")) { + return spec.getAttribute("class"); + } + if (spec.hasAttribute("id")) { + return spec.getAttribute("id"); + } + throw new IllegalArgumentException("Embedder specification does not have a required class attribute"); + } + + +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigBertBaseTransformer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigBertBaseTransformer.java new file mode 100644 index 00000000000..9431926d088 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigBertBaseTransformer.java @@ -0,0 +1,40 @@ +package com.yahoo.vespa.model.container.xml.embedder; + +import org.w3c.dom.Element; + +import java.util.Map; + +/** + * Transforms embedding configuration to component configuration for the + * BertBaseEmbedder using embedder.bert-base-embedder.def + * + * @author lesters + */ +public class EmbedderConfigBertBaseTransformer extends EmbedderConfigTransformer { + + private static final String BUNDLE = "model-integration"; + private static final String DEF = "embedding.bert-base-embedder"; + + public EmbedderConfigBertBaseTransformer(Element spec, boolean hosted) { + super(spec, hosted, BUNDLE, DEF); + + EmbedderOption.Builder modelOption = new EmbedderOption.Builder() + .name("model") + .required(true) + .optionTransformer(new EmbedderOption.ModelOptionTransformer("transformerModelPath", "transformerModelUrl")); + EmbedderOption.Builder vocabOption = new EmbedderOption.Builder() + .name("vocab") + .required(true) + .optionTransformer(new EmbedderOption.ModelOptionTransformer("tokenizerVocabPath", "tokenizerVocabUrl")); + + // Defaults + if (hosted) { + modelOption.attributes(Map.of("id", "minilm-l6-v2")).value(""); + vocabOption.attributes(Map.of("id", "bert-base-uncased")).value(""); + } + + addOption(modelOption.build()); + addOption(vocabOption.build()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigTransformer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigTransformer.java new file mode 100644 index 00000000000..527f631201d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigTransformer.java @@ -0,0 +1,104 @@ +package com.yahoo.vespa.model.container.xml.embedder; + + +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.text.XML; +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * A specific embedder to component configuration transformer. + * + * @author lesters + */ +public class EmbedderConfigTransformer { + + private final Document doc = XML.getDocumentBuilder().newDocument(); + + private final String id; + private final String className; + private final String bundle; + private final String def; + private final Map<String, EmbedderOption> options = new HashMap<>(); + + public EmbedderConfigTransformer(Element spec, boolean hosted) { + this(spec, hosted, null, null); + } + + public EmbedderConfigTransformer(Element spec, boolean hosted, String defaultBundle, String defaultDef) { + id = spec.getAttribute("id"); + className = spec.hasAttribute("class") ? spec.getAttribute("class") : id; + bundle = spec.hasAttribute("bundle") ? spec.getAttribute("bundle") : defaultBundle; + def = spec.hasAttribute("def") ? spec.getAttribute("def") : defaultDef; + + if (className == null || className.length() == 0) { + throw new IllegalArgumentException("Embedder class is empty"); + } + if (this.bundle == null || this.bundle.length() == 0) { + throw new IllegalArgumentException("Embedder configuration requires a bundle name"); + } + if (this.def == null || this.def.length() == 0) { + throw new IllegalArgumentException("Embedder configuration requires a config definition name"); + } + } + + Element createComponentConfig(DeployState deployState) { + checkRequiredOptions(); + + Element component = doc.createElement("component"); + component.setAttribute("id", id); + component.setAttribute("class", className); + component.setAttribute("bundle", bundle); + + if (options.size() > 0) { + Element config = doc.createElement("config"); + config.setAttribute("name", def); + for (Map.Entry<String, EmbedderOption> entry : options.entrySet()) { + entry.getValue().toElement(deployState, config); + } + component.appendChild(config); + } + + return component; + } + + // TODO: support nested options + void addOption(Element elem) { + String name = elem.getTagName(); + + EmbedderOption.Builder builder = new EmbedderOption.Builder(); + builder.name(name); + builder.value(elem.getTextContent()); + builder.attributes(elem); + + if (options.containsKey(name)) { + builder.required(options.get(name).required()); + builder.optionTransformer(options.get(name).optionTransformer()); + } + options.put(name, builder.build()); + } + + void addOption(EmbedderOption option) { + options.put(option.name(), option); + } + + private void checkRequiredOptions() { + List<String> missingOptions = new ArrayList<>(); + for (EmbedderOption option : options.values()) { + if ( ! option.isSet()) { + missingOptions.add(option.name()); + } + } + if (missingOptions.size() > 0) { + throw new IllegalArgumentException("Embedder '" + className + "' requires options for " + missingOptions); + } + } + + +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderOption.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderOption.java new file mode 100644 index 00000000000..5fd0ff9dee7 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderOption.java @@ -0,0 +1,192 @@ +package com.yahoo.vespa.model.container.xml.embedder; + +import com.yahoo.config.FileReference; +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.config.model.application.provider.FilesApplicationPackage; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.io.IOUtils; +import com.yahoo.path.Path; +import org.w3c.dom.Element; +import org.w3c.dom.NamedNodeMap; + +import java.io.File; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; + + +/** + * Holds options for embedder configuration. This includes code for handling special + * options such as model specifiers. + * + * @author lesters + */ +public class EmbedderOption { + + public static final OptionTransformer defaultOptionTransformer = new OptionTransformer(); + + private final String name; + private final boolean required; + private final String value; + private final Map<String, String> attributes; + private final OptionTransformer optionTransformer; + private final boolean set; + + private EmbedderOption(Builder builder) { + this.name = builder.name; + this.required = builder.required; + this.value = builder.value; + this.attributes = builder.attributes; + this.optionTransformer = builder.optionTransformer; + this.set = builder.set; + } + + public void toElement(DeployState deployState, Element parent) { + optionTransformer.transform(deployState, parent, this); + } + + public String name() { + return name; + } + + public String value() { + return value; + } + + public boolean required() { + return required; + } + + public OptionTransformer optionTransformer() { + return optionTransformer; + } + + public boolean isSet() { + return set; + } + + /** + * Basic option transformer. No special handling of options. + */ + public static class OptionTransformer { + public void transform(DeployState deployState, Element parent, EmbedderOption option) { + createElement(parent, option.name(), option.value()); + } + + public static Element createElement(Element parent, String name, String value) { + Element element = parent.getOwnerDocument().createElement(name); + element.setTextContent(value); + parent.appendChild(element); + return element; + } + } + + /** + * Transforms model options of type <x id="..." url="..." path="..." /> to the + * required fields in the config definition. + */ + public static class ModelOptionTransformer extends OptionTransformer { + + private final String pathField; + private final String urlField; + + public ModelOptionTransformer(String pathField, String urlField) { + super(); + this.pathField = pathField; + this.urlField = urlField; + } + + @Override + public void transform(DeployState deployState, Element parent, EmbedderOption option) { + String id = option.attributes.get("id"); + String url = option.attributes.get("url"); + String path = option.attributes.get("path"); + + // Always use path if it is set + if (path != null && path.length() > 0) { + createElement(parent, pathField, path); + createElement(parent, urlField, ""); + return; + } + + // Only use the id if we're on cloud + if (deployState.isHosted() && id != null && id.length() > 0) { + createElement(parent, urlField, EmbedderConfig.modelIdToUrl(id)); + createElement(parent, pathField, createDummyPath(deployState)); + return; + } + + // Otherwise, use url + if (url != null && url.length() > 0) { + createElement(parent, urlField, url); + createElement(parent, pathField, createDummyPath(deployState)); + return; + } + + if ( ! deployState.isHosted() && id != null && id.length() > 0) { + throw new IllegalArgumentException("Model option 'id' is not valid here"); + } + throw new IllegalArgumentException("Model option requires either a 'path' or a 'url' attribute"); + } + + private String createDummyPath(DeployState deployState) { + // For now, until we have optional config parameters, return services.xml as it is guaranteed to exist + return "services.xml"; + } + + } + + public static class Builder { + private String name = ""; + private boolean required = false; + private String value = ""; + private Map<String, String> attributes = Map.of(); + private OptionTransformer optionTransformer = defaultOptionTransformer; + private boolean set = false; + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder required(boolean required) { + this.required = required; + return this; + } + + public Builder value(String value) { + this.value = value; + this.set = true; + return this; + } + + public Builder attributes(Map<String, String> attributes) { + this.attributes = attributes; + return this; + } + + public Builder attributes(Element element) { + NamedNodeMap map = element.getAttributes(); + if (map.getLength() > 0) { + this.attributes = new HashMap<>(map.getLength()); + for (int i = 0; i < map.getLength(); ++i) { + String attr = map.item(i).getNodeName(); + attributes.put(attr, element.getAttribute(attr)); + } + } + return this; + } + + public Builder optionTransformer(OptionTransformer optionTransformer) { + this.optionTransformer = optionTransformer; + return this; + } + + public EmbedderOption build() { + return new EmbedderOption(this); + } + + } + +} |