summaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com
diff options
context:
space:
mode:
authorLester Solbakken <lesters@oath.com>2022-05-23 10:55:21 +0200
committerLester Solbakken <lesters@oath.com>2022-05-23 10:55:21 +0200
commit7593c064d3ecf3649cd27f5b9c820b5510f225ee (patch)
treee8e0cf3e574dfddc4e97c670ad53e6104c8f675a /config-model/src/main/java/com
parente657c0a9618868c9dcf32cfa7e05ac73750b904c (diff)
Add services.xml syntax for embedders
Diffstat (limited to 'config-model/src/main/java/com')
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java10
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfig.java79
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigBertBaseTransformer.java40
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigTransformer.java104
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderOption.java192
5 files changed, 425 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
index 0bd93c6d0df..1121a90693b 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
@@ -86,6 +86,7 @@ import com.yahoo.vespa.model.container.search.GUIHandler;
import com.yahoo.vespa.model.container.search.PageTemplates;
import com.yahoo.vespa.model.container.search.searchchain.SearchChains;
import com.yahoo.vespa.model.container.xml.document.DocumentFactoryBuilder;
+import com.yahoo.vespa.model.container.xml.embedder.EmbedderConfig;
import com.yahoo.vespa.model.content.StorageGroup;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
@@ -197,9 +198,11 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
private void addClusterContent(ApplicationContainerCluster cluster, Element spec, ConfigModelContext context) {
DeployState deployState = context.getDeployState();
DocumentFactoryBuilder.buildDocumentFactories(cluster, spec);
+
addConfiguredComponents(deployState, cluster, spec);
addSecretStore(cluster, spec, deployState);
+ addEmbedderComponents(deployState, cluster, spec);
addModelEvaluation(spec, cluster, context);
addModelEvaluationBundles(cluster);
@@ -382,6 +385,13 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
}
}
+ private static void addEmbedderComponents(DeployState deployState, ApplicationContainerCluster cluster, Element spec) {
+ for (Element node : XML.getChildren(spec, "embedder")) {
+ Element transformed = EmbedderConfig.transform(deployState, node);
+ cluster.addComponent(new DomComponentBuilder().build(deployState, cluster, transformed));
+ }
+ }
+
private void addConfiguredComponents(DeployState deployState, ApplicationContainerCluster cluster, Element spec) {
for (Element components : XML.getChildren(spec, "components")) {
addIncludes(components);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfig.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfig.java
new file mode 100644
index 00000000000..1558f01c231
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfig.java
@@ -0,0 +1,79 @@
+package com.yahoo.vespa.model.container.xml.embedder;
+
+import com.yahoo.config.model.deploy.DeployState;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+/**
+ * Translates config in services.xml of the form
+ *
+ * <embedder id="..." class="..." bundle="..." def="...">
+ * <!-- options -->
+ * </embedder>
+ *
+ * to component configuration of the form
+ *
+ * <component id="..." class="..." bundle="...">
+ * <config name=def>
+ * <!-- options -->
+ * </config>
+ * </component>
+ *
+ * with some added interpretations based on recognizing the class.
+ *
+ * @author lesters
+ */
+public class EmbedderConfig {
+
+ static EmbedderConfigTransformer getEmbedderTransformer(Element spec, boolean hosted) {
+ String classId = getEmbedderClass(spec);
+ switch (classId) {
+ case "ai.vespa.embedding.BertBaseEmbedder": return new EmbedderConfigBertBaseTransformer(spec, hosted);
+ }
+ return new EmbedderConfigTransformer(spec, hosted);
+ }
+
+ static String modelIdToUrl(String id) {
+ switch (id) {
+ case "test-model-id":
+ return "test-model-url";
+ case "minilm-l6-v2":
+ return "https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx";
+ case "bert-base-uncased":
+ return "https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt";
+ }
+ throw new IllegalArgumentException("Unknown model id: '" + id + "'");
+ }
+
+ /**
+ * Transforms the <embedder ...> element to component configuration.
+ *
+ * @param deployState the deploy state - as config generation can depend on context
+ * @param embedderSpec the XML element containing the <embedder ...>
+ * @return a new XML element containting the <component ...> configuration
+ */
+ public static Element transform(DeployState deployState, Element embedderSpec) {
+ EmbedderConfigTransformer transformer = getEmbedderTransformer(embedderSpec, deployState.isHosted());
+ NodeList children = embedderSpec.getChildNodes();
+ for (int i = 0; i < children.getLength(); i++) {
+ Node child = children.item(i);
+ if (child instanceof Element) {
+ transformer.addOption((Element) child);
+ }
+ }
+ return transformer.createComponentConfig(deployState);
+ }
+
+ private static String getEmbedderClass(Element spec) {
+ if (spec.hasAttribute("class")) {
+ return spec.getAttribute("class");
+ }
+ if (spec.hasAttribute("id")) {
+ return spec.getAttribute("id");
+ }
+ throw new IllegalArgumentException("Embedder specification does not have a required class attribute");
+ }
+
+
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigBertBaseTransformer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigBertBaseTransformer.java
new file mode 100644
index 00000000000..9431926d088
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigBertBaseTransformer.java
@@ -0,0 +1,40 @@
+package com.yahoo.vespa.model.container.xml.embedder;
+
+import org.w3c.dom.Element;
+
+import java.util.Map;
+
+/**
+ * Transforms embedding configuration to component configuration for the
+ * BertBaseEmbedder using embedder.bert-base-embedder.def
+ *
+ * @author lesters
+ */
+public class EmbedderConfigBertBaseTransformer extends EmbedderConfigTransformer {
+
+ private static final String BUNDLE = "model-integration";
+ private static final String DEF = "embedding.bert-base-embedder";
+
+ public EmbedderConfigBertBaseTransformer(Element spec, boolean hosted) {
+ super(spec, hosted, BUNDLE, DEF);
+
+ EmbedderOption.Builder modelOption = new EmbedderOption.Builder()
+ .name("model")
+ .required(true)
+ .optionTransformer(new EmbedderOption.ModelOptionTransformer("transformerModelPath", "transformerModelUrl"));
+ EmbedderOption.Builder vocabOption = new EmbedderOption.Builder()
+ .name("vocab")
+ .required(true)
+ .optionTransformer(new EmbedderOption.ModelOptionTransformer("tokenizerVocabPath", "tokenizerVocabUrl"));
+
+ // Defaults
+ if (hosted) {
+ modelOption.attributes(Map.of("id", "minilm-l6-v2")).value("");
+ vocabOption.attributes(Map.of("id", "bert-base-uncased")).value("");
+ }
+
+ addOption(modelOption.build());
+ addOption(vocabOption.build());
+ }
+
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigTransformer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigTransformer.java
new file mode 100644
index 00000000000..527f631201d
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderConfigTransformer.java
@@ -0,0 +1,104 @@
+package com.yahoo.vespa.model.container.xml.embedder;
+
+
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.text.XML;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * A specific embedder to component configuration transformer.
+ *
+ * @author lesters
+ */
+public class EmbedderConfigTransformer {
+
+ private final Document doc = XML.getDocumentBuilder().newDocument();
+
+ private final String id;
+ private final String className;
+ private final String bundle;
+ private final String def;
+ private final Map<String, EmbedderOption> options = new HashMap<>();
+
+ public EmbedderConfigTransformer(Element spec, boolean hosted) {
+ this(spec, hosted, null, null);
+ }
+
+ public EmbedderConfigTransformer(Element spec, boolean hosted, String defaultBundle, String defaultDef) {
+ id = spec.getAttribute("id");
+ className = spec.hasAttribute("class") ? spec.getAttribute("class") : id;
+ bundle = spec.hasAttribute("bundle") ? spec.getAttribute("bundle") : defaultBundle;
+ def = spec.hasAttribute("def") ? spec.getAttribute("def") : defaultDef;
+
+ if (className == null || className.length() == 0) {
+ throw new IllegalArgumentException("Embedder class is empty");
+ }
+ if (this.bundle == null || this.bundle.length() == 0) {
+ throw new IllegalArgumentException("Embedder configuration requires a bundle name");
+ }
+ if (this.def == null || this.def.length() == 0) {
+ throw new IllegalArgumentException("Embedder configuration requires a config definition name");
+ }
+ }
+
+ Element createComponentConfig(DeployState deployState) {
+ checkRequiredOptions();
+
+ Element component = doc.createElement("component");
+ component.setAttribute("id", id);
+ component.setAttribute("class", className);
+ component.setAttribute("bundle", bundle);
+
+ if (options.size() > 0) {
+ Element config = doc.createElement("config");
+ config.setAttribute("name", def);
+ for (Map.Entry<String, EmbedderOption> entry : options.entrySet()) {
+ entry.getValue().toElement(deployState, config);
+ }
+ component.appendChild(config);
+ }
+
+ return component;
+ }
+
+ // TODO: support nested options
+ void addOption(Element elem) {
+ String name = elem.getTagName();
+
+ EmbedderOption.Builder builder = new EmbedderOption.Builder();
+ builder.name(name);
+ builder.value(elem.getTextContent());
+ builder.attributes(elem);
+
+ if (options.containsKey(name)) {
+ builder.required(options.get(name).required());
+ builder.optionTransformer(options.get(name).optionTransformer());
+ }
+ options.put(name, builder.build());
+ }
+
+ void addOption(EmbedderOption option) {
+ options.put(option.name(), option);
+ }
+
+ private void checkRequiredOptions() {
+ List<String> missingOptions = new ArrayList<>();
+ for (EmbedderOption option : options.values()) {
+ if ( ! option.isSet()) {
+ missingOptions.add(option.name());
+ }
+ }
+ if (missingOptions.size() > 0) {
+ throw new IllegalArgumentException("Embedder '" + className + "' requires options for " + missingOptions);
+ }
+ }
+
+
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderOption.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderOption.java
new file mode 100644
index 00000000000..5fd0ff9dee7
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/embedder/EmbedderOption.java
@@ -0,0 +1,192 @@
+package com.yahoo.vespa.model.container.xml.embedder;
+
+import com.yahoo.config.FileReference;
+import com.yahoo.config.application.api.ApplicationPackage;
+import com.yahoo.config.application.api.FileRegistry;
+import com.yahoo.config.model.application.provider.FilesApplicationPackage;
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.io.IOUtils;
+import com.yahoo.path.Path;
+import org.w3c.dom.Element;
+import org.w3c.dom.NamedNodeMap;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.Map;
+
+
+/**
+ * Holds options for embedder configuration. This includes code for handling special
+ * options such as model specifiers.
+ *
+ * @author lesters
+ */
+public class EmbedderOption {
+
+ public static final OptionTransformer defaultOptionTransformer = new OptionTransformer();
+
+ private final String name;
+ private final boolean required;
+ private final String value;
+ private final Map<String, String> attributes;
+ private final OptionTransformer optionTransformer;
+ private final boolean set;
+
+ private EmbedderOption(Builder builder) {
+ this.name = builder.name;
+ this.required = builder.required;
+ this.value = builder.value;
+ this.attributes = builder.attributes;
+ this.optionTransformer = builder.optionTransformer;
+ this.set = builder.set;
+ }
+
+ public void toElement(DeployState deployState, Element parent) {
+ optionTransformer.transform(deployState, parent, this);
+ }
+
+ public String name() {
+ return name;
+ }
+
+ public String value() {
+ return value;
+ }
+
+ public boolean required() {
+ return required;
+ }
+
+ public OptionTransformer optionTransformer() {
+ return optionTransformer;
+ }
+
+ public boolean isSet() {
+ return set;
+ }
+
+ /**
+ * Basic option transformer. No special handling of options.
+ */
+ public static class OptionTransformer {
+ public void transform(DeployState deployState, Element parent, EmbedderOption option) {
+ createElement(parent, option.name(), option.value());
+ }
+
+ public static Element createElement(Element parent, String name, String value) {
+ Element element = parent.getOwnerDocument().createElement(name);
+ element.setTextContent(value);
+ parent.appendChild(element);
+ return element;
+ }
+ }
+
+ /**
+ * Transforms model options of type <x id="..." url="..." path="..." /> to the
+ * required fields in the config definition.
+ */
+ public static class ModelOptionTransformer extends OptionTransformer {
+
+ private final String pathField;
+ private final String urlField;
+
+ public ModelOptionTransformer(String pathField, String urlField) {
+ super();
+ this.pathField = pathField;
+ this.urlField = urlField;
+ }
+
+ @Override
+ public void transform(DeployState deployState, Element parent, EmbedderOption option) {
+ String id = option.attributes.get("id");
+ String url = option.attributes.get("url");
+ String path = option.attributes.get("path");
+
+ // Always use path if it is set
+ if (path != null && path.length() > 0) {
+ createElement(parent, pathField, path);
+ createElement(parent, urlField, "");
+ return;
+ }
+
+ // Only use the id if we're on cloud
+ if (deployState.isHosted() && id != null && id.length() > 0) {
+ createElement(parent, urlField, EmbedderConfig.modelIdToUrl(id));
+ createElement(parent, pathField, createDummyPath(deployState));
+ return;
+ }
+
+ // Otherwise, use url
+ if (url != null && url.length() > 0) {
+ createElement(parent, urlField, url);
+ createElement(parent, pathField, createDummyPath(deployState));
+ return;
+ }
+
+ if ( ! deployState.isHosted() && id != null && id.length() > 0) {
+ throw new IllegalArgumentException("Model option 'id' is not valid here");
+ }
+ throw new IllegalArgumentException("Model option requires either a 'path' or a 'url' attribute");
+ }
+
+ private String createDummyPath(DeployState deployState) {
+ // For now, until we have optional config parameters, return services.xml as it is guaranteed to exist
+ return "services.xml";
+ }
+
+ }
+
+ public static class Builder {
+ private String name = "";
+ private boolean required = false;
+ private String value = "";
+ private Map<String, String> attributes = Map.of();
+ private OptionTransformer optionTransformer = defaultOptionTransformer;
+ private boolean set = false;
+
+ public Builder name(String name) {
+ this.name = name;
+ return this;
+ }
+
+ public Builder required(boolean required) {
+ this.required = required;
+ return this;
+ }
+
+ public Builder value(String value) {
+ this.value = value;
+ this.set = true;
+ return this;
+ }
+
+ public Builder attributes(Map<String, String> attributes) {
+ this.attributes = attributes;
+ return this;
+ }
+
+ public Builder attributes(Element element) {
+ NamedNodeMap map = element.getAttributes();
+ if (map.getLength() > 0) {
+ this.attributes = new HashMap<>(map.getLength());
+ for (int i = 0; i < map.getLength(); ++i) {
+ String attr = map.item(i).getNodeName();
+ attributes.put(attr, element.getAttribute(attr));
+ }
+ }
+ return this;
+ }
+
+ public Builder optionTransformer(OptionTransformer optionTransformer) {
+ this.optionTransformer = optionTransformer;
+ return this;
+ }
+
+ public EmbedderOption build() {
+ return new EmbedderOption(this);
+ }
+
+ }
+
+}