diff options
author | Marius Arhaug <mariusarhaug@hotmail.com> | 2024-04-09 16:33:27 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-09 16:33:27 +0200 |
commit | 887cb5b3c98472ae521f2104216b15ffde5d8acb (patch) | |
tree | 968498f53f1c4056b23b54f680bb1dd421639557 | |
parent | 07010100192978eea266f7cb15b315b57a95438e (diff) | |
parent | 65eedab29e587beab738a5daa19de460612a7295 (diff) |
Merge pull request #30819 from vespa-engine/marius/add-significance-config-model
Add significance config model registry
8 files changed, 194 insertions, 1 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/SignificanceModelRegistry.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/SignificanceModelRegistry.java new file mode 100644 index 00000000000..eb3f63cdf10 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/SignificanceModelRegistry.java @@ -0,0 +1,73 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.container.component; + +import com.yahoo.config.ModelReference; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.container.bundle.BundleInstantiationSpecification; +import com.yahoo.osgi.provider.model.ComponentModel; +import com.yahoo.search.significance.config.SignificanceConfig; +import com.yahoo.text.XML; +import org.w3c.dom.Element; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static com.yahoo.vespa.model.container.xml.ModelIdResolver.SIGNIFICANCE_MODEL; + +/** + * A registry for significance models. + * + * @author MariusArhaug + * + */ +public class SignificanceModelRegistry extends SimpleComponent implements SignificanceConfig.Producer { + + private static final String CLASS = "com.yahoo.search.significance.impl.DefaultSignificanceModelRegistry"; + private static final String BUNDLE = "linguistics"; + + private final List<SignificanceModelConfig> configList; + + public SignificanceModelRegistry(DeployState deployState, Element spec) { + super(new ComponentModel(BundleInstantiationSpecification.fromStrings(CLASS, CLASS, BUNDLE))); + configList = new ArrayList<>(); + + for (Element modelElement : XML.getChildren(spec, "model")) { + addConfig( + modelElement.getAttribute("language"), + Model.fromXml(deployState, modelElement, Set.of(SIGNIFICANCE_MODEL)).modelReference()); + } + } + + + public void addConfig(String language, ModelReference path) { + configList.add( + new SignificanceModelConfig(language, path) + ); + } + + + @Override + public void getConfig(SignificanceConfig.Builder builder) { + builder.model( + configList.stream() + .map(config -> new SignificanceConfig.Model.Builder() + .language(config.language) + .path(config.path) + ).toList() + ); + } + + + class SignificanceModelConfig { + private final String language; + private final ModelReference path; + + public SignificanceModelConfig(String language, ModelReference path) { + this.language = language; + this.path = path; + } + + } +} + diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 4abd6e89b15..456ce8fdbc2 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -83,6 +83,7 @@ import com.yahoo.vespa.model.container.component.Handler; import com.yahoo.vespa.model.container.component.SimpleComponent; import com.yahoo.vespa.model.container.component.SystemBindingPattern; import com.yahoo.vespa.model.container.component.UserBindingPattern; +import com.yahoo.vespa.model.container.component.SignificanceModelRegistry; import com.yahoo.vespa.model.container.docproc.ContainerDocproc; import com.yahoo.vespa.model.container.docproc.DocprocChains; import com.yahoo.vespa.model.container.http.AccessControl; @@ -765,6 +766,17 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { addSearchHandler(deployState, cluster, searchElement, context); validateAndAddConfiguredComponents(deployState, cluster, searchElement, "renderer", ContainerModelBuilder::validateRendererElement); + + addSignificance(deployState, searchElement, cluster); + } + + private void addSignificance(DeployState deployState, Element spec, ApplicationContainerCluster cluster) { + Element significanceElement = XML.getChild(spec, "significance"); + if (significanceElement == null) return; + + SignificanceModelRegistry significanceModelRegistry = new SignificanceModelRegistry(deployState, significanceElement); + cluster.addComponent(significanceModelRegistry); + } private void addModelEvaluation(Element spec, ApplicationContainerCluster cluster, ConfigModelContext context) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java index 9ff9344edcb..f9993b770e5 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java @@ -25,6 +25,7 @@ public class ModelIdResolver { public static final String HF_TOKENIZER = "huggingface-tokenizer"; public static final String ONNX_MODEL = "onnx-model"; public static final String BERT_VOCAB = "bert-vocabulary"; + public static final String SIGNIFICANCE_MODEL = "significance-model"; private static Map<String, ProvidedModel> setupProvidedModels() { var m = new HashMap<String, ProvidedModel>(); diff --git a/config-model/src/main/resources/schema/containercluster.rnc b/config-model/src/main/resources/schema/containercluster.rnc index 9beaf7b7783..08092f10020 100644 --- a/config-model/src/main/resources/schema/containercluster.rnc +++ b/config-model/src/main/resources/schema/containercluster.rnc @@ -137,6 +137,10 @@ Threadpool = element threadpool { )) } +Significance = element significance { + element model { attribute language { xsd:string } & ModelReference }* +} + Clients = element clients { Client* } @@ -161,7 +165,8 @@ SearchInContainer = element search { Provider* & Renderer* & GenericConfig* & - Threadpool? + Threadpool? & + Significance? } SearchChain = element chain { diff --git a/config-model/src/test/cfg/significance/hosts.xml b/config-model/src/test/cfg/significance/hosts.xml new file mode 100644 index 00000000000..b06f676ac28 --- /dev/null +++ b/config-model/src/test/cfg/significance/hosts.xml @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="utf-8" ?> +<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +<hosts> + <host name="mynode1"> + <alias>node0</alias> + </host> + +</hosts> + diff --git a/config-model/src/test/cfg/significance/services.xml b/config-model/src/test/cfg/significance/services.xml new file mode 100644 index 00000000000..6991f5498fb --- /dev/null +++ b/config-model/src/test/cfg/significance/services.xml @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="utf-8" ?> +<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +<services> + <admin version="2.0"> + <adminserver hostalias="node0"/> + <logserver hostalias="node0"/> + </admin> + <container version="1.0"> + <search> + <significance> + <model language="en" model-id="idf-wiki-english" path="models/idf-english-wiki.json.zst"/> + <model language="no" path="models/idf-norwegian-wiki.json.zst" /> + <model language="ru" url="https://some/uri/blob.json" /> + </significance> + </search> + </container> +</services> + diff --git a/config-model/src/test/java/com/yahoo/vespa/model/significance/test/SignificanceModelTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/significance/test/SignificanceModelTestCase.java new file mode 100644 index 00000000000..acb9426b812 --- /dev/null +++ b/config-model/src/test/java/com/yahoo/vespa/model/significance/test/SignificanceModelTestCase.java @@ -0,0 +1,71 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.significance.test; + +import com.yahoo.component.ComponentId; +import com.yahoo.config.InnerNode; +import com.yahoo.config.ModelNode; +import com.yahoo.config.ModelReference; +import com.yahoo.search.significance.config.SignificanceConfig; +import com.yahoo.vespa.model.VespaModel; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; +import com.yahoo.vespa.model.container.component.SignificanceModelRegistry; +import com.yahoo.vespa.model.test.utils.VespaModelCreatorWithFilePkg; +import org.junit.jupiter.api.Test; + + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * @author MariusArhaug + */ + +public class SignificanceModelTestCase { + private VespaModel createModel(String filename) { + return new VespaModelCreatorWithFilePkg(filename).create(); + } + + @Test + void testIndexGreaterThanNumNodes() { + VespaModel vespaModel = createModel("src/test/cfg/significance"); + ApplicationContainerCluster containerCluster = vespaModel.getContainerClusters().get("container"); + assertEquals(1, containerCluster.getContainers().size()); + } + + @Test + void testSignificance() { + VespaModel vespaModel = createModel("src/test/cfg/significance"); + ApplicationContainerCluster containerCluster = vespaModel.getContainerClusters().get("container"); + var significanceConfig = assertSignificancePresent(containerCluster); + assertEquals(3, significanceConfig.model().size()); + assertEquals("en", significanceConfig.model().get(0).language()); + assertEquals("no", significanceConfig.model().get(1).language()); + assertEquals("ru", significanceConfig.model().get(2).language()); + + assertEquals("models/idf-norwegian-wiki.json.zst", modelReference(significanceConfig.model().get(1), "path").path().orElseThrow().value()); + assertEquals("https://some/uri/blob.json", modelReference(significanceConfig.model().get(2), "path").url().orElseThrow().value()); + + + } + + private SignificanceConfig assertSignificancePresent(ApplicationContainerCluster cluster) { + + var id = new ComponentId("com.yahoo.search.significance.impl.DefaultSignificanceModelRegistry"); + var significance = (SignificanceModelRegistry) cluster.getComponentsMap().get(id); + assertEquals("com.yahoo.search.significance.impl.DefaultSignificanceModelRegistry", significance.getClassId().getName()); + var cfgBuilder = new SignificanceConfig.Builder(); + significance.getConfig(cfgBuilder); + return cfgBuilder.build(); + } + + // Ugly hack to read underlying model reference from config instance + private static ModelReference modelReference(InnerNode cfg, String name) { + try { + var f = cfg.getClass().getDeclaredField(name); + f.setAccessible(true); + return ((ModelNode) f.get(cfg)).getModelReference(); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } +} + diff --git a/config-model/src/test/schema-test-files/services.xml b/config-model/src/test/schema-test-files/services.xml index 63b05a0ddfe..7333ef5a87b 100644 --- a/config-model/src/test/schema-test-files/services.xml +++ b/config-model/src/test/schema-test-files/services.xml @@ -166,6 +166,10 @@ <min-threads>500</min-threads> <queue-size>0</queue-size> </threadpool> + + <significance> + <model language="en" model-id="idf-wiki-simple-english" path="models/idf-simple-english-wiki.json.zst" /> + </significance> </search> <processing> |