From 9af2a2c2b78510bec7b4f8017bcb98e1da7e3e2a Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 18 Oct 2023 11:32:46 +0000 Subject: add defaults extraction and unit test --- container-search/pom.xml | 12 ++++ .../com/yahoo/search/ranking/GlobalPhaseSetup.java | 69 +++++++++++++++++++++- .../yahoo/search/ranking/GlobalPhaseSetupTest.java | 62 +++++++++++++++++++ .../test/resources/config/medium/rank-profiles.cfg | 55 +++++++++++++++++ .../resources/config/qf_defaults/rank-profiles.cfg | 23 ++++++++ 5 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 container-search/src/test/java/com/yahoo/search/ranking/GlobalPhaseSetupTest.java create mode 100644 container-search/src/test/resources/config/medium/rank-profiles.cfg create mode 100644 container-search/src/test/resources/config/qf_defaults/rank-profiles.cfg (limited to 'container-search') diff --git a/container-search/pom.xml b/container-search/pom.xml index 1e40539a79e..5e7c60d49c3 100644 --- a/container-search/pom.xml +++ b/container-search/pom.xml @@ -75,6 +75,18 @@ ${project.version} provided + + com.yahoo.vespa + model-integration + ${project.version} + provided + + + com.yahoo.vespa + container-onnxruntime + ${project.version} + provided + xerces diff --git a/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseSetup.java b/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseSetup.java index e5cd09d3a18..084c2c290eb 100644 --- a/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseSetup.java +++ b/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseSetup.java @@ -4,6 +4,7 @@ package com.yahoo.search.ranking; import ai.vespa.models.evaluation.FunctionEvaluator; import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; import com.yahoo.vespa.config.search.RankProfilesConfig; import java.util.*; @@ -30,6 +31,71 @@ class GlobalPhaseSetup { this.defaultValues = defaultValues; } + static class DefaultQueryFeatureExtractor { + final String baseName; + final String qfName; + TensorType type = null; + Tensor value = null; + DefaultQueryFeatureExtractor(String unwrappedQueryFeature) { + baseName = unwrappedQueryFeature; + qfName = "query(" + baseName + ")"; + } + List lookingFor() { + return List.of(qfName, "vespa.type.query." + baseName); + } + void accept(String key, String propValue) { + if (key.equals(qfName)) { + this.value = Tensor.from(propValue); + } else { + this.type = TensorType.fromSpec(propValue); + } + } + Tensor extract() { + if (value != null) { + return value; + } + if (type != null) { + return Tensor.Builder.of(type).build(); + } + return Tensor.from(0.0); + } + } + + static private Map extraDefaultQueryFeatureValues(RankProfilesConfig.Rankprofile rp, + List fromQuery, + List normalizers) + { + Map extractors = new HashMap<>(); + for (String fn : fromQuery) { + extractors.put(fn, new DefaultQueryFeatureExtractor(fn)); + } + for (var n : normalizers) { + for (String fn : n.inputEvalSpec().fromQuery()) { + extractors.put(fn, new DefaultQueryFeatureExtractor(fn)); + } + } + Map targets = new HashMap<>(); + for (var extractor : extractors.values()) { + for (String key : extractor.lookingFor()) { + var old = targets.put(key, extractor); + if (old != null) { + throw new IllegalStateException("Multiple targets for key: " + key); + } + } + } + for (var prop : rp.fef().property()) { + var extractor = targets.get(prop.name()); + if (extractor != null) { + extractor.accept(prop.name(), prop.value()); + } + } + Map defaultValues = new HashMap<>(); + for (var extractor : extractors.values()) { + defaultValues.put(extractor.qfName, extractor.extract()); + } + return defaultValues; + } + static GlobalPhaseSetup maybeMakeSetup(RankProfilesConfig.Rankprofile rp, RankProfilesEvaluator modelEvaluator) { var model = modelEvaluator.modelForRankProfile(rp.name()); Map availableNormalizers = new HashMap<>(); @@ -104,7 +170,8 @@ class GlobalPhaseSetup { } Supplier supplier = SimpleEvaluator.wrap(functionEvaluatorSource); var gfun = new FunEvalSpec(supplier, fromQuery, fromMF); - return new GlobalPhaseSetup(gfun, rerankCount, namesToHide, normalizers, Collections.emptyMap()); + var defaultValues = extraDefaultQueryFeatureValues(rp, fromQuery, normalizers); + return new GlobalPhaseSetup(gfun, rerankCount, namesToHide, normalizers, defaultValues); } return null; } diff --git a/container-search/src/test/java/com/yahoo/search/ranking/GlobalPhaseSetupTest.java b/container-search/src/test/java/com/yahoo/search/ranking/GlobalPhaseSetupTest.java new file mode 100644 index 00000000000..7f4dfc4c9a7 --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/ranking/GlobalPhaseSetupTest.java @@ -0,0 +1,62 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.ranking; + +import com.yahoo.config.subscription.ConfigGetter; +import com.yahoo.filedistribution.fileacquirer.MockFileAcquirer; +import com.yahoo.tensor.Tensor; +import com.yahoo.vespa.config.search.RankProfilesConfig; +import com.yahoo.vespa.config.search.core.OnnxModelsConfig; +import com.yahoo.vespa.config.search.core.RankingConstantsConfig; +import com.yahoo.vespa.config.search.core.RankingExpressionsConfig; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class GlobalPhaseSetupTest { + private static final String CONFIG_DIR = "src/test/resources/config/"; + + @SuppressWarnings("deprecation") + RankProfilesConfig readConfig(String subDir) { + String cfgId = "file:" + CONFIG_DIR + subDir + "/rank-profiles.cfg"; + return ConfigGetter.getConfig(RankProfilesConfig.class, cfgId); + } + + @Test void mediumAdvancedSetup() { + RankProfilesConfig rpCfg = readConfig("medium"); + assertEquals(1, rpCfg.rankprofile().size()); + RankProfilesEvaluator rpEvaluator = createEvaluator(rpCfg); + var setup = GlobalPhaseSetup.maybeMakeSetup(rpCfg.rankprofile().get(0), rpEvaluator); + assertNotNull(setup); + assertEquals(42, setup.rerankCount); + assertEquals(0, setup.normalizers.size()); + assertEquals(9, setup.matchFeaturesToHide.size()); + assertEquals(1, setup.globalPhaseEvalSpec.fromQuery().size()); + assertEquals(9, setup.globalPhaseEvalSpec.fromMF().size()); + } + + @Test void queryFeaturesWithDefaults() { + RankProfilesConfig rpCfg = readConfig("qf_defaults"); + assertEquals(1, rpCfg.rankprofile().size()); + RankProfilesEvaluator rpEvaluator = createEvaluator(rpCfg); + var setup = GlobalPhaseSetup.maybeMakeSetup(rpCfg.rankprofile().get(0), rpEvaluator); + assertNotNull(setup); + assertEquals(0, setup.normalizers.size()); + assertEquals(0, setup.matchFeaturesToHide.size()); + assertEquals(5, setup.globalPhaseEvalSpec.fromQuery().size()); + assertEquals(2, setup.globalPhaseEvalSpec.fromMF().size()); + assertEquals(5, setup.defaultValues.size()); + assertEquals(Tensor.from(0.0), setup.defaultValues.get("query(w_no_def)")); + assertEquals(Tensor.from(1.0), setup.defaultValues.get("query(w_has_def)")); + assertEquals(Tensor.from("tensor(m{}):{}"), setup.defaultValues.get("query(m_no_def)")); + assertEquals(Tensor.from("tensor(v[3]):[0,0,0]"), setup.defaultValues.get("query(v_no_def)")); + assertEquals(Tensor.from("tensor(v[3]):[2,0.25,1.5]"), setup.defaultValues.get("query(v_has_def)")); + } + + private RankProfilesEvaluator createEvaluator(RankProfilesConfig config) { + RankingConstantsConfig constantsConfig = new RankingConstantsConfig.Builder().build(); + RankingExpressionsConfig expressionsConfig = new RankingExpressionsConfig.Builder().build(); + OnnxModelsConfig onnxModelsConfig = new OnnxModelsConfig.Builder().build(); + return new RankProfilesEvaluator(config, constantsConfig, expressionsConfig, onnxModelsConfig, MockFileAcquirer.returnFile(null)); + } +} diff --git a/container-search/src/test/resources/config/medium/rank-profiles.cfg b/container-search/src/test/resources/config/medium/rank-profiles.cfg new file mode 100644 index 00000000000..5a609f70cef --- /dev/null +++ b/container-search/src/test/resources/config/medium/rank-profiles.cfg @@ -0,0 +1,55 @@ +rankprofile[0].name "withglobalphase" +rankprofile[0].fef.property[0].name "rankingExpression(myplus).rankingScript" +rankprofile[0].fef.property[0].value "attribute(foo1) + attribute(foo2)" +rankprofile[0].fef.property[1].name "rankingExpression(mymul).rankingScript" +rankprofile[0].fef.property[1].value "attribute(t1) * query(fromq)" +rankprofile[0].fef.property[2].name "rankingExpression(mymul).type" +rankprofile[0].fef.property[2].value "tensor(m{},v[3])" +rankprofile[0].fef.property[3].name "vespa.type.feature.attribute(t1)" +rankprofile[0].fef.property[3].value "tensor(m{},v[3])" +rankprofile[0].fef.property[4].name "vespa.rank.firstphase" +rankprofile[0].fef.property[4].value "attribute(foo1)" +rankprofile[0].fef.property[5].name "vespa.rank.globalphase" +rankprofile[0].fef.property[5].value "rankingExpression(globalphase)" +rankprofile[0].fef.property[6].name "rankingExpression(globalphase).rankingScript" +rankprofile[0].fef.property[6].value "rankingExpression(myplus) + reduce(rankingExpression(mymul), sum) + firstPhase + term(0).significance + fieldLength(artist) + fieldTermMatch(title,0).occurrences + termDistance(title,1,2).reverse + closeness(field,t1)" +rankprofile[0].fef.property[7].name "vespa.match.feature" +rankprofile[0].fef.property[7].value "fieldLength(artist)" +rankprofile[0].fef.property[8].name "vespa.match.feature" +rankprofile[0].fef.property[8].value "term(0).significance" +rankprofile[0].fef.property[9].name "vespa.match.feature" +rankprofile[0].fef.property[9].value "closeness(field,t1)" +rankprofile[0].fef.property[10].name "vespa.match.feature" +rankprofile[0].fef.property[10].value "termDistance(title,1,2).reverse" +rankprofile[0].fef.property[11].name "vespa.match.feature" +rankprofile[0].fef.property[11].value "firstPhase" +rankprofile[0].fef.property[12].name "vespa.match.feature" +rankprofile[0].fef.property[12].value "attribute(t1)" +rankprofile[0].fef.property[13].name "vespa.match.feature" +rankprofile[0].fef.property[13].value "attribute(foo1)" +rankprofile[0].fef.property[14].name "vespa.match.feature" +rankprofile[0].fef.property[14].value "fieldTermMatch(title,0).occurrences" +rankprofile[0].fef.property[15].name "vespa.match.feature" +rankprofile[0].fef.property[15].value "attribute(foo2)" +rankprofile[0].fef.property[16].name "vespa.hidden.matchfeature" +rankprofile[0].fef.property[16].value "fieldLength(artist)" +rankprofile[0].fef.property[17].name "vespa.hidden.matchfeature" +rankprofile[0].fef.property[17].value "term(0).significance" +rankprofile[0].fef.property[18].name "vespa.hidden.matchfeature" +rankprofile[0].fef.property[18].value "closeness(field,t1)" +rankprofile[0].fef.property[19].name "vespa.hidden.matchfeature" +rankprofile[0].fef.property[19].value "termDistance(title,1,2).reverse" +rankprofile[0].fef.property[20].name "vespa.hidden.matchfeature" +rankprofile[0].fef.property[20].value "firstPhase" +rankprofile[0].fef.property[21].name "vespa.hidden.matchfeature" +rankprofile[0].fef.property[21].value "attribute(t1)" +rankprofile[0].fef.property[22].name "vespa.hidden.matchfeature" +rankprofile[0].fef.property[22].value "attribute(foo1)" +rankprofile[0].fef.property[23].name "vespa.hidden.matchfeature" +rankprofile[0].fef.property[23].value "fieldTermMatch(title,0).occurrences" +rankprofile[0].fef.property[24].name "vespa.hidden.matchfeature" +rankprofile[0].fef.property[24].value "attribute(foo2)" +rankprofile[0].fef.property[25].name "vespa.globalphase.rerankcount" +rankprofile[0].fef.property[25].value "42" +rankprofile[0].fef.property[26].name "vespa.type.attribute.t1" +rankprofile[0].fef.property[26].value "tensor(m{},v[3])" diff --git a/container-search/src/test/resources/config/qf_defaults/rank-profiles.cfg b/container-search/src/test/resources/config/qf_defaults/rank-profiles.cfg new file mode 100644 index 00000000000..731064c4dd6 --- /dev/null +++ b/container-search/src/test/resources/config/qf_defaults/rank-profiles.cfg @@ -0,0 +1,23 @@ +rankprofile[0].name "gp_with_qf_defaults" +rankprofile[0].fef.property[0].name "vespa.rank.firstphase" +rankprofile[0].fef.property[0].value "attribute(foo1)" +rankprofile[0].fef.property[1].name "vespa.rank.globalphase" +rankprofile[0].fef.property[1].value "rankingExpression(globalphase)" +rankprofile[0].fef.property[2].name "rankingExpression(globalphase).rankingScript" +rankprofile[0].fef.property[2].value "reduce(query(m_no_def) * query(v_no_def) * query(v_has_def) * attribute(t1), sum) + attribute(bar3) * query(w_no_def) * query(w_has_def)" +rankprofile[0].fef.property[3].name "vespa.match.feature" +rankprofile[0].fef.property[3].value "attribute(t1)" +rankprofile[0].fef.property[4].name "vespa.match.feature" +rankprofile[0].fef.property[4].value "attribute(bar3)" +rankprofile[0].fef.property[5].name "vespa.type.attribute.t1" +rankprofile[0].fef.property[5].value "tensor(m{},v[3])" +rankprofile[0].fef.property[6].name "vespa.type.query.m_no_def" +rankprofile[0].fef.property[6].value "tensor(m{})" +rankprofile[0].fef.property[7].name "vespa.type.query.v_no_def" +rankprofile[0].fef.property[7].value "tensor(v[3])" +rankprofile[0].fef.property[8].name "query(w_has_def)" +rankprofile[0].fef.property[8].value "1.0" +rankprofile[0].fef.property[9].name "vespa.type.query.v_has_def" +rankprofile[0].fef.property[9].value "tensor(v[3])" +rankprofile[0].fef.property[10].name "query(v_has_def)" +rankprofile[0].fef.property[10].value "tensor(v[3]):{{v:0}:2.0, {v:1}:0.25, {v:2}:1.5}" -- cgit v1.2.3