summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--container-search/pom.xml12
-rw-r--r--container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseSetup.java69
-rw-r--r--container-search/src/test/java/com/yahoo/search/ranking/GlobalPhaseSetupTest.java62
-rw-r--r--container-search/src/test/resources/config/medium/rank-profiles.cfg55
-rw-r--r--container-search/src/test/resources/config/qf_defaults/rank-profiles.cfg23
5 files changed, 220 insertions, 1 deletions
diff --git a/container-search/pom.xml b/container-search/pom.xml
index 1e40539a79e..5e7c60d49c3 100644
--- a/container-search/pom.xml
+++ b/container-search/pom.xml
@@ -75,6 +75,18 @@
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>model-integration</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>container-onnxruntime</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
<dependency>
<groupId>xerces</groupId>
diff --git a/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseSetup.java b/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseSetup.java
index e5cd09d3a18..084c2c290eb 100644
--- a/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseSetup.java
+++ b/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseSetup.java
@@ -4,6 +4,7 @@ package com.yahoo.search.ranking;
import ai.vespa.models.evaluation.FunctionEvaluator;
import com.yahoo.tensor.Tensor;
+import com.yahoo.tensor.TensorType;
import com.yahoo.vespa.config.search.RankProfilesConfig;
import java.util.*;
@@ -30,6 +31,71 @@ class GlobalPhaseSetup {
this.defaultValues = defaultValues;
}
+ static class DefaultQueryFeatureExtractor {
+ final String baseName;
+ final String qfName;
+ TensorType type = null;
+ Tensor value = null;
+ DefaultQueryFeatureExtractor(String unwrappedQueryFeature) {
+ baseName = unwrappedQueryFeature;
+ qfName = "query(" + baseName + ")";
+ }
+ List<String> lookingFor() {
+ return List.of(qfName, "vespa.type.query." + baseName);
+ }
+ void accept(String key, String propValue) {
+ if (key.equals(qfName)) {
+ this.value = Tensor.from(propValue);
+ } else {
+ this.type = TensorType.fromSpec(propValue);
+ }
+ }
+ Tensor extract() {
+ if (value != null) {
+ return value;
+ }
+ if (type != null) {
+ return Tensor.Builder.of(type).build();
+ }
+ return Tensor.from(0.0);
+ }
+ }
+
+ static private Map<String, Tensor> extraDefaultQueryFeatureValues(RankProfilesConfig.Rankprofile rp,
+ List<String> fromQuery,
+ List<NormalizerSetup> normalizers)
+ {
+ Map<String, DefaultQueryFeatureExtractor> extractors = new HashMap<>();
+ for (String fn : fromQuery) {
+ extractors.put(fn, new DefaultQueryFeatureExtractor(fn));
+ }
+ for (var n : normalizers) {
+ for (String fn : n.inputEvalSpec().fromQuery()) {
+ extractors.put(fn, new DefaultQueryFeatureExtractor(fn));
+ }
+ }
+ Map<String, DefaultQueryFeatureExtractor> targets = new HashMap<>();
+ for (var extractor : extractors.values()) {
+ for (String key : extractor.lookingFor()) {
+ var old = targets.put(key, extractor);
+ if (old != null) {
+ throw new IllegalStateException("Multiple targets for key: " + key);
+ }
+ }
+ }
+ for (var prop : rp.fef().property()) {
+ var extractor = targets.get(prop.name());
+ if (extractor != null) {
+ extractor.accept(prop.name(), prop.value());
+ }
+ }
+ Map<String, Tensor> defaultValues = new HashMap<>();
+ for (var extractor : extractors.values()) {
+ defaultValues.put(extractor.qfName, extractor.extract());
+ }
+ return defaultValues;
+ }
+
static GlobalPhaseSetup maybeMakeSetup(RankProfilesConfig.Rankprofile rp, RankProfilesEvaluator modelEvaluator) {
var model = modelEvaluator.modelForRankProfile(rp.name());
Map<String, RankProfilesConfig.Rankprofile.Normalizer> availableNormalizers = new HashMap<>();
@@ -104,7 +170,8 @@ class GlobalPhaseSetup {
}
Supplier<Evaluator> supplier = SimpleEvaluator.wrap(functionEvaluatorSource);
var gfun = new FunEvalSpec(supplier, fromQuery, fromMF);
- return new GlobalPhaseSetup(gfun, rerankCount, namesToHide, normalizers, Collections.emptyMap());
+ var defaultValues = extraDefaultQueryFeatureValues(rp, fromQuery, normalizers);
+ return new GlobalPhaseSetup(gfun, rerankCount, namesToHide, normalizers, defaultValues);
}
return null;
}
diff --git a/container-search/src/test/java/com/yahoo/search/ranking/GlobalPhaseSetupTest.java b/container-search/src/test/java/com/yahoo/search/ranking/GlobalPhaseSetupTest.java
new file mode 100644
index 00000000000..7f4dfc4c9a7
--- /dev/null
+++ b/container-search/src/test/java/com/yahoo/search/ranking/GlobalPhaseSetupTest.java
@@ -0,0 +1,62 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.ranking;
+
+import com.yahoo.config.subscription.ConfigGetter;
+import com.yahoo.filedistribution.fileacquirer.MockFileAcquirer;
+import com.yahoo.tensor.Tensor;
+import com.yahoo.vespa.config.search.RankProfilesConfig;
+import com.yahoo.vespa.config.search.core.OnnxModelsConfig;
+import com.yahoo.vespa.config.search.core.RankingConstantsConfig;
+import com.yahoo.vespa.config.search.core.RankingExpressionsConfig;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class GlobalPhaseSetupTest {
+ private static final String CONFIG_DIR = "src/test/resources/config/";
+
+ @SuppressWarnings("deprecation")
+ RankProfilesConfig readConfig(String subDir) {
+ String cfgId = "file:" + CONFIG_DIR + subDir + "/rank-profiles.cfg";
+ return ConfigGetter.getConfig(RankProfilesConfig.class, cfgId);
+ }
+
+ @Test void mediumAdvancedSetup() {
+ RankProfilesConfig rpCfg = readConfig("medium");
+ assertEquals(1, rpCfg.rankprofile().size());
+ RankProfilesEvaluator rpEvaluator = createEvaluator(rpCfg);
+ var setup = GlobalPhaseSetup.maybeMakeSetup(rpCfg.rankprofile().get(0), rpEvaluator);
+ assertNotNull(setup);
+ assertEquals(42, setup.rerankCount);
+ assertEquals(0, setup.normalizers.size());
+ assertEquals(9, setup.matchFeaturesToHide.size());
+ assertEquals(1, setup.globalPhaseEvalSpec.fromQuery().size());
+ assertEquals(9, setup.globalPhaseEvalSpec.fromMF().size());
+ }
+
+ @Test void queryFeaturesWithDefaults() {
+ RankProfilesConfig rpCfg = readConfig("qf_defaults");
+ assertEquals(1, rpCfg.rankprofile().size());
+ RankProfilesEvaluator rpEvaluator = createEvaluator(rpCfg);
+ var setup = GlobalPhaseSetup.maybeMakeSetup(rpCfg.rankprofile().get(0), rpEvaluator);
+ assertNotNull(setup);
+ assertEquals(0, setup.normalizers.size());
+ assertEquals(0, setup.matchFeaturesToHide.size());
+ assertEquals(5, setup.globalPhaseEvalSpec.fromQuery().size());
+ assertEquals(2, setup.globalPhaseEvalSpec.fromMF().size());
+ assertEquals(5, setup.defaultValues.size());
+ assertEquals(Tensor.from(0.0), setup.defaultValues.get("query(w_no_def)"));
+ assertEquals(Tensor.from(1.0), setup.defaultValues.get("query(w_has_def)"));
+ assertEquals(Tensor.from("tensor(m{}):{}"), setup.defaultValues.get("query(m_no_def)"));
+ assertEquals(Tensor.from("tensor(v[3]):[0,0,0]"), setup.defaultValues.get("query(v_no_def)"));
+ assertEquals(Tensor.from("tensor(v[3]):[2,0.25,1.5]"), setup.defaultValues.get("query(v_has_def)"));
+ }
+
+ private RankProfilesEvaluator createEvaluator(RankProfilesConfig config) {
+ RankingConstantsConfig constantsConfig = new RankingConstantsConfig.Builder().build();
+ RankingExpressionsConfig expressionsConfig = new RankingExpressionsConfig.Builder().build();
+ OnnxModelsConfig onnxModelsConfig = new OnnxModelsConfig.Builder().build();
+ return new RankProfilesEvaluator(config, constantsConfig, expressionsConfig, onnxModelsConfig, MockFileAcquirer.returnFile(null));
+ }
+}
diff --git a/container-search/src/test/resources/config/medium/rank-profiles.cfg b/container-search/src/test/resources/config/medium/rank-profiles.cfg
new file mode 100644
index 00000000000..5a609f70cef
--- /dev/null
+++ b/container-search/src/test/resources/config/medium/rank-profiles.cfg
@@ -0,0 +1,55 @@
+rankprofile[0].name "withglobalphase"
+rankprofile[0].fef.property[0].name "rankingExpression(myplus).rankingScript"
+rankprofile[0].fef.property[0].value "attribute(foo1) + attribute(foo2)"
+rankprofile[0].fef.property[1].name "rankingExpression(mymul).rankingScript"
+rankprofile[0].fef.property[1].value "attribute(t1) * query(fromq)"
+rankprofile[0].fef.property[2].name "rankingExpression(mymul).type"
+rankprofile[0].fef.property[2].value "tensor(m{},v[3])"
+rankprofile[0].fef.property[3].name "vespa.type.feature.attribute(t1)"
+rankprofile[0].fef.property[3].value "tensor(m{},v[3])"
+rankprofile[0].fef.property[4].name "vespa.rank.firstphase"
+rankprofile[0].fef.property[4].value "attribute(foo1)"
+rankprofile[0].fef.property[5].name "vespa.rank.globalphase"
+rankprofile[0].fef.property[5].value "rankingExpression(globalphase)"
+rankprofile[0].fef.property[6].name "rankingExpression(globalphase).rankingScript"
+rankprofile[0].fef.property[6].value "rankingExpression(myplus) + reduce(rankingExpression(mymul), sum) + firstPhase + term(0).significance + fieldLength(artist) + fieldTermMatch(title,0).occurrences + termDistance(title,1,2).reverse + closeness(field,t1)"
+rankprofile[0].fef.property[7].name "vespa.match.feature"
+rankprofile[0].fef.property[7].value "fieldLength(artist)"
+rankprofile[0].fef.property[8].name "vespa.match.feature"
+rankprofile[0].fef.property[8].value "term(0).significance"
+rankprofile[0].fef.property[9].name "vespa.match.feature"
+rankprofile[0].fef.property[9].value "closeness(field,t1)"
+rankprofile[0].fef.property[10].name "vespa.match.feature"
+rankprofile[0].fef.property[10].value "termDistance(title,1,2).reverse"
+rankprofile[0].fef.property[11].name "vespa.match.feature"
+rankprofile[0].fef.property[11].value "firstPhase"
+rankprofile[0].fef.property[12].name "vespa.match.feature"
+rankprofile[0].fef.property[12].value "attribute(t1)"
+rankprofile[0].fef.property[13].name "vespa.match.feature"
+rankprofile[0].fef.property[13].value "attribute(foo1)"
+rankprofile[0].fef.property[14].name "vespa.match.feature"
+rankprofile[0].fef.property[14].value "fieldTermMatch(title,0).occurrences"
+rankprofile[0].fef.property[15].name "vespa.match.feature"
+rankprofile[0].fef.property[15].value "attribute(foo2)"
+rankprofile[0].fef.property[16].name "vespa.hidden.matchfeature"
+rankprofile[0].fef.property[16].value "fieldLength(artist)"
+rankprofile[0].fef.property[17].name "vespa.hidden.matchfeature"
+rankprofile[0].fef.property[17].value "term(0).significance"
+rankprofile[0].fef.property[18].name "vespa.hidden.matchfeature"
+rankprofile[0].fef.property[18].value "closeness(field,t1)"
+rankprofile[0].fef.property[19].name "vespa.hidden.matchfeature"
+rankprofile[0].fef.property[19].value "termDistance(title,1,2).reverse"
+rankprofile[0].fef.property[20].name "vespa.hidden.matchfeature"
+rankprofile[0].fef.property[20].value "firstPhase"
+rankprofile[0].fef.property[21].name "vespa.hidden.matchfeature"
+rankprofile[0].fef.property[21].value "attribute(t1)"
+rankprofile[0].fef.property[22].name "vespa.hidden.matchfeature"
+rankprofile[0].fef.property[22].value "attribute(foo1)"
+rankprofile[0].fef.property[23].name "vespa.hidden.matchfeature"
+rankprofile[0].fef.property[23].value "fieldTermMatch(title,0).occurrences"
+rankprofile[0].fef.property[24].name "vespa.hidden.matchfeature"
+rankprofile[0].fef.property[24].value "attribute(foo2)"
+rankprofile[0].fef.property[25].name "vespa.globalphase.rerankcount"
+rankprofile[0].fef.property[25].value "42"
+rankprofile[0].fef.property[26].name "vespa.type.attribute.t1"
+rankprofile[0].fef.property[26].value "tensor(m{},v[3])"
diff --git a/container-search/src/test/resources/config/qf_defaults/rank-profiles.cfg b/container-search/src/test/resources/config/qf_defaults/rank-profiles.cfg
new file mode 100644
index 00000000000..731064c4dd6
--- /dev/null
+++ b/container-search/src/test/resources/config/qf_defaults/rank-profiles.cfg
@@ -0,0 +1,23 @@
+rankprofile[0].name "gp_with_qf_defaults"
+rankprofile[0].fef.property[0].name "vespa.rank.firstphase"
+rankprofile[0].fef.property[0].value "attribute(foo1)"
+rankprofile[0].fef.property[1].name "vespa.rank.globalphase"
+rankprofile[0].fef.property[1].value "rankingExpression(globalphase)"
+rankprofile[0].fef.property[2].name "rankingExpression(globalphase).rankingScript"
+rankprofile[0].fef.property[2].value "reduce(query(m_no_def) * query(v_no_def) * query(v_has_def) * attribute(t1), sum) + attribute(bar3) * query(w_no_def) * query(w_has_def)"
+rankprofile[0].fef.property[3].name "vespa.match.feature"
+rankprofile[0].fef.property[3].value "attribute(t1)"
+rankprofile[0].fef.property[4].name "vespa.match.feature"
+rankprofile[0].fef.property[4].value "attribute(bar3)"
+rankprofile[0].fef.property[5].name "vespa.type.attribute.t1"
+rankprofile[0].fef.property[5].value "tensor(m{},v[3])"
+rankprofile[0].fef.property[6].name "vespa.type.query.m_no_def"
+rankprofile[0].fef.property[6].value "tensor(m{})"
+rankprofile[0].fef.property[7].name "vespa.type.query.v_no_def"
+rankprofile[0].fef.property[7].value "tensor(v[3])"
+rankprofile[0].fef.property[8].name "query(w_has_def)"
+rankprofile[0].fef.property[8].value "1.0"
+rankprofile[0].fef.property[9].name "vespa.type.query.v_has_def"
+rankprofile[0].fef.property[9].value "tensor(v[3])"
+rankprofile[0].fef.property[10].name "query(v_has_def)"
+rankprofile[0].fef.property[10].value "tensor(v[3]):{{v:0}:2.0, {v:1}:0.25, {v:2}:1.5}"