aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2023-05-11 16:15:05 +0200
committerGitHub <noreply@github.com>2023-05-11 16:15:05 +0200
commitb7b0a062a9b231746e80ec08b2021a5d2cf401fe (patch)
tree3b2b137c4dedf7c3d40f0f1b508bd0e9fa14d4b1
parent4d0d9ac57e4ba7df5de57e425cd5b59490db60ab (diff)
parent47b370c90715a15939474951fbdb6fad987073e9 (diff)
Merge pull request #27081 from vespa-engine/arnej/hide-some-match-features
add filtering to hide implicitly added match features
-rw-r--r--config-model/src/main/java/com/yahoo/schema/RankProfile.java28
-rw-r--r--config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java5
-rw-r--r--config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg10
-rw-r--r--config-model/src/test/derived/globalphase_token_functions/rank-profiles.cfg4
-rw-r--r--config-model/src/test/derived/rankingexpression/rank-profiles.cfg18
-rw-r--r--container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseRanker.java28
-rw-r--r--container-search/src/main/java/com/yahoo/search/ranking/RankProfilesEvaluator.java11
-rw-r--r--vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureData.java11
-rw-r--r--vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureFilter.java40
9 files changed, 151 insertions, 4 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/RankProfile.java b/config-model/src/main/java/com/yahoo/schema/RankProfile.java
index 639930041c3..69f32daef4a 100644
--- a/config-model/src/main/java/com/yahoo/schema/RankProfile.java
+++ b/config-model/src/main/java/com/yahoo/schema/RankProfile.java
@@ -108,6 +108,7 @@ public class RankProfile implements Cloneable {
private String inheritedSummaryFeaturesProfileName;
private Set<ReferenceNode> matchFeatures;
+ private Set<String> hiddenMatchFeatures;
private String inheritedMatchFeaturesProfileName;
private Set<ReferenceNode> rankFeatures;
@@ -605,6 +606,12 @@ public class RankProfile implements Cloneable {
.orElse(Set.of());
}
+ public Set<String> getHiddenMatchFeatures() {
+ if (hiddenMatchFeatures != null) return Collections.unmodifiableSet(hiddenMatchFeatures);
+ return uniquelyInherited(p -> p.getHiddenMatchFeatures(), f -> ! f.isEmpty(), "hidden match features")
+ .orElse(Set.of());
+ }
+
private void addSummaryFeature(ReferenceNode feature) {
if (summaryFeatures == null)
summaryFeatures = new LinkedHashSet<>();
@@ -617,6 +624,21 @@ public class RankProfile implements Cloneable {
matchFeatures.add(feature);
}
+ private void addImplicitMatchFeatures(List<FeatureList> list) {
+ if (matchFeatures == null)
+ matchFeatures = new LinkedHashSet<>();
+ if (hiddenMatchFeatures == null)
+ hiddenMatchFeatures = new LinkedHashSet<>();
+ for (var features : list) {
+ for (ReferenceNode feature : features) {
+ if (! matchFeatures.contains(feature)) {
+ matchFeatures.add(feature);
+ hiddenMatchFeatures.add(feature.toString());
+ }
+ }
+ }
+ }
+
/** Adds the content of the given feature list to the internal list of summary features. */
public void addSummaryFeatures(FeatureList features) {
for (ReferenceNode feature : features) {
@@ -1037,16 +1059,18 @@ public class RankProfile implements Cloneable {
var needInputs = new HashSet<String>();
var recorder = new InputRecorder(needInputs);
recorder.process(globalPhaseRanking.function().getBody(), context);
+ List<FeatureList> addIfMissing = new ArrayList<>();
for (String input : needInputs) {
if (input.startsWith("constant(") || input.startsWith("query(")) {
continue;
}
try {
- addMatchFeatures(new FeatureList(input));
+ addIfMissing.add(new FeatureList(input));
} catch (com.yahoo.searchlib.rankingexpression.parser.ParseException e) {
throw new IllegalArgumentException("invalid input in global-phase expression: "+input);
}
}
+ addImplicitMatchFeatures(addIfMissing);
}
}
@@ -1132,7 +1156,7 @@ public class RankProfile implements Cloneable {
allImportedFields().forEach(field -> addAttributeFeatureTypes(field, featureTypes));
return featureTypes;
}
-
+
public MapEvaluationTypeContext typeContext(QueryProfileRegistry queryProfiles,
Map<Reference, TensorType> featureTypes) {
MapEvaluationTypeContext context = new MapEvaluationTypeContext(getExpressionFunctions(), featureTypes);
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java
index 6272563f833..349fe1404ed 100644
--- a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java
+++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java
@@ -133,6 +133,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
private final Map<String, FieldRankSettings> fieldRankSettings = new java.util.LinkedHashMap<>();
private final Set<ReferenceNode> summaryFeatures;
private final Set<ReferenceNode> matchFeatures;
+ private final Collection<String> hiddenMatchFeatures;
private final Set<ReferenceNode> rankFeatures;
private final Map<String, String> featureRenames = new java.util.LinkedHashMap<>();
private final List<RankProfile.RankProperty> rankProperties;
@@ -183,6 +184,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
globalPhaseRanking = compiled.getGlobalPhaseRanking();
summaryFeatures = new LinkedHashSet<>(compiled.getSummaryFeatures());
matchFeatures = new LinkedHashSet<>(compiled.getMatchFeatures());
+ hiddenMatchFeatures = compiled.getHiddenMatchFeatures();
rankFeatures = compiled.getRankFeatures();
rerankCount = compiled.getRerankCount();
globalPhaseRerankCount = compiled.getGlobalPhaseRerankCount();
@@ -394,6 +396,9 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
for (ReferenceNode feature : matchFeatures) {
properties.add(new Pair<>("vespa.match.feature", feature.toString()));
}
+ for (String feature : hiddenMatchFeatures) {
+ properties.add(new Pair<>("vespa.hidden.matchfeature", feature));
+ }
for (ReferenceNode feature : rankFeatures) {
properties.add(new Pair<>("vespa.dump.feature", feature.toString()));
}
diff --git a/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg b/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg
index 9d21691a910..b28843736d8 100644
--- a/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg
+++ b/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg
@@ -29,6 +29,10 @@ rankprofile[].fef.property[].name "vespa.match.feature"
rankprofile[].fef.property[].value "attribute(aa)"
rankprofile[].fef.property[].name "vespa.match.feature"
rankprofile[].fef.property[].value "attribute(extra)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "attribute(aa)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "attribute(extra)"
rankprofile[].fef.property[].name "vespa.globalphase.rerankcount"
rankprofile[].fef.property[].value "13"
rankprofile[].fef.property[].name "vespa.type.attribute.aa"
@@ -64,6 +68,8 @@ rankprofile[].fef.property[].name "rankingExpression(globalphase).rankingScript"
rankprofile[].fef.property[].value "reduce(constant(ww) * (onnx(inside).foobar - rankingExpression(handicap)), sum)"
rankprofile[].fef.property[].name "vespa.match.feature"
rankprofile[].fef.property[].value "attribute(aa)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "attribute(aa)"
rankprofile[].fef.property[].name "vespa.globalphase.rerankcount"
rankprofile[].fef.property[].value "13"
rankprofile[].fef.property[].name "vespa.type.attribute.aa"
@@ -95,6 +101,8 @@ rankprofile[].fef.property[].name "rankingExpression(globalphase).rankingScript"
rankprofile[].fef.property[].value "reduce(constant(ww) * onnx(twoside).foobar, sum)"
rankprofile[].fef.property[].name "vespa.match.feature"
rankprofile[].fef.property[].value "attribute(extra)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "attribute(extra)"
rankprofile[].fef.property[].name "vespa.globalphase.rerankcount"
rankprofile[].fef.property[].value "42"
rankprofile[].fef.property[].name "vespa.type.attribute.aa"
@@ -126,6 +134,8 @@ rankprofile[].fef.property[].name "rankingExpression(globalphase).rankingScript"
rankprofile[].fef.property[].value "reduce(constant(ww) * onnx(another).foobar, sum)"
rankprofile[].fef.property[].name "vespa.match.feature"
rankprofile[].fef.property[].value "attribute(extra)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "attribute(extra)"
rankprofile[].fef.property[].name "vespa.globalphase.rerankcount"
rankprofile[].fef.property[].value "1001"
rankprofile[].fef.property[].name "vespa.type.attribute.aa"
diff --git a/config-model/src/test/derived/globalphase_token_functions/rank-profiles.cfg b/config-model/src/test/derived/globalphase_token_functions/rank-profiles.cfg
index fc2453f3aa9..42ddad8abe7 100644
--- a/config-model/src/test/derived/globalphase_token_functions/rank-profiles.cfg
+++ b/config-model/src/test/derived/globalphase_token_functions/rank-profiles.cfg
@@ -37,6 +37,10 @@ rankprofile[].fef.property[].name "vespa.match.feature"
rankprofile[].fef.property[].value "attribute(tokens)"
rankprofile[].fef.property[].name "vespa.match.feature"
rankprofile[].fef.property[].value "attribute(outputidx)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "attribute(tokens)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "attribute(outputidx)"
rankprofile[].fef.property[].name "vespa.globalphase.rerankcount"
rankprofile[].fef.property[].value "1000"
rankprofile[].fef.property[].name "vespa.type.attribute.tokens"
diff --git a/config-model/src/test/derived/rankingexpression/rank-profiles.cfg b/config-model/src/test/derived/rankingexpression/rank-profiles.cfg
index 3db3f437698..1e0ec94fa82 100644
--- a/config-model/src/test/derived/rankingexpression/rank-profiles.cfg
+++ b/config-model/src/test/derived/rankingexpression/rank-profiles.cfg
@@ -429,6 +429,24 @@ rankprofile[].fef.property[].name "vespa.match.feature"
rankprofile[].fef.property[].value "fieldTermMatch(title,0).occurrences"
rankprofile[].fef.property[].name "vespa.match.feature"
rankprofile[].fef.property[].value "attribute(foo2)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "fieldLength(artist)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "term(0).significance"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "closeness(field,t1)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "termDistance(title,1,2).reverse"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "firstPhase"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "attribute(t1)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "attribute(foo1)"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "fieldTermMatch(title,0).occurrences"
+rankprofile[].fef.property[].name "vespa.hidden.matchfeature"
+rankprofile[].fef.property[].value "attribute(foo2)"
rankprofile[].fef.property[].name "vespa.globalphase.rerankcount"
rankprofile[].fef.property[].value "42"
rankprofile[].fef.property[].name "vespa.type.attribute.t1"
diff --git a/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseRanker.java b/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseRanker.java
index dd419d69315..2aa9fd32795 100644
--- a/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseRanker.java
+++ b/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseRanker.java
@@ -7,9 +7,15 @@ import com.yahoo.search.Result;
import com.yahoo.search.query.Sorting;
import com.yahoo.search.ranking.RankProfilesEvaluator.GlobalPhaseData;
import com.yahoo.search.result.ErrorMessage;
+import com.yahoo.search.result.FeatureData;
+import com.yahoo.search.result.Hit;
+import com.yahoo.search.result.HitGroup;
import com.yahoo.tensor.Tensor;
+import com.yahoo.data.access.helpers.MatchFeatureData;
+import com.yahoo.data.access.helpers.MatchFeatureFilter;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.function.Supplier;
@@ -57,6 +63,28 @@ public class GlobalPhaseRanker {
if (rerankCount < 0)
rerankCount = 100;
ResultReranker.rerankHits(result, new HitRescorer(supplier), rerankCount);
+ hideImplicitMatchFeatures(result, data.matchFeaturesToHide());
+ }
+
+ private void hideImplicitMatchFeatures(Result result, Collection<String> namesToHide) {
+ if (namesToHide.size() == 0) return;
+ var filter = new MatchFeatureFilter(namesToHide);
+ for (var iterator = result.hits().deepIterator(); iterator.hasNext();) {
+ Hit hit = iterator.next();
+ if (hit.isMeta() || hit instanceof HitGroup) {
+ continue;
+ }
+ if (hit.getField("matchfeatures") instanceof FeatureData matchFeatures) {
+ if (matchFeatures.inspect() instanceof MatchFeatureData.HitValue hitValue) {
+ var newValue = hitValue.subsetFilter(filter);
+ if (newValue.fieldCount() == 0) {
+ hit.removeField("matchfeatures");
+ } else {
+ hit.setField("matchfeatures", newValue);
+ }
+ }
+ }
+ }
}
private Optional<GlobalPhaseData> globalPhaseDataFor(Query query, String schema) {
diff --git a/container-search/src/main/java/com/yahoo/search/ranking/RankProfilesEvaluator.java b/container-search/src/main/java/com/yahoo/search/ranking/RankProfilesEvaluator.java
index 2ca91a3ea91..a89f0a5c3ea 100644
--- a/container-search/src/main/java/com/yahoo/search/ranking/RankProfilesEvaluator.java
+++ b/container-search/src/main/java/com/yahoo/search/ranking/RankProfilesEvaluator.java
@@ -14,10 +14,13 @@ import com.yahoo.vespa.config.search.core.OnnxModelsConfig;
import com.yahoo.vespa.config.search.core.RankingConstantsConfig;
import com.yahoo.vespa.config.search.core.RankingExpressionsConfig;
+import java.util.Collection;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.Set;
import java.util.function.Supplier;
import java.util.logging.Logger;
@@ -61,6 +64,7 @@ public class RankProfilesEvaluator extends AbstractComponent {
}
static record GlobalPhaseData(Supplier<FunctionEvaluator> functionEvaluatorSource,
+ Collection<String> matchFeaturesToHide,
int rerankCount,
List<String> needInputs) {}
@@ -76,7 +80,7 @@ public class RankProfilesEvaluator extends AbstractComponent {
Supplier<FunctionEvaluator> functionEvaluatorSource = null;
int rerankCount = -1;
List<String> needInputs = null;
-
+ Set<String> namesToHide = new HashSet<>();
for (var prop : rp.fef().property()) {
if (prop.name().equals("vespa.globalphase.rerankcount")) {
rerankCount = Integer.valueOf(prop.value());
@@ -87,9 +91,12 @@ public class RankProfilesEvaluator extends AbstractComponent {
var evaluator = functionEvaluatorSource.get();
needInputs = List.copyOf(evaluator.function().arguments());
}
+ if (prop.name().equals("vespa.hidden.matchfeature")) {
+ namesToHide.add(prop.value());
+ }
}
if (functionEvaluatorSource != null && needInputs != null) {
- profilesWithGlobalPhase.put(name, new GlobalPhaseData(functionEvaluatorSource, rerankCount, needInputs));
+ profilesWithGlobalPhase.put(name, new GlobalPhaseData(functionEvaluatorSource, namesToHide, rerankCount, needInputs));
}
}
}
diff --git a/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureData.java b/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureData.java
index f7d72ade20d..4f8bd64f85a 100644
--- a/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureData.java
+++ b/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureData.java
@@ -13,6 +13,7 @@ import java.util.ArrayList;
import java.util.AbstractMap.SimpleEntry;
import java.util.List;
import java.util.Map;
+import java.util.function.Function;
/**
* MatchFeatureData helps pack match features for hits into
@@ -85,6 +86,16 @@ public class MatchFeatureData {
}
return new Value.DoubleValue(doubleValues[index]);
}
+
+ public HitValue subsetFilter(Function<Hashlet<String,Integer>, Hashlet<String,Integer>> filter) {
+ return new HitValue(filter.apply(hashlet), dataValues, doubleValues);
+ }
+ // used only from subsetFilter() above
+ private HitValue(Hashlet<String,Integer> hashlet, byte[][] dataValues, double[] doubleValues) {
+ this.hashlet = hashlet;
+ this.dataValues = dataValues;
+ this.doubleValues = doubleValues;
+ }
}
public HitValue addHit() {
diff --git a/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureFilter.java b/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureFilter.java
new file mode 100644
index 00000000000..96451f35504
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureFilter.java
@@ -0,0 +1,40 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.data.access.helpers;
+
+import com.yahoo.collections.Hashlet;
+
+import java.util.Collection;
+import java.util.IdentityHashMap;
+import java.util.function.Function;
+
+/**
+ * Helper class to remove (filter) some names from a Hashlet
+ * @author arnej
+ */
+public class MatchFeatureFilter implements Function<Hashlet<String,Integer>, Hashlet<String,Integer>> {
+
+ private final IdentityHashMap<Hashlet<String,Integer>, Hashlet<String,Integer>> mappings = new IdentityHashMap<>();
+ private final Collection<String> removeList;
+
+ public MatchFeatureFilter(Collection<String> removeList) {
+ this.removeList = removeList;
+ }
+
+ Hashlet<String,Integer> filter(Hashlet<String,Integer> input) {
+ Hashlet<String,Integer> result = new Hashlet<>();
+ result.reserve(input.size());
+ for (int i = 0; i < input.size(); i++) {
+ String k = input.key(i);
+ if (! removeList.contains(k)) {
+ Integer v = input.value(i);
+ result.put(k, v);
+ }
+ }
+ return result;
+ }
+
+ public Hashlet<String,Integer> apply(Hashlet<String,Integer> input) {
+ return mappings.computeIfAbsent(input, k -> filter(k));
+ }
+
+}