diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2023-05-11 16:15:05 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-11 16:15:05 +0200 |
commit | b7b0a062a9b231746e80ec08b2021a5d2cf401fe (patch) | |
tree | 3b2b137c4dedf7c3d40f0f1b508bd0e9fa14d4b1 | |
parent | 4d0d9ac57e4ba7df5de57e425cd5b59490db60ab (diff) | |
parent | 47b370c90715a15939474951fbdb6fad987073e9 (diff) |
Merge pull request #27081 from vespa-engine/arnej/hide-some-match-features
add filtering to hide implicitly added match features
9 files changed, 151 insertions, 4 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/RankProfile.java b/config-model/src/main/java/com/yahoo/schema/RankProfile.java index 639930041c3..69f32daef4a 100644 --- a/config-model/src/main/java/com/yahoo/schema/RankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/RankProfile.java @@ -108,6 +108,7 @@ public class RankProfile implements Cloneable { private String inheritedSummaryFeaturesProfileName; private Set<ReferenceNode> matchFeatures; + private Set<String> hiddenMatchFeatures; private String inheritedMatchFeaturesProfileName; private Set<ReferenceNode> rankFeatures; @@ -605,6 +606,12 @@ public class RankProfile implements Cloneable { .orElse(Set.of()); } + public Set<String> getHiddenMatchFeatures() { + if (hiddenMatchFeatures != null) return Collections.unmodifiableSet(hiddenMatchFeatures); + return uniquelyInherited(p -> p.getHiddenMatchFeatures(), f -> ! f.isEmpty(), "hidden match features") + .orElse(Set.of()); + } + private void addSummaryFeature(ReferenceNode feature) { if (summaryFeatures == null) summaryFeatures = new LinkedHashSet<>(); @@ -617,6 +624,21 @@ public class RankProfile implements Cloneable { matchFeatures.add(feature); } + private void addImplicitMatchFeatures(List<FeatureList> list) { + if (matchFeatures == null) + matchFeatures = new LinkedHashSet<>(); + if (hiddenMatchFeatures == null) + hiddenMatchFeatures = new LinkedHashSet<>(); + for (var features : list) { + for (ReferenceNode feature : features) { + if (! matchFeatures.contains(feature)) { + matchFeatures.add(feature); + hiddenMatchFeatures.add(feature.toString()); + } + } + } + } + /** Adds the content of the given feature list to the internal list of summary features. */ public void addSummaryFeatures(FeatureList features) { for (ReferenceNode feature : features) { @@ -1037,16 +1059,18 @@ public class RankProfile implements Cloneable { var needInputs = new HashSet<String>(); var recorder = new InputRecorder(needInputs); recorder.process(globalPhaseRanking.function().getBody(), context); + List<FeatureList> addIfMissing = new ArrayList<>(); for (String input : needInputs) { if (input.startsWith("constant(") || input.startsWith("query(")) { continue; } try { - addMatchFeatures(new FeatureList(input)); + addIfMissing.add(new FeatureList(input)); } catch (com.yahoo.searchlib.rankingexpression.parser.ParseException e) { throw new IllegalArgumentException("invalid input in global-phase expression: "+input); } } + addImplicitMatchFeatures(addIfMissing); } } @@ -1132,7 +1156,7 @@ public class RankProfile implements Cloneable { allImportedFields().forEach(field -> addAttributeFeatureTypes(field, featureTypes)); return featureTypes; } - + public MapEvaluationTypeContext typeContext(QueryProfileRegistry queryProfiles, Map<Reference, TensorType> featureTypes) { MapEvaluationTypeContext context = new MapEvaluationTypeContext(getExpressionFunctions(), featureTypes); diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java index 6272563f833..349fe1404ed 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java @@ -133,6 +133,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer { private final Map<String, FieldRankSettings> fieldRankSettings = new java.util.LinkedHashMap<>(); private final Set<ReferenceNode> summaryFeatures; private final Set<ReferenceNode> matchFeatures; + private final Collection<String> hiddenMatchFeatures; private final Set<ReferenceNode> rankFeatures; private final Map<String, String> featureRenames = new java.util.LinkedHashMap<>(); private final List<RankProfile.RankProperty> rankProperties; @@ -183,6 +184,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer { globalPhaseRanking = compiled.getGlobalPhaseRanking(); summaryFeatures = new LinkedHashSet<>(compiled.getSummaryFeatures()); matchFeatures = new LinkedHashSet<>(compiled.getMatchFeatures()); + hiddenMatchFeatures = compiled.getHiddenMatchFeatures(); rankFeatures = compiled.getRankFeatures(); rerankCount = compiled.getRerankCount(); globalPhaseRerankCount = compiled.getGlobalPhaseRerankCount(); @@ -394,6 +396,9 @@ public class RawRankProfile implements RankProfilesConfig.Producer { for (ReferenceNode feature : matchFeatures) { properties.add(new Pair<>("vespa.match.feature", feature.toString())); } + for (String feature : hiddenMatchFeatures) { + properties.add(new Pair<>("vespa.hidden.matchfeature", feature)); + } for (ReferenceNode feature : rankFeatures) { properties.add(new Pair<>("vespa.dump.feature", feature.toString())); } diff --git a/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg b/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg index 9d21691a910..b28843736d8 100644 --- a/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg +++ b/config-model/src/test/derived/globalphase_onnx_inside/rank-profiles.cfg @@ -29,6 +29,10 @@ rankprofile[].fef.property[].name "vespa.match.feature" rankprofile[].fef.property[].value "attribute(aa)" rankprofile[].fef.property[].name "vespa.match.feature" rankprofile[].fef.property[].value "attribute(extra)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "attribute(aa)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "attribute(extra)" rankprofile[].fef.property[].name "vespa.globalphase.rerankcount" rankprofile[].fef.property[].value "13" rankprofile[].fef.property[].name "vespa.type.attribute.aa" @@ -64,6 +68,8 @@ rankprofile[].fef.property[].name "rankingExpression(globalphase).rankingScript" rankprofile[].fef.property[].value "reduce(constant(ww) * (onnx(inside).foobar - rankingExpression(handicap)), sum)" rankprofile[].fef.property[].name "vespa.match.feature" rankprofile[].fef.property[].value "attribute(aa)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "attribute(aa)" rankprofile[].fef.property[].name "vespa.globalphase.rerankcount" rankprofile[].fef.property[].value "13" rankprofile[].fef.property[].name "vespa.type.attribute.aa" @@ -95,6 +101,8 @@ rankprofile[].fef.property[].name "rankingExpression(globalphase).rankingScript" rankprofile[].fef.property[].value "reduce(constant(ww) * onnx(twoside).foobar, sum)" rankprofile[].fef.property[].name "vespa.match.feature" rankprofile[].fef.property[].value "attribute(extra)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "attribute(extra)" rankprofile[].fef.property[].name "vespa.globalphase.rerankcount" rankprofile[].fef.property[].value "42" rankprofile[].fef.property[].name "vespa.type.attribute.aa" @@ -126,6 +134,8 @@ rankprofile[].fef.property[].name "rankingExpression(globalphase).rankingScript" rankprofile[].fef.property[].value "reduce(constant(ww) * onnx(another).foobar, sum)" rankprofile[].fef.property[].name "vespa.match.feature" rankprofile[].fef.property[].value "attribute(extra)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "attribute(extra)" rankprofile[].fef.property[].name "vespa.globalphase.rerankcount" rankprofile[].fef.property[].value "1001" rankprofile[].fef.property[].name "vespa.type.attribute.aa" diff --git a/config-model/src/test/derived/globalphase_token_functions/rank-profiles.cfg b/config-model/src/test/derived/globalphase_token_functions/rank-profiles.cfg index fc2453f3aa9..42ddad8abe7 100644 --- a/config-model/src/test/derived/globalphase_token_functions/rank-profiles.cfg +++ b/config-model/src/test/derived/globalphase_token_functions/rank-profiles.cfg @@ -37,6 +37,10 @@ rankprofile[].fef.property[].name "vespa.match.feature" rankprofile[].fef.property[].value "attribute(tokens)" rankprofile[].fef.property[].name "vespa.match.feature" rankprofile[].fef.property[].value "attribute(outputidx)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "attribute(tokens)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "attribute(outputidx)" rankprofile[].fef.property[].name "vespa.globalphase.rerankcount" rankprofile[].fef.property[].value "1000" rankprofile[].fef.property[].name "vespa.type.attribute.tokens" diff --git a/config-model/src/test/derived/rankingexpression/rank-profiles.cfg b/config-model/src/test/derived/rankingexpression/rank-profiles.cfg index 3db3f437698..1e0ec94fa82 100644 --- a/config-model/src/test/derived/rankingexpression/rank-profiles.cfg +++ b/config-model/src/test/derived/rankingexpression/rank-profiles.cfg @@ -429,6 +429,24 @@ rankprofile[].fef.property[].name "vespa.match.feature" rankprofile[].fef.property[].value "fieldTermMatch(title,0).occurrences" rankprofile[].fef.property[].name "vespa.match.feature" rankprofile[].fef.property[].value "attribute(foo2)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "fieldLength(artist)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "term(0).significance" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "closeness(field,t1)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "termDistance(title,1,2).reverse" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "firstPhase" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "attribute(t1)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "attribute(foo1)" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "fieldTermMatch(title,0).occurrences" +rankprofile[].fef.property[].name "vespa.hidden.matchfeature" +rankprofile[].fef.property[].value "attribute(foo2)" rankprofile[].fef.property[].name "vespa.globalphase.rerankcount" rankprofile[].fef.property[].value "42" rankprofile[].fef.property[].name "vespa.type.attribute.t1" diff --git a/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseRanker.java b/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseRanker.java index dd419d69315..2aa9fd32795 100644 --- a/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseRanker.java +++ b/container-search/src/main/java/com/yahoo/search/ranking/GlobalPhaseRanker.java @@ -7,9 +7,15 @@ import com.yahoo.search.Result; import com.yahoo.search.query.Sorting; import com.yahoo.search.ranking.RankProfilesEvaluator.GlobalPhaseData; import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.FeatureData; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; import com.yahoo.tensor.Tensor; +import com.yahoo.data.access.helpers.MatchFeatureData; +import com.yahoo.data.access.helpers.MatchFeatureFilter; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Optional; import java.util.function.Supplier; @@ -57,6 +63,28 @@ public class GlobalPhaseRanker { if (rerankCount < 0) rerankCount = 100; ResultReranker.rerankHits(result, new HitRescorer(supplier), rerankCount); + hideImplicitMatchFeatures(result, data.matchFeaturesToHide()); + } + + private void hideImplicitMatchFeatures(Result result, Collection<String> namesToHide) { + if (namesToHide.size() == 0) return; + var filter = new MatchFeatureFilter(namesToHide); + for (var iterator = result.hits().deepIterator(); iterator.hasNext();) { + Hit hit = iterator.next(); + if (hit.isMeta() || hit instanceof HitGroup) { + continue; + } + if (hit.getField("matchfeatures") instanceof FeatureData matchFeatures) { + if (matchFeatures.inspect() instanceof MatchFeatureData.HitValue hitValue) { + var newValue = hitValue.subsetFilter(filter); + if (newValue.fieldCount() == 0) { + hit.removeField("matchfeatures"); + } else { + hit.setField("matchfeatures", newValue); + } + } + } + } } private Optional<GlobalPhaseData> globalPhaseDataFor(Query query, String schema) { diff --git a/container-search/src/main/java/com/yahoo/search/ranking/RankProfilesEvaluator.java b/container-search/src/main/java/com/yahoo/search/ranking/RankProfilesEvaluator.java index 2ca91a3ea91..a89f0a5c3ea 100644 --- a/container-search/src/main/java/com/yahoo/search/ranking/RankProfilesEvaluator.java +++ b/container-search/src/main/java/com/yahoo/search/ranking/RankProfilesEvaluator.java @@ -14,10 +14,13 @@ import com.yahoo.vespa.config.search.core.OnnxModelsConfig; import com.yahoo.vespa.config.search.core.RankingConstantsConfig; import com.yahoo.vespa.config.search.core.RankingExpressionsConfig; +import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.function.Supplier; import java.util.logging.Logger; @@ -61,6 +64,7 @@ public class RankProfilesEvaluator extends AbstractComponent { } static record GlobalPhaseData(Supplier<FunctionEvaluator> functionEvaluatorSource, + Collection<String> matchFeaturesToHide, int rerankCount, List<String> needInputs) {} @@ -76,7 +80,7 @@ public class RankProfilesEvaluator extends AbstractComponent { Supplier<FunctionEvaluator> functionEvaluatorSource = null; int rerankCount = -1; List<String> needInputs = null; - + Set<String> namesToHide = new HashSet<>(); for (var prop : rp.fef().property()) { if (prop.name().equals("vespa.globalphase.rerankcount")) { rerankCount = Integer.valueOf(prop.value()); @@ -87,9 +91,12 @@ public class RankProfilesEvaluator extends AbstractComponent { var evaluator = functionEvaluatorSource.get(); needInputs = List.copyOf(evaluator.function().arguments()); } + if (prop.name().equals("vespa.hidden.matchfeature")) { + namesToHide.add(prop.value()); + } } if (functionEvaluatorSource != null && needInputs != null) { - profilesWithGlobalPhase.put(name, new GlobalPhaseData(functionEvaluatorSource, rerankCount, needInputs)); + profilesWithGlobalPhase.put(name, new GlobalPhaseData(functionEvaluatorSource, namesToHide, rerankCount, needInputs)); } } } diff --git a/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureData.java b/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureData.java index f7d72ade20d..4f8bd64f85a 100644 --- a/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureData.java +++ b/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureData.java @@ -13,6 +13,7 @@ import java.util.ArrayList; import java.util.AbstractMap.SimpleEntry; import java.util.List; import java.util.Map; +import java.util.function.Function; /** * MatchFeatureData helps pack match features for hits into @@ -85,6 +86,16 @@ public class MatchFeatureData { } return new Value.DoubleValue(doubleValues[index]); } + + public HitValue subsetFilter(Function<Hashlet<String,Integer>, Hashlet<String,Integer>> filter) { + return new HitValue(filter.apply(hashlet), dataValues, doubleValues); + } + // used only from subsetFilter() above + private HitValue(Hashlet<String,Integer> hashlet, byte[][] dataValues, double[] doubleValues) { + this.hashlet = hashlet; + this.dataValues = dataValues; + this.doubleValues = doubleValues; + } } public HitValue addHit() { diff --git a/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureFilter.java b/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureFilter.java new file mode 100644 index 00000000000..96451f35504 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/data/access/helpers/MatchFeatureFilter.java @@ -0,0 +1,40 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.data.access.helpers; + +import com.yahoo.collections.Hashlet; + +import java.util.Collection; +import java.util.IdentityHashMap; +import java.util.function.Function; + +/** + * Helper class to remove (filter) some names from a Hashlet + * @author arnej + */ +public class MatchFeatureFilter implements Function<Hashlet<String,Integer>, Hashlet<String,Integer>> { + + private final IdentityHashMap<Hashlet<String,Integer>, Hashlet<String,Integer>> mappings = new IdentityHashMap<>(); + private final Collection<String> removeList; + + public MatchFeatureFilter(Collection<String> removeList) { + this.removeList = removeList; + } + + Hashlet<String,Integer> filter(Hashlet<String,Integer> input) { + Hashlet<String,Integer> result = new Hashlet<>(); + result.reserve(input.size()); + for (int i = 0; i < input.size(); i++) { + String k = input.key(i); + if (! removeList.contains(k)) { + Integer v = input.value(i); + result.put(k, v); + } + } + return result; + } + + public Hashlet<String,Integer> apply(Hashlet<String,Integer> input) { + return mappings.computeIfAbsent(input, k -> filter(k)); + } + +} |