diff options
author | Geir Storli <geirst@yahooinc.com> | 2022-04-29 11:45:16 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2022-04-29 12:46:22 +0000 |
commit | 4541333bede340aa23572ec2b626fc0d14df7edd (patch) | |
tree | d1365751053fd9cc7296a41397a0ee74174b01bb /config-model | |
parent | 5b5f725ec9dd5ec81539e4d315e1d7adf054e0d1 (diff) |
Add support for approximate nearest neighbor threshold settings in rank profiles.
Diffstat (limited to 'config-model')
7 files changed, 165 insertions, 3 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java b/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java index 029c0efb55f..9c802075462 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java @@ -93,6 +93,8 @@ public class RankProfile implements Cloneable { private int numSearchPartitions = -1; private Double termwiseLimit = null; + private Double postFilterThreshold = null; + private Double approximateThreshold = null; /** The drop limit used to drop hits with rank score less than or equal to this value */ private double rankScoreDropLimit = -Double.MAX_VALUE; @@ -688,6 +690,8 @@ public class RankProfile implements Cloneable { } public void setTermwiseLimit(double termwiseLimit) { this.termwiseLimit = termwiseLimit; } + public void setPostFilterThreshold(double threshold) { this.postFilterThreshold = threshold; } + public void setApproximateThreshold(double threshold) { this.approximateThreshold = threshold; } public OptionalDouble getTermwiseLimit() { if (termwiseLimit != null) return OptionalDouble.of(termwiseLimit); @@ -695,6 +699,20 @@ public class RankProfile implements Cloneable { .orElse(OptionalDouble.empty()); } + public OptionalDouble getPostFilterThreshold() { + if (postFilterThreshold != null) { + return OptionalDouble.of(postFilterThreshold); + } + return uniquelyInherited(p -> p.getPostFilterThreshold(), l -> l.isPresent(), "post-filter-threshold").orElse(OptionalDouble.empty()); + } + + public OptionalDouble getApproximateThreshold() { + if (approximateThreshold != null) { + return OptionalDouble.of(approximateThreshold); + } + return uniquelyInherited(p -> p.getApproximateThreshold(), l -> l.isPresent(), "approximate-threshold").orElse(OptionalDouble.empty()); + } + /** Whether we should ignore the default rank features. Set to null to use inherited */ public void setIgnoreDefaultRankFeatures(Boolean ignoreDefaultRankFeatures) { this.ignoreDefaultRankFeatures = ignoreDefaultRankFeatures; diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/RawRankProfile.java index dba5397c6c3..3c14a2b9c63 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/RawRankProfile.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/RawRankProfile.java @@ -33,6 +33,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.ListIterator; import java.util.Map; +import java.util.OptionalDouble; import java.util.Set; import java.util.stream.Collectors; @@ -134,6 +135,8 @@ public class RawRankProfile implements RankProfilesConfig.Producer { private final int minHitsPerThread; private final int numSearchPartitions; private final double termwiseLimit; + private final OptionalDouble postFilterThreshold; + private final OptionalDouble approximateThreshold; private final double rankScoreDropLimit; private final boolean mapBackRankingExpressionFeatures; @@ -170,6 +173,8 @@ public class RawRankProfile implements RankProfilesConfig.Producer { minHitsPerThread = compiled.getMinHitsPerThread(); numSearchPartitions = compiled.getNumSearchPartitions(); termwiseLimit = compiled.getTermwiseLimit().orElse(deployProperties.featureFlags().defaultTermwiseLimit()); + postFilterThreshold = compiled.getPostFilterThreshold(); + approximateThreshold = compiled.getApproximateThreshold(); keepRankCount = compiled.getKeepRankCount(); rankScoreDropLimit = compiled.getRankScoreDropLimit(); mapBackRankingExpressionFeatures = deployProperties.featureFlags().avoidRenamingSummaryFeatures(); @@ -382,6 +387,12 @@ public class RawRankProfile implements RankProfilesConfig.Producer { if (termwiseLimit < 1.0) { properties.add(new Pair<>("vespa.matching.termwise_limit", termwiseLimit + "")); } + if (postFilterThreshold.isPresent()) { + properties.add(new Pair<>("vespa.matching.global_filter.upper_limit", String.valueOf(postFilterThreshold.getAsDouble()))); + } + if (approximateThreshold.isPresent()) { + properties.add(new Pair<>("vespa.matching.global_filter.lower_limit", String.valueOf(approximateThreshold.getAsDouble()))); + } if (matchPhaseSettings != null) { properties.add(new Pair<>("vespa.matchphase.degradation.attribute", matchPhaseSettings.getAttribute())); properties.add(new Pair<>("vespa.matchphase.degradation.ascendingorder", matchPhaseSettings.getAscending() + "")); diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/parser/ConvertParsedRanking.java b/config-model/src/main/java/com/yahoo/searchdefinition/parser/ConvertParsedRanking.java index d151776dd85..b50d5bf0cf8 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/parser/ConvertParsedRanking.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/parser/ConvertParsedRanking.java @@ -70,6 +70,10 @@ public class ConvertParsedRanking { (value -> profile.setRankScoreDropLimit(value)); parsed.getTermwiseLimit().ifPresent (value -> profile.setTermwiseLimit(value)); + parsed.getPostFilterThreshold().ifPresent + (value -> profile.setPostFilterThreshold(value)); + parsed.getApproximateThreshold().ifPresent + (value -> profile.setApproximateThreshold(value)); parsed.getKeepRankCount().ifPresent (value -> profile.setKeepRankCount(value)); parsed.getMinHitsPerThread().ifPresent diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/parser/ParsedRankProfile.java b/config-model/src/main/java/com/yahoo/searchdefinition/parser/ParsedRankProfile.java index 1918f31749d..118945369d3 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/parser/ParsedRankProfile.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/parser/ParsedRankProfile.java @@ -28,6 +28,8 @@ class ParsedRankProfile extends ParsedBlock { private boolean ignoreDefaultRankFeatures = false; private Double rankScoreDropLimit = null; private Double termwiseLimit = null; + private Double postFilterThreshold = null; + private Double approximateThreshold = null; private final List<FeatureList> matchFeatures = new ArrayList<>(); private final List<FeatureList> rankFeatures = new ArrayList<>(); private final List<FeatureList> summaryFeatures = new ArrayList<>(); @@ -59,6 +61,8 @@ class ParsedRankProfile extends ParsedBlock { boolean getIgnoreDefaultRankFeatures() { return this.ignoreDefaultRankFeatures; } Optional<Double> getRankScoreDropLimit() { return Optional.ofNullable(this.rankScoreDropLimit); } Optional<Double> getTermwiseLimit() { return Optional.ofNullable(this.termwiseLimit); } + Optional<Double> getPostFilterThreshold() { return Optional.ofNullable(this.postFilterThreshold); } + Optional<Double> getApproximateThreshold() { return Optional.ofNullable(this.approximateThreshold); } List<FeatureList> getMatchFeatures() { return List.copyOf(this.matchFeatures); } List<FeatureList> getRankFeatures() { return List.copyOf(this.rankFeatures); } List<FeatureList> getSummaryFeatures() { return List.copyOf(this.summaryFeatures); } @@ -202,6 +206,15 @@ class ParsedRankProfile extends ParsedBlock { verifyThat(termwiseLimit == null, "already has termwise-limit"); this.termwiseLimit = limit; } - - + + void setPostFilterThreshold(double threshold) { + verifyThat(postFilterThreshold == null, "already has post-filter-threshold"); + this.postFilterThreshold = threshold; + } + + void setApproximateThreshold(double threshold) { + verifyThat(approximateThreshold == null, "already has approximate-threshold"); + this.approximateThreshold = threshold; + } + } diff --git a/config-model/src/main/javacc/IntermediateParser.jj b/config-model/src/main/javacc/IntermediateParser.jj index 91dd5867307..d2d27a7a2d4 100644 --- a/config-model/src/main/javacc/IntermediateParser.jj +++ b/config-model/src/main/javacc/IntermediateParser.jj @@ -331,6 +331,8 @@ TOKEN : | < MINHITSPERTHREAD: "min-hits-per-thread" > | < NUMSEARCHPARTITIONS: "num-search-partitions" > | < TERMWISELIMIT: "termwise-limit" > +| < POSTFILTERTHRESHOLD: "post-filter-threshold" > +| < APPROXIMATETHRESHOLD: "approximate-threshold" > | < KEEPRANKCOUNT: "keep-rank-count" > | < RANKSCOREDROPLIMIT: "rank-score-drop-limit" > | < CONSTANTS: "constants" > @@ -1834,6 +1836,8 @@ void rankProfileItem(ParsedRankProfile profile) : { } | minHitsPerThread(profile) | numSearchPartitions(profile) | termwiseLimit(profile) + | postFilterThreshold(profile) + | approximateThreshold(profile) | rankFeatures(profile) | rankProperties(profile) | secondPhase(profile) @@ -2213,6 +2217,32 @@ void termwiseLimit(ParsedRankProfile profile) : } /** + * This rule consumes a post-filter-threshold statement for a rank profile. + * + * @param profile the rank profile to modify + */ +void postFilterThreshold(ParsedRankProfile profile) : +{ + double threshold; +} +{ + (<POSTFILTERTHRESHOLD> <COLON> threshold = consumeFloat()) { profile.setPostFilterThreshold(threshold); } +} + +/** + * This rule consumes an approximate-threshold statement for a rank profile. + * + * @param profile the rank profile to modify + */ +void approximateThreshold(ParsedRankProfile profile) : +{ + double threshold; +} +{ + (<APPROXIMATETHRESHOLD> <COLON> threshold = consumeFloat()) { profile.setApproximateThreshold(threshold); } +} + +/** * This rule consumes a rank-properties block of a rank profile. There * is a little trick within this rule to allow the final rank property * to skip the terminating newline token. diff --git a/config-model/src/main/javacc/SDParser.jj b/config-model/src/main/javacc/SDParser.jj index 81631a70dbd..0ff9513885f 100644 --- a/config-model/src/main/javacc/SDParser.jj +++ b/config-model/src/main/javacc/SDParser.jj @@ -361,6 +361,8 @@ TOKEN : | < MINHITSPERTHREAD: "min-hits-per-thread" > | < NUMSEARCHPARTITIONS: "num-search-partitions" > | < TERMWISELIMIT: "termwise-limit" > +| < POSTFILTERTHRESHOLD: "post-filter-threshold" > +| < APPROXIMATETHRESHOLD: "approximate-threshold" > | < KEEPRANKCOUNT: "keep-rank-count" > | < RANKSCOREDROPLIMIT: "rank-score-drop-limit" > | < CONSTANTS: "constants" > @@ -1974,6 +1976,8 @@ void rankProfileItem(RankProfile profile) : { } | minHitsPerThread(profile) | numSearchPartitions(profile) | termwiseLimit(profile) + | postFilterThreshold(profile) + | approximateThreshold(profile) | rankFeatures(profile) | rankProperties(profile) | secondPhase(profile) @@ -2332,7 +2336,7 @@ void numSearchPartitions(RankProfile profile) : } /** - * This rule consumes a num-threads-per-search statement for a rank profile. + * This rule consumes a termwise-limit statement for a rank profile. * * @param profile the rank profile to modify */ @@ -2343,6 +2347,33 @@ void termwiseLimit(RankProfile profile) : { (<TERMWISELIMIT> <COLON> num = consumeFloat()) { profile.setTermwiseLimit(num); } } + +/** + * This rule consumes a post-filter-threshold statement for a rank profile. + * + * @param profile the rank profile to modify + */ +void postFilterThreshold(RankProfile profile) : +{ + double threshold; +} +{ + (<POSTFILTERTHRESHOLD> <COLON> threshold = consumeFloat()) { profile.setPostFilterThreshold(threshold); } +} + +/** + * This rule consumes an approximate-threshold statement for a rank profile. + * + * @param profile the rank profile to modify + */ +void approximateThreshold(RankProfile profile) : +{ + double threshold; +} +{ + (<APPROXIMATETHRESHOLD> <COLON> threshold = consumeFloat()) { profile.setApproximateThreshold(threshold); } +} + /** * This rule consumes a rank-properties block of a rank profile. There is a little trick within this rule to allow the * final rank property to skip the terminating newline token. diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/RankProfileTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/RankProfileTestCase.java index 38ebb147cac..b89a2e09c76 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/RankProfileTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/RankProfileTestCase.java @@ -385,4 +385,59 @@ public class RankProfileTestCase extends AbstractSchemaTestCase { return Optional.empty(); } + @Test + public void approximate_nearest_neighbor_threshold_settings_are_configurable() throws ParseException { + verifyApproximateNearestNeighborThresholdSettings(0.7, null); + verifyApproximateNearestNeighborThresholdSettings(null, 0.3); + verifyApproximateNearestNeighborThresholdSettings(0.7, 0.3); + } + + private void verifyApproximateNearestNeighborThresholdSettings(Double postFilterThreshold, Double approximateThreshold) throws ParseException { + verifyApproximateNearestNeighborThresholdSettings(postFilterThreshold, approximateThreshold, false); + verifyApproximateNearestNeighborThresholdSettings(postFilterThreshold, approximateThreshold, true); + } + + private void verifyApproximateNearestNeighborThresholdSettings(Double postFilterThreshold, Double approximateThreshold, + boolean experimentalSdParsing) throws ParseException { + var rankProfileRegistry = new RankProfileRegistry(); + var props = new TestProperties(); + props.setExperimentalSdParsing(experimentalSdParsing); + var queryProfileRegistry = new QueryProfileRegistry(); + var builder = new ApplicationBuilder(rankProfileRegistry, queryProfileRegistry, props); + builder.addSchema(createSDWithRankProfileThresholds(postFilterThreshold, approximateThreshold)); + builder.build(true); + + var schema = builder.getSchema(); + var rankProfile = rankProfileRegistry.get(schema, "my_profile"); + var rawRankProfile = new RawRankProfile(rankProfile, new LargeRankExpressions(new MockFileRegistry()), queryProfileRegistry, + new ImportedMlModels(), new AttributeFields(schema), props); + + if (postFilterThreshold != null) { + assertEquals((double)postFilterThreshold, rankProfile.getPostFilterThreshold().getAsDouble(), 0.000001); + assertEquals(String.valueOf(postFilterThreshold), findProperty(rawRankProfile.configProperties(), "vespa.matching.global_filter.upper_limit").get()); + } else { + assertTrue(rankProfile.getPostFilterThreshold().isEmpty()); + assertFalse(findProperty(rawRankProfile.configProperties(), "vespa.matching.global_filter.upper_limit").isPresent()); + } + + if (approximateThreshold != null) { + assertEquals((double)approximateThreshold, rankProfile.getApproximateThreshold().getAsDouble(), 0.000001); + assertEquals(String.valueOf(approximateThreshold), findProperty(rawRankProfile.configProperties(), "vespa.matching.global_filter.lower_limit").get()); + } else { + assertTrue(rankProfile.getApproximateThreshold().isEmpty()); + assertFalse(findProperty(rawRankProfile.configProperties(), "vespa.matching.global_filter.lower_limit").isPresent()); + } + } + + private String createSDWithRankProfileThresholds(Double postFilterThreshold, Double approximateThreshold) { + return joinLines( + "search test {", + " document test {}", + " rank-profile my_profile {", + (postFilterThreshold != null ? (" post-filter-threshold: " + postFilterThreshold) : ""), + (approximateThreshold != null ? (" approximate-threshold: " + approximateThreshold) : ""), + " }", + "}"); + } + } |