summaryrefslogtreecommitdiffstats
path: root/config-model
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-04-29 11:45:16 +0000
committerGeir Storli <geirst@yahooinc.com>2022-04-29 12:46:22 +0000
commit4541333bede340aa23572ec2b626fc0d14df7edd (patch)
treed1365751053fd9cc7296a41397a0ee74174b01bb /config-model
parent5b5f725ec9dd5ec81539e4d315e1d7adf054e0d1 (diff)
Add support for approximate nearest neighbor threshold settings in rank profiles.
Diffstat (limited to 'config-model')
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java18
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/derived/RawRankProfile.java11
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/parser/ConvertParsedRanking.java4
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/parser/ParsedRankProfile.java17
-rw-r--r--config-model/src/main/javacc/IntermediateParser.jj30
-rw-r--r--config-model/src/main/javacc/SDParser.jj33
-rw-r--r--config-model/src/test/java/com/yahoo/searchdefinition/RankProfileTestCase.java55
7 files changed, 165 insertions, 3 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java b/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java
index 029c0efb55f..9c802075462 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java
@@ -93,6 +93,8 @@ public class RankProfile implements Cloneable {
private int numSearchPartitions = -1;
private Double termwiseLimit = null;
+ private Double postFilterThreshold = null;
+ private Double approximateThreshold = null;
/** The drop limit used to drop hits with rank score less than or equal to this value */
private double rankScoreDropLimit = -Double.MAX_VALUE;
@@ -688,6 +690,8 @@ public class RankProfile implements Cloneable {
}
public void setTermwiseLimit(double termwiseLimit) { this.termwiseLimit = termwiseLimit; }
+ public void setPostFilterThreshold(double threshold) { this.postFilterThreshold = threshold; }
+ public void setApproximateThreshold(double threshold) { this.approximateThreshold = threshold; }
public OptionalDouble getTermwiseLimit() {
if (termwiseLimit != null) return OptionalDouble.of(termwiseLimit);
@@ -695,6 +699,20 @@ public class RankProfile implements Cloneable {
.orElse(OptionalDouble.empty());
}
+ public OptionalDouble getPostFilterThreshold() {
+ if (postFilterThreshold != null) {
+ return OptionalDouble.of(postFilterThreshold);
+ }
+ return uniquelyInherited(p -> p.getPostFilterThreshold(), l -> l.isPresent(), "post-filter-threshold").orElse(OptionalDouble.empty());
+ }
+
+ public OptionalDouble getApproximateThreshold() {
+ if (approximateThreshold != null) {
+ return OptionalDouble.of(approximateThreshold);
+ }
+ return uniquelyInherited(p -> p.getApproximateThreshold(), l -> l.isPresent(), "approximate-threshold").orElse(OptionalDouble.empty());
+ }
+
/** Whether we should ignore the default rank features. Set to null to use inherited */
public void setIgnoreDefaultRankFeatures(Boolean ignoreDefaultRankFeatures) {
this.ignoreDefaultRankFeatures = ignoreDefaultRankFeatures;
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/RawRankProfile.java
index dba5397c6c3..3c14a2b9c63 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/RawRankProfile.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/RawRankProfile.java
@@ -33,6 +33,7 @@ import java.util.LinkedHashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
+import java.util.OptionalDouble;
import java.util.Set;
import java.util.stream.Collectors;
@@ -134,6 +135,8 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
private final int minHitsPerThread;
private final int numSearchPartitions;
private final double termwiseLimit;
+ private final OptionalDouble postFilterThreshold;
+ private final OptionalDouble approximateThreshold;
private final double rankScoreDropLimit;
private final boolean mapBackRankingExpressionFeatures;
@@ -170,6 +173,8 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
minHitsPerThread = compiled.getMinHitsPerThread();
numSearchPartitions = compiled.getNumSearchPartitions();
termwiseLimit = compiled.getTermwiseLimit().orElse(deployProperties.featureFlags().defaultTermwiseLimit());
+ postFilterThreshold = compiled.getPostFilterThreshold();
+ approximateThreshold = compiled.getApproximateThreshold();
keepRankCount = compiled.getKeepRankCount();
rankScoreDropLimit = compiled.getRankScoreDropLimit();
mapBackRankingExpressionFeatures = deployProperties.featureFlags().avoidRenamingSummaryFeatures();
@@ -382,6 +387,12 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
if (termwiseLimit < 1.0) {
properties.add(new Pair<>("vespa.matching.termwise_limit", termwiseLimit + ""));
}
+ if (postFilterThreshold.isPresent()) {
+ properties.add(new Pair<>("vespa.matching.global_filter.upper_limit", String.valueOf(postFilterThreshold.getAsDouble())));
+ }
+ if (approximateThreshold.isPresent()) {
+ properties.add(new Pair<>("vespa.matching.global_filter.lower_limit", String.valueOf(approximateThreshold.getAsDouble())));
+ }
if (matchPhaseSettings != null) {
properties.add(new Pair<>("vespa.matchphase.degradation.attribute", matchPhaseSettings.getAttribute()));
properties.add(new Pair<>("vespa.matchphase.degradation.ascendingorder", matchPhaseSettings.getAscending() + ""));
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/parser/ConvertParsedRanking.java b/config-model/src/main/java/com/yahoo/searchdefinition/parser/ConvertParsedRanking.java
index d151776dd85..b50d5bf0cf8 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/parser/ConvertParsedRanking.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/parser/ConvertParsedRanking.java
@@ -70,6 +70,10 @@ public class ConvertParsedRanking {
(value -> profile.setRankScoreDropLimit(value));
parsed.getTermwiseLimit().ifPresent
(value -> profile.setTermwiseLimit(value));
+ parsed.getPostFilterThreshold().ifPresent
+ (value -> profile.setPostFilterThreshold(value));
+ parsed.getApproximateThreshold().ifPresent
+ (value -> profile.setApproximateThreshold(value));
parsed.getKeepRankCount().ifPresent
(value -> profile.setKeepRankCount(value));
parsed.getMinHitsPerThread().ifPresent
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/parser/ParsedRankProfile.java b/config-model/src/main/java/com/yahoo/searchdefinition/parser/ParsedRankProfile.java
index 1918f31749d..118945369d3 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/parser/ParsedRankProfile.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/parser/ParsedRankProfile.java
@@ -28,6 +28,8 @@ class ParsedRankProfile extends ParsedBlock {
private boolean ignoreDefaultRankFeatures = false;
private Double rankScoreDropLimit = null;
private Double termwiseLimit = null;
+ private Double postFilterThreshold = null;
+ private Double approximateThreshold = null;
private final List<FeatureList> matchFeatures = new ArrayList<>();
private final List<FeatureList> rankFeatures = new ArrayList<>();
private final List<FeatureList> summaryFeatures = new ArrayList<>();
@@ -59,6 +61,8 @@ class ParsedRankProfile extends ParsedBlock {
boolean getIgnoreDefaultRankFeatures() { return this.ignoreDefaultRankFeatures; }
Optional<Double> getRankScoreDropLimit() { return Optional.ofNullable(this.rankScoreDropLimit); }
Optional<Double> getTermwiseLimit() { return Optional.ofNullable(this.termwiseLimit); }
+ Optional<Double> getPostFilterThreshold() { return Optional.ofNullable(this.postFilterThreshold); }
+ Optional<Double> getApproximateThreshold() { return Optional.ofNullable(this.approximateThreshold); }
List<FeatureList> getMatchFeatures() { return List.copyOf(this.matchFeatures); }
List<FeatureList> getRankFeatures() { return List.copyOf(this.rankFeatures); }
List<FeatureList> getSummaryFeatures() { return List.copyOf(this.summaryFeatures); }
@@ -202,6 +206,15 @@ class ParsedRankProfile extends ParsedBlock {
verifyThat(termwiseLimit == null, "already has termwise-limit");
this.termwiseLimit = limit;
}
-
-
+
+ void setPostFilterThreshold(double threshold) {
+ verifyThat(postFilterThreshold == null, "already has post-filter-threshold");
+ this.postFilterThreshold = threshold;
+ }
+
+ void setApproximateThreshold(double threshold) {
+ verifyThat(approximateThreshold == null, "already has approximate-threshold");
+ this.approximateThreshold = threshold;
+ }
+
}
diff --git a/config-model/src/main/javacc/IntermediateParser.jj b/config-model/src/main/javacc/IntermediateParser.jj
index 91dd5867307..d2d27a7a2d4 100644
--- a/config-model/src/main/javacc/IntermediateParser.jj
+++ b/config-model/src/main/javacc/IntermediateParser.jj
@@ -331,6 +331,8 @@ TOKEN :
| < MINHITSPERTHREAD: "min-hits-per-thread" >
| < NUMSEARCHPARTITIONS: "num-search-partitions" >
| < TERMWISELIMIT: "termwise-limit" >
+| < POSTFILTERTHRESHOLD: "post-filter-threshold" >
+| < APPROXIMATETHRESHOLD: "approximate-threshold" >
| < KEEPRANKCOUNT: "keep-rank-count" >
| < RANKSCOREDROPLIMIT: "rank-score-drop-limit" >
| < CONSTANTS: "constants" >
@@ -1834,6 +1836,8 @@ void rankProfileItem(ParsedRankProfile profile) : { }
| minHitsPerThread(profile)
| numSearchPartitions(profile)
| termwiseLimit(profile)
+ | postFilterThreshold(profile)
+ | approximateThreshold(profile)
| rankFeatures(profile)
| rankProperties(profile)
| secondPhase(profile)
@@ -2213,6 +2217,32 @@ void termwiseLimit(ParsedRankProfile profile) :
}
/**
+ * This rule consumes a post-filter-threshold statement for a rank profile.
+ *
+ * @param profile the rank profile to modify
+ */
+void postFilterThreshold(ParsedRankProfile profile) :
+{
+ double threshold;
+}
+{
+ (<POSTFILTERTHRESHOLD> <COLON> threshold = consumeFloat()) { profile.setPostFilterThreshold(threshold); }
+}
+
+/**
+ * This rule consumes an approximate-threshold statement for a rank profile.
+ *
+ * @param profile the rank profile to modify
+ */
+void approximateThreshold(ParsedRankProfile profile) :
+{
+ double threshold;
+}
+{
+ (<APPROXIMATETHRESHOLD> <COLON> threshold = consumeFloat()) { profile.setApproximateThreshold(threshold); }
+}
+
+/**
* This rule consumes a rank-properties block of a rank profile. There
* is a little trick within this rule to allow the final rank property
* to skip the terminating newline token.
diff --git a/config-model/src/main/javacc/SDParser.jj b/config-model/src/main/javacc/SDParser.jj
index 81631a70dbd..0ff9513885f 100644
--- a/config-model/src/main/javacc/SDParser.jj
+++ b/config-model/src/main/javacc/SDParser.jj
@@ -361,6 +361,8 @@ TOKEN :
| < MINHITSPERTHREAD: "min-hits-per-thread" >
| < NUMSEARCHPARTITIONS: "num-search-partitions" >
| < TERMWISELIMIT: "termwise-limit" >
+| < POSTFILTERTHRESHOLD: "post-filter-threshold" >
+| < APPROXIMATETHRESHOLD: "approximate-threshold" >
| < KEEPRANKCOUNT: "keep-rank-count" >
| < RANKSCOREDROPLIMIT: "rank-score-drop-limit" >
| < CONSTANTS: "constants" >
@@ -1974,6 +1976,8 @@ void rankProfileItem(RankProfile profile) : { }
| minHitsPerThread(profile)
| numSearchPartitions(profile)
| termwiseLimit(profile)
+ | postFilterThreshold(profile)
+ | approximateThreshold(profile)
| rankFeatures(profile)
| rankProperties(profile)
| secondPhase(profile)
@@ -2332,7 +2336,7 @@ void numSearchPartitions(RankProfile profile) :
}
/**
- * This rule consumes a num-threads-per-search statement for a rank profile.
+ * This rule consumes a termwise-limit statement for a rank profile.
*
* @param profile the rank profile to modify
*/
@@ -2343,6 +2347,33 @@ void termwiseLimit(RankProfile profile) :
{
(<TERMWISELIMIT> <COLON> num = consumeFloat()) { profile.setTermwiseLimit(num); }
}
+
+/**
+ * This rule consumes a post-filter-threshold statement for a rank profile.
+ *
+ * @param profile the rank profile to modify
+ */
+void postFilterThreshold(RankProfile profile) :
+{
+ double threshold;
+}
+{
+ (<POSTFILTERTHRESHOLD> <COLON> threshold = consumeFloat()) { profile.setPostFilterThreshold(threshold); }
+}
+
+/**
+ * This rule consumes an approximate-threshold statement for a rank profile.
+ *
+ * @param profile the rank profile to modify
+ */
+void approximateThreshold(RankProfile profile) :
+{
+ double threshold;
+}
+{
+ (<APPROXIMATETHRESHOLD> <COLON> threshold = consumeFloat()) { profile.setApproximateThreshold(threshold); }
+}
+
/**
* This rule consumes a rank-properties block of a rank profile. There is a little trick within this rule to allow the
* final rank property to skip the terminating newline token.
diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/RankProfileTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/RankProfileTestCase.java
index 38ebb147cac..b89a2e09c76 100644
--- a/config-model/src/test/java/com/yahoo/searchdefinition/RankProfileTestCase.java
+++ b/config-model/src/test/java/com/yahoo/searchdefinition/RankProfileTestCase.java
@@ -385,4 +385,59 @@ public class RankProfileTestCase extends AbstractSchemaTestCase {
return Optional.empty();
}
+ @Test
+ public void approximate_nearest_neighbor_threshold_settings_are_configurable() throws ParseException {
+ verifyApproximateNearestNeighborThresholdSettings(0.7, null);
+ verifyApproximateNearestNeighborThresholdSettings(null, 0.3);
+ verifyApproximateNearestNeighborThresholdSettings(0.7, 0.3);
+ }
+
+ private void verifyApproximateNearestNeighborThresholdSettings(Double postFilterThreshold, Double approximateThreshold) throws ParseException {
+ verifyApproximateNearestNeighborThresholdSettings(postFilterThreshold, approximateThreshold, false);
+ verifyApproximateNearestNeighborThresholdSettings(postFilterThreshold, approximateThreshold, true);
+ }
+
+ private void verifyApproximateNearestNeighborThresholdSettings(Double postFilterThreshold, Double approximateThreshold,
+ boolean experimentalSdParsing) throws ParseException {
+ var rankProfileRegistry = new RankProfileRegistry();
+ var props = new TestProperties();
+ props.setExperimentalSdParsing(experimentalSdParsing);
+ var queryProfileRegistry = new QueryProfileRegistry();
+ var builder = new ApplicationBuilder(rankProfileRegistry, queryProfileRegistry, props);
+ builder.addSchema(createSDWithRankProfileThresholds(postFilterThreshold, approximateThreshold));
+ builder.build(true);
+
+ var schema = builder.getSchema();
+ var rankProfile = rankProfileRegistry.get(schema, "my_profile");
+ var rawRankProfile = new RawRankProfile(rankProfile, new LargeRankExpressions(new MockFileRegistry()), queryProfileRegistry,
+ new ImportedMlModels(), new AttributeFields(schema), props);
+
+ if (postFilterThreshold != null) {
+ assertEquals((double)postFilterThreshold, rankProfile.getPostFilterThreshold().getAsDouble(), 0.000001);
+ assertEquals(String.valueOf(postFilterThreshold), findProperty(rawRankProfile.configProperties(), "vespa.matching.global_filter.upper_limit").get());
+ } else {
+ assertTrue(rankProfile.getPostFilterThreshold().isEmpty());
+ assertFalse(findProperty(rawRankProfile.configProperties(), "vespa.matching.global_filter.upper_limit").isPresent());
+ }
+
+ if (approximateThreshold != null) {
+ assertEquals((double)approximateThreshold, rankProfile.getApproximateThreshold().getAsDouble(), 0.000001);
+ assertEquals(String.valueOf(approximateThreshold), findProperty(rawRankProfile.configProperties(), "vespa.matching.global_filter.lower_limit").get());
+ } else {
+ assertTrue(rankProfile.getApproximateThreshold().isEmpty());
+ assertFalse(findProperty(rawRankProfile.configProperties(), "vespa.matching.global_filter.lower_limit").isPresent());
+ }
+ }
+
+ private String createSDWithRankProfileThresholds(Double postFilterThreshold, Double approximateThreshold) {
+ return joinLines(
+ "search test {",
+ " document test {}",
+ " rank-profile my_profile {",
+ (postFilterThreshold != null ? (" post-filter-threshold: " + postFilterThreshold) : ""),
+ (approximateThreshold != null ? (" approximate-threshold: " + approximateThreshold) : ""),
+ " }",
+ "}");
+ }
+
}