summaryrefslogtreecommitdiffstats
path: root/config-model/src/main/javacc
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-08-11 13:16:11 +0000
committerGeir Storli <geirst@yahooinc.com>2023-08-15 13:47:49 +0000
commit6fbe8e9a17f3bb90f8a8f539ad56308df601ac5b (patch)
treea4ef9b7f073b3fe91f53bfdb7d8d38cf89375cd8 /config-model/src/main/javacc
parent4902b1a4209eb26cfaa22c4527821be89566cc65 (diff)
Control the auto-adjustment of targetHits in ANN using post-filtering.
When searching the HNSW index in a post-filtering case, targetHits is auto-adjusted in an effort to still expose targetHits hits to first-phase ranking after post-filtering. The following formula is now used to ensure an upper bound of adjustedTargetHits, avoiding that the search in the HNSW index takes too long. adjustedTargetHits = min(targetHits / estimatedHitRatio, targetHits * targetHitsMaxAdjustmentFactor). The target-hits-max-adjustment-factor can be set in a rank profile and overriden per query. The value is in the range [1.0,inf], with the default being 20.0. When setting this to 1.0, auto-adjustment of targetHits is effectively disabled.
Diffstat (limited to 'config-model/src/main/javacc')
-rw-r--r--config-model/src/main/javacc/SchemaParser.jj16
1 files changed, 16 insertions, 0 deletions
diff --git a/config-model/src/main/javacc/SchemaParser.jj b/config-model/src/main/javacc/SchemaParser.jj
index b2cb258c0ab..42eeabb5ac7 100644
--- a/config-model/src/main/javacc/SchemaParser.jj
+++ b/config-model/src/main/javacc/SchemaParser.jj
@@ -326,6 +326,7 @@ TOKEN :
| < TERMWISE_LIMIT: "termwise-limit" >
| < POST_FILTER_THRESHOLD: "post-filter-threshold" >
| < APPROXIMATE_THRESHOLD: "approximate-threshold" >
+| < TARGET_HITS_MAX_ADJUSTMENT_FACTOR: "target-hits-max-adjustment-factor" >
| < KEEP_RANK_COUNT: "keep-rank-count" >
| < RANK_SCORE_DROP_LIMIT: "rank-score-drop-limit" >
| < CONSTANTS: "constants" >
@@ -1727,6 +1728,7 @@ void rankProfileItem(ParsedSchema schema, ParsedRankProfile profile) : { }
| termwiseLimit(profile)
| postFilterThreshold(profile)
| approximateThreshold(profile)
+ | targetHitsMaxAdjustmentFactor(profile)
| rankFeatures(profile)
| rankProperties(profile)
| secondPhase(profile)
@@ -2190,6 +2192,19 @@ void approximateThreshold(ParsedRankProfile profile) :
}
/**
+ * This rule consumes a target-hits-max-adjustment-factor statement for a rank profile.
+ *
+ * @param profile the rank profile to modify
+ */
+void targetHitsMaxAdjustmentFactor(ParsedRankProfile profile) :
+{
+ double factor;
+}
+{
+ (<TARGET_HITS_MAX_ADJUSTMENT_FACTOR> <COLON> factor = floatValue()) { profile.setTargetHitsMaxAdjustmentFactor(factor); }
+}
+
+/**
* Consumes a rank-properties block of a rank profile. There
* is a little trick within this rule to allow the final rank property
* to skip the terminating newline token.
@@ -2641,6 +2656,7 @@ String identifierWithDash() :
| <SECOND_PHASE>
| <STRUCT_FIELD>
| <SUMMARY_TO>
+ | <TARGET_HITS_MAX_ADJUSTMENT_FACTOR>
| <TERMWISE_LIMIT>
| <UPPER_BOUND>
) { return token.image; }