summaryrefslogtreecommitdiffstats
path: root/config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-08-11 13:16:11 +0000
committerGeir Storli <geirst@yahooinc.com>2023-08-15 13:47:49 +0000
commit6fbe8e9a17f3bb90f8a8f539ad56308df601ac5b (patch)
treea4ef9b7f073b3fe91f53bfdb7d8d38cf89375cd8 /config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java
parent4902b1a4209eb26cfaa22c4527821be89566cc65 (diff)
Control the auto-adjustment of targetHits in ANN using post-filtering.
When searching the HNSW index in a post-filtering case, targetHits is auto-adjusted in an effort to still expose targetHits hits to first-phase ranking after post-filtering. The following formula is now used to ensure an upper bound of adjustedTargetHits, avoiding that the search in the HNSW index takes too long. adjustedTargetHits = min(targetHits / estimatedHitRatio, targetHits * targetHitsMaxAdjustmentFactor). The target-hits-max-adjustment-factor can be set in a rank profile and overriden per query. The value is in the range [1.0,inf], with the default being 20.0. When setting this to 1.0, auto-adjustment of targetHits is effectively disabled.
Diffstat (limited to 'config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java')
-rw-r--r--config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java59
1 files changed, 45 insertions, 14 deletions
diff --git a/config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java b/config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java
index 85225f0d255..380b458ea8c 100644
--- a/config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java
+++ b/config-model/src/test/java/com/yahoo/schema/RankProfileTestCase.java
@@ -459,17 +459,9 @@ public class RankProfileTestCase extends AbstractSchemaTestCase {
}
private void verifyApproximateNearestNeighborThresholdSettings(Double postFilterThreshold, Double approximateThreshold) throws ParseException {
- var rankProfileRegistry = new RankProfileRegistry();
- var props = new TestProperties();
- var queryProfileRegistry = new QueryProfileRegistry();
- var builder = new ApplicationBuilder(rankProfileRegistry, queryProfileRegistry, props);
- builder.addSchema(createSDWithRankProfileThresholds(postFilterThreshold, approximateThreshold));
- builder.build(true);
-
- var schema = builder.getSchema();
- var rankProfile = rankProfileRegistry.get(schema, "my_profile");
- var rawRankProfile = new RawRankProfile(rankProfile, new LargeRankingExpressions(new MockFileRegistry()), queryProfileRegistry,
- new ImportedMlModels(), new AttributeFields(schema), props);
+ var rp = createRankProfile(postFilterThreshold, approximateThreshold, null);
+ var rankProfile = rp.getFirst();
+ var rawRankProfile = rp.getSecond();
if (postFilterThreshold != null) {
assertEquals((double)postFilterThreshold, rankProfile.getPostFilterThreshold().getAsDouble(), 0.000001);
@@ -488,13 +480,52 @@ public class RankProfileTestCase extends AbstractSchemaTestCase {
}
}
- private String createSDWithRankProfileThresholds(Double postFilterThreshold, Double approximateThreshold) {
+ @Test
+ void target_hits_max_adjustment_factor_is_configurable() throws ParseException {
+ verifyTargetHitsMaxAdjustmentFactor(null);
+ verifyTargetHitsMaxAdjustmentFactor(2.0);
+ }
+
+ private void verifyTargetHitsMaxAdjustmentFactor(Double targetHitsMaxAdjustmentFactor) throws ParseException {
+ var rp = createRankProfile(null, null, targetHitsMaxAdjustmentFactor);
+ var rankProfile = rp.getFirst();
+ var rawRankProfile = rp.getSecond();
+ if (targetHitsMaxAdjustmentFactor != null) {
+ assertEquals((double)targetHitsMaxAdjustmentFactor, rankProfile.getTargetHitsMaxAdjustmentFactor().getAsDouble(), 0.000001);
+ assertEquals(String.valueOf(targetHitsMaxAdjustmentFactor), findProperty(rawRankProfile.configProperties(), "vespa.matching.nns.target_hits_max_adjustment_factor").get());
+ } else {
+ assertTrue(rankProfile.getTargetHitsMaxAdjustmentFactor().isEmpty());
+ assertFalse(findProperty(rawRankProfile.configProperties(), "vespa.matching.nns.target_hits_max_adjustment_factor").isPresent());
+ }
+ }
+
+ private Pair<RankProfile, RawRankProfile> createRankProfile(Double postFilterThreshold,
+ Double approximateThreshold,
+ Double targetHitsMaxAdjustmentFactor) throws ParseException {
+ var rankProfileRegistry = new RankProfileRegistry();
+ var props = new TestProperties();
+ var queryProfileRegistry = new QueryProfileRegistry();
+ var builder = new ApplicationBuilder(rankProfileRegistry, queryProfileRegistry, props);
+ builder.addSchema(createSDWithRankProfile(postFilterThreshold, approximateThreshold, targetHitsMaxAdjustmentFactor));
+ builder.build(true);
+
+ var schema = builder.getSchema();
+ var rankProfile = rankProfileRegistry.get(schema, "my_profile");
+ var rawRankProfile = new RawRankProfile(rankProfile, new LargeRankingExpressions(new MockFileRegistry()), queryProfileRegistry,
+ new ImportedMlModels(), new AttributeFields(schema), props);
+ return new Pair<>(rankProfile, rawRankProfile);
+ }
+
+ private String createSDWithRankProfile(Double postFilterThreshold,
+ Double approximateThreshold,
+ Double targetHitsMaxAdjustmentFactor) {
return joinLines(
"search test {",
" document test {}",
" rank-profile my_profile {",
- (postFilterThreshold != null ? (" post-filter-threshold: " + postFilterThreshold) : ""),
- (approximateThreshold != null ? (" approximate-threshold: " + approximateThreshold) : ""),
+ (postFilterThreshold != null ? (" post-filter-threshold: " + postFilterThreshold) : ""),
+ (approximateThreshold != null ? (" approximate-threshold: " + approximateThreshold) : ""),
+ (targetHitsMaxAdjustmentFactor != null ? (" target-hits-max-adjustment-factor: " + targetHitsMaxAdjustmentFactor) : ""),
" }",
"}");
}