aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/attribute
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-08-11 13:16:11 +0000
committerGeir Storli <geirst@yahooinc.com>2023-08-15 13:47:49 +0000
commit6fbe8e9a17f3bb90f8a8f539ad56308df601ac5b (patch)
treea4ef9b7f073b3fe91f53bfdb7d8d38cf89375cd8 /searchlib/src/tests/attribute
parent4902b1a4209eb26cfaa22c4527821be89566cc65 (diff)
Control the auto-adjustment of targetHits in ANN using post-filtering.
When searching the HNSW index in a post-filtering case, targetHits is auto-adjusted in an effort to still expose targetHits hits to first-phase ranking after post-filtering. The following formula is now used to ensure an upper bound of adjustedTargetHits, avoiding that the search in the HNSW index takes too long. adjustedTargetHits = min(targetHits / estimatedHitRatio, targetHits * targetHitsMaxAdjustmentFactor). The target-hits-max-adjustment-factor can be set in a rank profile and overriden per query. The value is in the range [1.0,inf], with the default being 20.0. When setting this to 1.0, auto-adjustment of targetHits is effectively disabled.
Diffstat (limited to 'searchlib/src/tests/attribute')
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp22
1 files changed, 18 insertions, 4 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 6ca7d298ee2..0475f8462fc 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -1320,15 +1320,16 @@ public:
return *_query_tensor;
}
- std::unique_ptr<NearestNeighborBlueprint> make_blueprint(bool approximate = true, double global_filter_lower_limit = 0.05) {
+ std::unique_ptr<NearestNeighborBlueprint> make_blueprint(bool approximate = true,
+ double global_filter_lower_limit = 0.05,
+ double target_hits_max_adjustment_factor = 20.0) {
search::queryeval::FieldSpec field("foo", 0, 0);
auto bp = std::make_unique<NearestNeighborBlueprint>(
field,
std::make_unique<DistanceCalculator>(this->as_dense_tensor(),
create_query_tensor(vec_2d(17, 42))),
- 3, approximate, 5,
- 100100.25,
- global_filter_lower_limit, 1.0, _no_doom.get_doom());
+ 3, approximate, 5, 100100.25,
+ global_filter_lower_limit, 1.0, target_hits_max_adjustment_factor, _no_doom.get_doom());
EXPECT_EQUAL(11u, bp->getState().estimate().estHits);
EXPECT_EQUAL(100100.25 * 100100.25, bp->get_distance_threshold());
return bp;
@@ -1362,6 +1363,19 @@ TEST_F("NN blueprint handles empty filter (post-filtering)", NearestNeighborBlue
EXPECT_EQUAL(NNBA::INDEX_TOP_K, bp->get_algorithm());
}
+TEST_F("NN blueprint adjustment of targetHits is bound (post-filtering)", NearestNeighborBlueprintFixture)
+{
+ auto bp = f.make_blueprint(true, 0.05, 3.5);
+ auto empty_filter = GlobalFilter::create();
+ bp->set_global_filter(*empty_filter, 0.2);
+ // targetHits is adjusted based on the estimated hit ratio of the query,
+ // but bound by target-hits-max-adjustment-factor
+ EXPECT_EQUAL(3u, bp->get_target_hits());
+ EXPECT_EQUAL(10u, bp->get_adjusted_target_hits());
+ EXPECT_EQUAL(10u, bp->getState().estimate().estHits);
+ EXPECT_EQUAL(NNBA::INDEX_TOP_K, bp->get_algorithm());
+}
+
TEST_F("NN blueprint handles strong filter (pre-filtering)", NearestNeighborBlueprintFixture)
{
auto bp = f.make_blueprint();