diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-06-23 15:37:52 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-23 15:37:52 +0200 |
commit | d7b5964ed9d54346e4f1b6166e02e70265c8b70a (patch) | |
tree | 1c8c9eede9b153c98e64b94f0bfc6839a1e915b9 | |
parent | 7a7279d2980cd4099b8070cbf0c298dc8d666301 (diff) | |
parent | a5a3a8effee04aa36c2c6af046df876b488bf980 (diff) |
Merge pull request #13665 from vespa-engine/toregge/check-brute-force-limit-in-nearest-neighbor-blueprint
Check brute force limit in nearest neighbor blueprint.
-rw-r--r-- | searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp | 16 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp | 19 |
2 files changed, 26 insertions, 9 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 6608959662f..c698a1d612b 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -887,13 +887,13 @@ public: return std::unique_ptr<QueryTensor>(tensor); } - std::unique_ptr<NearestNeighborBlueprint> make_blueprint() { + std::unique_ptr<NearestNeighborBlueprint> make_blueprint(double brute_force_limit = 0.05) { search::queryeval::FieldSpec field("foo", 0, 0); auto bp = std::make_unique<NearestNeighborBlueprint>( field, as_dense_tensor(), createDenseTensor(vec_2d(17, 42)), - 3, true, 5, 0.05); + 3, true, 5, brute_force_limit); EXPECT_EQUAL(11u, bp->getState().estimate().estHits); EXPECT_TRUE(bp->may_approximate()); return bp; @@ -938,4 +938,16 @@ TEST_F("NN blueprint handles weak filter", NearestNeighborBlueprintFixture) EXPECT_TRUE(bp->may_approximate()); } +TEST_F("NN blueprint handles strong filter triggering brute force search", NearestNeighborBlueprintFixture) +{ + auto bp = f.make_blueprint(0.2); + auto filter = search::BitVector::create(11); + filter->setBit(3); + filter->invalidateCachedCount(); + auto strong_filter = GlobalFilter::create(std::move(filter)); + bp->set_global_filter(*strong_filter); + EXPECT_EQUAL(11u, bp->getState().estimate().estHits); + EXPECT_FALSE(bp->may_approximate()); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp index fcf8b78056d..2cf6e7e29c4 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp @@ -98,15 +98,20 @@ NearestNeighborBlueprint::set_global_filter(const GlobalFilter &global_filter) if (_global_filter->has_filter()) { uint32_t max_hits = _global_filter->filter()->countTrueBits(); LOG(debug, "set_global_filter getNumDocs: %u / max_hits %u", est_hits, max_hits); - if (max_hits * 10 < est_hits) { - LOG(debug, "too many hits filtered out, consider using brute force implementation"); + double max_hit_ratio = static_cast<double>(max_hits) / est_hits; + if (max_hit_ratio < _brute_force_limit) { + _approximate = false; + LOG(debug, "too many hits filtered out, using brute force implementation"); + } else { + est_hits = std::min(est_hits, max_hits); } - est_hits = std::min(est_hits, max_hits); } - est_hits = std::min(est_hits, _target_num_hits); - setEstimate(HitEstimate(est_hits, false)); - perform_top_k(); - LOG(debug, "perform_top_k found %zu hits", _found_hits.size()); + if (_approximate) { + est_hits = std::min(est_hits, _target_num_hits); + setEstimate(HitEstimate(est_hits, false)); + perform_top_k(); + LOG(debug, "perform_top_k found %zu hits", _found_hits.size()); + } } } |