summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-06-23 15:37:52 +0200
committerGitHub <noreply@github.com>2020-06-23 15:37:52 +0200
commitd7b5964ed9d54346e4f1b6166e02e70265c8b70a (patch)
tree1c8c9eede9b153c98e64b94f0bfc6839a1e915b9
parent7a7279d2980cd4099b8070cbf0c298dc8d666301 (diff)
parenta5a3a8effee04aa36c2c6af046df876b488bf980 (diff)
Merge pull request #13665 from vespa-engine/toregge/check-brute-force-limit-in-nearest-neighbor-blueprint
Check brute force limit in nearest neighbor blueprint.
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp19
2 files changed, 26 insertions, 9 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 6608959662f..c698a1d612b 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -887,13 +887,13 @@ public:
return std::unique_ptr<QueryTensor>(tensor);
}
- std::unique_ptr<NearestNeighborBlueprint> make_blueprint() {
+ std::unique_ptr<NearestNeighborBlueprint> make_blueprint(double brute_force_limit = 0.05) {
search::queryeval::FieldSpec field("foo", 0, 0);
auto bp = std::make_unique<NearestNeighborBlueprint>(
field,
as_dense_tensor(),
createDenseTensor(vec_2d(17, 42)),
- 3, true, 5, 0.05);
+ 3, true, 5, brute_force_limit);
EXPECT_EQUAL(11u, bp->getState().estimate().estHits);
EXPECT_TRUE(bp->may_approximate());
return bp;
@@ -938,4 +938,16 @@ TEST_F("NN blueprint handles weak filter", NearestNeighborBlueprintFixture)
EXPECT_TRUE(bp->may_approximate());
}
+TEST_F("NN blueprint handles strong filter triggering brute force search", NearestNeighborBlueprintFixture)
+{
+ auto bp = f.make_blueprint(0.2);
+ auto filter = search::BitVector::create(11);
+ filter->setBit(3);
+ filter->invalidateCachedCount();
+ auto strong_filter = GlobalFilter::create(std::move(filter));
+ bp->set_global_filter(*strong_filter);
+ EXPECT_EQUAL(11u, bp->getState().estimate().estHits);
+ EXPECT_FALSE(bp->may_approximate());
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
index fcf8b78056d..2cf6e7e29c4 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
@@ -98,15 +98,20 @@ NearestNeighborBlueprint::set_global_filter(const GlobalFilter &global_filter)
if (_global_filter->has_filter()) {
uint32_t max_hits = _global_filter->filter()->countTrueBits();
LOG(debug, "set_global_filter getNumDocs: %u / max_hits %u", est_hits, max_hits);
- if (max_hits * 10 < est_hits) {
- LOG(debug, "too many hits filtered out, consider using brute force implementation");
+ double max_hit_ratio = static_cast<double>(max_hits) / est_hits;
+ if (max_hit_ratio < _brute_force_limit) {
+ _approximate = false;
+ LOG(debug, "too many hits filtered out, using brute force implementation");
+ } else {
+ est_hits = std::min(est_hits, max_hits);
}
- est_hits = std::min(est_hits, max_hits);
}
- est_hits = std::min(est_hits, _target_num_hits);
- setEstimate(HitEstimate(est_hits, false));
- perform_top_k();
- LOG(debug, "perform_top_k found %zu hits", _found_hits.size());
+ if (_approximate) {
+ est_hits = std::min(est_hits, _target_num_hits);
+ setEstimate(HitEstimate(est_hits, false));
+ perform_top_k();
+ LOG(debug, "perform_top_k found %zu hits", _found_hits.size());
+ }
}
}