diff options
author | Geir Storli <geirst@yahooinc.com> | 2022-04-22 14:37:30 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-22 14:37:30 +0200 |
commit | 17756ccc4bb3d6603f35480d8555adcafe0d49d4 (patch) | |
tree | 4c002f9e758bcd7062f895103f71a0bfaeb92337 | |
parent | cb0043f2d5d7d83972dcb7e666a6a0e2b6ad19f8 (diff) | |
parent | 3224bef1c9d8bef82d2bf1706db90b18644eaba4 (diff) |
Merge pull request #22217 from vespa-engine/geirst/improve-nearest-neighbor-visit-tracing
Improve nearest neighbor blueprint debug tracing
3 files changed, 20 insertions, 33 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 1d3305d2c1a..ec75a0d6d06 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -1041,7 +1041,6 @@ public: 100100.25, global_filter_lower_limit, 1.0); EXPECT_EQUAL(11u, bp->getState().estimate().estHits); - EXPECT_EQUAL(approximate, bp->may_approximate()); EXPECT_EQUAL(100100.25 * 100100.25, bp->get_distance_threshold()); return bp; } @@ -1068,7 +1067,6 @@ TEST_F("NN blueprint handles empty filter", NearestNeighborBlueprintFixture) auto empty_filter = GlobalFilter::create(); bp->set_global_filter(*empty_filter); EXPECT_EQUAL(3u, bp->getState().estimate().estHits); - EXPECT_TRUE(bp->may_approximate()); EXPECT_EQUAL(NNBA::INDEX_TOP_K, bp->get_algorithm()); } @@ -1081,7 +1079,6 @@ TEST_F("NN blueprint handles strong filter", NearestNeighborBlueprintFixture) auto strong_filter = GlobalFilter::create(std::move(filter)); bp->set_global_filter(*strong_filter); EXPECT_EQUAL(1u, bp->getState().estimate().estHits); - EXPECT_TRUE(bp->may_approximate()); EXPECT_EQUAL(NNBA::INDEX_TOP_K_WITH_FILTER, bp->get_algorithm()); } @@ -1099,7 +1096,6 @@ TEST_F("NN blueprint handles weak filter", NearestNeighborBlueprintFixture) auto weak_filter = GlobalFilter::create(std::move(filter)); bp->set_global_filter(*weak_filter); EXPECT_EQUAL(3u, bp->getState().estimate().estHits); - EXPECT_TRUE(bp->may_approximate()); EXPECT_EQUAL(NNBA::INDEX_TOP_K_WITH_FILTER, bp->get_algorithm()); } @@ -1112,7 +1108,6 @@ TEST_F("NN blueprint handles strong filter triggering brute force search", Neare auto strong_filter = GlobalFilter::create(std::move(filter)); bp->set_global_filter(*strong_filter); EXPECT_EQUAL(11u, bp->getState().estimate().estHits); - EXPECT_FALSE(bp->may_approximate()); EXPECT_EQUAL(NNBA::BRUTE_FORCE_FALLBACK, bp->get_algorithm()); } diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp index bdcbb3db633..73eaa773c53 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp @@ -89,6 +89,7 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f _found_hits(), _algorithm(Algorithm::BRUTE_FORCE), _global_filter(GlobalFilter::create()), + _global_filter_set(false), _global_filter_hits(), _global_filter_hit_ratio() { @@ -122,51 +123,41 @@ void NearestNeighborBlueprint::set_global_filter(const GlobalFilter &global_filter) { _global_filter = global_filter.shared_from_this(); + _global_filter_set = true; auto nns_index = _attr_tensor.nearest_neighbor_index(); - LOG(debug, "set_global_filter with: %s / %s / %s", - (_approximate ? "approximate" : "exact"), - (nns_index ? "nns_index" : "no_index"), - (_global_filter->has_filter() ? "has_filter" : "no_filter")); if (_approximate && nns_index) { uint32_t est_hits = _attr_tensor.get_num_docs(); if (_global_filter->has_filter()) { uint32_t max_hits = _global_filter->filter()->countTrueBits(); - LOG(debug, "set_global_filter getNumDocs: %u / max_hits %u", est_hits, max_hits); double max_hit_ratio = static_cast<double>(max_hits) / est_hits; if (max_hit_ratio < _global_filter_lower_limit) { - _approximate = false; _algorithm = Algorithm::BRUTE_FORCE_FALLBACK; - LOG(debug, "too many hits filtered out, using brute force implementation"); } else { est_hits = std::min(est_hits, max_hits); } _global_filter_hits = max_hits; _global_filter_hit_ratio = max_hit_ratio; } - if (_approximate) { + if (_algorithm != Algorithm::BRUTE_FORCE_FALLBACK) { est_hits = std::min(est_hits, _target_num_hits); setEstimate(HitEstimate(est_hits, false)); - perform_top_k(); - LOG(debug, "perform_top_k found %zu hits", _found_hits.size()); + perform_top_k(nns_index); } } } void -NearestNeighborBlueprint::perform_top_k() +NearestNeighborBlueprint::perform_top_k(const search::tensor::NearestNeighborIndex* nns_index) { - auto nns_index = _attr_tensor.nearest_neighbor_index(); - if (_approximate && nns_index) { - auto lhs = _query_tensor->cells(); - uint32_t k = _target_num_hits; - if (_global_filter->has_filter()) { - auto filter = _global_filter->filter(); - _found_hits = nns_index->find_top_k_with_filter(k, lhs, *filter, k + _explore_additional_hits, _distance_threshold); - _algorithm = Algorithm::INDEX_TOP_K_WITH_FILTER; - } else { - _found_hits = nns_index->find_top_k(k, lhs, k + _explore_additional_hits, _distance_threshold); - _algorithm = Algorithm::INDEX_TOP_K; - } + auto lhs = _query_tensor->cells(); + uint32_t k = _target_num_hits; + if (_global_filter->has_filter()) { + auto filter = _global_filter->filter(); + _found_hits = nns_index->find_top_k_with_filter(k, lhs, *filter, k + _explore_additional_hits, _distance_threshold); + _algorithm = Algorithm::INDEX_TOP_K_WITH_FILTER; + } else { + _found_hits = nns_index->find_top_k(k, lhs, k + _explore_additional_hits, _distance_threshold); + _algorithm = Algorithm::INDEX_TOP_K; } } @@ -191,14 +182,15 @@ NearestNeighborBlueprint::visitMembers(vespalib::ObjectVisitor& visitor) const visitor.visitString("query_tensor", _query_tensor->type().to_spec()); visitor.visitInt("target_num_hits", _target_num_hits); visitor.visitInt("explore_additional_hits", _explore_additional_hits); - visitor.visitBool("approximate", _approximate); + visitor.visitBool("wanted_approximate", _approximate); visitor.visitBool("has_index", _attr_tensor.nearest_neighbor_index()); visitor.visitString("algorithm", to_string(_algorithm)); visitor.visitInt("top_k_hits", _found_hits.size()); visitor.openStruct("global_filter", "GlobalFilter"); - visitor.visitBool("is_set", (_global_filter != nullptr)); - visitor.visitBool("has_filter", (_global_filter && _global_filter->has_filter())); + visitor.visitBool("wanted", getState().want_global_filter()); + visitor.visitBool("set", _global_filter_set); + visitor.visitBool("calculated", _global_filter->has_filter()); visitor.visitFloat("lower_limit", _global_filter_lower_limit); visitor.visitFloat("upper_limit", _global_filter_upper_limit); if (_global_filter_hits.has_value()) { diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h index 7922036dc42..7637c4dd6b7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h @@ -41,10 +41,11 @@ private: std::vector<search::tensor::NearestNeighborIndex::Neighbor> _found_hits; Algorithm _algorithm; std::shared_ptr<const GlobalFilter> _global_filter; + bool _global_filter_set; std::optional<uint32_t> _global_filter_hits; std::optional<double> _global_filter_hit_ratio; - void perform_top_k(); + void perform_top_k(const search::tensor::NearestNeighborIndex* nns_index); public: NearestNeighborBlueprint(const queryeval::FieldSpec& field, const tensor::ITensorAttribute& attr_tensor, @@ -60,7 +61,6 @@ public: const vespalib::eval::Value& get_query_tensor() const { return *_query_tensor; } uint32_t get_target_num_hits() const { return _target_num_hits; } void set_global_filter(const GlobalFilter &global_filter) override; - bool may_approximate() const { return _approximate; } Algorithm get_algorithm() const { return _algorithm; } double get_distance_threshold() const { return _distance_threshold; } |