summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-04-22 14:37:30 +0200
committerGitHub <noreply@github.com>2022-04-22 14:37:30 +0200
commit17756ccc4bb3d6603f35480d8555adcafe0d49d4 (patch)
tree4c002f9e758bcd7062f895103f71a0bfaeb92337
parentcb0043f2d5d7d83972dcb7e666a6a0e2b6ad19f8 (diff)
parent3224bef1c9d8bef82d2bf1706db90b18644eaba4 (diff)
Merge pull request #22217 from vespa-engine/geirst/improve-nearest-neighbor-visit-tracing
Improve nearest neighbor blueprint debug tracing
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp44
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h4
3 files changed, 20 insertions, 33 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 1d3305d2c1a..ec75a0d6d06 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -1041,7 +1041,6 @@ public:
100100.25,
global_filter_lower_limit, 1.0);
EXPECT_EQUAL(11u, bp->getState().estimate().estHits);
- EXPECT_EQUAL(approximate, bp->may_approximate());
EXPECT_EQUAL(100100.25 * 100100.25, bp->get_distance_threshold());
return bp;
}
@@ -1068,7 +1067,6 @@ TEST_F("NN blueprint handles empty filter", NearestNeighborBlueprintFixture)
auto empty_filter = GlobalFilter::create();
bp->set_global_filter(*empty_filter);
EXPECT_EQUAL(3u, bp->getState().estimate().estHits);
- EXPECT_TRUE(bp->may_approximate());
EXPECT_EQUAL(NNBA::INDEX_TOP_K, bp->get_algorithm());
}
@@ -1081,7 +1079,6 @@ TEST_F("NN blueprint handles strong filter", NearestNeighborBlueprintFixture)
auto strong_filter = GlobalFilter::create(std::move(filter));
bp->set_global_filter(*strong_filter);
EXPECT_EQUAL(1u, bp->getState().estimate().estHits);
- EXPECT_TRUE(bp->may_approximate());
EXPECT_EQUAL(NNBA::INDEX_TOP_K_WITH_FILTER, bp->get_algorithm());
}
@@ -1099,7 +1096,6 @@ TEST_F("NN blueprint handles weak filter", NearestNeighborBlueprintFixture)
auto weak_filter = GlobalFilter::create(std::move(filter));
bp->set_global_filter(*weak_filter);
EXPECT_EQUAL(3u, bp->getState().estimate().estHits);
- EXPECT_TRUE(bp->may_approximate());
EXPECT_EQUAL(NNBA::INDEX_TOP_K_WITH_FILTER, bp->get_algorithm());
}
@@ -1112,7 +1108,6 @@ TEST_F("NN blueprint handles strong filter triggering brute force search", Neare
auto strong_filter = GlobalFilter::create(std::move(filter));
bp->set_global_filter(*strong_filter);
EXPECT_EQUAL(11u, bp->getState().estimate().estHits);
- EXPECT_FALSE(bp->may_approximate());
EXPECT_EQUAL(NNBA::BRUTE_FORCE_FALLBACK, bp->get_algorithm());
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
index bdcbb3db633..73eaa773c53 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
@@ -89,6 +89,7 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f
_found_hits(),
_algorithm(Algorithm::BRUTE_FORCE),
_global_filter(GlobalFilter::create()),
+ _global_filter_set(false),
_global_filter_hits(),
_global_filter_hit_ratio()
{
@@ -122,51 +123,41 @@ void
NearestNeighborBlueprint::set_global_filter(const GlobalFilter &global_filter)
{
_global_filter = global_filter.shared_from_this();
+ _global_filter_set = true;
auto nns_index = _attr_tensor.nearest_neighbor_index();
- LOG(debug, "set_global_filter with: %s / %s / %s",
- (_approximate ? "approximate" : "exact"),
- (nns_index ? "nns_index" : "no_index"),
- (_global_filter->has_filter() ? "has_filter" : "no_filter"));
if (_approximate && nns_index) {
uint32_t est_hits = _attr_tensor.get_num_docs();
if (_global_filter->has_filter()) {
uint32_t max_hits = _global_filter->filter()->countTrueBits();
- LOG(debug, "set_global_filter getNumDocs: %u / max_hits %u", est_hits, max_hits);
double max_hit_ratio = static_cast<double>(max_hits) / est_hits;
if (max_hit_ratio < _global_filter_lower_limit) {
- _approximate = false;
_algorithm = Algorithm::BRUTE_FORCE_FALLBACK;
- LOG(debug, "too many hits filtered out, using brute force implementation");
} else {
est_hits = std::min(est_hits, max_hits);
}
_global_filter_hits = max_hits;
_global_filter_hit_ratio = max_hit_ratio;
}
- if (_approximate) {
+ if (_algorithm != Algorithm::BRUTE_FORCE_FALLBACK) {
est_hits = std::min(est_hits, _target_num_hits);
setEstimate(HitEstimate(est_hits, false));
- perform_top_k();
- LOG(debug, "perform_top_k found %zu hits", _found_hits.size());
+ perform_top_k(nns_index);
}
}
}
void
-NearestNeighborBlueprint::perform_top_k()
+NearestNeighborBlueprint::perform_top_k(const search::tensor::NearestNeighborIndex* nns_index)
{
- auto nns_index = _attr_tensor.nearest_neighbor_index();
- if (_approximate && nns_index) {
- auto lhs = _query_tensor->cells();
- uint32_t k = _target_num_hits;
- if (_global_filter->has_filter()) {
- auto filter = _global_filter->filter();
- _found_hits = nns_index->find_top_k_with_filter(k, lhs, *filter, k + _explore_additional_hits, _distance_threshold);
- _algorithm = Algorithm::INDEX_TOP_K_WITH_FILTER;
- } else {
- _found_hits = nns_index->find_top_k(k, lhs, k + _explore_additional_hits, _distance_threshold);
- _algorithm = Algorithm::INDEX_TOP_K;
- }
+ auto lhs = _query_tensor->cells();
+ uint32_t k = _target_num_hits;
+ if (_global_filter->has_filter()) {
+ auto filter = _global_filter->filter();
+ _found_hits = nns_index->find_top_k_with_filter(k, lhs, *filter, k + _explore_additional_hits, _distance_threshold);
+ _algorithm = Algorithm::INDEX_TOP_K_WITH_FILTER;
+ } else {
+ _found_hits = nns_index->find_top_k(k, lhs, k + _explore_additional_hits, _distance_threshold);
+ _algorithm = Algorithm::INDEX_TOP_K;
}
}
@@ -191,14 +182,15 @@ NearestNeighborBlueprint::visitMembers(vespalib::ObjectVisitor& visitor) const
visitor.visitString("query_tensor", _query_tensor->type().to_spec());
visitor.visitInt("target_num_hits", _target_num_hits);
visitor.visitInt("explore_additional_hits", _explore_additional_hits);
- visitor.visitBool("approximate", _approximate);
+ visitor.visitBool("wanted_approximate", _approximate);
visitor.visitBool("has_index", _attr_tensor.nearest_neighbor_index());
visitor.visitString("algorithm", to_string(_algorithm));
visitor.visitInt("top_k_hits", _found_hits.size());
visitor.openStruct("global_filter", "GlobalFilter");
- visitor.visitBool("is_set", (_global_filter != nullptr));
- visitor.visitBool("has_filter", (_global_filter && _global_filter->has_filter()));
+ visitor.visitBool("wanted", getState().want_global_filter());
+ visitor.visitBool("set", _global_filter_set);
+ visitor.visitBool("calculated", _global_filter->has_filter());
visitor.visitFloat("lower_limit", _global_filter_lower_limit);
visitor.visitFloat("upper_limit", _global_filter_upper_limit);
if (_global_filter_hits.has_value()) {
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
index 7922036dc42..7637c4dd6b7 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
@@ -41,10 +41,11 @@ private:
std::vector<search::tensor::NearestNeighborIndex::Neighbor> _found_hits;
Algorithm _algorithm;
std::shared_ptr<const GlobalFilter> _global_filter;
+ bool _global_filter_set;
std::optional<uint32_t> _global_filter_hits;
std::optional<double> _global_filter_hit_ratio;
- void perform_top_k();
+ void perform_top_k(const search::tensor::NearestNeighborIndex* nns_index);
public:
NearestNeighborBlueprint(const queryeval::FieldSpec& field,
const tensor::ITensorAttribute& attr_tensor,
@@ -60,7 +61,6 @@ public:
const vespalib::eval::Value& get_query_tensor() const { return *_query_tensor; }
uint32_t get_target_num_hits() const { return _target_num_hits; }
void set_global_filter(const GlobalFilter &global_filter) override;
- bool may_approximate() const { return _approximate; }
Algorithm get_algorithm() const { return _algorithm; }
double get_distance_threshold() const { return _distance_threshold; }