summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-06-07 15:08:34 +0000
committerArne Juul <arnej@verizonmedia.com>2020-06-07 19:16:58 +0000
commitc5dbeedb10013c7e5931b5da27d00de5808e50d3 (patch)
treeebebcf326ffeb909a6cce7ed0035f0bcd65a1915 /searchlib
parent4368171fdc096a02e76e7d88000fccdf46b2d540 (diff)
perform TopK in set_global_filter
* global filter must be computed after fetchPostings, so move actual TopK computation.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h1
2 files changed, 12 insertions, 11 deletions
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
index 6a27e8a9f14..bdd360aa9da 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
@@ -9,6 +9,9 @@
#include <vespa/eval/tensor/dense/dense_tensor.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
#include <vespa/searchlib/tensor/distance_function_factory.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.queryeval.nearest_neighbor_blueprint");
using vespalib::tensor::DenseTensorView;
using vespalib::tensor::DenseTensor;
@@ -84,18 +87,24 @@ NearestNeighborBlueprint::set_global_filter(const GlobalFilter &global_filter)
{
_global_filter = global_filter.shared_from_this();
auto nns_index = _attr_tensor.nearest_neighbor_index();
+ LOG(debug, "set_global_filter with: %s / %s / %s",
+ (_approximate ? "approximate" : "exact"),
+ (nns_index ? "nns_index" : "no_index"),
+ (_global_filter->has_filter() ? "has_filter" : "no_filter"));
if (_approximate && nns_index) {
uint32_t est_hits = _attr_tensor.getNumDocs();
if (_global_filter->has_filter()) {
uint32_t max_hits = _global_filter->filter()->countTrueBits();
+ LOG(debug, "set_global_filter getNumDocs: %u / max_hits %u", est_hits, max_hits);
if (max_hits * 10 < est_hits) {
// too many hits filtered out, use brute force implementation:
_approximate = false;
return;
}
- est_hits = std::min(est_hits, max_hits);
}
- est_hits = std::min(est_hits, _target_num_hits);
+ perform_top_k();
+ est_hits = _found_hits.size();
+ LOG(debug, "perform_top_k found %u hits", est_hits);
setEstimate(HitEstimate(est_hits, false));
}
}
@@ -107,7 +116,7 @@ NearestNeighborBlueprint::perform_top_k()
if (_approximate && nns_index) {
auto lhs_type = _query_tensor->fast_type();
auto rhs_type = _attr_tensor.getTensorType();
- // different cell types should have be converted already
+ // different cell types should be converted already
if (lhs_type == rhs_type) {
auto lhs = _query_tensor->cellsRef();
uint32_t k = _target_num_hits;
@@ -121,13 +130,6 @@ NearestNeighborBlueprint::perform_top_k()
}
}
-void
-NearestNeighborBlueprint::fetchPostings(const ExecuteInfo &execInfo) {
- if (execInfo.isStrict()) {
- perform_top_k();
- }
-}
-
std::unique_ptr<SearchIterator>
NearestNeighborBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray& tfmda, bool strict) const
{
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
index a713c73ad32..3e402b46a43 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
@@ -49,7 +49,6 @@ public:
bool strict) const override;
void visitMembers(vespalib::ObjectVisitor& visitor) const override;
bool always_needs_unpack() const override;
- void fetchPostings(const ExecuteInfo &execInfo) override;
};
}