diff options
11 files changed, 48 insertions, 73 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/documentmetastore/CMakeLists.txt index 63ff53bb011..d257f29c9df 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/CMakeLists.txt @@ -16,7 +16,6 @@ vespa_add_library(searchcore_documentmetastore STATIC lidreusedelayer.cpp lidstatevector.cpp lid_hold_list.cpp - white_list_provider.cpp DEPENDS searchcore_attribute searchcore_bucketdb diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp index 31361e40c68..61662f621f1 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp @@ -1,7 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "lid_allocator.h" -#include "white_list_provider.h" #include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/searchlib/fef/matchdata.h> @@ -192,17 +191,13 @@ LidAllocator::constructFreeList(DocId lidLimit) namespace { -class WhiteListBlueprint : public SimpleLeafBlueprint, public WhiteListProvider +class WhiteListBlueprint : public SimpleLeafBlueprint { private: const search::GrowableBitVector &_activeLids; mutable std::mutex _lock; mutable std::vector<search::fef::TermFieldMatchData *> _matchDataVector; - search::BitVector::UP get_white_list_filter() const override { - return search::BitVector::create(_activeLids, 0, get_docid_limit()); - } - SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const override { @@ -210,16 +205,6 @@ private: (void) tfmda; return createFilterSearch(strict, FilterConstraint::UPPER_BOUND); } - - SearchIterator::UP createFilterSearch(bool strict, FilterConstraint) const override { - auto tfmd = new search::fef::TermFieldMatchData; - { - std::lock_guard<std::mutex> lock(_lock); - _matchDataVector.push_back(tfmd); - } - return search::BitVectorIterator::create(&_activeLids, get_docid_limit(), *tfmd, strict); - } - public: WhiteListBlueprint(const search::GrowableBitVector &activeLids) : SimpleLeafBlueprint(FieldSpecBaseList()), @@ -231,6 +216,15 @@ public: bool isWhiteList() const override { return true; } + SearchIterator::UP createFilterSearch(bool strict, FilterConstraint) const override { + auto tfmd = new search::fef::TermFieldMatchData; + { + std::lock_guard<std::mutex> lock(_lock); + _matchDataVector.push_back(tfmd); + } + return search::BitVectorIterator::create(&_activeLids, get_docid_limit(), *tfmd, strict); + } + ~WhiteListBlueprint() { for (auto matchData : _matchDataVector) { delete matchData; diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/white_list_provider.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/white_list_provider.cpp deleted file mode 100644 index 49ea1960005..00000000000 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/white_list_provider.cpp +++ /dev/null @@ -1,3 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "white_list_provider.h" diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/white_list_provider.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/white_list_provider.h deleted file mode 100644 index 99e0bf8d103..00000000000 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/white_list_provider.h +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <memory> - -#pragma once - -namespace search { class BitVector; } - -namespace proton::documentmetastore { - -/** Interface for fetching a copy of the white list bitvector */ -struct WhiteListProvider { - virtual std::unique_ptr<search::BitVector> get_white_list_filter() const = 0; -protected: - ~WhiteListProvider() = default; -}; - -} // namespace diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index f7fce994bd1..f19b416b92f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -185,6 +185,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _query.optimize(); trace.addEvent(4, "MTF: Fetch Postings"); _query.fetchPostings(); + trace.addEvent(5, "MTF: Handle Global Filters"); + _query.handle_global_filters(searchContext.getDocIdLimit()); _query.freeze(); trace.addEvent(5, "MTF: prepareSharedState"); _rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore()); diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index d7c0ac04ce5..8fd686e235d 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -13,7 +13,6 @@ #include <vespa/searchlib/query/tree/point.h> #include <vespa/searchlib/query/tree/rectangle.h> #include <vespa/searchlib/queryeval/intermediate_blueprints.h> -#include <vespa/searchcore/proton/documentmetastore/white_list_provider.h> #include <vespa/log/log.h> LOG_SETUP(".proton.matching.query"); @@ -157,7 +156,6 @@ void Query::setWhiteListBlueprint(Blueprint::UP whiteListBlueprint) { _whiteListBlueprint = std::move(whiteListBlueprint); - _white_list_provider = dynamic_cast<WhiteListProvider *>(_whiteListBlueprint.get()); } void @@ -190,17 +188,7 @@ Query::reserveHandles(const IRequestContext & requestContext, ISearchContext &co void Query::optimize() { - using search::queryeval::GlobalFilter; _blueprint = Blueprint::optimize(std::move(_blueprint)); - if (_blueprint->getState().want_global_filter()) { - auto white_list = (_white_list_provider ? - _white_list_provider->get_white_list_filter() : - search::BitVector::UP()); - auto global_filter = GlobalFilter::create(std::move(white_list)); - _blueprint->set_global_filter(*global_filter); - // optimized order may change after accounting for global filter: - _blueprint = Blueprint::optimize(std::move(_blueprint)); - } LOG(debug, "optimized blueprint:\n%s\n", _blueprint->asString().c_str()); } @@ -211,6 +199,24 @@ Query::fetchPostings() } void +Query::handle_global_filters(uint32_t docid_limit) +{ + using search::queryeval::GlobalFilter; + if (_blueprint->getState().want_global_filter()) { + auto constraint = Blueprint::FilterConstraint::UPPER_BOUND; + bool strict = true; + auto filter_iterator = _blueprint->createFilterSearch(strict, constraint); + filter_iterator->initRange(1, docid_limit); + auto white_list = filter_iterator->get_hits(1); + auto global_filter = GlobalFilter::create(std::move(white_list)); + _blueprint->set_global_filter(*global_filter); + // optimized order may change after accounting for global filter: + _blueprint = Blueprint::optimize(std::move(_blueprint)); + LOG(debug, "blueprint after handle_global_filters:\n%s\n", _blueprint->asString().c_str()); + } +} + +void Query::freeze() { _blueprint->freeze(); diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index 4ca66fb7a86..3ed6229830d 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -10,8 +10,6 @@ #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/searchlib/queryeval/irequestcontext.h> -namespace proton::documentmetastore { struct WhiteListProvider; } - namespace proton::matching { class ViewResolver; @@ -21,12 +19,10 @@ class Query { private: using Blueprint=search::queryeval::Blueprint; - using WhiteListProvider=proton::documentmetastore::WhiteListProvider; search::query::Node::UP _query_tree; Blueprint::UP _blueprint; search::fef::Location _location; Blueprint::UP _whiteListBlueprint; - WhiteListProvider *_white_list_provider; public: Query(); @@ -93,6 +89,7 @@ public: **/ void optimize(); void fetchPostings(); + void handle_global_filters(uint32_t docidLimit); void freeze(); /** diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index ac14233f32d..74e67fb7fcd 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -847,8 +847,8 @@ TEST_F("NN blueprint handles strong filter", NearestNeighborBlueprintFixture) filter->invalidateCachedCount(); auto strong_filter = GlobalFilter::create(std::move(filter)); bp->set_global_filter(*strong_filter); - EXPECT_EQUAL(11u, bp->getState().estimate().estHits); - EXPECT_FALSE(bp->may_approximate()); + EXPECT_EQUAL(1u, bp->getState().estimate().estHits); + EXPECT_TRUE(bp->may_approximate()); } TEST_F("NN blueprint handles weak filter", NearestNeighborBlueprintFixture) diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h index d2e86ac65a4..5bba87c7091 100644 --- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h @@ -15,11 +15,11 @@ class EmptyBlueprint : public SimpleLeafBlueprint { protected: SearchIterator::UP createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, bool strict) const override; - SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; public: EmptyBlueprint(const FieldSpecBaseList &fields); EmptyBlueprint(const FieldSpecBase &field); EmptyBlueprint(); + SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; }; //----------------------------------------------------------------------------- @@ -31,16 +31,14 @@ private: SimpleResult _result; protected: - SearchIterator::UP + SearchIterator::UP createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, bool strict) const override; - SearchIterator::UP - createFilterSearch(bool strict, FilterConstraint constraint) const override; - public: SimpleBlueprint(const SimpleResult &result); ~SimpleBlueprint(); SimpleBlueprint &tag(const vespalib::string &tag); const vespalib::string &tag() const { return _tag; } + SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; }; //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp index 6a27e8a9f14..55342f91e93 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp @@ -9,6 +9,9 @@ #include <vespa/eval/tensor/dense/dense_tensor.h> #include <vespa/searchlib/tensor/dense_tensor_attribute.h> #include <vespa/searchlib/tensor/distance_function_factory.h> +#include <vespa/log/log.h> + +LOG_SETUP(".searchlib.queryeval.nearest_neighbor_blueprint"); using vespalib::tensor::DenseTensorView; using vespalib::tensor::DenseTensor; @@ -84,19 +87,24 @@ NearestNeighborBlueprint::set_global_filter(const GlobalFilter &global_filter) { _global_filter = global_filter.shared_from_this(); auto nns_index = _attr_tensor.nearest_neighbor_index(); + LOG(debug, "set_global_filter with: %s / %s / %s", + (_approximate ? "approximate" : "exact"), + (nns_index ? "nns_index" : "no_index"), + (_global_filter->has_filter() ? "has_filter" : "no_filter")); if (_approximate && nns_index) { uint32_t est_hits = _attr_tensor.getNumDocs(); if (_global_filter->has_filter()) { uint32_t max_hits = _global_filter->filter()->countTrueBits(); + LOG(debug, "set_global_filter getNumDocs: %u / max_hits %u", est_hits, max_hits); if (max_hits * 10 < est_hits) { - // too many hits filtered out, use brute force implementation: - _approximate = false; - return; + LOG(debug, "too many hits filtered out, consider using brute force implementation"); } est_hits = std::min(est_hits, max_hits); } est_hits = std::min(est_hits, _target_num_hits); setEstimate(HitEstimate(est_hits, false)); + perform_top_k(); + LOG(debug, "perform_top_k found %zu hits", _found_hits.size()); } } @@ -107,7 +115,7 @@ NearestNeighborBlueprint::perform_top_k() if (_approximate && nns_index) { auto lhs_type = _query_tensor->fast_type(); auto rhs_type = _attr_tensor.getTensorType(); - // different cell types should have be converted already + // different cell types should be converted already if (lhs_type == rhs_type) { auto lhs = _query_tensor->cellsRef(); uint32_t k = _target_num_hits; @@ -121,13 +129,6 @@ NearestNeighborBlueprint::perform_top_k() } } -void -NearestNeighborBlueprint::fetchPostings(const ExecuteInfo &execInfo) { - if (execInfo.isStrict()) { - perform_top_k(); - } -} - std::unique_ptr<SearchIterator> NearestNeighborBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray& tfmda, bool strict) const { diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h index a713c73ad32..3e402b46a43 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h @@ -49,7 +49,6 @@ public: bool strict) const override; void visitMembers(vespalib::ObjectVisitor& visitor) const override; bool always_needs_unpack() const override; - void fetchPostings(const ExecuteInfo &execInfo) override; }; } |