diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-09-25 14:34:19 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-25 14:34:19 +0200 |
commit | 7facdd6177063f772c497000b9c12e4653a2db83 (patch) | |
tree | 1acdcde007215c4bd1b36107611b8a95493e875e | |
parent | 75d25f2d7e1ff1045d79b95e36a84cc09566baea (diff) | |
parent | 7c2b17a3a91890b592117986799d84e8e26b1ad7 (diff) |
Merge pull request #28631 from vespa-engine/balder/lift-single-filter-terms-out-from-ws
- Single filter terms can be lifted out from weighted sets.
8 files changed, 95 insertions, 56 deletions
diff --git a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp index d7a854e0afc..6c6f05fd5e2 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp @@ -29,14 +29,14 @@ using namespace search::attribute::test; namespace { void -setupAttributeManager(MockAttributeManager &manager) +setupAttributeManager(MockAttributeManager &manager, bool isFilter) { AttributeVector::DocId docId; { - AttributeVector::SP attr_sp = AttributeFactory::createAttribute("integer", Config(BasicType("int64"))); + AttributeVector::SP attr_sp = AttributeFactory::createAttribute("integer", Config(BasicType("int64")).setIsFilter(isFilter)); manager.addAttribute(attr_sp); - IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get()); + auto *attr = (IntegerAttribute*)(attr_sp.get()); for (size_t i = 1; i < 10; ++i) { attr->addDoc(docId); assert(i == docId); @@ -45,10 +45,10 @@ setupAttributeManager(MockAttributeManager &manager) } } { - AttributeVector::SP attr_sp = AttributeFactory::createAttribute("string", Config(BasicType("string"))); + AttributeVector::SP attr_sp = AttributeFactory::createAttribute("string", Config(BasicType("string")).setIsFilter(isFilter)); manager.addAttribute(attr_sp); - StringAttribute *attr = (StringAttribute*)(attr_sp.get()); + auto *attr = (StringAttribute*)(attr_sp.get()); for (size_t i = 1; i < 10; ++i) { attr->addDoc(docId); assert(i == docId); @@ -58,9 +58,9 @@ setupAttributeManager(MockAttributeManager &manager) } { AttributeVector::SP attr_sp = AttributeFactory::createAttribute( - "multi", Config(BasicType("int64"), search::attribute::CollectionType("array"))); + "multi", Config(BasicType("int64"), search::attribute::CollectionType("array")).setIsFilter(isFilter)); manager.addAttribute(attr_sp); - IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get()); + auto *attr = (IntegerAttribute*)(attr_sp.get()); for (size_t i = 1; i < 10; ++i) { attr->addDoc(docId); assert(i == docId); @@ -78,35 +78,43 @@ struct WS { TermFieldHandle handle; std::vector<std::pair<std::string, uint32_t> > tokens; - WS(IAttributeManager & manager) : attribute_manager(manager), layout(), handle(layout.allocTermField(fieldId)), tokens() { + explicit WS(IAttributeManager & manager) + : attribute_manager(manager), + layout(), handle(layout.allocTermField(fieldId)), + tokens() + { MatchData::UP tmp = layout.createMatchData(); ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId); } WS &add(const std::string &token, uint32_t weight) { - tokens.push_back(std::make_pair(token, weight)); + tokens.emplace_back(token, weight); return *this; } Node::UP createNode() const { - SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0)); - for (size_t i = 0; i < tokens.size(); ++i) { - node->addTerm(tokens[i].first, Weight(tokens[i].second)); + auto *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0)); + for (const auto & token : tokens) { + node->addTerm(token.first, Weight(token.second)); } return Node::UP(node); } - bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const { + SearchIterator::UP + createSearch(Searchable &searchable, const std::string &field, bool strict) const { AttributeContext ac(attribute_manager); FakeRequestContext requestContext(&ac); MatchData::UP md = layout.createMatchData(); Node::UP node = createNode(); FieldSpecList fields; - fields.add(FieldSpec(field, fieldId, handle)); + fields.add(FieldSpec(field, fieldId, handle, ac.getAttribute(field)->getIsFilter())); queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); bp->fetchPostings(queryeval::ExecuteInfo::create(strict)); SearchIterator::UP sb = bp->createSearch(*md, strict); - return (dynamic_cast<WeightedSetTermSearch*>(sb.get()) != 0); + return sb; + } + bool isWeightedSetTermSearch(Searchable &searchable, const std::string &field, bool strict) const { + return dynamic_cast<WeightedSetTermSearch *>(createSearch(searchable, field, strict).get()) != nullptr; } FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { @@ -140,23 +148,58 @@ struct WS { } // namespace <unnamed> +void test_tokens(bool isFilter, const std::vector<uint32_t> & docs) { + MockAttributeManager manager; + setupAttributeManager(manager, isFilter); + AttributeBlueprintFactory adapter; + + FakeResult expect = FakeResult(); + WS ws = WS(manager); + for (uint32_t doc : docs) { + auto docS = vespalib::stringify(doc); + int32_t weight = doc * 10; + expect.doc(doc).weight(weight).pos(0); + ws.add(docS, weight); + } + + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "integer", true)); + EXPECT_TRUE(!ws.isWeightedSetTermSearch(adapter, "integer", false)); + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "string", true)); + EXPECT_TRUE(!ws.isWeightedSetTermSearch(adapter, "string", false)); + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", true)); + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", false)); + + EXPECT_EQUAL(expect, ws.search(adapter, "integer", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "integer", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", false)); +} TEST("attribute_weighted_set_test") { + test_tokens(false, {3, 5, 7}); + test_tokens(true, {3, 5, 7}); + test_tokens(false, {3}); +} + +TEST("attribute_weighted_set_single_token_filter_lifted_out") { MockAttributeManager manager; - setupAttributeManager(manager); + setupAttributeManager(manager, true); AttributeBlueprintFactory adapter; - FakeResult expect = FakeResult() - .doc(3).elem(0).weight(30).pos(0) - .doc(5).elem(0).weight(50).pos(0) - .doc(7).elem(0).weight(70).pos(0); - WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30); - - EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true)); - EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true)); - EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false)); + FakeResult expect = FakeResult().doc(3).elem(0).weight(30).pos(0); + WS ws = WS(manager).add("3", 30); + + EXPECT_EQUAL("search::FilterAttributeIteratorStrict<search::attribute::SingleNumericSearchContext<long, search::attribute::NumericMatcher<long> > >", + ws.createSearch(adapter, "integer", true)->getClassName()); + EXPECT_EQUAL("search::FilterAttributeIteratorT<search::attribute::SingleNumericSearchContext<long, search::attribute::NumericMatcher<long> > >", + ws.createSearch(adapter, "integer", false)->getClassName()); + EXPECT_EQUAL("search::FilterAttributeIteratorStrict<search::attribute::SingleEnumSearchContext<char const*, search::attribute::StringSearchContext> >", + ws.createSearch(adapter, "string", true)->getClassName()); + EXPECT_EQUAL("search::FilterAttributeIteratorT<search::attribute::SingleEnumSearchContext<char const*, search::attribute::StringSearchContext> >", + ws.createSearch(adapter, "string", false)->getClassName()); + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", true)); + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", false)); EXPECT_EQUAL(expect, ws.search(adapter, "integer", true)); EXPECT_EQUAL(expect, ws.search(adapter, "integer", false)); diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 1519bb14554..b4cdd621b71 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -337,10 +337,7 @@ public: if (tfmda.size() == 1) { // search in exactly one field fef::TermFieldMatchData &tfmd = *tfmda[0]; - return search::common::create_location_iterator(tfmd, - _attribute.getNumDocs(), - strict, - _location); + return common::create_location_iterator(tfmd, _attribute.getNumDocs(), strict, _location); } else { LOG(debug, "wrong size tfmda: %zu (fallback to old location iterator)\n", tfmda.size()); } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp index 108128eeb39..94c560a0dae 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp @@ -30,7 +30,7 @@ protected: const attribute::IAttributeVector &attribute() const { return _attr; } public: - UseAttr(const attribute::IAttributeVector & attr) + explicit UseAttr(const attribute::IAttributeVector & attr) : _attr(attr) {} }; @@ -40,7 +40,7 @@ class UseStringEnum : public UseAttr { public: using TokenT = uint32_t; - UseStringEnum(const IAttributeVector & attr) + explicit UseStringEnum(const IAttributeVector & attr) : UseAttr(attr) {} auto mapToken(const ISearchContext &context) const { return attribute().findFoldedEnums(context.queryTerm()->getTerm()); @@ -56,7 +56,7 @@ class UseInteger : public UseAttr { public: using TokenT = uint64_t; - UseInteger(const IAttributeVector & attr) : UseAttr(attr) {} + explicit UseInteger(const IAttributeVector & attr) : UseAttr(attr) {} std::vector<int64_t> mapToken(const ISearchContext &context) const { std::vector<int64_t> result; Int64Range range(context.getAsIntegerTerm()); @@ -157,6 +157,10 @@ AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArr assert(tfmda.size() == 1); assert(getState().numFields() == 1); fef::TermFieldMatchData &tfmd = *tfmda[0]; + bool field_is_filter = getState().fields()[0].isFilter(); + if (field_is_filter && (_contexts.size() == 1)) { + return _contexts[0]->createIterator(&tfmd, strict); + } if (strict) { // use generic weighted set search fef::MatchDataLayout layout; auto handle = layout.allocTermField(tfmd.getFieldId()); @@ -167,7 +171,6 @@ AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArr // TODO: pass ownership with unique_ptr children[i] = _contexts[i]->createIterator(child_tfmd, true).release(); } - bool field_is_filter = getState().fields()[0].isFilter(); return queryeval::WeightedSetTermSearch::create(children, tfmd, field_is_filter, _weights, std::move(match_data)); } else { // use attribute filter optimization bool isString = (_attr.isStringType() && _attr.hasEnum()); @@ -182,18 +185,16 @@ AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArr } queryeval::SearchIterator::UP -AttributeWeightedSetBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const +AttributeWeightedSetBlueprint::createFilterSearch(bool strict, FilterConstraint) const { - (void) constraint; std::vector<std::unique_ptr<queryeval::SearchIterator>> children; children.reserve(_contexts.size()); for (auto& context : _contexts) { - auto wrapper = std::make_unique<search::queryeval::FilterWrapper>(1); + auto wrapper = std::make_unique<queryeval::FilterWrapper>(1); wrapper->wrap(context->createIterator(wrapper->tfmda()[0], strict)); children.emplace_back(std::move(wrapper)); } - search::queryeval::UnpackInfo unpack_info; - return search::queryeval::OrSearch::create(std::move(children), strict, unpack_info); + return queryeval::OrSearch::create(std::move(children), strict, queryeval::UnpackInfo()); } void diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp index 3c0bae00047..e2566c94f1c 100644 --- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp +++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp @@ -11,8 +11,8 @@ class DocumentWeightOrFilterSearchImpl : public DocumentWeightOrFilterSearch { AttributeIteratorPack _children; public: - DocumentWeightOrFilterSearchImpl(AttributeIteratorPack&& children); - ~DocumentWeightOrFilterSearchImpl(); + explicit DocumentWeightOrFilterSearchImpl(AttributeIteratorPack&& children); + ~DocumentWeightOrFilterSearchImpl() override; void doSeek(uint32_t docId) override; @@ -67,7 +67,7 @@ DocumentWeightOrFilterSearchImpl::doUnpack(uint32_t) { } -std::unique_ptr<search::queryeval::SearchIterator> +std::unique_ptr<queryeval::SearchIterator> DocumentWeightOrFilterSearch::create(std::vector<DocumentWeightIterator>&& children) { if (children.empty()) { diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h index 62be883ab52..c601856573f 100644 --- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h +++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h @@ -9,15 +9,15 @@ namespace search::attribute { * Filter iterator on top of document weight iterators with OR semantics used during * calculation of global filter for weighted set terms, wand terms and dot product terms. */ -class DocumentWeightOrFilterSearch : public search::queryeval::SearchIterator +class DocumentWeightOrFilterSearch : public queryeval::SearchIterator { protected: DocumentWeightOrFilterSearch() - : search::queryeval::SearchIterator() + : queryeval::SearchIterator() { } public: - static std::unique_ptr<search::queryeval::SearchIterator> create(std::vector<DocumentWeightIterator>&& children); + static std::unique_ptr<queryeval::SearchIterator> create(std::vector<DocumentWeightIterator>&& children); }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp index 77d9875bf69..97f6bc2e6f8 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp @@ -33,10 +33,8 @@ WeightedSetTermMatchingElementsSearch::WeightedSetTermMatchingElementsSearch(con _search() { _tfmda.add(&_tfmd); - auto generic_search = bp.createLeafSearch(_tfmda, false); - auto weighted_set_term_search = dynamic_cast<WeightedSetTermSearch *>(generic_search.get()); - generic_search.release(); - _search.reset(weighted_set_term_search); + _search.reset(static_cast<WeightedSetTermSearch *>(bp.createLeafSearch(_tfmda, false).release())); + } WeightedSetTermMatchingElementsSearch::~WeightedSetTermMatchingElementsSearch() = default; diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h index 0e3c82444d7..9c8d6d88329 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h @@ -18,7 +18,7 @@ class WeightedSetTermBlueprint : public ComplexLeafBlueprint std::vector<Blueprint::UP> _terms; public: - WeightedSetTermBlueprint(const FieldSpec &field); + explicit WeightedSetTermBlueprint(const FieldSpec &field); WeightedSetTermBlueprint(const WeightedSetTermBlueprint &) = delete; WeightedSetTermBlueprint &operator=(const WeightedSetTermBlueprint &) = delete; ~WeightedSetTermBlueprint() override; diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp index ee3978705cf..8478a0d3c35 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp @@ -21,7 +21,7 @@ private: struct CmpDocId { const uint32_t *termPos; - CmpDocId(const uint32_t *tp) : termPos(tp) {} + explicit CmpDocId(const uint32_t *tp) : termPos(tp) {} bool operator()(const ref_t &a, const ref_t &b) const { return (termPos[a] < termPos[b]); } @@ -29,7 +29,7 @@ private: struct CmpWeight { const int32_t *weight; - CmpWeight(const int32_t *w) : weight(w) {} + explicit CmpWeight(const int32_t *w) : weight(w) {} bool operator()(const ref_t &a, const ref_t &b) const { return (weight[a] > weight[b]); } @@ -61,7 +61,7 @@ private: } public: - WeightedSetTermSearchImpl(search::fef::TermFieldMatchData &tmd, + WeightedSetTermSearchImpl(fef::TermFieldMatchData &tmd, bool field_is_filter, const std::vector<int32_t> &weights, IteratorPack &&iteratorPack) @@ -180,7 +180,7 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children, //----------------------------------------------------------------------------- SearchIterator::UP -WeightedSetTermSearch::create(search::fef::TermFieldMatchData &tmd, +WeightedSetTermSearch::create(fef::TermFieldMatchData &tmd, bool field_is_filter, const std::vector<int32_t> &weights, std::vector<DocumentWeightIterator> &&iterators) |