From 704569423a2fab41ad063f8a1031549e01870e5f Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Mon, 22 Jan 2024 13:45:32 +0000 Subject: Support IDocidWithWeightPostingStore for more attribute data types. This includes array types (in addition to weighted set) and all integer types. This change also aligns the blueprint and iterator implementations used for WeightedSetTerm and InTerm, making the performance of WeightedSetTerm more similar to InTerm. In particular an attribute with "rank: filter" uses a more optimal search iterator for WeightedSetTerm. --- .../tests/attribute/bitvector/bitvector_test.cpp | 3 +- .../direct_posting_store_test.cpp | 8 ++- .../src/vespa/searchcommon/attribute/basictype.cpp | 9 +++ .../src/vespa/searchcommon/attribute/basictype.h | 1 + .../attribute/attribute_blueprint_factory.cpp | 65 +++++++++++----------- .../attribute/multinumericpostattribute.hpp | 2 +- .../attribute/multistringpostattribute.hpp | 3 +- .../attribute/singlenumericpostattribute.hpp | 13 +---- 8 files changed, 51 insertions(+), 53 deletions(-) diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp index f612bdda87f..6e479e1d9db 100644 --- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp @@ -430,7 +430,8 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre sc = getSearch(tv, filter); checkSearch(v, std::move(sc), 2, 1022, 205, !filter, true); const auto* dww = v->as_docid_with_weight_posting_store(); - if (dww != nullptr) { + if ((dww != nullptr) && (bt == BasicType::STRING)) { + // This way of doing lookup is only supported by string attributes. auto lres = dww->lookup(getSearchStr(), dww->get_dictionary_snapshot()); using DWSI = search::queryeval::DocidWithWeightSearchIterator; TermFieldMatchData md; diff --git a/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp index c1e12580559..2ddd211bd12 100644 --- a/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp +++ b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp @@ -142,7 +142,11 @@ TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidPostingStore_int TEST(DirectPostingStoreApiTest, attributes_support_IDocidWithWeightPostingStore_interface) { expect_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, true); + expect_docid_with_weight_posting_store(BasicType::INT32, CollectionType::WSET, true); expect_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, true); + expect_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, true); + expect_docid_with_weight_posting_store(BasicType::INT32, CollectionType::ARRAY, true); + expect_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, true); } TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidWithWeightPostingStore_interface) { @@ -150,13 +154,11 @@ TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidWithWeightPostin expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, false); expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, false); expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::SINGLE, true); - expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, true); expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, false); expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, false); expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, false); expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, true); - expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, true); - expect_not_docid_with_weight_posting_store(BasicType::INT32, CollectionType::WSET, true); + expect_not_docid_with_weight_posting_store(BasicType::DOUBLE, CollectionType::ARRAY, true); expect_not_docid_with_weight_posting_store(BasicType::DOUBLE, CollectionType::WSET, true); } diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.cpp b/searchlib/src/vespa/searchcommon/attribute/basictype.cpp index c63d07ca130..0312154690c 100644 --- a/searchlib/src/vespa/searchcommon/attribute/basictype.cpp +++ b/searchlib/src/vespa/searchcommon/attribute/basictype.cpp @@ -35,4 +35,13 @@ BasicType::asType(const vespalib::string &t) return NONE; } +bool +BasicType::is_integer_type() const noexcept +{ + return (_type == INT8) || + (_type == INT16) || + (_type == INT32) || + (_type == INT64); +} + } diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.h b/searchlib/src/vespa/searchcommon/attribute/basictype.h index 35200b3f62d..4bdee5ecfd9 100644 --- a/searchlib/src/vespa/searchcommon/attribute/basictype.h +++ b/searchlib/src/vespa/searchcommon/attribute/basictype.h @@ -36,6 +36,7 @@ class BasicType Type type() const noexcept { return _type; } const char * asString() const noexcept { return asString(_type); } size_t fixedSize() const noexcept { return fixedSize(_type); } + bool is_integer_type() const noexcept; static BasicType fromType(bool) noexcept { return BOOL; } static BasicType fromType(int8_t) noexcept { return INT8; } static BasicType fromType(int16_t) noexcept { return INT16; } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 5d689f5bd81..5f00c1ccf2f 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -579,8 +579,7 @@ private: const IDocidWithWeightPostingStore *_dwwps; vespalib::string _scratchPad; - bool use_docid_with_weight_posting_store() const { - // TODO: Relax requirement on always having weight iterator for query operators where that makes sense. + bool has_always_btree_iterators_with_docid_and_weight() const { return (_dwwps != nullptr) && (_dwwps->has_always_btree_iterator()); } @@ -599,7 +598,7 @@ public: template void visitSimpleTerm(TermNode &n) { - if (use_docid_with_weight_posting_store() && !_field.isFilter() && n.isRanked() && !Term::isPossibleRangeTerm(n.getTerm())) { + if (has_always_btree_iterators_with_docid_and_weight() && !_field.isFilter() && n.isRanked() && !Term::isPossibleRangeTerm(n.getTerm())) { NodeAsKey key(n, _scratchPad); setResult(std::make_unique(_field, _attr, *_dwwps, key)); } else { @@ -680,32 +679,41 @@ public: return std::make_unique(term, QueryTermSimple::Type::WORD); } - void visit(query::WeightedSetTerm &n) override { - bool isSingleValue = !_attr.hasMultiValue(); - bool isString = (_attr.isStringType() && _attr.hasEnum()); - bool isInteger = _attr.isIntegerType(); - if (isSingleValue && (isString || isInteger)) { - auto ws = std::make_unique(_field, _attr); - SearchContextParams scParams = createContextParams(); - for (size_t i = 0; i < n.getNumTerms(); ++i) { - auto term = n.getAsString(i); - ws->addToken(_attr.createSearchContext(extractTerm(term.first, isInteger), scParams), term.second.percent()); - } - setResult(std::move(ws)); + template + void visit_wset_or_in_term(TermType& n) { + if (_dps != nullptr) { + auto* bp = new attribute::DirectMultiTermBlueprint + (_field, _attr, *_dps, n.getNumTerms()); + createDirectMultiTerm(bp, n); + } else if (_dwwps != nullptr) { + auto* bp = new attribute::DirectMultiTermBlueprint + (_field, _attr, *_dwwps, n.getNumTerms()); + createDirectMultiTerm(bp, n); } else { - if (use_docid_with_weight_posting_store()) { - auto *bp = new attribute::DirectMultiTermBlueprint - (_field, _attr, *_dwwps, n.getNumTerms()); - createDirectMultiTerm(bp, n); + bool isSingleValue = !_attr.hasMultiValue(); + bool isString = (_attr.isStringType() && _attr.hasEnum()); + bool isInteger = _attr.isIntegerType(); + if (isSingleValue && (isString || isInteger)) { + auto ws = std::make_unique(_field, _attr); + SearchContextParams scParams = createContextParams(); + for (size_t i = 0; i < n.getNumTerms(); ++i) { + auto term = n.getAsString(i); + ws->addToken(_attr.createSearchContext(extractTerm(term.first, isInteger), scParams), term.second.percent()); + } + setResult(std::move(ws)); } else { - auto *bp = new WeightedSetTermBlueprint(_field); + auto* bp = new WeightedSetTermBlueprint(_field); createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); } } } + void visit(query::WeightedSetTerm &n) override { + visit_wset_or_in_term(n); + } + void visit(query::DotProduct &n) override { - if (use_docid_with_weight_posting_store()) { + if (has_always_btree_iterators_with_docid_and_weight()) { auto *bp = new attribute::DirectMultiTermBlueprint (_field, _attr, *_dwwps, n.getNumTerms()); createDirectMultiTerm(bp, n); @@ -716,7 +724,7 @@ public: } void visit(query::WandTerm &n) override { - if (use_docid_with_weight_posting_store()) { + if (has_always_btree_iterators_with_docid_and_weight()) { auto *bp = new DirectWandBlueprint(_field, *_dwwps, n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(), n.getNumTerms()); @@ -731,18 +739,7 @@ public: } void visit(query::InTerm &n) override { - if (_dps != nullptr) { - auto* bp = new attribute::DirectMultiTermBlueprint - (_field, _attr, *_dps, n.getNumTerms()); - createDirectMultiTerm(bp, n); - } else if (_dwwps != nullptr) { - auto* bp = new attribute::DirectMultiTermBlueprint - (_field, _attr, *_dwwps, n.getNumTerms()); - createDirectMultiTerm(bp, n); - } else { - auto* bp = new WeightedSetTermBlueprint(_field); - createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); - } + visit_wset_or_in_term(n); } void fail_nearest_neighbor_term(query::NearestNeighborTerm&n, const vespalib::string& error_msg) { diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp index ea1058d88fb..9328857c919 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp @@ -89,7 +89,7 @@ template const IDocidWithWeightPostingStore* MultiValueNumericPostingAttribute::as_docid_with_weight_posting_store() const { - if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::INT64)) { + if (this->getConfig().basicType().is_integer_type()) { return &_posting_store_adapter; } return nullptr; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index b6e9b69a81d..371b8d920f9 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -108,8 +108,7 @@ template const IDocidWithWeightPostingStore* MultiValueStringPostingAttributeT::as_docid_with_weight_posting_store() const { - // TODO: Add support for handling bit vectors too, and lift restriction on isFilter. - if (this->hasWeightedSetType() && this->isStringType()) { + if (this->isStringType()) { return &_posting_store_adapter; } return nullptr; diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp index c57742ca4b6..c74e25fbf5a 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp @@ -150,22 +150,11 @@ SingleValueNumericPostingAttribute::getSearch(QueryTermSimple::UP qTerm, return std::make_unique(std::move(base_sc), params, *this); } -namespace { - -bool is_integer_type(attribute::BasicType type) { - return (type == attribute::BasicType::INT8) || - (type == attribute::BasicType::INT16) || - (type == attribute::BasicType::INT32) || - (type == attribute::BasicType::INT64); -} - -} - template const IDocidPostingStore* SingleValueNumericPostingAttribute::as_docid_posting_store() const { - if (is_integer_type(this->getBasicType())) { + if (this->getConfig().basicType().is_integer_type()) { return &_posting_store_adapter; } return nullptr; -- cgit v1.2.3