diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-24 11:46:44 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-24 11:46:44 +0100 |
commit | 33b1928a935d70fabc7bc638dd53425141a1ab04 (patch) | |
tree | d0841157c5489503272472d234027bbaf33d5280 | |
parent | 055b4ea8f9a37230c531624b27875d355794b54e (diff) | |
parent | 233da25510548e8a1033e1bbf0a506135ea85b42 (diff) |
Merge pull request #30036 from vespa-engine/geirst/reapply-weighted-set-and-in-term-alignment
Reapply Support IDocidWithWeightPostingStore for more attribute data types.
8 files changed, 50 insertions, 58 deletions
diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp index f612bdda87f..6e479e1d9db 100644 --- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp @@ -430,7 +430,8 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre sc = getSearch<VectorType>(tv, filter); checkSearch(v, std::move(sc), 2, 1022, 205, !filter, true); const auto* dww = v->as_docid_with_weight_posting_store(); - if (dww != nullptr) { + if ((dww != nullptr) && (bt == BasicType::STRING)) { + // This way of doing lookup is only supported by string attributes. auto lres = dww->lookup(getSearchStr<VectorType>(), dww->get_dictionary_snapshot()); using DWSI = search::queryeval::DocidWithWeightSearchIterator; TermFieldMatchData md; diff --git a/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp index c1e12580559..2ddd211bd12 100644 --- a/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp +++ b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp @@ -142,7 +142,11 @@ TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidPostingStore_int TEST(DirectPostingStoreApiTest, attributes_support_IDocidWithWeightPostingStore_interface) { expect_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, true); + expect_docid_with_weight_posting_store(BasicType::INT32, CollectionType::WSET, true); expect_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, true); + expect_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, true); + expect_docid_with_weight_posting_store(BasicType::INT32, CollectionType::ARRAY, true); + expect_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, true); } TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidWithWeightPostingStore_interface) { @@ -150,13 +154,11 @@ TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidWithWeightPostin expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, false); expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, false); expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::SINGLE, true); - expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, true); expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, false); expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, false); expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, false); expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, true); - expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, true); - expect_not_docid_with_weight_posting_store(BasicType::INT32, CollectionType::WSET, true); + expect_not_docid_with_weight_posting_store(BasicType::DOUBLE, CollectionType::ARRAY, true); expect_not_docid_with_weight_posting_store(BasicType::DOUBLE, CollectionType::WSET, true); } diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.cpp b/searchlib/src/vespa/searchcommon/attribute/basictype.cpp index c63d07ca130..0312154690c 100644 --- a/searchlib/src/vespa/searchcommon/attribute/basictype.cpp +++ b/searchlib/src/vespa/searchcommon/attribute/basictype.cpp @@ -35,4 +35,13 @@ BasicType::asType(const vespalib::string &t) return NONE; } +bool +BasicType::is_integer_type() const noexcept +{ + return (_type == INT8) || + (_type == INT16) || + (_type == INT32) || + (_type == INT64); +} + } diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.h b/searchlib/src/vespa/searchcommon/attribute/basictype.h index 35200b3f62d..4bdee5ecfd9 100644 --- a/searchlib/src/vespa/searchcommon/attribute/basictype.h +++ b/searchlib/src/vespa/searchcommon/attribute/basictype.h @@ -36,6 +36,7 @@ class BasicType Type type() const noexcept { return _type; } const char * asString() const noexcept { return asString(_type); } size_t fixedSize() const noexcept { return fixedSize(_type); } + bool is_integer_type() const noexcept; static BasicType fromType(bool) noexcept { return BOOL; } static BasicType fromType(int8_t) noexcept { return INT8; } static BasicType fromType(int16_t) noexcept { return INT16; } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 56714bbf033..d5c67664a5e 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -389,12 +389,6 @@ private: //----------------------------------------------------------------------------- - - - - -//----------------------------------------------------------------------------- - class DirectWandBlueprint : public queryeval::ComplexLeafBlueprint { private: @@ -516,8 +510,7 @@ private: const IDocidWithWeightPostingStore *_dwwps; vespalib::string _scratchPad; - bool use_docid_with_weight_posting_store() const { - // TODO: Relax requirement on always having weight iterator for query operators where that makes sense. + bool has_always_btree_iterators_with_docid_and_weight() const { return (_dwwps != nullptr) && (_dwwps->has_always_btree_iterator()); } @@ -608,32 +601,41 @@ public: return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD); } - void visit(query::WeightedSetTerm &n) override { - bool isSingleValue = !_attr.hasMultiValue(); - bool isString = (_attr.isStringType() && _attr.hasEnum()); - bool isInteger = _attr.isIntegerType(); - if (isSingleValue && (isString || isInteger)) { - auto ws = std::make_unique<AttributeWeightedSetBlueprint>(_field, _attr); - SearchContextParams scParams = createContextParams(); - for (size_t i = 0; i < n.getNumTerms(); ++i) { - auto term = n.getAsString(i); - ws->addToken(_attr.createSearchContext(extractTerm(term.first, isInteger), scParams), term.second.percent()); - } - setResult(std::move(ws)); + template <typename TermType, typename SearchType> + void visit_wset_or_in_term(TermType& n) { + if (_dps != nullptr) { + auto* bp = new attribute::DirectMultiTermBlueprint<IDocidPostingStore, SearchType> + (_field, _attr, *_dps, n.getNumTerms()); + createDirectMultiTerm(bp, n); + } else if (_dwwps != nullptr) { + auto* bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, SearchType> + (_field, _attr, *_dwwps, n.getNumTerms()); + createDirectMultiTerm(bp, n); } else { - if (use_docid_with_weight_posting_store()) { - auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch> - (_field, _attr, *_dwwps, n.getNumTerms()); - createDirectMultiTerm(bp, n); + bool isSingleValue = !_attr.hasMultiValue(); + bool isString = (_attr.isStringType() && _attr.hasEnum()); + bool isInteger = _attr.isIntegerType(); + if (isSingleValue && (isString || isInteger)) { + auto ws = std::make_unique<AttributeWeightedSetBlueprint>(_field, _attr); + SearchContextParams scParams = createContextParams(); + for (size_t i = 0; i < n.getNumTerms(); ++i) { + auto term = n.getAsString(i); + ws->addToken(_attr.createSearchContext(extractTerm(term.first, isInteger), scParams), term.second.percent()); + } + setResult(std::move(ws)); } else { - auto *bp = new WeightedSetTermBlueprint(_field); + auto* bp = new WeightedSetTermBlueprint(_field); createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); } } } + void visit(query::WeightedSetTerm &n) override { + visit_wset_or_in_term<query::WeightedSetTerm, queryeval::WeightedSetTermSearch>(n); + } + void visit(query::DotProduct &n) override { - if (use_docid_with_weight_posting_store()) { + if (has_always_btree_iterators_with_docid_and_weight()) { auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch> (_field, _attr, *_dwwps, n.getNumTerms()); createDirectMultiTerm(bp, n); @@ -644,7 +646,7 @@ public: } void visit(query::WandTerm &n) override { - if (use_docid_with_weight_posting_store()) { + if (has_always_btree_iterators_with_docid_and_weight()) { auto *bp = new DirectWandBlueprint(_field, *_dwwps, n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(), n.getNumTerms()); @@ -659,18 +661,7 @@ public: } void visit(query::InTerm &n) override { - if (_dps != nullptr) { - auto* bp = new attribute::DirectMultiTermBlueprint<IDocidPostingStore, attribute::InTermSearch> - (_field, _attr, *_dps, n.getNumTerms()); - createDirectMultiTerm(bp, n); - } else if (_dwwps != nullptr) { - auto* bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, attribute::InTermSearch> - (_field, _attr, *_dwwps, n.getNumTerms()); - createDirectMultiTerm(bp, n); - } else { - auto* bp = new WeightedSetTermBlueprint(_field); - createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); - } + visit_wset_or_in_term<query::InTerm, attribute::InTermSearch>(n); } void fail_nearest_neighbor_term(query::NearestNeighborTerm&n, const vespalib::string& error_msg) { diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp index ea1058d88fb..9328857c919 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp @@ -89,7 +89,7 @@ template <typename B, typename M> const IDocidWithWeightPostingStore* MultiValueNumericPostingAttribute<B, M>::as_docid_with_weight_posting_store() const { - if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::INT64)) { + if (this->getConfig().basicType().is_integer_type()) { return &_posting_store_adapter; } return nullptr; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index b6e9b69a81d..371b8d920f9 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -108,8 +108,7 @@ template <typename B, typename T> const IDocidWithWeightPostingStore* MultiValueStringPostingAttributeT<B, T>::as_docid_with_weight_posting_store() const { - // TODO: Add support for handling bit vectors too, and lift restriction on isFilter. - if (this->hasWeightedSetType() && this->isStringType()) { + if (this->isStringType()) { return &_posting_store_adapter; } return nullptr; diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp index c57742ca4b6..c74e25fbf5a 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp @@ -150,22 +150,11 @@ SingleValueNumericPostingAttribute<B>::getSearch(QueryTermSimple::UP qTerm, return std::make_unique<SC>(std::move(base_sc), params, *this); } -namespace { - -bool is_integer_type(attribute::BasicType type) { - return (type == attribute::BasicType::INT8) || - (type == attribute::BasicType::INT16) || - (type == attribute::BasicType::INT32) || - (type == attribute::BasicType::INT64); -} - -} - template <typename B> const IDocidPostingStore* SingleValueNumericPostingAttribute<B>::as_docid_posting_store() const { - if (is_integer_type(this->getBasicType())) { + if (this->getConfig().basicType().is_integer_type()) { return &_posting_store_adapter; } return nullptr; |