summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-24 11:46:44 +0100
committerGitHub <noreply@github.com>2024-01-24 11:46:44 +0100
commit33b1928a935d70fabc7bc638dd53425141a1ab04 (patch)
treed0841157c5489503272472d234027bbaf33d5280
parent055b4ea8f9a37230c531624b27875d355794b54e (diff)
parent233da25510548e8a1033e1bbf0a506135ea85b42 (diff)
Merge pull request #30036 from vespa-engine/geirst/reapply-weighted-set-and-in-term-alignment
Reapply Support IDocidWithWeightPostingStore for more attribute data types.
-rw-r--r--searchlib/src/tests/attribute/bitvector/bitvector_test.cpp3
-rw-r--r--searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp8
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/basictype.cpp9
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/basictype.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp69
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp13
8 files changed, 50 insertions, 58 deletions
diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
index f612bdda87f..6e479e1d9db 100644
--- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
+++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
@@ -430,7 +430,8 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre
sc = getSearch<VectorType>(tv, filter);
checkSearch(v, std::move(sc), 2, 1022, 205, !filter, true);
const auto* dww = v->as_docid_with_weight_posting_store();
- if (dww != nullptr) {
+ if ((dww != nullptr) && (bt == BasicType::STRING)) {
+ // This way of doing lookup is only supported by string attributes.
auto lres = dww->lookup(getSearchStr<VectorType>(), dww->get_dictionary_snapshot());
using DWSI = search::queryeval::DocidWithWeightSearchIterator;
TermFieldMatchData md;
diff --git a/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp
index c1e12580559..2ddd211bd12 100644
--- a/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp
+++ b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp
@@ -142,7 +142,11 @@ TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidPostingStore_int
TEST(DirectPostingStoreApiTest, attributes_support_IDocidWithWeightPostingStore_interface) {
expect_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, true);
+ expect_docid_with_weight_posting_store(BasicType::INT32, CollectionType::WSET, true);
expect_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, true);
+ expect_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, true);
+ expect_docid_with_weight_posting_store(BasicType::INT32, CollectionType::ARRAY, true);
+ expect_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, true);
}
TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidWithWeightPostingStore_interface) {
@@ -150,13 +154,11 @@ TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidWithWeightPostin
expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, false);
expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, false);
expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::SINGLE, true);
- expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, true);
expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, false);
expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, false);
expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, false);
expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, true);
- expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, true);
- expect_not_docid_with_weight_posting_store(BasicType::INT32, CollectionType::WSET, true);
+ expect_not_docid_with_weight_posting_store(BasicType::DOUBLE, CollectionType::ARRAY, true);
expect_not_docid_with_weight_posting_store(BasicType::DOUBLE, CollectionType::WSET, true);
}
diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.cpp b/searchlib/src/vespa/searchcommon/attribute/basictype.cpp
index c63d07ca130..0312154690c 100644
--- a/searchlib/src/vespa/searchcommon/attribute/basictype.cpp
+++ b/searchlib/src/vespa/searchcommon/attribute/basictype.cpp
@@ -35,4 +35,13 @@ BasicType::asType(const vespalib::string &t)
return NONE;
}
+bool
+BasicType::is_integer_type() const noexcept
+{
+ return (_type == INT8) ||
+ (_type == INT16) ||
+ (_type == INT32) ||
+ (_type == INT64);
+}
+
}
diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.h b/searchlib/src/vespa/searchcommon/attribute/basictype.h
index 35200b3f62d..4bdee5ecfd9 100644
--- a/searchlib/src/vespa/searchcommon/attribute/basictype.h
+++ b/searchlib/src/vespa/searchcommon/attribute/basictype.h
@@ -36,6 +36,7 @@ class BasicType
Type type() const noexcept { return _type; }
const char * asString() const noexcept { return asString(_type); }
size_t fixedSize() const noexcept { return fixedSize(_type); }
+ bool is_integer_type() const noexcept;
static BasicType fromType(bool) noexcept { return BOOL; }
static BasicType fromType(int8_t) noexcept { return INT8; }
static BasicType fromType(int16_t) noexcept { return INT16; }
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 56714bbf033..d5c67664a5e 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -389,12 +389,6 @@ private:
//-----------------------------------------------------------------------------
-
-
-
-
-//-----------------------------------------------------------------------------
-
class DirectWandBlueprint : public queryeval::ComplexLeafBlueprint
{
private:
@@ -516,8 +510,7 @@ private:
const IDocidWithWeightPostingStore *_dwwps;
vespalib::string _scratchPad;
- bool use_docid_with_weight_posting_store() const {
- // TODO: Relax requirement on always having weight iterator for query operators where that makes sense.
+ bool has_always_btree_iterators_with_docid_and_weight() const {
return (_dwwps != nullptr) && (_dwwps->has_always_btree_iterator());
}
@@ -608,32 +601,41 @@ public:
return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD);
}
- void visit(query::WeightedSetTerm &n) override {
- bool isSingleValue = !_attr.hasMultiValue();
- bool isString = (_attr.isStringType() && _attr.hasEnum());
- bool isInteger = _attr.isIntegerType();
- if (isSingleValue && (isString || isInteger)) {
- auto ws = std::make_unique<AttributeWeightedSetBlueprint>(_field, _attr);
- SearchContextParams scParams = createContextParams();
- for (size_t i = 0; i < n.getNumTerms(); ++i) {
- auto term = n.getAsString(i);
- ws->addToken(_attr.createSearchContext(extractTerm(term.first, isInteger), scParams), term.second.percent());
- }
- setResult(std::move(ws));
+ template <typename TermType, typename SearchType>
+ void visit_wset_or_in_term(TermType& n) {
+ if (_dps != nullptr) {
+ auto* bp = new attribute::DirectMultiTermBlueprint<IDocidPostingStore, SearchType>
+ (_field, _attr, *_dps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
+ } else if (_dwwps != nullptr) {
+ auto* bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, SearchType>
+ (_field, _attr, *_dwwps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
} else {
- if (use_docid_with_weight_posting_store()) {
- auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch>
- (_field, _attr, *_dwwps, n.getNumTerms());
- createDirectMultiTerm(bp, n);
+ bool isSingleValue = !_attr.hasMultiValue();
+ bool isString = (_attr.isStringType() && _attr.hasEnum());
+ bool isInteger = _attr.isIntegerType();
+ if (isSingleValue && (isString || isInteger)) {
+ auto ws = std::make_unique<AttributeWeightedSetBlueprint>(_field, _attr);
+ SearchContextParams scParams = createContextParams();
+ for (size_t i = 0; i < n.getNumTerms(); ++i) {
+ auto term = n.getAsString(i);
+ ws->addToken(_attr.createSearchContext(extractTerm(term.first, isInteger), scParams), term.second.percent());
+ }
+ setResult(std::move(ws));
} else {
- auto *bp = new WeightedSetTermBlueprint(_field);
+ auto* bp = new WeightedSetTermBlueprint(_field);
createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
}
}
}
+ void visit(query::WeightedSetTerm &n) override {
+ visit_wset_or_in_term<query::WeightedSetTerm, queryeval::WeightedSetTermSearch>(n);
+ }
+
void visit(query::DotProduct &n) override {
- if (use_docid_with_weight_posting_store()) {
+ if (has_always_btree_iterators_with_docid_and_weight()) {
auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch>
(_field, _attr, *_dwwps, n.getNumTerms());
createDirectMultiTerm(bp, n);
@@ -644,7 +646,7 @@ public:
}
void visit(query::WandTerm &n) override {
- if (use_docid_with_weight_posting_store()) {
+ if (has_always_btree_iterators_with_docid_and_weight()) {
auto *bp = new DirectWandBlueprint(_field, *_dwwps,
n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(),
n.getNumTerms());
@@ -659,18 +661,7 @@ public:
}
void visit(query::InTerm &n) override {
- if (_dps != nullptr) {
- auto* bp = new attribute::DirectMultiTermBlueprint<IDocidPostingStore, attribute::InTermSearch>
- (_field, _attr, *_dps, n.getNumTerms());
- createDirectMultiTerm(bp, n);
- } else if (_dwwps != nullptr) {
- auto* bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, attribute::InTermSearch>
- (_field, _attr, *_dwwps, n.getNumTerms());
- createDirectMultiTerm(bp, n);
- } else {
- auto* bp = new WeightedSetTermBlueprint(_field);
- createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
- }
+ visit_wset_or_in_term<query::InTerm, attribute::InTermSearch>(n);
}
void fail_nearest_neighbor_term(query::NearestNeighborTerm&n, const vespalib::string& error_msg) {
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
index ea1058d88fb..9328857c919 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
@@ -89,7 +89,7 @@ template <typename B, typename M>
const IDocidWithWeightPostingStore*
MultiValueNumericPostingAttribute<B, M>::as_docid_with_weight_posting_store() const
{
- if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::INT64)) {
+ if (this->getConfig().basicType().is_integer_type()) {
return &_posting_store_adapter;
}
return nullptr;
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
index b6e9b69a81d..371b8d920f9 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
@@ -108,8 +108,7 @@ template <typename B, typename T>
const IDocidWithWeightPostingStore*
MultiValueStringPostingAttributeT<B, T>::as_docid_with_weight_posting_store() const
{
- // TODO: Add support for handling bit vectors too, and lift restriction on isFilter.
- if (this->hasWeightedSetType() && this->isStringType()) {
+ if (this->isStringType()) {
return &_posting_store_adapter;
}
return nullptr;
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
index c57742ca4b6..c74e25fbf5a 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
@@ -150,22 +150,11 @@ SingleValueNumericPostingAttribute<B>::getSearch(QueryTermSimple::UP qTerm,
return std::make_unique<SC>(std::move(base_sc), params, *this);
}
-namespace {
-
-bool is_integer_type(attribute::BasicType type) {
- return (type == attribute::BasicType::INT8) ||
- (type == attribute::BasicType::INT16) ||
- (type == attribute::BasicType::INT32) ||
- (type == attribute::BasicType::INT64);
-}
-
-}
-
template <typename B>
const IDocidPostingStore*
SingleValueNumericPostingAttribute<B>::as_docid_posting_store() const
{
- if (is_integer_type(this->getBasicType())) {
+ if (this->getConfig().basicType().is_integer_type()) {
return &_posting_store_adapter;
}
return nullptr;