summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2024-01-22 13:45:32 +0000
committerGeir Storli <geirst@yahooinc.com>2024-01-24 09:56:58 +0000
commit233da25510548e8a1033e1bbf0a506135ea85b42 (patch)
tree309838b1750e6c45c155098f539ada45f916bc96
parent2a43344d217ab065425e880cd35be54542423ce8 (diff)
Support IDocidWithWeightPostingStore for more attribute data types.
This includes array types (in addition to weighted set) and all integer types. This change also aligns the blueprint and iterator implementations used for WeightedSetTerm and InTerm, making the performance of WeightedSetTerm more similar to InTerm. In particular an attribute with "rank: filter" uses a more optimal search iterator for WeightedSetTerm.
-rw-r--r--searchlib/src/tests/attribute/bitvector/bitvector_test.cpp3
-rw-r--r--searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp8
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/basictype.cpp9
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/basictype.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp69
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp13
8 files changed, 50 insertions, 58 deletions
diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
index f612bdda87f..6e479e1d9db 100644
--- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
+++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
@@ -430,7 +430,8 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre
sc = getSearch<VectorType>(tv, filter);
checkSearch(v, std::move(sc), 2, 1022, 205, !filter, true);
const auto* dww = v->as_docid_with_weight_posting_store();
- if (dww != nullptr) {
+ if ((dww != nullptr) && (bt == BasicType::STRING)) {
+ // This way of doing lookup is only supported by string attributes.
auto lres = dww->lookup(getSearchStr<VectorType>(), dww->get_dictionary_snapshot());
using DWSI = search::queryeval::DocidWithWeightSearchIterator;
TermFieldMatchData md;
diff --git a/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp
index c1e12580559..2ddd211bd12 100644
--- a/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp
+++ b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp
@@ -142,7 +142,11 @@ TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidPostingStore_int
TEST(DirectPostingStoreApiTest, attributes_support_IDocidWithWeightPostingStore_interface) {
expect_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, true);
+ expect_docid_with_weight_posting_store(BasicType::INT32, CollectionType::WSET, true);
expect_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, true);
+ expect_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, true);
+ expect_docid_with_weight_posting_store(BasicType::INT32, CollectionType::ARRAY, true);
+ expect_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, true);
}
TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidWithWeightPostingStore_interface) {
@@ -150,13 +154,11 @@ TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidWithWeightPostin
expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, false);
expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, false);
expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::SINGLE, true);
- expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, true);
expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, false);
expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, false);
expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, false);
expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, true);
- expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, true);
- expect_not_docid_with_weight_posting_store(BasicType::INT32, CollectionType::WSET, true);
+ expect_not_docid_with_weight_posting_store(BasicType::DOUBLE, CollectionType::ARRAY, true);
expect_not_docid_with_weight_posting_store(BasicType::DOUBLE, CollectionType::WSET, true);
}
diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.cpp b/searchlib/src/vespa/searchcommon/attribute/basictype.cpp
index c63d07ca130..0312154690c 100644
--- a/searchlib/src/vespa/searchcommon/attribute/basictype.cpp
+++ b/searchlib/src/vespa/searchcommon/attribute/basictype.cpp
@@ -35,4 +35,13 @@ BasicType::asType(const vespalib::string &t)
return NONE;
}
+bool
+BasicType::is_integer_type() const noexcept
+{
+ return (_type == INT8) ||
+ (_type == INT16) ||
+ (_type == INT32) ||
+ (_type == INT64);
+}
+
}
diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.h b/searchlib/src/vespa/searchcommon/attribute/basictype.h
index 35200b3f62d..4bdee5ecfd9 100644
--- a/searchlib/src/vespa/searchcommon/attribute/basictype.h
+++ b/searchlib/src/vespa/searchcommon/attribute/basictype.h
@@ -36,6 +36,7 @@ class BasicType
Type type() const noexcept { return _type; }
const char * asString() const noexcept { return asString(_type); }
size_t fixedSize() const noexcept { return fixedSize(_type); }
+ bool is_integer_type() const noexcept;
static BasicType fromType(bool) noexcept { return BOOL; }
static BasicType fromType(int8_t) noexcept { return INT8; }
static BasicType fromType(int16_t) noexcept { return INT16; }
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 56714bbf033..d5c67664a5e 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -389,12 +389,6 @@ private:
//-----------------------------------------------------------------------------
-
-
-
-
-//-----------------------------------------------------------------------------
-
class DirectWandBlueprint : public queryeval::ComplexLeafBlueprint
{
private:
@@ -516,8 +510,7 @@ private:
const IDocidWithWeightPostingStore *_dwwps;
vespalib::string _scratchPad;
- bool use_docid_with_weight_posting_store() const {
- // TODO: Relax requirement on always having weight iterator for query operators where that makes sense.
+ bool has_always_btree_iterators_with_docid_and_weight() const {
return (_dwwps != nullptr) && (_dwwps->has_always_btree_iterator());
}
@@ -608,32 +601,41 @@ public:
return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD);
}
- void visit(query::WeightedSetTerm &n) override {
- bool isSingleValue = !_attr.hasMultiValue();
- bool isString = (_attr.isStringType() && _attr.hasEnum());
- bool isInteger = _attr.isIntegerType();
- if (isSingleValue && (isString || isInteger)) {
- auto ws = std::make_unique<AttributeWeightedSetBlueprint>(_field, _attr);
- SearchContextParams scParams = createContextParams();
- for (size_t i = 0; i < n.getNumTerms(); ++i) {
- auto term = n.getAsString(i);
- ws->addToken(_attr.createSearchContext(extractTerm(term.first, isInteger), scParams), term.second.percent());
- }
- setResult(std::move(ws));
+ template <typename TermType, typename SearchType>
+ void visit_wset_or_in_term(TermType& n) {
+ if (_dps != nullptr) {
+ auto* bp = new attribute::DirectMultiTermBlueprint<IDocidPostingStore, SearchType>
+ (_field, _attr, *_dps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
+ } else if (_dwwps != nullptr) {
+ auto* bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, SearchType>
+ (_field, _attr, *_dwwps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
} else {
- if (use_docid_with_weight_posting_store()) {
- auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch>
- (_field, _attr, *_dwwps, n.getNumTerms());
- createDirectMultiTerm(bp, n);
+ bool isSingleValue = !_attr.hasMultiValue();
+ bool isString = (_attr.isStringType() && _attr.hasEnum());
+ bool isInteger = _attr.isIntegerType();
+ if (isSingleValue && (isString || isInteger)) {
+ auto ws = std::make_unique<AttributeWeightedSetBlueprint>(_field, _attr);
+ SearchContextParams scParams = createContextParams();
+ for (size_t i = 0; i < n.getNumTerms(); ++i) {
+ auto term = n.getAsString(i);
+ ws->addToken(_attr.createSearchContext(extractTerm(term.first, isInteger), scParams), term.second.percent());
+ }
+ setResult(std::move(ws));
} else {
- auto *bp = new WeightedSetTermBlueprint(_field);
+ auto* bp = new WeightedSetTermBlueprint(_field);
createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
}
}
}
+ void visit(query::WeightedSetTerm &n) override {
+ visit_wset_or_in_term<query::WeightedSetTerm, queryeval::WeightedSetTermSearch>(n);
+ }
+
void visit(query::DotProduct &n) override {
- if (use_docid_with_weight_posting_store()) {
+ if (has_always_btree_iterators_with_docid_and_weight()) {
auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch>
(_field, _attr, *_dwwps, n.getNumTerms());
createDirectMultiTerm(bp, n);
@@ -644,7 +646,7 @@ public:
}
void visit(query::WandTerm &n) override {
- if (use_docid_with_weight_posting_store()) {
+ if (has_always_btree_iterators_with_docid_and_weight()) {
auto *bp = new DirectWandBlueprint(_field, *_dwwps,
n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(),
n.getNumTerms());
@@ -659,18 +661,7 @@ public:
}
void visit(query::InTerm &n) override {
- if (_dps != nullptr) {
- auto* bp = new attribute::DirectMultiTermBlueprint<IDocidPostingStore, attribute::InTermSearch>
- (_field, _attr, *_dps, n.getNumTerms());
- createDirectMultiTerm(bp, n);
- } else if (_dwwps != nullptr) {
- auto* bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, attribute::InTermSearch>
- (_field, _attr, *_dwwps, n.getNumTerms());
- createDirectMultiTerm(bp, n);
- } else {
- auto* bp = new WeightedSetTermBlueprint(_field);
- createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
- }
+ visit_wset_or_in_term<query::InTerm, attribute::InTermSearch>(n);
}
void fail_nearest_neighbor_term(query::NearestNeighborTerm&n, const vespalib::string& error_msg) {
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
index ea1058d88fb..9328857c919 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
@@ -89,7 +89,7 @@ template <typename B, typename M>
const IDocidWithWeightPostingStore*
MultiValueNumericPostingAttribute<B, M>::as_docid_with_weight_posting_store() const
{
- if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::INT64)) {
+ if (this->getConfig().basicType().is_integer_type()) {
return &_posting_store_adapter;
}
return nullptr;
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
index b6e9b69a81d..371b8d920f9 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
@@ -108,8 +108,7 @@ template <typename B, typename T>
const IDocidWithWeightPostingStore*
MultiValueStringPostingAttributeT<B, T>::as_docid_with_weight_posting_store() const
{
- // TODO: Add support for handling bit vectors too, and lift restriction on isFilter.
- if (this->hasWeightedSetType() && this->isStringType()) {
+ if (this->isStringType()) {
return &_posting_store_adapter;
}
return nullptr;
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
index c57742ca4b6..c74e25fbf5a 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
@@ -150,22 +150,11 @@ SingleValueNumericPostingAttribute<B>::getSearch(QueryTermSimple::UP qTerm,
return std::make_unique<SC>(std::move(base_sc), params, *this);
}
-namespace {
-
-bool is_integer_type(attribute::BasicType type) {
- return (type == attribute::BasicType::INT8) ||
- (type == attribute::BasicType::INT16) ||
- (type == attribute::BasicType::INT32) ||
- (type == attribute::BasicType::INT64);
-}
-
-}
-
template <typename B>
const IDocidPostingStore*
SingleValueNumericPostingAttribute<B>::as_docid_posting_store() const
{
- if (is_integer_type(this->getBasicType())) {
+ if (this->getConfig().basicType().is_integer_type()) {
return &_posting_store_adapter;
}
return nullptr;