aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-12-13 14:57:08 +0000
committerGeir Storli <geirst@yahooinc.com>2023-12-13 14:57:08 +0000
commit6a5c9711d63aff13100e7ca4b4e6c1e91bce83a5 (patch)
tree06e87fd36d54cecdec4033380cbb76d13886976a
parent32b8a74701ade63457977a36aaa56cdcc5ddaf0a (diff)
Support direct posting store API for single integer attributes.
-rw-r--r--searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp63
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/iattributevector.h12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp22
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.hpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_docid_posting_store.h22
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h13
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/numeric_direct_posting_store_adapter.hpp8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp29
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_direct_posting_store_adapter.hpp8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp37
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h5
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreestore.h1
25 files changed, 219 insertions, 71 deletions
diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
index cfdeb35e0fc..f2341d0968e 100644
--- a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
+++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
@@ -1,6 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/searchlib/attribute/direct_multi_term_blueprint.h>
+#include <vespa/searchlib/attribute/i_docid_posting_store.h>
+#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h>
#include <vespa/searchlib/attribute/integerbase.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/queryeval/orsearch.h>
@@ -61,28 +63,28 @@ make_attribute(bool field_is_filter, CollectionType col_type)
IntegerAttribute& real = dynamic_cast<IntegerAttribute&>(*attr);
// Values 1 and 3 have btree (short) posting lists with weights.
- real.append(10, 1, 1);
- real.append(30, 3, 1);
- real.append(31, 3, 1);
+ real.update(10, 1);
+ real.update(30, 3);
+ real.update(31, 3);
// Values 100 and 300 have bitvector posting lists.
// We need at least 128 documents to get bitvector posting list (see PostingStoreBase2::resizeBitVectors())
for (auto docid : range(100, 128)) {
- real.append(docid, 100, 1);
+ real.update(docid, 100);
}
for (auto docid : range(300, 128)) {
- real.append(docid, 300, 1);
+ real.update(docid, 300);
}
attr->commit(true);
return attr;
}
void
-expect_has_weight_iterator(const IDirectPostingStore& store, int64_t term_value)
+expect_has_btree_iterator(const IDirectPostingStore& store, int64_t term_value)
{
auto snapshot = store.get_dictionary_snapshot();
auto res = store.lookup(IntegerKey(term_value), snapshot);
- EXPECT_TRUE(store.has_weight_iterator(res.posting_idx));
+ EXPECT_TRUE(store.has_btree_iterator(res.posting_idx));
}
void
@@ -94,13 +96,13 @@ expect_has_bitvector_iterator(const IDirectPostingStore& store, int64_t term_val
}
void
-validate_posting_lists(const IDocidWithWeightPostingStore& store)
+validate_posting_lists(const IDirectPostingStore& store)
{
- expect_has_weight_iterator(store, 1);
- expect_has_weight_iterator(store, 3);
- if (store.has_always_weight_iterator()) {
- expect_has_weight_iterator(store, 100);
- expect_has_weight_iterator(store, 300);
+ expect_has_btree_iterator(store, 1);
+ expect_has_btree_iterator(store, 3);
+ if (store.has_always_btree_iterator()) {
+ expect_has_btree_iterator(store, 100);
+ expect_has_btree_iterator(store, 300);
}
expect_has_bitvector_iterator(store, 100);
expect_has_bitvector_iterator(store, 300);
@@ -120,14 +122,19 @@ std::ostream& operator<<(std::ostream& os, const TestParam& param)
class DirectMultiTermBlueprintTest : public ::testing::TestWithParam<TestParam> {
public:
- using BlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, WeightedSetTermSearch>;
+ using SingleValueBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, WeightedSetTermSearch>;
+ using MultiValueBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, WeightedSetTermSearch>;
std::shared_ptr<AttributeVector> attr;
- std::shared_ptr<BlueprintType> blueprint;
+ std::shared_ptr<SingleValueBlueprintType> single_blueprint;
+ std::shared_ptr<MultiValueBlueprintType> multi_blueprint;
+ queryeval::ComplexLeafBlueprint* blueprint;
Blueprint::HitEstimate estimate;
fef::TermFieldMatchData tfmd;
fef::TermFieldMatchDataArray tfmda;
DirectMultiTermBlueprintTest()
: attr(),
+ single_blueprint(),
+ multi_blueprint(),
blueprint(),
tfmd(),
tfmda()
@@ -136,10 +143,20 @@ public:
}
void setup(bool field_is_filter, bool need_term_field_match_data) {
attr = make_attribute(field_is_filter, GetParam().col_type);
- const auto* store = attr->as_docid_with_weight_posting_store();
- ASSERT_TRUE(store);
- validate_posting_lists(*store);
- blueprint = std::make_shared<BlueprintType>(FieldSpec(field_name, field_id, fef::TermFieldHandle(), field_is_filter), *attr, *store, 2);
+ FieldSpec spec(field_name, field_id, fef::TermFieldHandle(), field_is_filter);
+ if (GetParam().col_type == CollectionType::SINGLE) {
+ const auto* store = attr->as_docid_posting_store();
+ ASSERT_TRUE(store);
+ validate_posting_lists(*store);
+ single_blueprint = std::make_shared<SingleValueBlueprintType>(spec, *attr, *store, 2);
+ blueprint = single_blueprint.get();
+ } else {
+ const auto* store = attr->as_docid_with_weight_posting_store();
+ ASSERT_TRUE(store);
+ validate_posting_lists(*store);
+ multi_blueprint = std::make_shared<MultiValueBlueprintType>(spec, *attr, *store, 2);
+ blueprint = multi_blueprint.get();
+ }
blueprint->setDocIdLimit(doc_id_limit);
if (need_term_field_match_data) {
tfmd.needs_normal_features();
@@ -148,7 +165,11 @@ public:
}
}
void add_term(int64_t term_value) {
- blueprint->addTerm(IntegerKey(term_value), 1, estimate);
+ if (single_blueprint) {
+ single_blueprint->addTerm(IntegerKey(term_value), 1, estimate);
+ } else {
+ multi_blueprint->addTerm(IntegerKey(term_value), 1, estimate);
+ }
}
std::unique_ptr<SearchIterator> create_leaf_search() const {
return blueprint->createLeafSearch(tfmda, true);
@@ -180,7 +201,7 @@ expect_or_child(SearchIterator& itr, size_t child, const vespalib::string& exp_c
INSTANTIATE_TEST_SUITE_P(DefaultInstantiation,
DirectMultiTermBlueprintTest,
- testing::Values(CollectionType::WSET),
+ testing::Values(CollectionType::SINGLE, CollectionType::WSET),
testing::PrintToStringParamName());
TEST_P(DirectMultiTermBlueprintTest, weight_iterators_used_for_none_filter_field)
diff --git a/searchlib/src/vespa/searchcommon/attribute/iattributevector.h b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h
index 0576dd85600..f27146ee67d 100644
--- a/searchlib/src/vespa/searchcommon/attribute/iattributevector.h
+++ b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h
@@ -11,6 +11,7 @@
#include <vector>
namespace search {
+ class IDocidPostingStore;
class IDocidWithWeightPostingStore;
class QueryTermSimple;
}
@@ -293,9 +294,16 @@ public:
const SearchContextParams &params) const = 0;
/**
- * Type-safe down-cast to an attribute supporting direct access to posting lists with docid and weight.
+ * Type-safe down-cast to an interface supporting direct access to posting lists with docids.
*
- * @return document weight attribute or nullptr if not supported.
+ * @return posting store or nullptr if not supported.
+ */
+ virtual const IDocidPostingStore* as_docid_posting_store() const = 0;
+
+ /**
+ * Type-safe down-cast to an interface supporting direct access to posting lists with {docid, weight} tuples.
+ *
+ * @return posting store or nullptr if not supported.
*/
virtual const IDocidWithWeightPostingStore *as_docid_with_weight_posting_store() const = 0;
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 76c6171cac3..3a5f79ef665 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -527,7 +527,7 @@ public:
return bitvector_iterator;
}
}
- if (_attr.has_weight_iterator(_dict_entry.posting_idx)) {
+ if (_attr.has_btree_iterator(_dict_entry.posting_idx)) {
return std::make_unique<queryeval::DocumentWeightSearchIterator>(*tfmda[0], _attr, _dict_entry);
} else {
return _attr.make_bitvector_iterator(_dict_entry.posting_idx, get_docid_limit(), *tfmda[0], strict);
@@ -579,7 +579,7 @@ private:
bool use_docid_with_weight_posting_store() const {
// TODO: Relax requirement on always having weight iterator for query operators where that makes sense.
- return (_dww != nullptr) && (_dww->has_always_weight_iterator());
+ return (_dww != nullptr) && (_dww->has_always_btree_iterator());
}
public:
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
index bbb5e4096fc..4654cf435b1 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
@@ -445,6 +445,7 @@ AttributeVector::set_reserved_doc_values()
attribute::IPostingListAttributeBase *AttributeVector::getIPostingListAttributeBase() { return nullptr; }
const attribute::IPostingListAttributeBase *AttributeVector::getIPostingListAttributeBase() const { return nullptr; }
+const IDocidPostingStore* AttributeVector::as_docid_posting_store() const { return nullptr; }
const IDocidWithWeightPostingStore * AttributeVector::as_docid_with_weight_posting_store() const { return nullptr; }
const tensor::ITensorAttribute *AttributeVector::asTensorAttribute() const { return nullptr; }
const attribute::IMultiValueAttribute* AttributeVector::as_multi_value_attribute() const { return nullptr; }
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h
index c6aa538ceac..256aaf1c9d2 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.h
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h
@@ -384,6 +384,7 @@ public:
////// Search API
+ const IDocidPostingStore* as_docid_posting_store() const override;
const IDocidWithWeightPostingStore *as_docid_with_weight_posting_store() const override;
const tensor::ITensorAttribute *asTensorAttribute() const override;
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp
index b8f2d0a1970..12ae226895e 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp
@@ -2,11 +2,14 @@
#include "direct_multi_term_blueprint.h"
#include "direct_multi_term_blueprint.hpp"
+#include "i_docid_posting_store.h"
+#include "i_docid_with_weight_posting_store.h"
#include <vespa/searchlib/queryeval/dot_product_search.h>
#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
namespace search::attribute {
+template class DirectMultiTermBlueprint<IDocidPostingStore, queryeval::WeightedSetTermSearch>;
template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch>;
template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch>;
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
index d1bf242400b..668034ecd3d 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
@@ -3,7 +3,7 @@
#pragma once
#include "attribute_object_visitor.h"
-#include "i_docid_with_weight_posting_store.h"
+#include "i_direct_posting_store.h"
#include <vespa/searchcommon/attribute/iattributevector.h>
#include <vespa/searchlib/common/matching_elements_fields.h>
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
@@ -32,9 +32,10 @@ private:
const PostingStoreType &_attr;
vespalib::datastore::EntryRef _dictionary_snapshot;
+ using IteratorType = typename PostingStoreType::IteratorType;
using IteratorWeights = std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>>;
- IteratorWeights create_iterators(std::vector<DocidWithWeightIterator>& weight_iterators,
+ IteratorWeights create_iterators(std::vector<IteratorType>& btree_iterators,
std::vector<std::unique_ptr<queryeval::SearchIterator>>& bitvectors,
bool use_bitvector_when_available,
fef::TermFieldMatchData& tfmd, bool strict) const;
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
index 1fcc28342d5..5ca943a356d 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
@@ -40,7 +40,7 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::~DirectMultiTermBlueprin
template <typename PostingStoreType, typename SearchType>
typename DirectMultiTermBlueprint<PostingStoreType, SearchType>::IteratorWeights
-DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_iterators(std::vector<DocidWithWeightIterator>& weight_iterators,
+DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_iterators(std::vector<IteratorType>& btree_iterators,
std::vector<std::unique_ptr<SearchIterator>>& bitvectors,
bool use_bitvector_when_available,
fef::TermFieldMatchData& tfmd, bool strict) const
@@ -58,7 +58,7 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_iterators(std::ve
}
bitvectors.push_back(_attr.make_bitvector_iterator(r.posting_idx, get_docid_limit(), tfmd, strict));
} else {
- _attr.create(r.posting_idx, weight_iterators);
+ _attr.create(r.posting_idx, btree_iterators);
if (!bitvectors.empty()) {
result_weights.push_back(_weights[i]);
}
@@ -94,27 +94,27 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(con
if (_terms.empty()) {
return std::make_unique<queryeval::EmptySearch>();
}
- std::vector<DocidWithWeightIterator> weight_iterators;
+ std::vector<IteratorType> btree_iterators;
std::vector<queryeval::SearchIterator::UP> bitvectors;
const size_t num_children = _terms.size();
- weight_iterators.reserve(num_children);
- bool use_bit_vector_when_available = is_filter_search || !_attr.has_always_weight_iterator();
- auto weights = create_iterators(weight_iterators, bitvectors, use_bit_vector_when_available, *tfmda[0], strict);
+ btree_iterators.reserve(num_children);
+ bool use_bit_vector_when_available = is_filter_search || !_attr.has_always_btree_iterator();
+ auto weights = create_iterators(btree_iterators, bitvectors, use_bit_vector_when_available, *tfmda[0], strict);
if (is_filter_search) {
- auto filter = !weight_iterators.empty() ? attribute::DocumentWeightOrFilterSearch::create(std::move(weight_iterators)) : std::unique_ptr<SearchIterator>();
+ auto filter = !btree_iterators.empty() ? attribute::DocumentWeightOrFilterSearch::create(std::move(btree_iterators)) : std::unique_ptr<SearchIterator>();
return combine_iterators(std::move(filter), std::move(bitvectors), strict);
}
bool field_is_filter = getState().fields()[0].isFilter();
if constexpr (std::is_same_v<SearchType, queryeval::WeightedSetTermSearch>) {
- auto multi_term = !weight_iterators.empty() ?
- SearchType::create(*tfmda[0], field_is_filter, std::move(weights), std::move(weight_iterators))
+ auto multi_term = !btree_iterators.empty() ?
+ SearchType::create(*tfmda[0], field_is_filter, std::move(weights), std::move(btree_iterators))
: std::unique_ptr<SearchIterator>();
return combine_iterators(std::move(multi_term), std::move(bitvectors), strict);
} else {
// In this case we should only have weight iterators.
- assert(weight_iterators.size() == _terms.size());
+ assert(btree_iterators.size() == _terms.size());
assert(weights.index() == 0);
- return SearchType::create(*tfmda[0], field_is_filter, std::get<0>(weights).get(), std::move(weight_iterators));
+ return SearchType::create(*tfmda[0], field_is_filter, std::get<0>(weights).get(), std::move(btree_iterators));
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.h b/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.h
index 33941152602..74f22484756 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.h
+++ b/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.h
@@ -30,14 +30,14 @@ public:
bool attr_is_filter);
vespalib::datastore::EntryRef get_dictionary_snapshot() const override;
- bool has_weight_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept override;
+ bool has_btree_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept override;
std::unique_ptr<queryeval::SearchIterator>
make_bitvector_iterator(vespalib::datastore::EntryRef posting_idx, uint32_t doc_id_limit,
fef::TermFieldMatchData& match_data, bool strict) const override;
bool has_bitvector(vespalib::datastore::EntryRef posting_idx) const noexcept override;
+ bool has_always_btree_iterator() const noexcept override { return !_attr_is_filter; }
void create(vespalib::datastore::EntryRef idx, std::vector<IteratorType>& dst) const override;
IteratorType create(vespalib::datastore::EntryRef idx) const override;
- bool has_always_weight_iterator() const noexcept override { return !_attr_is_filter; }
};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.hpp b/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.hpp
index 02fc1a84ec6..e3b936b993b 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.hpp
@@ -40,7 +40,7 @@ make_bitvector_iterator(vespalib::datastore::EntryRef posting_idx, uint32_t doc_
template <typename ParentType, typename PostingStoreType, typename EnumStoreType>
bool
DirectPostingStoreAdapter<ParentType, PostingStoreType, EnumStoreType>::
-has_weight_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept
+has_btree_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept
{
return _posting_store.has_btree(posting_idx);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp
index 5159c2be06c..b910e64b665 100644
--- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp
@@ -85,19 +85,36 @@ DocumentWeightOrFilterSearchImpl<IteratorPack>::doSeek(uint32_t docId)
setDocId(min_doc_id);
}
+namespace {
+
+template <typename IteratorType, typename IteratorPackType>
std::unique_ptr<queryeval::SearchIterator>
-DocumentWeightOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children)
+create_helper(std::vector<IteratorType>&& children)
{
if (children.empty()) {
return std::make_unique<queryeval::EmptySearch>();
} else {
std::sort(children.begin(), children.end(),
[](const auto & a, const auto & b) { return a.size() > b.size(); });
- using OrFilter = DocumentWeightOrFilterSearchImpl<DocidWithWeightIteratorPack>;
- return std::make_unique<OrFilter>(DocidWithWeightIteratorPack(std::move(children)));
+ using OrFilter = DocumentWeightOrFilterSearchImpl<IteratorPackType>;
+ return std::make_unique<OrFilter>(IteratorPackType(std::move(children)));
}
}
+}
+
+std::unique_ptr<queryeval::SearchIterator>
+DocumentWeightOrFilterSearch::create(std::vector<DocidIterator>&& children)
+{
+ return create_helper<DocidIterator, DocidIteratorPack>(std::move(children));
+}
+
+std::unique_ptr<queryeval::SearchIterator>
+DocumentWeightOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children)
+{
+ return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(std::move(children));
+}
+
std::unique_ptr<queryeval::SearchIterator>
DocumentWeightOrFilterSearch::create(const std::vector<SearchIterator *>& children,
std::unique_ptr<fef::MatchData> md)
diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h
index cea30e83619..5ed0dd16d83 100644
--- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h
+++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h
@@ -15,6 +15,7 @@ class DocumentWeightOrFilterSearch : public queryeval::SearchIterator
protected:
DocumentWeightOrFilterSearch() = default;
public:
+ static std::unique_ptr<SearchIterator> create(std::vector<DocidIterator>&& children);
static std::unique_ptr<SearchIterator> create(std::vector<DocidWithWeightIterator>&& children);
static std::unique_ptr<SearchIterator> create(const std::vector<SearchIterator *>& children,
std::unique_ptr<fef::MatchData> md);
diff --git a/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h b/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h
index 6b5251772e1..631aecf2bbe 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h
@@ -54,10 +54,17 @@ public:
* (e.g. lowercased) value equals the folded value for enum_idx.
*/
virtual void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const = 0;
- virtual bool has_weight_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept = 0;
+ virtual bool has_btree_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept = 0;
virtual std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef posting_idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const = 0;
virtual bool has_bitvector(vespalib::datastore::EntryRef posting_idx) const noexcept = 0;
virtual int64_t get_integer_value(vespalib::datastore::EntryRef enum_idx) const noexcept = 0;
+
+ /**
+ * Returns true when btree posting list iterators are present for all terms.
+ *
+ * This means btree posting lists exist in addition to eventual bitvector posting lists.
+ */
+ virtual bool has_always_btree_iterator() const noexcept = 0;
virtual ~IDirectPostingStore() = default;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/i_docid_posting_store.h b/searchlib/src/vespa/searchlib/attribute/i_docid_posting_store.h
new file mode 100644
index 00000000000..4dbcfc44f56
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/i_docid_posting_store.h
@@ -0,0 +1,22 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "i_direct_posting_store.h"
+
+namespace search {
+
+/**
+ * Interface providing access to dictionary lookups and underlying posting lists that contains only docids.
+ *
+ * This posting store type is supported by some single-value attributes with fast-search.
+ */
+class IDocidPostingStore : public IDirectPostingStore {
+public:
+ using IteratorType = DocidIterator;
+
+ virtual void create(vespalib::datastore::EntryRef idx, std::vector<DocidIterator>& dst) const = 0;
+ virtual DocidIterator create(vespalib::datastore::EntryRef idx) const = 0;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h b/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h
index bdb4054b2d7..04b71188493 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h
@@ -9,24 +9,15 @@ namespace search {
/**
* Interface providing access to dictionary lookups and underlying posting lists that contains {docid, weight} tuples.
*
- * This posting store type is supported by multi-value attributes with fast-search.
+ * This posting store type is supported by some multi-value attributes with fast-search.
*/
class IDocidWithWeightPostingStore : public IDirectPostingStore {
public:
using IteratorType = DocidWithWeightIterator;
- virtual void create(vespalib::datastore::EntryRef idx, std::vector<DocidWithWeightIterator> &dst) const = 0;
+ virtual void create(vespalib::datastore::EntryRef idx, std::vector<DocidWithWeightIterator>& dst) const = 0;
virtual DocidWithWeightIterator create(vespalib::datastore::EntryRef idx) const = 0;
-
- /**
- * Returns true when posting list iterators with weight are present for all terms.
- *
- * This means posting list iterators exist in addition to eventual bitvector posting lists.
- */
- virtual bool has_always_weight_iterator() const noexcept = 0;
};
-
-
}
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp
index bdec9d1d7e2..3ca05eecdb0 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp
@@ -117,6 +117,10 @@ std::unique_ptr<ISearchContext> ImportedAttributeVectorReadGuard::createSearchCo
return std::make_unique<ImportedSearchContext>(std::move(term), params, _imported_attribute, _target_attribute);
}
+const IDocidPostingStore* ImportedAttributeVectorReadGuard::as_docid_posting_store() const {
+ return nullptr;
+}
+
const IDocidWithWeightPostingStore *ImportedAttributeVectorReadGuard::as_docid_with_weight_posting_store() const {
return nullptr;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h
index 643e2352668..1007934baf1 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h
+++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h
@@ -59,6 +59,7 @@ public:
const char * getStringFromEnum(EnumHandle e) const override;
std::unique_ptr<ISearchContext> createSearchContext(std::unique_ptr<QueryTermSimple> term,
const SearchContextParams &params) const override;
+ const IDocidPostingStore* as_docid_posting_store() const override;
const IDocidWithWeightPostingStore *as_docid_with_weight_posting_store() const override;
const tensor::ITensorAttribute *asTensorAttribute() const override;
const attribute::IMultiValueAttribute* as_multi_value_attribute() const override;
diff --git a/searchlib/src/vespa/searchlib/attribute/numeric_direct_posting_store_adapter.hpp b/searchlib/src/vespa/searchlib/attribute/numeric_direct_posting_store_adapter.hpp
index b5a1282d09c..3b8f3e2334a 100644
--- a/searchlib/src/vespa/searchlib/attribute/numeric_direct_posting_store_adapter.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/numeric_direct_posting_store_adapter.hpp
@@ -30,8 +30,12 @@ lookup(const LookupKey& key, vespalib::datastore::EntryRef dictionary_snapshot)
if (find_result.first.valid()) {
auto pidx = find_result.second;
if (pidx.valid()) {
- auto minmax = this->_posting_store.getAggregated(pidx);
- return LookupResult(pidx, this->_posting_store.frozenSize(pidx), minmax.getMin(), minmax.getMax(), find_result.first);
+ if constexpr (PostingStoreType::AggrCalcType::hasAggregated()) {
+ auto minmax = this->_posting_store.getAggregated(pidx);
+ return LookupResult(pidx, this->_posting_store.frozenSize(pidx), minmax.getMin(), minmax.getMax(), find_result.first);
+ } else {
+ return LookupResult(pidx, this->_posting_store.frozenSize(pidx), 1, 1, find_result.first);
+ }
}
}
return LookupResult();
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h
index 3e81b89b6e4..d3f850b5afe 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h
@@ -78,6 +78,7 @@ public:
using ConstIterator = typename Parent::ConstIterator;
using KeyDataType = typename Parent::KeyDataType;
using AggregatedType = typename Parent::AggregatedType;
+ using AggrCalcType = typename Parent::AggrCalcType;
using BTreeTypeRefPair = typename Parent::BTreeTypeRefPair;
using Builder = typename Parent::Builder;
using CompactionSpec = vespalib::datastore::CompactionSpec;
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h
index fd055206a86..482dc90f6cd 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h
@@ -2,9 +2,11 @@
#pragma once
-#include "singlenumericenumattribute.h"
+#include "i_docid_posting_store.h"
+#include "numeric_direct_posting_store_adapter.h"
#include "postinglistattribute.h"
#include "postinglistsearchcontext.h"
+#include "singlenumericenumattribute.h"
namespace search {
@@ -44,11 +46,16 @@ private:
using DocId = typename B::BaseClass::DocId;
using EnumIndex = typename SingleValueEnumAttributeBase::EnumIndex;
using PostingMap = typename PostingParent::PostingMap;
+ using PostingStore = typename PostingParent::PostingStore;
using QueryTermSimpleUP = AttributeVector::QueryTermSimpleUP;
using SelfType = SingleValueNumericPostingAttribute<B>;
using ValueModifier = typename B::BaseClass::ValueModifier;
using generation_t = typename SingleValueNumericEnumAttribute<B>::generation_t;
+ using DirectPostingStoreAdapterType = attribute::NumericDirectPostingStoreAdapter<IDocidPostingStore,
+ PostingStore, EnumStore>;
+ DirectPostingStoreAdapterType _posting_store_adapter;
+
using PostingParent::_posting_store;
using PostingParent::clearAllPostings;
using PostingParent::handle_load_posting_lists;
@@ -75,6 +82,8 @@ public:
std::unique_ptr<attribute::SearchContext>
getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override;
+ const IDocidPostingStore* as_docid_posting_store() const override;
+
bool onAddDoc(DocId doc) override {
return forwardedOnAddDoc(doc, this->_enumIndices.size(), this->_enumIndices.capacity());
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
index 6e9c6a73337..c57742ca4b6 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
@@ -3,8 +3,9 @@
#pragma once
#include "singlenumericpostattribute.h"
-#include "enumstore.h"
#include "enumcomparator.h"
+#include "enumstore.h"
+#include "numeric_direct_posting_store_adapter.hpp"
#include "singlenumericenumattribute.hpp"
namespace search {
@@ -21,7 +22,8 @@ template <typename B>
SingleValueNumericPostingAttribute<B>::SingleValueNumericPostingAttribute(const vespalib::string & name,
const AttributeVector::Config & c) :
SingleValueNumericEnumAttribute<B>(name, c),
- PostingParent(*this, this->getEnumStore())
+ PostingParent(*this, this->getEnumStore()),
+ _posting_store_adapter(this->get_posting_store(), this->_enumStore, this->getIsFilter())
{
}
@@ -148,5 +150,26 @@ SingleValueNumericPostingAttribute<B>::getSearch(QueryTermSimple::UP qTerm,
return std::make_unique<SC>(std::move(base_sc), params, *this);
}
-} // namespace search
+namespace {
+
+bool is_integer_type(attribute::BasicType type) {
+ return (type == attribute::BasicType::INT8) ||
+ (type == attribute::BasicType::INT16) ||
+ (type == attribute::BasicType::INT32) ||
+ (type == attribute::BasicType::INT64);
+}
+
+}
+
+template <typename B>
+const IDocidPostingStore*
+SingleValueNumericPostingAttribute<B>::as_docid_posting_store() const
+{
+ if (is_integer_type(this->getBasicType())) {
+ return &_posting_store_adapter;
+ }
+ return nullptr;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/string_direct_posting_store_adapter.hpp b/searchlib/src/vespa/searchlib/attribute/string_direct_posting_store_adapter.hpp
index 9f29fe0ef46..463f6f13f01 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_direct_posting_store_adapter.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_direct_posting_store_adapter.hpp
@@ -30,8 +30,12 @@ lookup(const LookupKey& key, vespalib::datastore::EntryRef dictionary_snapshot)
if (find_result.first.valid()) {
auto pidx = find_result.second;
if (pidx.valid()) {
- auto minmax = this->_posting_store.getAggregated(pidx);
- return LookupResult(pidx, this->_posting_store.frozenSize(pidx), minmax.getMin(), minmax.getMax(), find_result.first);
+ if constexpr (PostingStoreType::AggrCalcType::hasAggregated()) {
+ auto minmax = this->_posting_store.getAggregated(pidx);
+ return LookupResult(pidx, this->_posting_store.frozenSize(pidx), minmax.getMin(), minmax.getMax(), find_result.first);
+ } else {
+ return LookupResult(pidx, this->_posting_store.frozenSize(pidx), 1, 1, find_result.first);
+ }
}
}
return LookupResult();
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
index 2a131c6cdc0..1cecbca7660 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
@@ -186,19 +186,42 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children,
//-----------------------------------------------------------------------------
+namespace {
+
+template <typename IteratorType, typename IteratorPackType>
+SearchIterator::UP
+create_helper(fef::TermFieldMatchData& tmd,
+ bool field_is_filter,
+ std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights,
+ std::vector<IteratorType>&& iterators)
+{
+ using ArrayHeapImpl = WeightedSetTermSearchImpl<vespalib::LeftArrayHeap, IteratorPackType>;
+ using HeapImpl = WeightedSetTermSearchImpl<vespalib::LeftHeap, IteratorPackType>;
+
+ if (iterators.size() < 128) {
+ return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, std::move(weights), IteratorPackType(std::move(iterators))));
+ }
+ return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, std::move(weights), IteratorPackType(std::move(iterators))));
+}
+
+}
+
+SearchIterator::UP
+WeightedSetTermSearch::create(fef::TermFieldMatchData& tmd,
+ bool field_is_filter,
+ std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights,
+ std::vector<DocidIterator>&& iterators)
+{
+ return create_helper<DocidIterator, DocidIteratorPack>(tmd, field_is_filter, std::move(weights), std::move(iterators));
+}
+
SearchIterator::UP
WeightedSetTermSearch::create(fef::TermFieldMatchData &tmd,
bool field_is_filter,
std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights,
std::vector<DocidWithWeightIterator> &&iterators)
{
- using ArrayHeapImpl = WeightedSetTermSearchImpl<vespalib::LeftArrayHeap, DocidWithWeightIteratorPack>;
- using HeapImpl = WeightedSetTermSearchImpl<vespalib::LeftHeap, DocidWithWeightIteratorPack>;
-
- if (iterators.size() < 128) {
- return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, weights, DocidWithWeightIteratorPack(std::move(iterators))));
- }
- return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, weights, DocidWithWeightIteratorPack(std::move(iterators))));
+ return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(tmd, field_is_filter, std::move(weights), std::move(iterators));
}
//-----------------------------------------------------------------------------
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
index 830ee136842..a497a647ac6 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
@@ -34,6 +34,11 @@ public:
const std::vector<int32_t> &weights,
fef::MatchData::UP match_data);
+ static SearchIterator::UP create(search::fef::TermFieldMatchData& tmd,
+ bool field_is_filter,
+ std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights,
+ std::vector<DocidIterator>&& iterators);
+
static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd,
bool field_is_filter,
std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights,
diff --git a/vespalib/src/vespa/vespalib/btree/btreestore.h b/vespalib/src/vespa/vespalib/btree/btreestore.h
index 0fbe1a1fcea..5ab3a317be8 100644
--- a/vespalib/src/vespa/vespalib/btree/btreestore.h
+++ b/vespalib/src/vespa/vespalib/btree/btreestore.h
@@ -24,6 +24,7 @@ public:
using KeyType = KeyT;
using DataType = DataT;
using AggregatedType = AggrT;
+ using AggrCalcType = AggrCalcT;
using DataStoreType = datastore::DataStoreT<datastore::EntryRefT<22> >;
using RefType = DataStoreType::RefType;
using KeyDataType = BTreeKeyData<KeyT, DataT>;