aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-09-25 14:34:19 +0200
committerGitHub <noreply@github.com>2023-09-25 14:34:19 +0200
commit7facdd6177063f772c497000b9c12e4653a2db83 (patch)
tree1acdcde007215c4bd1b36107611b8a95493e875e
parent75d25f2d7e1ff1045d79b95e36a84cc09566baea (diff)
parent7c2b17a3a91890b592117986799d84e8e26b1ad7 (diff)
Merge pull request #28631 from vespa-engine/balder/lift-single-filter-terms-out-from-ws
- Single filter terms can be lifted out from weighted sets.
-rw-r--r--searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp99
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h6
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp8
8 files changed, 95 insertions, 56 deletions
diff --git a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp
index d7a854e0afc..6c6f05fd5e2 100644
--- a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp
+++ b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp
@@ -29,14 +29,14 @@ using namespace search::attribute::test;
namespace {
void
-setupAttributeManager(MockAttributeManager &manager)
+setupAttributeManager(MockAttributeManager &manager, bool isFilter)
{
AttributeVector::DocId docId;
{
- AttributeVector::SP attr_sp = AttributeFactory::createAttribute("integer", Config(BasicType("int64")));
+ AttributeVector::SP attr_sp = AttributeFactory::createAttribute("integer", Config(BasicType("int64")).setIsFilter(isFilter));
manager.addAttribute(attr_sp);
- IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get());
+ auto *attr = (IntegerAttribute*)(attr_sp.get());
for (size_t i = 1; i < 10; ++i) {
attr->addDoc(docId);
assert(i == docId);
@@ -45,10 +45,10 @@ setupAttributeManager(MockAttributeManager &manager)
}
}
{
- AttributeVector::SP attr_sp = AttributeFactory::createAttribute("string", Config(BasicType("string")));
+ AttributeVector::SP attr_sp = AttributeFactory::createAttribute("string", Config(BasicType("string")).setIsFilter(isFilter));
manager.addAttribute(attr_sp);
- StringAttribute *attr = (StringAttribute*)(attr_sp.get());
+ auto *attr = (StringAttribute*)(attr_sp.get());
for (size_t i = 1; i < 10; ++i) {
attr->addDoc(docId);
assert(i == docId);
@@ -58,9 +58,9 @@ setupAttributeManager(MockAttributeManager &manager)
}
{
AttributeVector::SP attr_sp = AttributeFactory::createAttribute(
- "multi", Config(BasicType("int64"), search::attribute::CollectionType("array")));
+ "multi", Config(BasicType("int64"), search::attribute::CollectionType("array")).setIsFilter(isFilter));
manager.addAttribute(attr_sp);
- IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get());
+ auto *attr = (IntegerAttribute*)(attr_sp.get());
for (size_t i = 1; i < 10; ++i) {
attr->addDoc(docId);
assert(i == docId);
@@ -78,35 +78,43 @@ struct WS {
TermFieldHandle handle;
std::vector<std::pair<std::string, uint32_t> > tokens;
- WS(IAttributeManager & manager) : attribute_manager(manager), layout(), handle(layout.allocTermField(fieldId)), tokens() {
+ explicit WS(IAttributeManager & manager)
+ : attribute_manager(manager),
+ layout(), handle(layout.allocTermField(fieldId)),
+ tokens()
+ {
MatchData::UP tmp = layout.createMatchData();
ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId);
}
WS &add(const std::string &token, uint32_t weight) {
- tokens.push_back(std::make_pair(token, weight));
+ tokens.emplace_back(token, weight);
return *this;
}
Node::UP createNode() const {
- SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0));
- for (size_t i = 0; i < tokens.size(); ++i) {
- node->addTerm(tokens[i].first, Weight(tokens[i].second));
+ auto *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0));
+ for (const auto & token : tokens) {
+ node->addTerm(token.first, Weight(token.second));
}
return Node::UP(node);
}
- bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const {
+ SearchIterator::UP
+ createSearch(Searchable &searchable, const std::string &field, bool strict) const {
AttributeContext ac(attribute_manager);
FakeRequestContext requestContext(&ac);
MatchData::UP md = layout.createMatchData();
Node::UP node = createNode();
FieldSpecList fields;
- fields.add(FieldSpec(field, fieldId, handle));
+ fields.add(FieldSpec(field, fieldId, handle, ac.getAttribute(field)->getIsFilter()));
queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node);
bp->fetchPostings(queryeval::ExecuteInfo::create(strict));
SearchIterator::UP sb = bp->createSearch(*md, strict);
- return (dynamic_cast<WeightedSetTermSearch*>(sb.get()) != 0);
+ return sb;
+ }
+ bool isWeightedSetTermSearch(Searchable &searchable, const std::string &field, bool strict) const {
+ return dynamic_cast<WeightedSetTermSearch *>(createSearch(searchable, field, strict).get()) != nullptr;
}
FakeResult search(Searchable &searchable, const std::string &field, bool strict) const {
@@ -140,23 +148,58 @@ struct WS {
} // namespace <unnamed>
+void test_tokens(bool isFilter, const std::vector<uint32_t> & docs) {
+ MockAttributeManager manager;
+ setupAttributeManager(manager, isFilter);
+ AttributeBlueprintFactory adapter;
+
+ FakeResult expect = FakeResult();
+ WS ws = WS(manager);
+ for (uint32_t doc : docs) {
+ auto docS = vespalib::stringify(doc);
+ int32_t weight = doc * 10;
+ expect.doc(doc).weight(weight).pos(0);
+ ws.add(docS, weight);
+ }
+
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "integer", true));
+ EXPECT_TRUE(!ws.isWeightedSetTermSearch(adapter, "integer", false));
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "string", true));
+ EXPECT_TRUE(!ws.isWeightedSetTermSearch(adapter, "string", false));
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", true));
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", false));
+
+ EXPECT_EQUAL(expect, ws.search(adapter, "integer", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "integer", false));
+ EXPECT_EQUAL(expect, ws.search(adapter, "string", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "string", false));
+ EXPECT_EQUAL(expect, ws.search(adapter, "multi", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "multi", false));
+}
TEST("attribute_weighted_set_test") {
+ test_tokens(false, {3, 5, 7});
+ test_tokens(true, {3, 5, 7});
+ test_tokens(false, {3});
+}
+
+TEST("attribute_weighted_set_single_token_filter_lifted_out") {
MockAttributeManager manager;
- setupAttributeManager(manager);
+ setupAttributeManager(manager, true);
AttributeBlueprintFactory adapter;
- FakeResult expect = FakeResult()
- .doc(3).elem(0).weight(30).pos(0)
- .doc(5).elem(0).weight(50).pos(0)
- .doc(7).elem(0).weight(70).pos(0);
- WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30);
-
- EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true));
- EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false));
- EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true));
- EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false));
- EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true));
- EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false));
+ FakeResult expect = FakeResult().doc(3).elem(0).weight(30).pos(0);
+ WS ws = WS(manager).add("3", 30);
+
+ EXPECT_EQUAL("search::FilterAttributeIteratorStrict<search::attribute::SingleNumericSearchContext<long, search::attribute::NumericMatcher<long> > >",
+ ws.createSearch(adapter, "integer", true)->getClassName());
+ EXPECT_EQUAL("search::FilterAttributeIteratorT<search::attribute::SingleNumericSearchContext<long, search::attribute::NumericMatcher<long> > >",
+ ws.createSearch(adapter, "integer", false)->getClassName());
+ EXPECT_EQUAL("search::FilterAttributeIteratorStrict<search::attribute::SingleEnumSearchContext<char const*, search::attribute::StringSearchContext> >",
+ ws.createSearch(adapter, "string", true)->getClassName());
+ EXPECT_EQUAL("search::FilterAttributeIteratorT<search::attribute::SingleEnumSearchContext<char const*, search::attribute::StringSearchContext> >",
+ ws.createSearch(adapter, "string", false)->getClassName());
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", true));
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", false));
EXPECT_EQUAL(expect, ws.search(adapter, "integer", true));
EXPECT_EQUAL(expect, ws.search(adapter, "integer", false));
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 1519bb14554..b4cdd621b71 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -337,10 +337,7 @@ public:
if (tfmda.size() == 1) {
// search in exactly one field
fef::TermFieldMatchData &tfmd = *tfmda[0];
- return search::common::create_location_iterator(tfmd,
- _attribute.getNumDocs(),
- strict,
- _location);
+ return common::create_location_iterator(tfmd, _attribute.getNumDocs(), strict, _location);
} else {
LOG(debug, "wrong size tfmda: %zu (fallback to old location iterator)\n", tfmda.size());
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
index 108128eeb39..94c560a0dae 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
@@ -30,7 +30,7 @@ protected:
const attribute::IAttributeVector &attribute() const { return _attr; }
public:
- UseAttr(const attribute::IAttributeVector & attr)
+ explicit UseAttr(const attribute::IAttributeVector & attr)
: _attr(attr) {}
};
@@ -40,7 +40,7 @@ class UseStringEnum : public UseAttr
{
public:
using TokenT = uint32_t;
- UseStringEnum(const IAttributeVector & attr)
+ explicit UseStringEnum(const IAttributeVector & attr)
: UseAttr(attr) {}
auto mapToken(const ISearchContext &context) const {
return attribute().findFoldedEnums(context.queryTerm()->getTerm());
@@ -56,7 +56,7 @@ class UseInteger : public UseAttr
{
public:
using TokenT = uint64_t;
- UseInteger(const IAttributeVector & attr) : UseAttr(attr) {}
+ explicit UseInteger(const IAttributeVector & attr) : UseAttr(attr) {}
std::vector<int64_t> mapToken(const ISearchContext &context) const {
std::vector<int64_t> result;
Int64Range range(context.getAsIntegerTerm());
@@ -157,6 +157,10 @@ AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArr
assert(tfmda.size() == 1);
assert(getState().numFields() == 1);
fef::TermFieldMatchData &tfmd = *tfmda[0];
+ bool field_is_filter = getState().fields()[0].isFilter();
+ if (field_is_filter && (_contexts.size() == 1)) {
+ return _contexts[0]->createIterator(&tfmd, strict);
+ }
if (strict) { // use generic weighted set search
fef::MatchDataLayout layout;
auto handle = layout.allocTermField(tfmd.getFieldId());
@@ -167,7 +171,6 @@ AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArr
// TODO: pass ownership with unique_ptr
children[i] = _contexts[i]->createIterator(child_tfmd, true).release();
}
- bool field_is_filter = getState().fields()[0].isFilter();
return queryeval::WeightedSetTermSearch::create(children, tfmd, field_is_filter, _weights, std::move(match_data));
} else { // use attribute filter optimization
bool isString = (_attr.isStringType() && _attr.hasEnum());
@@ -182,18 +185,16 @@ AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArr
}
queryeval::SearchIterator::UP
-AttributeWeightedSetBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
+AttributeWeightedSetBlueprint::createFilterSearch(bool strict, FilterConstraint) const
{
- (void) constraint;
std::vector<std::unique_ptr<queryeval::SearchIterator>> children;
children.reserve(_contexts.size());
for (auto& context : _contexts) {
- auto wrapper = std::make_unique<search::queryeval::FilterWrapper>(1);
+ auto wrapper = std::make_unique<queryeval::FilterWrapper>(1);
wrapper->wrap(context->createIterator(wrapper->tfmda()[0], strict));
children.emplace_back(std::move(wrapper));
}
- search::queryeval::UnpackInfo unpack_info;
- return search::queryeval::OrSearch::create(std::move(children), strict, unpack_info);
+ return queryeval::OrSearch::create(std::move(children), strict, queryeval::UnpackInfo());
}
void
diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp
index 3c0bae00047..e2566c94f1c 100644
--- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp
@@ -11,8 +11,8 @@ class DocumentWeightOrFilterSearchImpl : public DocumentWeightOrFilterSearch
{
AttributeIteratorPack _children;
public:
- DocumentWeightOrFilterSearchImpl(AttributeIteratorPack&& children);
- ~DocumentWeightOrFilterSearchImpl();
+ explicit DocumentWeightOrFilterSearchImpl(AttributeIteratorPack&& children);
+ ~DocumentWeightOrFilterSearchImpl() override;
void doSeek(uint32_t docId) override;
@@ -67,7 +67,7 @@ DocumentWeightOrFilterSearchImpl::doUnpack(uint32_t)
{
}
-std::unique_ptr<search::queryeval::SearchIterator>
+std::unique_ptr<queryeval::SearchIterator>
DocumentWeightOrFilterSearch::create(std::vector<DocumentWeightIterator>&& children)
{
if (children.empty()) {
diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h
index 62be883ab52..c601856573f 100644
--- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h
+++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h
@@ -9,15 +9,15 @@ namespace search::attribute {
* Filter iterator on top of document weight iterators with OR semantics used during
* calculation of global filter for weighted set terms, wand terms and dot product terms.
*/
-class DocumentWeightOrFilterSearch : public search::queryeval::SearchIterator
+class DocumentWeightOrFilterSearch : public queryeval::SearchIterator
{
protected:
DocumentWeightOrFilterSearch()
- : search::queryeval::SearchIterator()
+ : queryeval::SearchIterator()
{
}
public:
- static std::unique_ptr<search::queryeval::SearchIterator> create(std::vector<DocumentWeightIterator>&& children);
+ static std::unique_ptr<queryeval::SearchIterator> create(std::vector<DocumentWeightIterator>&& children);
};
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp
index 77d9875bf69..97f6bc2e6f8 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp
@@ -33,10 +33,8 @@ WeightedSetTermMatchingElementsSearch::WeightedSetTermMatchingElementsSearch(con
_search()
{
_tfmda.add(&_tfmd);
- auto generic_search = bp.createLeafSearch(_tfmda, false);
- auto weighted_set_term_search = dynamic_cast<WeightedSetTermSearch *>(generic_search.get());
- generic_search.release();
- _search.reset(weighted_set_term_search);
+ _search.reset(static_cast<WeightedSetTermSearch *>(bp.createLeafSearch(_tfmda, false).release()));
+
}
WeightedSetTermMatchingElementsSearch::~WeightedSetTermMatchingElementsSearch() = default;
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h
index 0e3c82444d7..9c8d6d88329 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h
@@ -18,7 +18,7 @@ class WeightedSetTermBlueprint : public ComplexLeafBlueprint
std::vector<Blueprint::UP> _terms;
public:
- WeightedSetTermBlueprint(const FieldSpec &field);
+ explicit WeightedSetTermBlueprint(const FieldSpec &field);
WeightedSetTermBlueprint(const WeightedSetTermBlueprint &) = delete;
WeightedSetTermBlueprint &operator=(const WeightedSetTermBlueprint &) = delete;
~WeightedSetTermBlueprint() override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
index ee3978705cf..8478a0d3c35 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
@@ -21,7 +21,7 @@ private:
struct CmpDocId {
const uint32_t *termPos;
- CmpDocId(const uint32_t *tp) : termPos(tp) {}
+ explicit CmpDocId(const uint32_t *tp) : termPos(tp) {}
bool operator()(const ref_t &a, const ref_t &b) const {
return (termPos[a] < termPos[b]);
}
@@ -29,7 +29,7 @@ private:
struct CmpWeight {
const int32_t *weight;
- CmpWeight(const int32_t *w) : weight(w) {}
+ explicit CmpWeight(const int32_t *w) : weight(w) {}
bool operator()(const ref_t &a, const ref_t &b) const {
return (weight[a] > weight[b]);
}
@@ -61,7 +61,7 @@ private:
}
public:
- WeightedSetTermSearchImpl(search::fef::TermFieldMatchData &tmd,
+ WeightedSetTermSearchImpl(fef::TermFieldMatchData &tmd,
bool field_is_filter,
const std::vector<int32_t> &weights,
IteratorPack &&iteratorPack)
@@ -180,7 +180,7 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children,
//-----------------------------------------------------------------------------
SearchIterator::UP
-WeightedSetTermSearch::create(search::fef::TermFieldMatchData &tmd,
+WeightedSetTermSearch::create(fef::TermFieldMatchData &tmd,
bool field_is_filter,
const std::vector<int32_t> &weights,
std::vector<DocumentWeightIterator> &&iterators)