summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-12-08 16:07:24 +0000
committerGeir Storli <geirst@yahooinc.com>2023-12-08 16:07:24 +0000
commit3dbbf33cfd54e0fda16ced62aedab94282957b44 (patch)
tree61b2dd82b8a3f89500b3ef008c7308a05ef00fb6
parent39032146a75f003bffb14459fee69bf8b9436fac (diff)
Make multi-term filter iterator available for other blueprints.
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp87
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_term_filter.h44
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_term_filter.hpp51
3 files changed, 120 insertions, 62 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
index 1810b15f3f1..42b2cca06f7 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "attribute_weighted_set_blueprint.h"
+#include "multi_term_filter.hpp"
#include <vespa/searchcommon/attribute/i_search_context.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/fef/matchdatalayout.h>
@@ -19,9 +20,8 @@ namespace {
using attribute::ISearchContext;
using attribute::IAttributeVector;
-//-----------------------------------------------------------------------------
-class UseAttr
+class AttrWrapper
{
private:
const attribute::IAttributeVector &_attr;
@@ -30,18 +30,16 @@ protected:
const attribute::IAttributeVector &attribute() const { return _attr; }
public:
- explicit UseAttr(const attribute::IAttributeVector & attr)
+ explicit AttrWrapper(const attribute::IAttributeVector & attr)
: _attr(attr) {}
};
-//-----------------------------------------------------------------------------
-
-class UseStringEnum : public UseAttr
+class StringEnumWrapper : public AttrWrapper
{
public:
using TokenT = uint32_t;
- explicit UseStringEnum(const IAttributeVector & attr)
- : UseAttr(attr) {}
+ explicit StringEnumWrapper(const IAttributeVector & attr)
+ : AttrWrapper(attr) {}
auto mapToken(const ISearchContext &context) const {
return attribute().findFoldedEnums(context.queryTerm()->getTerm());
}
@@ -50,13 +48,11 @@ public:
}
};
-//-----------------------------------------------------------------------------
-
-class UseInteger : public UseAttr
+class IntegerWrapper : public AttrWrapper
{
public:
using TokenT = uint64_t;
- explicit UseInteger(const IAttributeVector & attr) : UseAttr(attr) {}
+ explicit IntegerWrapper(const IAttributeVector & attr) : AttrWrapper(attr) {}
std::vector<int64_t> mapToken(const ISearchContext &context) const {
std::vector<int64_t> result;
Int64Range range(context.getAsIntegerTerm());
@@ -70,58 +66,25 @@ public:
}
};
-//-----------------------------------------------------------------------------
-
-template <typename T>
-class AttributeFilter final : public queryeval::SearchIterator
+template <typename WrapperType>
+std::unique_ptr<queryeval::SearchIterator>
+make_multi_term_filter(fef::TermFieldMatchData& tfmd,
+ const IAttributeVector& attr,
+ const std::vector<int32_t>& weights,
+ const std::vector<ISearchContext*>& contexts)
{
-private:
- using Key = typename T::TokenT;
- using Map = vespalib::hash_map<Key, int32_t, vespalib::hash<Key>, std::equal_to<Key>, vespalib::hashtable_base::and_modulator>;
- using TFMD = fef::TermFieldMatchData;
-
- TFMD &_tfmd;
- T _attr;
- Map _map;
- int32_t _weight;
-
-public:
- AttributeFilter(fef::TermFieldMatchData &tfmd,
- const IAttributeVector & attr,
- const std::vector<int32_t> & weights,
- const std::vector<ISearchContext*> & contexts)
- : _tfmd(tfmd), _attr(attr), _map(), _weight(0)
- {
- for (size_t i = 0; i < contexts.size(); ++i) {
- for (int64_t token : _attr.mapToken(*contexts[i])) {
- _map[token] = weights[i];
- }
+ using FilterType = attribute::MultiTermFilter<WrapperType>;
+ typename FilterType::TokenMap tokens;
+ WrapperType wrapper(attr);
+ for (size_t i = 0; i < contexts.size(); ++i) {
+ for (auto token : wrapper.mapToken(*contexts[i])) {
+ tokens[token] = weights[i];
}
}
- void and_hits_into(BitVector & result,uint32_t begin_id) override {
- auto end = _map.end();
- result.foreach_truebit([&, end](uint32_t key) { if ( _map.find(_attr.getToken(key)) == end) { result.clearBit(key); }}, begin_id);
- }
-
- void doSeek(uint32_t docId) override {
- auto pos = _map.find(_attr.getToken(docId));
- if (pos != _map.end()) {
- _weight = pos->second;
- setDocId(docId);
- }
- }
- void doUnpack(uint32_t docId) override {
- _tfmd.reset(docId);
- fef::TermFieldMatchDataPosition pos;
- pos.setElementWeight(_weight);
- _tfmd.appendPosition(pos);
- }
- void visitMembers(vespalib::ObjectVisitor &) const override {}
-};
-
-//-----------------------------------------------------------------------------
+ return std::make_unique<FilterType>(tfmd, wrapper, std::move(tokens));
+}
-} // namespace search::<unnamed>
+}
AttributeWeightedSetBlueprint::AttributeWeightedSetBlueprint(const queryeval::FieldSpec &field, const IAttributeVector & attr)
: queryeval::ComplexLeafBlueprint(field),
@@ -176,10 +139,10 @@ AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArr
bool isString = (_attr.isStringType() && _attr.hasEnum());
assert(!_attr.hasMultiValue());
if (isString) {
- return std::make_unique<AttributeFilter<UseStringEnum>>(tfmd, _attr, _weights, _contexts);
+ return make_multi_term_filter<StringEnumWrapper>(tfmd, _attr, _weights, _contexts);
} else {
assert(_attr.isIntegerType());
- return std::make_unique<AttributeFilter<UseInteger>>(tfmd, _attr, _weights, _contexts);
+ return make_multi_term_filter<IntegerWrapper>(tfmd, _attr, _weights, _contexts);
}
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_filter.h b/searchlib/src/vespa/searchlib/attribute/multi_term_filter.h
new file mode 100644
index 00000000000..adbf37d2dcd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multi_term_filter.h
@@ -0,0 +1,44 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search::fef { class TermFieldMatchData; }
+
+namespace search::attribute {
+
+/**
+ * Search iterator used to match a multi-term query operator against a single value attribute.
+ *
+ * The caller must provide a hash map (token -> weight) containing all tokens in the multi-term operator.
+ * In doSeek() the attribute value for the docid is matched against the tokens hash map.
+ *
+ * @tparam WrapperType Type that wraps an attribute vector and provides access to the attribute value for a given docid.
+ */
+template <typename WrapperType>
+class MultiTermFilter final : public queryeval::SearchIterator
+{
+public:
+ using Key = typename WrapperType::TokenT;
+ using TokenMap = vespalib::hash_map<Key, int32_t, vespalib::hash<Key>, std::equal_to<Key>, vespalib::hashtable_base::and_modulator>;
+
+private:
+ fef::TermFieldMatchData& _tfmd;
+ WrapperType _attr;
+ TokenMap _map;
+ int32_t _weight;
+
+public:
+ MultiTermFilter(fef::TermFieldMatchData& tfmd,
+ WrapperType attr,
+ TokenMap&& map);
+
+ void and_hits_into(BitVector& result, uint32_t begin_id) override;
+ void doSeek(uint32_t docId) override;
+ void doUnpack(uint32_t docId) override;
+ void visitMembers(vespalib::ObjectVisitor&) const override {}
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_filter.hpp b/searchlib/src/vespa/searchlib/attribute/multi_term_filter.hpp
new file mode 100644
index 00000000000..dc572aedbff
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multi_term_filter.hpp
@@ -0,0 +1,51 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multi_term_filter.h"
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+
+namespace search::attribute {
+
+template <typename WrapperType>
+MultiTermFilter<WrapperType>::MultiTermFilter(fef::TermFieldMatchData& tfmd,
+ WrapperType attr,
+ TokenMap&& map)
+ : _tfmd(tfmd),
+ _attr(attr),
+ _map(std::move(map)),
+ _weight(0)
+{
+}
+
+template <typename WrapperType>
+void
+MultiTermFilter<WrapperType>::and_hits_into(BitVector& result, uint32_t begin_id)
+{
+ auto end = _map.end();
+ result.foreach_truebit([&, end](uint32_t key) { if ( _map.find(_attr.getToken(key)) == end) { result.clearBit(key); }}, begin_id);
+}
+
+template <typename WrapperType>
+void
+MultiTermFilter<WrapperType>::doSeek(uint32_t docId)
+{
+ auto pos = _map.find(_attr.getToken(docId));
+ if (pos != _map.end()) {
+ _weight = pos->second;
+ setDocId(docId);
+ }
+}
+
+template <typename WrapperType>
+void
+MultiTermFilter<WrapperType>::doUnpack(uint32_t docId)
+{
+ _tfmd.reset(docId);
+ fef::TermFieldMatchDataPosition pos;
+ pos.setElementWeight(_weight);
+ _tfmd.appendPosition(pos);
+}
+
+}