diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-12-08 21:34:01 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-08 21:34:01 +0100 |
commit | 8ee864d6a1ffed020d5d2e6158b49ba8037e6ebb (patch) | |
tree | 265fb65dd66fcbafb61a1bcb298e73368461ec75 | |
parent | 441579e0a00a859ae1e1ddf4dc995be1df760346 (diff) | |
parent | 3dbbf33cfd54e0fda16ced62aedab94282957b44 (diff) |
Merge pull request #29596 from vespa-engine/geirst/multi-term-filter-refactor
Make multi-term filter iterator available for other blueprints.
3 files changed, 120 insertions, 62 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp index 1810b15f3f1..42b2cca06f7 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "attribute_weighted_set_blueprint.h" +#include "multi_term_filter.hpp" #include <vespa/searchcommon/attribute/i_search_context.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/fef/matchdatalayout.h> @@ -19,9 +20,8 @@ namespace { using attribute::ISearchContext; using attribute::IAttributeVector; -//----------------------------------------------------------------------------- -class UseAttr +class AttrWrapper { private: const attribute::IAttributeVector &_attr; @@ -30,18 +30,16 @@ protected: const attribute::IAttributeVector &attribute() const { return _attr; } public: - explicit UseAttr(const attribute::IAttributeVector & attr) + explicit AttrWrapper(const attribute::IAttributeVector & attr) : _attr(attr) {} }; -//----------------------------------------------------------------------------- - -class UseStringEnum : public UseAttr +class StringEnumWrapper : public AttrWrapper { public: using TokenT = uint32_t; - explicit UseStringEnum(const IAttributeVector & attr) - : UseAttr(attr) {} + explicit StringEnumWrapper(const IAttributeVector & attr) + : AttrWrapper(attr) {} auto mapToken(const ISearchContext &context) const { return attribute().findFoldedEnums(context.queryTerm()->getTerm()); } @@ -50,13 +48,11 @@ public: } }; -//----------------------------------------------------------------------------- - -class UseInteger : public UseAttr +class IntegerWrapper : public AttrWrapper { public: using TokenT = uint64_t; - explicit UseInteger(const IAttributeVector & attr) : UseAttr(attr) {} + explicit IntegerWrapper(const IAttributeVector & attr) : AttrWrapper(attr) {} std::vector<int64_t> mapToken(const ISearchContext &context) const { std::vector<int64_t> result; Int64Range range(context.getAsIntegerTerm()); @@ -70,58 +66,25 @@ public: } }; -//----------------------------------------------------------------------------- - -template <typename T> -class AttributeFilter final : public queryeval::SearchIterator +template <typename WrapperType> +std::unique_ptr<queryeval::SearchIterator> +make_multi_term_filter(fef::TermFieldMatchData& tfmd, + const IAttributeVector& attr, + const std::vector<int32_t>& weights, + const std::vector<ISearchContext*>& contexts) { -private: - using Key = typename T::TokenT; - using Map = vespalib::hash_map<Key, int32_t, vespalib::hash<Key>, std::equal_to<Key>, vespalib::hashtable_base::and_modulator>; - using TFMD = fef::TermFieldMatchData; - - TFMD &_tfmd; - T _attr; - Map _map; - int32_t _weight; - -public: - AttributeFilter(fef::TermFieldMatchData &tfmd, - const IAttributeVector & attr, - const std::vector<int32_t> & weights, - const std::vector<ISearchContext*> & contexts) - : _tfmd(tfmd), _attr(attr), _map(), _weight(0) - { - for (size_t i = 0; i < contexts.size(); ++i) { - for (int64_t token : _attr.mapToken(*contexts[i])) { - _map[token] = weights[i]; - } + using FilterType = attribute::MultiTermFilter<WrapperType>; + typename FilterType::TokenMap tokens; + WrapperType wrapper(attr); + for (size_t i = 0; i < contexts.size(); ++i) { + for (auto token : wrapper.mapToken(*contexts[i])) { + tokens[token] = weights[i]; } } - void and_hits_into(BitVector & result,uint32_t begin_id) override { - auto end = _map.end(); - result.foreach_truebit([&, end](uint32_t key) { if ( _map.find(_attr.getToken(key)) == end) { result.clearBit(key); }}, begin_id); - } - - void doSeek(uint32_t docId) override { - auto pos = _map.find(_attr.getToken(docId)); - if (pos != _map.end()) { - _weight = pos->second; - setDocId(docId); - } - } - void doUnpack(uint32_t docId) override { - _tfmd.reset(docId); - fef::TermFieldMatchDataPosition pos; - pos.setElementWeight(_weight); - _tfmd.appendPosition(pos); - } - void visitMembers(vespalib::ObjectVisitor &) const override {} -}; - -//----------------------------------------------------------------------------- + return std::make_unique<FilterType>(tfmd, wrapper, std::move(tokens)); +} -} // namespace search::<unnamed> +} AttributeWeightedSetBlueprint::AttributeWeightedSetBlueprint(const queryeval::FieldSpec &field, const IAttributeVector & attr) : queryeval::ComplexLeafBlueprint(field), @@ -176,10 +139,10 @@ AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArr bool isString = (_attr.isStringType() && _attr.hasEnum()); assert(!_attr.hasMultiValue()); if (isString) { - return std::make_unique<AttributeFilter<UseStringEnum>>(tfmd, _attr, _weights, _contexts); + return make_multi_term_filter<StringEnumWrapper>(tfmd, _attr, _weights, _contexts); } else { assert(_attr.isIntegerType()); - return std::make_unique<AttributeFilter<UseInteger>>(tfmd, _attr, _weights, _contexts); + return make_multi_term_filter<IntegerWrapper>(tfmd, _attr, _weights, _contexts); } } } diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_filter.h b/searchlib/src/vespa/searchlib/attribute/multi_term_filter.h new file mode 100644 index 00000000000..adbf37d2dcd --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_term_filter.h @@ -0,0 +1,44 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/vespalib/stllike/hash_map.h> + +namespace search::fef { class TermFieldMatchData; } + +namespace search::attribute { + +/** + * Search iterator used to match a multi-term query operator against a single value attribute. + * + * The caller must provide a hash map (token -> weight) containing all tokens in the multi-term operator. + * In doSeek() the attribute value for the docid is matched against the tokens hash map. + * + * @tparam WrapperType Type that wraps an attribute vector and provides access to the attribute value for a given docid. + */ +template <typename WrapperType> +class MultiTermFilter final : public queryeval::SearchIterator +{ +public: + using Key = typename WrapperType::TokenT; + using TokenMap = vespalib::hash_map<Key, int32_t, vespalib::hash<Key>, std::equal_to<Key>, vespalib::hashtable_base::and_modulator>; + +private: + fef::TermFieldMatchData& _tfmd; + WrapperType _attr; + TokenMap _map; + int32_t _weight; + +public: + MultiTermFilter(fef::TermFieldMatchData& tfmd, + WrapperType attr, + TokenMap&& map); + + void and_hits_into(BitVector& result, uint32_t begin_id) override; + void doSeek(uint32_t docId) override; + void doUnpack(uint32_t docId) override; + void visitMembers(vespalib::ObjectVisitor&) const override {} +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_filter.hpp b/searchlib/src/vespa/searchlib/attribute/multi_term_filter.hpp new file mode 100644 index 00000000000..dc572aedbff --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_term_filter.hpp @@ -0,0 +1,51 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multi_term_filter.h" +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> + +namespace search::attribute { + +template <typename WrapperType> +MultiTermFilter<WrapperType>::MultiTermFilter(fef::TermFieldMatchData& tfmd, + WrapperType attr, + TokenMap&& map) + : _tfmd(tfmd), + _attr(attr), + _map(std::move(map)), + _weight(0) +{ +} + +template <typename WrapperType> +void +MultiTermFilter<WrapperType>::and_hits_into(BitVector& result, uint32_t begin_id) +{ + auto end = _map.end(); + result.foreach_truebit([&, end](uint32_t key) { if ( _map.find(_attr.getToken(key)) == end) { result.clearBit(key); }}, begin_id); +} + +template <typename WrapperType> +void +MultiTermFilter<WrapperType>::doSeek(uint32_t docId) +{ + auto pos = _map.find(_attr.getToken(docId)); + if (pos != _map.end()) { + _weight = pos->second; + setDocId(docId); + } +} + +template <typename WrapperType> +void +MultiTermFilter<WrapperType>::doUnpack(uint32_t docId) +{ + _tfmd.reset(docId); + fef::TermFieldMatchDataPosition pos; + pos.setElementWeight(_weight); + _tfmd.appendPosition(pos); +} + +} |