diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-04-02 18:03:52 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-04-02 18:03:52 +0200 |
commit | a265c854388a8764607915bf117383af01585b90 (patch) | |
tree | 20672c78f14abdc6fdb39726bbd0c331c31e38ea | |
parent | d0317f54368c56bcc26758e7e3d11cf79bdd9f91 (diff) |
Factor out MultiStringEnumSearchContext from MultiStringAttribute.
14 files changed, 122 insertions, 192 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index e267eaab06e..3c34bf8a57d 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -66,6 +66,8 @@ vespa_add_library(searchlib_attribute OBJECT loadedvalue.cpp multi_numeric_enum_search_context.cpp multi_numeric_search_context.cpp + multi_string_enum_search_context.cpp + multi_string_enum_hint_search_context.cpp multi_value_mapping.cpp multi_value_mapping_base.cpp multienumattribute.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.cpp new file mode 100644 index 00000000000..55886ac85fa --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.cpp @@ -0,0 +1,15 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "multi_string_enum_hint_search_context.hpp" +#include <vespa/searchcommon/attribute/multivalue.h> + +using ValueRef = search::multivalue::Value<vespalib::datastore::AtomicEntryRef>; +using WeightedValueRef = search::multivalue::WeightedValue<vespalib::datastore::AtomicEntryRef>; + +namespace search::attribute { + +template class MultiStringEnumHintSearchContext<ValueRef>; + +template class MultiStringEnumHintSearchContext<WeightedValueRef>; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h new file mode 100644 index 00000000000..92650851116 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h @@ -0,0 +1,24 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multi_string_enum_search_context.h" +#include "enumhintsearchcontext.h" + +namespace search::attribute { + +/* + * MultiStringEnumHintSearchContext handles the creation of search iterators + * for a query term on a multi value string enumerated attribute vector using + * dictionary information to eliminate searches for nonexisting words. + */ +template <typename M> +class MultiStringEnumHintSearchContext : public MultiStringEnumSearchContext<M>, + public EnumHintSearchContext +{ +public: + MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values); + ~MultiStringEnumHintSearchContext() override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp new file mode 100644 index 00000000000..a6b0f3f5eb9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp @@ -0,0 +1,20 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "multi_string_enum_hint_search_context.h" +#include <vespa/searchlib/query/query_term_ucs4.h> + +namespace search::attribute { + +template <typename M> +MultiStringEnumHintSearchContext<M>::MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values) + : MultiStringEnumSearchContext<M>(std::move(qTerm), cased, toBeSearched, mv_mapping, enum_store), + EnumHintSearchContext(enum_store.get_dictionary(), + doc_id_limit, num_values) +{ + this->setup_enum_hint_sc(enum_store, *this); +} + +template <typename M> +MultiStringEnumHintSearchContext<M>::~MultiStringEnumHintSearchContext() = default; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.cpp new file mode 100644 index 00000000000..4abaf02e2e8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.cpp @@ -0,0 +1,15 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "multi_string_enum_search_context.hpp" +#include <vespa/searchcommon/attribute/multivalue.h> + +using ValueRef = search::multivalue::Value<vespalib::datastore::AtomicEntryRef>; +using WeightedValueRef = search::multivalue::WeightedValue<vespalib::datastore::AtomicEntryRef>; + +namespace search::attribute { + +template class MultiStringEnumSearchContext<ValueRef>; + +template class MultiStringEnumSearchContext<WeightedValueRef>; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h new file mode 100644 index 00000000000..a4f05a5c9cc --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h @@ -0,0 +1,21 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multi_enum_search_context.h" +#include "string_search_context.h" + +namespace search::attribute { + +/* + * MultiStringEnumSearchContext handles the creation of search iterators for + * a query term on a multi value string enumerated attribute vector. + */ +template <typename M> +class MultiStringEnumSearchContext : public MultiEnumSearchContext<const char*, StringSearchContext, M> +{ +public: + MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store); +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp new file mode 100644 index 00000000000..02a740b06dc --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp @@ -0,0 +1,17 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multi_string_enum_search_context.h" +#include "multi_enum_search_context.hpp" +#include <vespa/searchlib/query/query_term_simple.h> + +namespace search::attribute { + +template <typename M> +MultiStringEnumSearchContext<M>::MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store) + : MultiEnumSearchContext<const char*, StringSearchContext, M>(StringMatcher(std::move(qTerm), cased), toBeSearched, mv_mapping, enum_store) +{ +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h index e832f53777b..cf4169138fe 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h @@ -7,7 +7,6 @@ #include "enumstore.h" #include "multienumattribute.h" #include "multi_value_mapping.h" -#include "enumhintsearchcontext.h" #include <vespa/searchcommon/attribute/multivalue.h> namespace search { @@ -104,58 +103,6 @@ public: return getWeightedHelper(doc, v, sz); } - /* - * Specialization of SearchContext for weighted set type - */ - class StringImplSearchContext : public StringAttribute::StringSearchContext { - public: - StringImplSearchContext(QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched) : - StringAttribute::StringSearchContext(std::move(qTerm), toBeSearched) - { } - protected: - const MultiValueStringAttributeT<B, M> & myAttribute() const { - return static_cast< const MultiValueStringAttributeT<B, M> & > (attribute()); - } - int32_t onFind(DocId docId, int32_t elemId) const override; - - template <typename Collector> - int32_t findNextWeight(DocId doc, int32_t elemId, int32_t & weight, Collector & collector) const; - }; - - /* - * Specialization of SearchContext for weighted set type - */ - class StringSetImplSearchContext : public StringImplSearchContext { - public: - StringSetImplSearchContext(attribute::SearchContext::QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched) : - StringImplSearchContext(std::move(qTerm), toBeSearched) - { } - protected: - int32_t onFind(DocId docId, int32_t elemId, int32_t &weight) const override; - }; - - /* - * Specialization of SearchContext for array type - */ - class StringArrayImplSearchContext : public StringImplSearchContext { - public: - StringArrayImplSearchContext(attribute::SearchContext::QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched) : - StringImplSearchContext(std::move(qTerm), toBeSearched) - { } - protected: - int32_t onFind(DocId docId, int32_t elemId, int32_t &weight) const override; - }; - - template <typename BT> - class StringTemplSearchContext : public BT, - public attribute::EnumHintSearchContext - { - using BT::queryTerm; - using AttrType = MultiValueStringAttributeT<B, M>; - public: - StringTemplSearchContext(attribute::SearchContext::QueryTermSimpleUP qTerm, const AttrType & toBeSearched); - }; - std::unique_ptr<attribute::SearchContext> getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp index a6825cfb9bd..212a71dad74 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp @@ -6,6 +6,7 @@ #include "multistringattribute.h" #include "enumattribute.hpp" #include "multienumattribute.hpp" +#include "multi_string_enum_hint_search_context.h" #include <vespa/vespalib/text/utf8.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/searchlib/util/bufferwriter.h> @@ -41,80 +42,8 @@ std::unique_ptr<attribute::SearchContext> MultiValueStringAttributeT<B, M>::getSearch(QueryTermSimpleUP qTerm, const attribute::SearchContextParams &) const { - if (this->getCollectionType() == attribute::CollectionType::WSET) { - return std::make_unique<StringTemplSearchContext<StringSetImplSearchContext>>(std::move(qTerm), *this); - } else { - return std::make_unique<StringTemplSearchContext<StringArrayImplSearchContext>>(std::move(qTerm), *this); - } -} - -namespace { - -template <typename E> -class EnumAccessor { -public: - EnumAccessor(const E & enumStore) : _enumStore(enumStore) { } - const char * get(typename E::Index index) const { return _enumStore.get_value(index); } -private: - const E & _enumStore; -}; - -} - -template <typename B, typename M> -int32_t -MultiValueStringAttributeT<B, M>::StringSetImplSearchContext::onFind(DocId doc, int32_t elemId, int32_t &weight) const -{ - StringAttribute::StringSearchContext::CollectWeight collector; - return this->findNextWeight(doc, elemId, weight, collector); -} - -template <typename B, typename M> -int32_t -MultiValueStringAttributeT<B, M>::StringArrayImplSearchContext::onFind(DocId doc, int32_t elemId, int32_t &weight) const -{ - StringAttribute::StringSearchContext::CollectHitCount collector; - return this->findNextWeight(doc, elemId, weight, collector); -} - -template <typename B, typename M> -template <typename Collector> -int32_t -MultiValueStringAttributeT<B, M>::StringImplSearchContext::findNextWeight(DocId doc, int32_t elemId, int32_t & weight, Collector & collector) const -{ - WeightedIndexArrayRef indices(myAttribute()._mvMapping.get(doc)); - - EnumAccessor<typename B::EnumStore> accessor(myAttribute()._enumStore); - int32_t foundElem = findNextMatch(indices, elemId, accessor, collector); - weight = collector.getWeight(); - return foundElem; -} - -template <typename B, typename M> -int32_t -MultiValueStringAttributeT<B, M>::StringImplSearchContext::onFind(DocId doc, int32_t elemId) const -{ - const auto& attr = static_cast<const MultiValueStringAttributeT<B, M>&>(attribute()); - WeightedIndexArrayRef indices(attr._mvMapping.get(doc)); - for (uint32_t i(elemId); i < indices.size(); i++) { - if (isMatch(attr._enumStore.get_value(indices[i].value_ref().load_acquire()))) { - return i; - } - } - - return -1; -} - -template <typename B, typename M> -template <typename BT> -MultiValueStringAttributeT<B, M>::StringTemplSearchContext<BT>:: -StringTemplSearchContext(QueryTermSimpleUP qTerm, const AttrType & toBeSearched) : - BT(std::move(qTerm), toBeSearched), - EnumHintSearchContext(toBeSearched.getEnumStore().get_dictionary(), - toBeSearched.getCommittedDocIdLimit(), - toBeSearched.getStatus().getNumValues()) -{ - this->setup_enum_hint_sc(toBeSearched.getEnumStore(), *this); + bool cased = this->get_match_is_cased(); + return std::make_unique<attribute::MultiStringEnumHintSearchContext<M>>(std::move(qTerm), cased, *this, this->_mvMapping, this->_enumStore, this->getCommittedDocIdLimit(), this->getStatus().getNumValues()); } } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h index 9f8827028cc..17a67a67ddf 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h @@ -54,10 +54,6 @@ private: using PostingMap = typename PostingParent::PostingMap; using QueryTermSimpleUP = AttributeVector::QueryTermSimpleUP; using SelfType = MultiValueStringPostingAttributeT<B, T>; - using StringArrayImplSearchContext = typename MultiValueStringAttributeT<B, T>::StringArrayImplSearchContext; - using StringArrayPostingSearchContext = attribute::StringPostingSearchContext<StringArrayImplSearchContext, SelfType, int32_t>; - using StringSetImplSearchContext = typename MultiValueStringAttributeT<B, T>::StringSetImplSearchContext; - using StringSetPostingSearchContext = attribute::StringPostingSearchContext<StringSetImplSearchContext, SelfType, int32_t>; using WeightedIndex = typename MultiValueStringAttributeT<B, T>::WeightedIndex; using generation_t = typename MultiValueStringAttributeT<B, T>::generation_t; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index 13de3bc6493..2c2ac48979d 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -5,6 +5,7 @@ #include "stringattribute.h" #include "multistringpostattribute.h" #include "multistringattribute.hpp" +#include "multi_string_enum_search_context.h" #include <vespa/searchlib/query/query_term_simple.h> namespace search { @@ -89,9 +90,10 @@ std::unique_ptr<attribute::SearchContext> MultiValueStringPostingAttributeT<B, T>::getSearch(QueryTermSimpleUP qTerm, const attribute::SearchContextParams & params) const { - using BaseSC = std::conditional_t<T::_hasWeight, StringSetImplSearchContext, StringArrayImplSearchContext>; - using SC = std::conditional_t<T::_hasWeight, StringSetPostingSearchContext, StringArrayPostingSearchContext>; - BaseSC base_sc(std::move(qTerm), *this); + using BaseSC = attribute::MultiStringEnumSearchContext<T>; + using SC = attribute::StringPostingSearchContext<BaseSC, SelfType, int32_t>; + bool cased = this->get_match_is_cased(); + BaseSC base_sc(std::move(qTerm), cased, *this, this->_mvMapping, this->_enumStore); return std::make_unique<SC>(std::move(base_sc), params.useBitVector(), *this); } diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp index 56c35d4b0b2..70023b27802 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp @@ -2,7 +2,6 @@ #include "single_string_enum_hint_search_context.h" #include <vespa/searchlib/query/query_term_ucs4.h> -#include <vespa/vespalib/util/regexp.h> namespace search::attribute { diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index f30792099f8..b60ec269383 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -152,16 +152,6 @@ StringAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long avai return buf.size(); } -StringAttribute::StringSearchContext::StringSearchContext(QueryTermSimple::UP qTerm, - const StringAttribute & toBeSearched) - : attribute::StringSearchContext(toBeSearched, std::move(qTerm), toBeSearched.getConfig().get_match() == Config::Match::CASED) -{ -} - -StringAttribute::StringSearchContext::StringSearchContext(StringSearchContext&&) noexcept = default; - -StringAttribute::StringSearchContext::~StringSearchContext() = default; - uint32_t StringAttribute::clearDoc(DocId doc) { diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index c29626c13ea..e5e14829118 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -89,53 +89,6 @@ private: long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; - -protected: - class StringSearchContext : public attribute::StringSearchContext { - public: - StringSearchContext(QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched); - StringSearchContext(StringSearchContext&&) noexcept; - ~StringSearchContext() override; - protected: - bool isMatch(const char *src) const { return match(src); } - - class CollectHitCount { - public: - CollectHitCount() : _hitCount(0) { } - void addWeight(int32_t w) { - (void) w; - _hitCount++; - } - int32_t getWeight() const { return _hitCount; } - bool hasMatch() const { return _hitCount != 0; } - private: - uint32_t _hitCount; - }; - class CollectWeight { - public: - CollectWeight() : _hitCount(0), _weight(0) { } - void addWeight(int32_t w) { - _weight += w; - _hitCount++; - } - int32_t getWeight() const { return _weight; } - bool hasMatch() const { return _hitCount != 0; } - private: - uint32_t _hitCount; - int32_t _weight; - }; - - template<typename WeightedT, typename Accessor, typename Collector> - int32_t findNextMatch(vespalib::ConstArrayRef<WeightedT> w, int32_t elemId, const Accessor & ac, Collector & collector) const { - for (uint32_t i(elemId); i < w.size(); i++) { - if (isMatch(ac.get(w[i].value_ref().load_acquire()))) { - collector.addWeight(w[i].weight()); - return i; - } - } - return -1; - } - }; }; } |