diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-04-02 12:59:24 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-04-02 12:59:24 +0200 |
commit | 5b4ac491b078bc486ad16b1819a13bf80bdd03b5 (patch) | |
tree | 9933aab986f36b2f693aa99cebf1d16a7c0600de | |
parent | caa488658d2b5c812e2450f7d91d2a9fb672618f (diff) |
Factor out SingleStringEnumSearchContext from SingleStringAttribute.
14 files changed, 102 insertions, 53 deletions
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index fd10bfcf47c..1f570e0a381 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -389,7 +389,7 @@ TEST("testSingleValue") { EXPECT_EQUAL(24u, sizeof(SearchContext)); EXPECT_EQUAL(56u, sizeof(StringSearchHelper)); - EXPECT_EQUAL(88u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext)); + EXPECT_EQUAL(104u, sizeof(attribute::SingleStringEnumSearchContext)); { Config cfg(BasicType::STRING, CollectionType::SINGLE); SingleValueStringAttribute svsa("svsa", cfg); diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 2558b9878ed..e267eaab06e 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -108,6 +108,8 @@ vespa_add_library(searchlib_attribute OBJECT singlestringpostattribute.cpp single_numeric_enum_search_context.cpp single_numeric_search_context.cpp + single_string_enum_search_context.cpp + single_string_enum_hint_search_context.cpp sourceselector.cpp stringattribute.cpp stringbase.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index 4e3bd259cc6..61f578d9f2b 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -386,7 +386,7 @@ public: /** Return the fixed length of the attribute. If 0 then you must inquire each document. */ size_t getFixedWidth() const override { return _config.basicType().fixedSize(); } - const Config &getConfig() const { return _config; } + const Config &getConfig() const noexcept { return _config; } void update_config(const Config& cfg); BasicType getInternalBasicType() const { return _config.basicType(); } CollectionType getInternalCollectionType() const { return _config.collectionType(); } diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index 78e372ed218..9ee56a27a85 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -301,7 +301,7 @@ StringPostingSearchContext<BaseSC, AttrT, DataT>::useThis(const PostingListSearc ? this->getRegex().partial_match(_enumStore.get_value(it.getKey().load_acquire())) : false; } else if ( this->isCased() ) { - return this->isMatch(_enumStore.get_value(it.getKey().load_acquire())); + return this->match(_enumStore.get_value(it.getKey().load_acquire())); } else if (this->isFuzzy()) { return this->getFuzzyMatcher().isMatch(_enumStore.get_value(it.getKey().load_acquire())); } diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp new file mode 100644 index 00000000000..56c35d4b0b2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp @@ -0,0 +1,19 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "single_string_enum_hint_search_context.h" +#include <vespa/searchlib/query/query_term_ucs4.h> +#include <vespa/vespalib/util/regexp.h> + +namespace search::attribute { + +SingleStringEnumHintSearchContext::SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const vespalib::datastore::AtomicEntryRef* enum_indices, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values) + : SingleStringEnumSearchContext(std::move(qTerm), cased, toBeSearched, enum_indices, enum_store), + EnumHintSearchContext(enum_store.get_dictionary(), + doc_id_limit, num_values) +{ + setup_enum_hint_sc(enum_store, *this); +} + +SingleStringEnumHintSearchContext::~SingleStringEnumHintSearchContext() = default; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h new file mode 100644 index 00000000000..f9d44454cd0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h @@ -0,0 +1,23 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "single_string_enum_search_context.h" +#include "enumhintsearchcontext.h" + +namespace search::attribute { + +/* + * SingleStringEnumHintSearchContext handles the creation of search iterators + * for a query term on a single value string enumerated attribute vector using + * dictionary information to eliminate searches for nonexisting words. + */ +class SingleStringEnumHintSearchContext : public SingleStringEnumSearchContext, + public EnumHintSearchContext +{ +public: + SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const vespalib::datastore::AtomicEntryRef* enum_indices, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values); + ~SingleStringEnumHintSearchContext() override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp new file mode 100644 index 00000000000..cba1d207501 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp @@ -0,0 +1,18 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "single_string_enum_search_context.h" +#include "single_enum_search_context.hpp" +#include <vespa/searchlib/query/query_term_simple.h> + +namespace search::attribute { + +SingleStringEnumSearchContext::SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const vespalib::datastore::AtomicEntryRef* enum_indices, const EnumStoreT<const char*>& enum_store) + : SingleEnumSearchContext<const char*, StringSearchContext>(StringMatcher(std::move(qTerm), cased), toBeSearched, enum_indices, enum_store) +{ +} + +SingleStringEnumSearchContext::SingleStringEnumSearchContext(SingleStringEnumSearchContext&&) noexcept = default; + +SingleStringEnumSearchContext::~SingleStringEnumSearchContext() = default; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h new file mode 100644 index 00000000000..6a9ed38b4ea --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h @@ -0,0 +1,22 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "single_enum_search_context.h" +#include "string_search_context.h" + +namespace search::attribute { + +/* + * SingleStringEnumSearchContext handles the creation of search iterators for + * a query term on a single value string enumerated attribute vector. + */ +class SingleStringEnumSearchContext : public SingleEnumSearchContext<const char*, StringSearchContext> +{ +public: + SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const vespalib::datastore::AtomicEntryRef* enum_indices, const EnumStoreT<const char*>& enum_store); + SingleStringEnumSearchContext(SingleStringEnumSearchContext&&) noexcept; + ~SingleStringEnumSearchContext() override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h index 7a3f7ee2e07..e9bde76d4ec 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h @@ -5,7 +5,6 @@ #include <vespa/searchlib/attribute/stringbase.h> #include <vespa/searchlib/attribute/enumattribute.h> #include <vespa/searchlib/attribute/singleenumattribute.h> -#include "enumhintsearchcontext.h" namespace search { @@ -79,35 +78,6 @@ public: std::unique_ptr<attribute::SearchContext> getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; - - class StringSingleImplSearchContext : public StringAttribute::StringSearchContext { - public: - StringSingleImplSearchContext(QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched) : - StringSearchContext(std::move(qTerm), toBeSearched) - { } - StringSingleImplSearchContext(StringSingleImplSearchContext&&) noexcept = default; - protected: - int32_t onFind(DocId doc, int32_t elemId, int32_t &weight) const override { - weight = 1; - return onFind(doc, elemId); - } - - int32_t onFind(DocId doc, int32_t elemId) const override { - if ( elemId != 0) return -1; - const SingleValueStringAttributeT<B> & attr(static_cast<const SingleValueStringAttributeT<B> &>(attribute())); - return isMatch(attr._enumStore.get_value(attr.acquire_enum_entry_ref(doc))) ? 0 : -1; - } - - }; - - class StringTemplSearchContext : public StringSingleImplSearchContext, - public attribute::EnumHintSearchContext - { - using AttrType = SingleValueStringAttributeT<B>; - using StringSingleImplSearchContext::queryTerm; - public: - StringTemplSearchContext(QueryTermSimpleUP qTerm, const AttrType & toBeSearched); - }; }; using SingleValueStringAttribute = SingleValueStringAttributeT<EnumAttribute<StringAttribute> >; diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp index e4027c928a0..50ea4ef42b6 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp @@ -6,6 +6,7 @@ #include "stringattribute.h" #include "singleenumattribute.hpp" #include "attributevector.hpp" +#include "single_string_enum_hint_search_context.h" #include <vespa/vespalib/text/utf8.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/searchlib/util/bufferwriter.h> @@ -40,17 +41,8 @@ std::unique_ptr<attribute::SearchContext> SingleValueStringAttributeT<B>::getSearch(QueryTermSimpleUP qTerm, const attribute::SearchContextParams &) const { - return std::make_unique<StringTemplSearchContext>(std::move(qTerm), *this); -} - -template <typename B> -SingleValueStringAttributeT<B>::StringTemplSearchContext::StringTemplSearchContext(QueryTermSimple::UP qTerm, const AttrType & toBeSearched) : - StringSingleImplSearchContext(std::move(qTerm), toBeSearched), - EnumHintSearchContext(toBeSearched.getEnumStore().get_dictionary(), - toBeSearched.getCommittedDocIdLimit(), - toBeSearched.getStatus().getNumValues()) -{ - this->setup_enum_hint_sc(toBeSearched.getEnumStore(), *this); + bool cased = this->get_match_is_cased(); + return std::make_unique<attribute::SingleStringEnumHintSearchContext>(std::move(qTerm), cased, *this, &this->_enumIndices.acquire_elem_ref(0), this->_enumStore, this->getCommittedDocIdLimit(), this->getStatus().getNumValues()); } } diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h index f75926e5a60..2643f11eaf3 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h @@ -40,10 +40,6 @@ private: using PostingMap = typename PostingParent::PostingMap; using QueryTermSimpleUP = AttributeVector::QueryTermSimpleUP; using SelfType = SingleValueStringPostingAttributeT<B>; - using StringSingleImplSearchContext = typename SingleValueStringAttributeT<B>::StringSingleImplSearchContext; - using StringSinglePostingSearchContext = attribute::StringPostingSearchContext<StringSingleImplSearchContext, - SelfType, - vespalib::btree::BTreeNoLeafData>; using ValueModifier = typename SingleValueStringAttributeT<B>::ValueModifier; using generation_t = typename SingleValueStringAttributeT<B>::generation_t; diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp index cbffe1a2662..ddbc24d2b75 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp @@ -3,6 +3,7 @@ #pragma once #include "singlestringpostattribute.h" +#include "single_string_enum_search_context.h" #include <vespa/searchlib/query/query_term_ucs4.h> namespace search { @@ -139,10 +140,13 @@ std::unique_ptr<attribute::SearchContext> SingleValueStringPostingAttributeT<B>::getSearch(QueryTermSimpleUP qTerm, const attribute::SearchContextParams & params) const { - StringSingleImplSearchContext base_sc(std::move(qTerm), *this); - return std::make_unique<StringSinglePostingSearchContext>(std::move(base_sc), - params.useBitVector(), - *this); + using BaseSC = attribute::SingleStringEnumSearchContext; + using SC = attribute::StringPostingSearchContext<BaseSC, SelfType, vespalib::btree::BTreeNoLeafData>; + bool cased = this->get_match_is_cased(); + BaseSC base_sc(std::move(qTerm), cased, *this, &this->_enumIndices.acquire_elem_ref(0), this->_enumStore); + return std::make_unique<SC>(std::move(base_sc), + params.useBitVector(), + *this); } } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/string_matcher.h b/searchlib/src/vespa/searchlib/attribute/string_matcher.h index 51cd3d238a6..ea4debecc0d 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_matcher.h +++ b/searchlib/src/vespa/searchlib/attribute/string_matcher.h @@ -17,10 +17,11 @@ class StringMatcher private: std::unique_ptr<QueryTermUCS4> _query_term; attribute::StringSearchHelper _helper; -protected: +public: StringMatcher(std::unique_ptr<QueryTermSimple> qTerm, bool cased); StringMatcher(StringMatcher&&) noexcept; ~StringMatcher(); +protected: bool isValid() const; bool match(const char *src) const { return _helper.isMatch(src); } bool isPrefix() const { return _helper.isPrefix(); } diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index 366fd89c62d..c29626c13ea 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -72,6 +72,8 @@ protected: bool onAddDoc(DocId doc) override; vespalib::MemoryUsage getChangeVectorMemoryUsage() const override; + + bool get_match_is_cased() const noexcept { return getConfig().get_match() == attribute::Config::Match::CASED; } private: virtual void load_posting_lists(LoadedVector& loaded); virtual void load_enum_store(LoadedVector& loaded); |