aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-04-03 14:43:31 +0200
committerGitHub <noreply@github.com>2022-04-03 14:43:31 +0200
commita8ebc51eaeb26c977e997e6a9f1dab2123cdddcf (patch)
tree20672c78f14abdc6fdb39726bbd0c331c31e38ea
parentd0317f54368c56bcc26758e7e3d11cf79bdd9f91 (diff)
parenta265c854388a8764607915bf117383af01585b90 (diff)
Merge pull request #21955 from vespa-engine/toregge/factor-out-multi-string-enum-search-context-from-multi-string-attribute
Factor out MultiStringEnumSearchContext from MultiStringAttribute.
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h24
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp20
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h21
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp17
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.h53
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp77
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.h47
14 files changed, 122 insertions, 192 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index e267eaab06e..3c34bf8a57d 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -66,6 +66,8 @@ vespa_add_library(searchlib_attribute OBJECT
loadedvalue.cpp
multi_numeric_enum_search_context.cpp
multi_numeric_search_context.cpp
+ multi_string_enum_search_context.cpp
+ multi_string_enum_hint_search_context.cpp
multi_value_mapping.cpp
multi_value_mapping_base.cpp
multienumattribute.cpp
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.cpp
new file mode 100644
index 00000000000..55886ac85fa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.cpp
@@ -0,0 +1,15 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "multi_string_enum_hint_search_context.hpp"
+#include <vespa/searchcommon/attribute/multivalue.h>
+
+using ValueRef = search::multivalue::Value<vespalib::datastore::AtomicEntryRef>;
+using WeightedValueRef = search::multivalue::WeightedValue<vespalib::datastore::AtomicEntryRef>;
+
+namespace search::attribute {
+
+template class MultiStringEnumHintSearchContext<ValueRef>;
+
+template class MultiStringEnumHintSearchContext<WeightedValueRef>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h
new file mode 100644
index 00000000000..92650851116
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h
@@ -0,0 +1,24 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multi_string_enum_search_context.h"
+#include "enumhintsearchcontext.h"
+
+namespace search::attribute {
+
+/*
+ * MultiStringEnumHintSearchContext handles the creation of search iterators
+ * for a query term on a multi value string enumerated attribute vector using
+ * dictionary information to eliminate searches for nonexisting words.
+ */
+template <typename M>
+class MultiStringEnumHintSearchContext : public MultiStringEnumSearchContext<M>,
+ public EnumHintSearchContext
+{
+public:
+ MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values);
+ ~MultiStringEnumHintSearchContext() override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp
new file mode 100644
index 00000000000..a6b0f3f5eb9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp
@@ -0,0 +1,20 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "multi_string_enum_hint_search_context.h"
+#include <vespa/searchlib/query/query_term_ucs4.h>
+
+namespace search::attribute {
+
+template <typename M>
+MultiStringEnumHintSearchContext<M>::MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values)
+ : MultiStringEnumSearchContext<M>(std::move(qTerm), cased, toBeSearched, mv_mapping, enum_store),
+ EnumHintSearchContext(enum_store.get_dictionary(),
+ doc_id_limit, num_values)
+{
+ this->setup_enum_hint_sc(enum_store, *this);
+}
+
+template <typename M>
+MultiStringEnumHintSearchContext<M>::~MultiStringEnumHintSearchContext() = default;
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.cpp
new file mode 100644
index 00000000000..4abaf02e2e8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.cpp
@@ -0,0 +1,15 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "multi_string_enum_search_context.hpp"
+#include <vespa/searchcommon/attribute/multivalue.h>
+
+using ValueRef = search::multivalue::Value<vespalib::datastore::AtomicEntryRef>;
+using WeightedValueRef = search::multivalue::WeightedValue<vespalib::datastore::AtomicEntryRef>;
+
+namespace search::attribute {
+
+template class MultiStringEnumSearchContext<ValueRef>;
+
+template class MultiStringEnumSearchContext<WeightedValueRef>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h
new file mode 100644
index 00000000000..a4f05a5c9cc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h
@@ -0,0 +1,21 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multi_enum_search_context.h"
+#include "string_search_context.h"
+
+namespace search::attribute {
+
+/*
+ * MultiStringEnumSearchContext handles the creation of search iterators for
+ * a query term on a multi value string enumerated attribute vector.
+ */
+template <typename M>
+class MultiStringEnumSearchContext : public MultiEnumSearchContext<const char*, StringSearchContext, M>
+{
+public:
+ MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp
new file mode 100644
index 00000000000..02a740b06dc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp
@@ -0,0 +1,17 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multi_string_enum_search_context.h"
+#include "multi_enum_search_context.hpp"
+#include <vespa/searchlib/query/query_term_simple.h>
+
+namespace search::attribute {
+
+template <typename M>
+MultiStringEnumSearchContext<M>::MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store)
+ : MultiEnumSearchContext<const char*, StringSearchContext, M>(StringMatcher(std::move(qTerm), cased), toBeSearched, mv_mapping, enum_store)
+{
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h
index e832f53777b..cf4169138fe 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h
@@ -7,7 +7,6 @@
#include "enumstore.h"
#include "multienumattribute.h"
#include "multi_value_mapping.h"
-#include "enumhintsearchcontext.h"
#include <vespa/searchcommon/attribute/multivalue.h>
namespace search {
@@ -104,58 +103,6 @@ public:
return getWeightedHelper(doc, v, sz);
}
- /*
- * Specialization of SearchContext for weighted set type
- */
- class StringImplSearchContext : public StringAttribute::StringSearchContext {
- public:
- StringImplSearchContext(QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched) :
- StringAttribute::StringSearchContext(std::move(qTerm), toBeSearched)
- { }
- protected:
- const MultiValueStringAttributeT<B, M> & myAttribute() const {
- return static_cast< const MultiValueStringAttributeT<B, M> & > (attribute());
- }
- int32_t onFind(DocId docId, int32_t elemId) const override;
-
- template <typename Collector>
- int32_t findNextWeight(DocId doc, int32_t elemId, int32_t & weight, Collector & collector) const;
- };
-
- /*
- * Specialization of SearchContext for weighted set type
- */
- class StringSetImplSearchContext : public StringImplSearchContext {
- public:
- StringSetImplSearchContext(attribute::SearchContext::QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched) :
- StringImplSearchContext(std::move(qTerm), toBeSearched)
- { }
- protected:
- int32_t onFind(DocId docId, int32_t elemId, int32_t &weight) const override;
- };
-
- /*
- * Specialization of SearchContext for array type
- */
- class StringArrayImplSearchContext : public StringImplSearchContext {
- public:
- StringArrayImplSearchContext(attribute::SearchContext::QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched) :
- StringImplSearchContext(std::move(qTerm), toBeSearched)
- { }
- protected:
- int32_t onFind(DocId docId, int32_t elemId, int32_t &weight) const override;
- };
-
- template <typename BT>
- class StringTemplSearchContext : public BT,
- public attribute::EnumHintSearchContext
- {
- using BT::queryTerm;
- using AttrType = MultiValueStringAttributeT<B, M>;
- public:
- StringTemplSearchContext(attribute::SearchContext::QueryTermSimpleUP qTerm, const AttrType & toBeSearched);
- };
-
std::unique_ptr<attribute::SearchContext>
getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
index a6825cfb9bd..212a71dad74 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
@@ -6,6 +6,7 @@
#include "multistringattribute.h"
#include "enumattribute.hpp"
#include "multienumattribute.hpp"
+#include "multi_string_enum_hint_search_context.h"
#include <vespa/vespalib/text/utf8.h>
#include <vespa/vespalib/text/lowercase.h>
#include <vespa/searchlib/util/bufferwriter.h>
@@ -41,80 +42,8 @@ std::unique_ptr<attribute::SearchContext>
MultiValueStringAttributeT<B, M>::getSearch(QueryTermSimpleUP qTerm,
const attribute::SearchContextParams &) const
{
- if (this->getCollectionType() == attribute::CollectionType::WSET) {
- return std::make_unique<StringTemplSearchContext<StringSetImplSearchContext>>(std::move(qTerm), *this);
- } else {
- return std::make_unique<StringTemplSearchContext<StringArrayImplSearchContext>>(std::move(qTerm), *this);
- }
-}
-
-namespace {
-
-template <typename E>
-class EnumAccessor {
-public:
- EnumAccessor(const E & enumStore) : _enumStore(enumStore) { }
- const char * get(typename E::Index index) const { return _enumStore.get_value(index); }
-private:
- const E & _enumStore;
-};
-
-}
-
-template <typename B, typename M>
-int32_t
-MultiValueStringAttributeT<B, M>::StringSetImplSearchContext::onFind(DocId doc, int32_t elemId, int32_t &weight) const
-{
- StringAttribute::StringSearchContext::CollectWeight collector;
- return this->findNextWeight(doc, elemId, weight, collector);
-}
-
-template <typename B, typename M>
-int32_t
-MultiValueStringAttributeT<B, M>::StringArrayImplSearchContext::onFind(DocId doc, int32_t elemId, int32_t &weight) const
-{
- StringAttribute::StringSearchContext::CollectHitCount collector;
- return this->findNextWeight(doc, elemId, weight, collector);
-}
-
-template <typename B, typename M>
-template <typename Collector>
-int32_t
-MultiValueStringAttributeT<B, M>::StringImplSearchContext::findNextWeight(DocId doc, int32_t elemId, int32_t & weight, Collector & collector) const
-{
- WeightedIndexArrayRef indices(myAttribute()._mvMapping.get(doc));
-
- EnumAccessor<typename B::EnumStore> accessor(myAttribute()._enumStore);
- int32_t foundElem = findNextMatch(indices, elemId, accessor, collector);
- weight = collector.getWeight();
- return foundElem;
-}
-
-template <typename B, typename M>
-int32_t
-MultiValueStringAttributeT<B, M>::StringImplSearchContext::onFind(DocId doc, int32_t elemId) const
-{
- const auto& attr = static_cast<const MultiValueStringAttributeT<B, M>&>(attribute());
- WeightedIndexArrayRef indices(attr._mvMapping.get(doc));
- for (uint32_t i(elemId); i < indices.size(); i++) {
- if (isMatch(attr._enumStore.get_value(indices[i].value_ref().load_acquire()))) {
- return i;
- }
- }
-
- return -1;
-}
-
-template <typename B, typename M>
-template <typename BT>
-MultiValueStringAttributeT<B, M>::StringTemplSearchContext<BT>::
-StringTemplSearchContext(QueryTermSimpleUP qTerm, const AttrType & toBeSearched) :
- BT(std::move(qTerm), toBeSearched),
- EnumHintSearchContext(toBeSearched.getEnumStore().get_dictionary(),
- toBeSearched.getCommittedDocIdLimit(),
- toBeSearched.getStatus().getNumValues())
-{
- this->setup_enum_hint_sc(toBeSearched.getEnumStore(), *this);
+ bool cased = this->get_match_is_cased();
+ return std::make_unique<attribute::MultiStringEnumHintSearchContext<M>>(std::move(qTerm), cased, *this, this->_mvMapping, this->_enumStore, this->getCommittedDocIdLimit(), this->getStatus().getNumValues());
}
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h
index 9f8827028cc..17a67a67ddf 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h
@@ -54,10 +54,6 @@ private:
using PostingMap = typename PostingParent::PostingMap;
using QueryTermSimpleUP = AttributeVector::QueryTermSimpleUP;
using SelfType = MultiValueStringPostingAttributeT<B, T>;
- using StringArrayImplSearchContext = typename MultiValueStringAttributeT<B, T>::StringArrayImplSearchContext;
- using StringArrayPostingSearchContext = attribute::StringPostingSearchContext<StringArrayImplSearchContext, SelfType, int32_t>;
- using StringSetImplSearchContext = typename MultiValueStringAttributeT<B, T>::StringSetImplSearchContext;
- using StringSetPostingSearchContext = attribute::StringPostingSearchContext<StringSetImplSearchContext, SelfType, int32_t>;
using WeightedIndex = typename MultiValueStringAttributeT<B, T>::WeightedIndex;
using generation_t = typename MultiValueStringAttributeT<B, T>::generation_t;
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
index 13de3bc6493..2c2ac48979d 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
@@ -5,6 +5,7 @@
#include "stringattribute.h"
#include "multistringpostattribute.h"
#include "multistringattribute.hpp"
+#include "multi_string_enum_search_context.h"
#include <vespa/searchlib/query/query_term_simple.h>
namespace search {
@@ -89,9 +90,10 @@ std::unique_ptr<attribute::SearchContext>
MultiValueStringPostingAttributeT<B, T>::getSearch(QueryTermSimpleUP qTerm,
const attribute::SearchContextParams & params) const
{
- using BaseSC = std::conditional_t<T::_hasWeight, StringSetImplSearchContext, StringArrayImplSearchContext>;
- using SC = std::conditional_t<T::_hasWeight, StringSetPostingSearchContext, StringArrayPostingSearchContext>;
- BaseSC base_sc(std::move(qTerm), *this);
+ using BaseSC = attribute::MultiStringEnumSearchContext<T>;
+ using SC = attribute::StringPostingSearchContext<BaseSC, SelfType, int32_t>;
+ bool cased = this->get_match_is_cased();
+ BaseSC base_sc(std::move(qTerm), cased, *this, this->_mvMapping, this->_enumStore);
return std::make_unique<SC>(std::move(base_sc), params.useBitVector(), *this);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp
index 56c35d4b0b2..70023b27802 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp
@@ -2,7 +2,6 @@
#include "single_string_enum_hint_search_context.h"
#include <vespa/searchlib/query/query_term_ucs4.h>
-#include <vespa/vespalib/util/regexp.h>
namespace search::attribute {
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
index f30792099f8..b60ec269383 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
@@ -152,16 +152,6 @@ StringAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long avai
return buf.size();
}
-StringAttribute::StringSearchContext::StringSearchContext(QueryTermSimple::UP qTerm,
- const StringAttribute & toBeSearched)
- : attribute::StringSearchContext(toBeSearched, std::move(qTerm), toBeSearched.getConfig().get_match() == Config::Match::CASED)
-{
-}
-
-StringAttribute::StringSearchContext::StringSearchContext(StringSearchContext&&) noexcept = default;
-
-StringAttribute::StringSearchContext::~StringSearchContext() = default;
-
uint32_t
StringAttribute::clearDoc(DocId doc)
{
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h
index c29626c13ea..e5e14829118 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h
@@ -89,53 +89,6 @@ private:
long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
-
-protected:
- class StringSearchContext : public attribute::StringSearchContext {
- public:
- StringSearchContext(QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched);
- StringSearchContext(StringSearchContext&&) noexcept;
- ~StringSearchContext() override;
- protected:
- bool isMatch(const char *src) const { return match(src); }
-
- class CollectHitCount {
- public:
- CollectHitCount() : _hitCount(0) { }
- void addWeight(int32_t w) {
- (void) w;
- _hitCount++;
- }
- int32_t getWeight() const { return _hitCount; }
- bool hasMatch() const { return _hitCount != 0; }
- private:
- uint32_t _hitCount;
- };
- class CollectWeight {
- public:
- CollectWeight() : _hitCount(0), _weight(0) { }
- void addWeight(int32_t w) {
- _weight += w;
- _hitCount++;
- }
- int32_t getWeight() const { return _weight; }
- bool hasMatch() const { return _hitCount != 0; }
- private:
- uint32_t _hitCount;
- int32_t _weight;
- };
-
- template<typename WeightedT, typename Accessor, typename Collector>
- int32_t findNextMatch(vespalib::ConstArrayRef<WeightedT> w, int32_t elemId, const Accessor & ac, Collector & collector) const {
- for (uint32_t i(elemId); i < w.size(); i++) {
- if (isMatch(ac.get(w[i].value_ref().load_acquire()))) {
- collector.addWeight(w[i].weight());
- return i;
- }
- }
- return -1;
- }
- };
};
}