summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-11-22 17:16:47 +0100
committerGitHub <noreply@github.com>2023-11-22 17:16:47 +0100
commit40fca839012e79e792b94f22cb7de5846acd1433 (patch)
treef833fa6832b7639c72de9b1297a923b9c5d88858 /searchlib
parentd74ed011c5c88c0a770bf85757ba8435273ee261 (diff)
parentdf7e3fbec13aafb54682fca0bb5385f76d17780a (diff)
Merge pull request #29429 from vespa-engine/geirst/direct-weighted-set-blueprint-refactor
Move DirectWeightedSetBlueprint to separate file(s).
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt40
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp124
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.h16
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.h72
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.hpp67
7 files changed, 234 insertions, 137 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index 1d47b4d02ff..6ec78daecd1 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -6,6 +6,7 @@ vespa_add_library(searchlib_attribute OBJECT
attribute.cpp
attribute_blueprint_factory.cpp
attribute_header.cpp
+ attribute_object_visitor.cpp
attribute_operation.cpp
attribute_read_guard.cpp
attribute_weighted_set_blueprint.cpp
@@ -38,27 +39,27 @@ vespa_add_library(searchlib_attribute OBJECT
defines.cpp
dfa_fuzzy_matcher.cpp
dfa_string_comparator.cpp
+ direct_weighted_set_blueprint.cpp
distance_metric_utils.cpp
diversity.cpp
dociditerator.cpp
document_weight_or_filter_search.cpp
- searchcontextelementiterator.cpp
empty_search_context.cpp
+ enum_store_compaction_spec.cpp
+ enum_store_dictionary.cpp
+ enum_store_loaders.cpp
enumattribute.cpp
enumattributesaver.cpp
enumcomparator.cpp
+ enumerated_multi_value_read_view.cpp
enumhintsearchcontext.cpp
enummodifier.cpp
- enum_store_compaction_spec.cpp
- enum_store_dictionary.cpp
- enum_store_loaders.cpp
enumstore.cpp
- enumerated_multi_value_read_view.cpp
- extendableattributes.cpp
extendable_numeric_array_multi_value_read_view.cpp
extendable_numeric_weighted_set_multi_value_read_view.cpp
extendable_string_array_multi_value_read_view.cpp
extendable_string_weighted_set_multi_value_read_view.cpp
+ extendableattributes.cpp
fixedsourceselector.cpp
flagattribute.cpp
floatbase.cpp
@@ -82,8 +83,8 @@ vespa_add_library(searchlib_attribute OBJECT
multi_numeric_enum_search_context.cpp
multi_numeric_flag_search_context.cpp
multi_numeric_search_context.cpp
- multi_string_enum_search_context.cpp
multi_string_enum_hint_search_context.cpp
+ multi_string_enum_search_context.cpp
multi_value_mapping.cpp
multi_value_mapping_base.cpp
multienumattribute.cpp
@@ -98,11 +99,11 @@ vespa_add_library(searchlib_attribute OBJECT
multivalueattributesaver.cpp
multivalueattributesaverutils.cpp
not_implemented_attribute.cpp
- numericbase.cpp
numeric_matcher.cpp
numeric_range_matcher.cpp
numeric_search_context.cpp
numeric_sort_blob_writer.cpp
+ numericbase.cpp
posting_list_merger.cpp
postingchange.cpp
postinglistattribute.cpp
@@ -121,6 +122,17 @@ vespa_add_library(searchlib_attribute OBJECT
reference_mappings.cpp
save_utils.cpp
search_context.cpp
+ searchcontextelementiterator.cpp
+ single_enum_search_context.cpp
+ single_numeric_enum_search_context.cpp
+ single_numeric_search_context.cpp
+ single_raw_attribute.cpp
+ single_raw_attribute_loader.cpp
+ single_raw_attribute_saver.cpp
+ single_raw_ext_attribute.cpp
+ single_small_numeric_search_context.cpp
+ single_string_enum_hint_search_context.cpp
+ single_string_enum_search_context.cpp
singleboolattribute.cpp
singleenumattribute.cpp
singleenumattributesaver.cpp
@@ -131,22 +143,12 @@ vespa_add_library(searchlib_attribute OBJECT
singlesmallnumericattribute.cpp
singlestringattribute.cpp
singlestringpostattribute.cpp
- single_enum_search_context.cpp
- single_numeric_enum_search_context.cpp
- single_numeric_search_context.cpp
- single_raw_attribute.cpp
- single_raw_attribute_loader.cpp
- single_raw_attribute_saver.cpp
- single_raw_ext_attribute.cpp
- single_small_numeric_search_context.cpp
- single_string_enum_search_context.cpp
- single_string_enum_hint_search_context.cpp
sourceselector.cpp
- stringbase.cpp
string_matcher.cpp
string_search_context.cpp
string_search_helper.cpp
string_sort_blob_writer.cpp
+ stringbase.cpp
valuemodifier.cpp
DEPENDS
)
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index cf8cbe3177f..a289cc7a2f6 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -1,12 +1,14 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "attribute_blueprint_factory.h"
+#include "attribute_blueprint_params.h"
+#include "attribute_object_visitor.h"
#include "attribute_weighted_set_blueprint.h"
+#include "direct_weighted_set_blueprint.h"
+#include "document_weight_or_filter_search.h"
#include "i_document_weight_attribute.h"
#include "iterator_pack.h"
#include "predicate_attribute.h"
-#include "attribute_blueprint_params.h"
-#include "document_weight_or_filter_search.h"
#include <vespa/eval/eval/value.h>
#include <vespa/searchlib/common/location.h>
#include <vespa/searchlib/common/locationiterators.h>
@@ -191,29 +193,7 @@ AttributeFieldBlueprint::AttributeFieldBlueprint(FieldSpecBase field, const IAtt
}
}
-vespalib::string
-get_type(const IAttributeVector& attr)
-{
- auto coll_type = CollectionType(attr.getCollectionType());
- auto basic_type = BasicType(attr.getBasicType());
- if (coll_type.type() == CollectionType::SINGLE) {
- return basic_type.asString();
- }
- std::ostringstream oss;
- oss << coll_type.asString() << "<" << basic_type.asString() << ">";
- return oss.str();
-}
-void
-visit_attribute(vespalib::ObjectVisitor& visitor, const IAttributeVector& attr)
-{
- visitor.openStruct("attribute", "IAttributeVector");
- visitor.visitString("name", attr.getName());
- visitor.visitString("type", get_type(attr));
- visitor.visitBool("fast_search", attr.getIsFastSearch());
- visitor.visitBool("filter", attr.getIsFilter());
- visitor.closeStruct();
-}
void
AttributeFieldBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
@@ -408,101 +388,9 @@ private:
//-----------------------------------------------------------------------------
-template <typename SearchType>
-class DirectWeightedSetBlueprint : public ComplexLeafBlueprint
-{
-private:
- std::vector<int32_t> _weights;
- std::vector<IDocumentWeightAttribute::LookupResult> _terms;
- const IAttributeVector &_iattr;
- const IDocumentWeightAttribute &_attr;
- vespalib::datastore::EntryRef _dictionary_snapshot;
-public:
- DirectWeightedSetBlueprint(const FieldSpec &field, const IAttributeVector &iattr, const IDocumentWeightAttribute &attr, size_t size_hint)
- : ComplexLeafBlueprint(field),
- _weights(),
- _terms(),
- _iattr(iattr),
- _attr(attr),
- _dictionary_snapshot(_attr.get_dictionary_snapshot())
- {
- set_allow_termwise_eval(true);
- _weights.reserve(size_hint);
- _terms.reserve(size_hint);
- }
- ~DirectWeightedSetBlueprint() override;
- void addTerm(const IDocumentWeightAttribute::LookupKey & key, int32_t weight, HitEstimate & estimate) {
- IDocumentWeightAttribute::LookupResult result = _attr.lookup(key, _dictionary_snapshot);
- HitEstimate childEst(result.posting_size, (result.posting_size == 0));
- if (!childEst.empty) {
- if (estimate.empty) {
- estimate = childEst;
- } else {
- estimate.estHits += childEst.estHits;
- }
- _weights.push_back(weight);
- _terms.push_back(result);
- }
- }
- void complete(HitEstimate estimate) {
- setEstimate(estimate);
- }
- SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const override;
-
- std::unique_ptr<SearchIterator> createFilterSearch(bool strict, FilterConstraint constraint) const override;
- std::unique_ptr<queryeval::MatchingElementsSearch> create_matching_elements_search(const MatchingElementsFields &fields) const override {
- if (fields.has_field(_iattr.getName())) {
- return queryeval::MatchingElementsSearch::create(_iattr, _dictionary_snapshot, vespalib::ConstArrayRef<IDocumentWeightAttribute::LookupResult>(_terms));
- } else {
- return {};
- }
- }
- void visitMembers(vespalib::ObjectVisitor& visitor) const override {
- LeafBlueprint::visitMembers(visitor);
- visit_attribute(visitor, _iattr);
- }
-};
-
-template <typename SearchType>
-SearchIterator::UP
-DirectWeightedSetBlueprint<SearchType>::createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const
-{
- assert(tfmda.size() == 1);
- assert(getState().numFields() == 1);
- if (_terms.empty()) {
- return std::make_unique<queryeval::EmptySearch>();
- }
- std::vector<DocumentWeightIterator> iterators;
- const size_t numChildren = _terms.size();
- iterators.reserve(numChildren);
- for (const IDocumentWeightAttribute::LookupResult &r : _terms) {
- _attr.create(r.posting_idx, iterators);
- }
- bool field_is_filter = getState().fields()[0].isFilter();
- if (field_is_filter && tfmda[0]->isNotNeeded()) {
- return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators));
- }
- return SearchType::create(*tfmda[0], field_is_filter, _weights, std::move(iterators));
-}
-
-
-template <typename SearchType>
-DirectWeightedSetBlueprint<SearchType>::~DirectWeightedSetBlueprint() = default;
-
-template <typename SearchType>
-std::unique_ptr<SearchIterator>
-DirectWeightedSetBlueprint<SearchType>::createFilterSearch(bool, FilterConstraint) const
-{
- std::vector<DocumentWeightIterator> iterators;
- iterators.reserve(_terms.size());
- for (const IDocumentWeightAttribute::LookupResult &r : _terms) {
- _attr.create(r.posting_idx, iterators);
- }
- return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators));
-}
//-----------------------------------------------------------------------------
@@ -798,7 +686,7 @@ public:
setResult(std::move(ws));
} else {
if (_dwa != nullptr) {
- auto *bp = new DirectWeightedSetBlueprint<queryeval::WeightedSetTermSearch>(_field, _attr, *_dwa, n.getNumTerms());
+ auto *bp = new attribute::DirectWeightedSetBlueprint<queryeval::WeightedSetTermSearch>(_field, _attr, *_dwa, n.getNumTerms());
createDirectWeightedSet(bp, n);
} else {
auto *bp = new WeightedSetTermBlueprint(_field);
@@ -809,7 +697,7 @@ public:
void visit(query::DotProduct &n) override {
if (_dwa != nullptr) {
- auto *bp = new DirectWeightedSetBlueprint<queryeval::DotProductSearch>(_field, _attr, *_dwa, n.getNumTerms());
+ auto *bp = new attribute::DirectWeightedSetBlueprint<queryeval::DotProductSearch>(_field, _attr, *_dwa, n.getNumTerms());
createDirectWeightedSet(bp, n);
} else {
auto *bp = new DotProductBlueprint(_field);
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.cpp
new file mode 100644
index 00000000000..39f39212d5c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.cpp
@@ -0,0 +1,38 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "attribute_object_visitor.h"
+#include <vespa/searchcommon/attribute/iattributevector.h>
+#include <vespa/vespalib/objects/objectvisitor.h>
+#include <sstream>
+
+namespace search::attribute {
+
+namespace {
+
+vespalib::string
+get_type(const IAttributeVector& attr)
+{
+ auto coll_type = CollectionType(attr.getCollectionType());
+ auto basic_type = BasicType(attr.getBasicType());
+ if (coll_type.type() == CollectionType::SINGLE) {
+ return basic_type.asString();
+ }
+ std::ostringstream oss;
+ oss << coll_type.asString() << "<" << basic_type.asString() << ">";
+ return oss.str();
+}
+
+}
+
+void
+visit_attribute(vespalib::ObjectVisitor& visitor, const IAttributeVector& attr)
+{
+ visitor.openStruct("attribute", "IAttributeVector");
+ visitor.visitString("name", attr.getName());
+ visitor.visitString("type", get_type(attr));
+ visitor.visitBool("fast_search", attr.getIsFastSearch());
+ visitor.visitBool("filter", attr.getIsFilter());
+ visitor.closeStruct();
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.h b/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.h
new file mode 100644
index 00000000000..29c7e1556b6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.h
@@ -0,0 +1,16 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace vespalib { class ObjectVisitor; }
+
+namespace search::attribute {
+
+class IAttributeVector;
+
+/**
+ * Function used to visit the basic properties of an IAttributeVector.
+ */
+void visit_attribute(vespalib::ObjectVisitor& visitor, const IAttributeVector& attr);
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.cpp
new file mode 100644
index 00000000000..01b683f3b6d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.cpp
@@ -0,0 +1,14 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "direct_weighted_set_blueprint.h"
+#include "direct_weighted_set_blueprint.hpp"
+#include <vespa/searchlib/queryeval/dot_product_search.h>
+#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
+
+namespace search::attribute {
+
+template class DirectWeightedSetBlueprint<queryeval::WeightedSetTermSearch>;
+template class DirectWeightedSetBlueprint<queryeval::DotProductSearch>;
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.h
new file mode 100644
index 00000000000..e50c7688ac7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.h
@@ -0,0 +1,72 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attribute_object_visitor.h"
+#include "i_document_weight_attribute.h"
+#include <vespa/searchcommon/attribute/iattributevector.h>
+#include <vespa/searchlib/common/matching_elements_fields.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/field_spec.h>
+#include <vespa/searchlib/queryeval/matching_elements_search.h>
+
+namespace search::queryeval { class SearchIterator; }
+
+namespace search::attribute {
+
+/**
+ * Blueprint used for WeightedSetTerm or DotProduct over a multi-value attribute
+ * which supports the IDocumentWeightAttribute interface.
+ *
+ * This allows access to low-level posting lists, which speeds up query execution.
+ */
+template <typename SearchType>
+class DirectWeightedSetBlueprint : public queryeval::ComplexLeafBlueprint
+{
+private:
+ std::vector<int32_t> _weights;
+ std::vector<IDocumentWeightAttribute::LookupResult> _terms;
+ const IAttributeVector &_iattr;
+ const IDocumentWeightAttribute &_attr;
+ vespalib::datastore::EntryRef _dictionary_snapshot;
+
+public:
+ DirectWeightedSetBlueprint(const queryeval::FieldSpec &field, const IAttributeVector &iattr, const IDocumentWeightAttribute &attr, size_t size_hint);
+ ~DirectWeightedSetBlueprint() override;
+
+ void addTerm(const IDocumentWeightAttribute::LookupKey & key, int32_t weight, HitEstimate & estimate) {
+ IDocumentWeightAttribute::LookupResult result = _attr.lookup(key, _dictionary_snapshot);
+ HitEstimate childEst(result.posting_size, (result.posting_size == 0));
+ if (!childEst.empty) {
+ if (estimate.empty) {
+ estimate = childEst;
+ } else {
+ estimate.estHits += childEst.estHits;
+ }
+ _weights.push_back(weight);
+ _terms.push_back(result);
+ }
+ }
+ void complete(HitEstimate estimate) {
+ setEstimate(estimate);
+ }
+
+ std::unique_ptr<queryeval::SearchIterator> createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const override;
+
+ std::unique_ptr<queryeval::SearchIterator> createFilterSearch(bool strict, FilterConstraint constraint) const override;
+ std::unique_ptr<queryeval::MatchingElementsSearch> create_matching_elements_search(const MatchingElementsFields &fields) const override {
+ if (fields.has_field(_iattr.getName())) {
+ return queryeval::MatchingElementsSearch::create(_iattr, _dictionary_snapshot, vespalib::ConstArrayRef<IDocumentWeightAttribute::LookupResult>(_terms));
+ } else {
+ return {};
+ }
+ }
+ void visitMembers(vespalib::ObjectVisitor& visitor) const override {
+ LeafBlueprint::visitMembers(visitor);
+ visit_attribute(visitor, _iattr);
+ }
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.hpp
new file mode 100644
index 00000000000..bf6410c347c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.hpp
@@ -0,0 +1,67 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "direct_weighted_set_blueprint.h"
+#include "document_weight_or_filter_search.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <memory>
+
+namespace search::attribute {
+
+template <typename SearchType>
+DirectWeightedSetBlueprint<SearchType>::DirectWeightedSetBlueprint(const queryeval::FieldSpec &field,
+ const IAttributeVector &iattr,
+ const IDocumentWeightAttribute &attr,
+ size_t size_hint)
+ : ComplexLeafBlueprint(field),
+ _weights(),
+ _terms(),
+ _iattr(iattr),
+ _attr(attr),
+ _dictionary_snapshot(_attr.get_dictionary_snapshot())
+{
+ set_allow_termwise_eval(true);
+ _weights.reserve(size_hint);
+ _terms.reserve(size_hint);
+}
+
+template <typename SearchType>
+DirectWeightedSetBlueprint<SearchType>::~DirectWeightedSetBlueprint() = default;
+
+template <typename SearchType>
+std::unique_ptr<queryeval::SearchIterator>
+DirectWeightedSetBlueprint<SearchType>::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const
+{
+ assert(tfmda.size() == 1);
+ assert(getState().numFields() == 1);
+ if (_terms.empty()) {
+ return std::make_unique<queryeval::EmptySearch>();
+ }
+ std::vector<DocumentWeightIterator> iterators;
+ const size_t numChildren = _terms.size();
+ iterators.reserve(numChildren);
+ for (const IDocumentWeightAttribute::LookupResult &r : _terms) {
+ _attr.create(r.posting_idx, iterators);
+ }
+ bool field_is_filter = getState().fields()[0].isFilter();
+ if (field_is_filter && tfmda[0]->isNotNeeded()) {
+ return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators));
+ }
+ return SearchType::create(*tfmda[0], field_is_filter, _weights, std::move(iterators));
+}
+
+template <typename SearchType>
+std::unique_ptr<queryeval::SearchIterator>
+DirectWeightedSetBlueprint<SearchType>::createFilterSearch(bool, FilterConstraint) const
+{
+ std::vector<DocumentWeightIterator> iterators;
+ iterators.reserve(_terms.size());
+ for (const IDocumentWeightAttribute::LookupResult &r : _terms) {
+ _attr.create(r.posting_idx, iterators);
+ }
+ return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators));
+}
+
+}