diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-11-22 17:16:47 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-22 17:16:47 +0100 |
commit | 40fca839012e79e792b94f22cb7de5846acd1433 (patch) | |
tree | f833fa6832b7639c72de9b1297a923b9c5d88858 /searchlib | |
parent | d74ed011c5c88c0a770bf85757ba8435273ee261 (diff) | |
parent | df7e3fbec13aafb54682fca0bb5385f76d17780a (diff) |
Merge pull request #29429 from vespa-engine/geirst/direct-weighted-set-blueprint-refactor
Move DirectWeightedSetBlueprint to separate file(s).
Diffstat (limited to 'searchlib')
7 files changed, 234 insertions, 137 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 1d47b4d02ff..6ec78daecd1 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -6,6 +6,7 @@ vespa_add_library(searchlib_attribute OBJECT attribute.cpp attribute_blueprint_factory.cpp attribute_header.cpp + attribute_object_visitor.cpp attribute_operation.cpp attribute_read_guard.cpp attribute_weighted_set_blueprint.cpp @@ -38,27 +39,27 @@ vespa_add_library(searchlib_attribute OBJECT defines.cpp dfa_fuzzy_matcher.cpp dfa_string_comparator.cpp + direct_weighted_set_blueprint.cpp distance_metric_utils.cpp diversity.cpp dociditerator.cpp document_weight_or_filter_search.cpp - searchcontextelementiterator.cpp empty_search_context.cpp + enum_store_compaction_spec.cpp + enum_store_dictionary.cpp + enum_store_loaders.cpp enumattribute.cpp enumattributesaver.cpp enumcomparator.cpp + enumerated_multi_value_read_view.cpp enumhintsearchcontext.cpp enummodifier.cpp - enum_store_compaction_spec.cpp - enum_store_dictionary.cpp - enum_store_loaders.cpp enumstore.cpp - enumerated_multi_value_read_view.cpp - extendableattributes.cpp extendable_numeric_array_multi_value_read_view.cpp extendable_numeric_weighted_set_multi_value_read_view.cpp extendable_string_array_multi_value_read_view.cpp extendable_string_weighted_set_multi_value_read_view.cpp + extendableattributes.cpp fixedsourceselector.cpp flagattribute.cpp floatbase.cpp @@ -82,8 +83,8 @@ vespa_add_library(searchlib_attribute OBJECT multi_numeric_enum_search_context.cpp multi_numeric_flag_search_context.cpp multi_numeric_search_context.cpp - multi_string_enum_search_context.cpp multi_string_enum_hint_search_context.cpp + multi_string_enum_search_context.cpp multi_value_mapping.cpp multi_value_mapping_base.cpp multienumattribute.cpp @@ -98,11 +99,11 @@ vespa_add_library(searchlib_attribute OBJECT multivalueattributesaver.cpp multivalueattributesaverutils.cpp not_implemented_attribute.cpp - numericbase.cpp numeric_matcher.cpp numeric_range_matcher.cpp numeric_search_context.cpp numeric_sort_blob_writer.cpp + numericbase.cpp posting_list_merger.cpp postingchange.cpp postinglistattribute.cpp @@ -121,6 +122,17 @@ vespa_add_library(searchlib_attribute OBJECT reference_mappings.cpp save_utils.cpp search_context.cpp + searchcontextelementiterator.cpp + single_enum_search_context.cpp + single_numeric_enum_search_context.cpp + single_numeric_search_context.cpp + single_raw_attribute.cpp + single_raw_attribute_loader.cpp + single_raw_attribute_saver.cpp + single_raw_ext_attribute.cpp + single_small_numeric_search_context.cpp + single_string_enum_hint_search_context.cpp + single_string_enum_search_context.cpp singleboolattribute.cpp singleenumattribute.cpp singleenumattributesaver.cpp @@ -131,22 +143,12 @@ vespa_add_library(searchlib_attribute OBJECT singlesmallnumericattribute.cpp singlestringattribute.cpp singlestringpostattribute.cpp - single_enum_search_context.cpp - single_numeric_enum_search_context.cpp - single_numeric_search_context.cpp - single_raw_attribute.cpp - single_raw_attribute_loader.cpp - single_raw_attribute_saver.cpp - single_raw_ext_attribute.cpp - single_small_numeric_search_context.cpp - single_string_enum_search_context.cpp - single_string_enum_hint_search_context.cpp sourceselector.cpp - stringbase.cpp string_matcher.cpp string_search_context.cpp string_search_helper.cpp string_sort_blob_writer.cpp + stringbase.cpp valuemodifier.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index cf8cbe3177f..a289cc7a2f6 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -1,12 +1,14 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "attribute_blueprint_factory.h" +#include "attribute_blueprint_params.h" +#include "attribute_object_visitor.h" #include "attribute_weighted_set_blueprint.h" +#include "direct_weighted_set_blueprint.h" +#include "document_weight_or_filter_search.h" #include "i_document_weight_attribute.h" #include "iterator_pack.h" #include "predicate_attribute.h" -#include "attribute_blueprint_params.h" -#include "document_weight_or_filter_search.h" #include <vespa/eval/eval/value.h> #include <vespa/searchlib/common/location.h> #include <vespa/searchlib/common/locationiterators.h> @@ -191,29 +193,7 @@ AttributeFieldBlueprint::AttributeFieldBlueprint(FieldSpecBase field, const IAtt } } -vespalib::string -get_type(const IAttributeVector& attr) -{ - auto coll_type = CollectionType(attr.getCollectionType()); - auto basic_type = BasicType(attr.getBasicType()); - if (coll_type.type() == CollectionType::SINGLE) { - return basic_type.asString(); - } - std::ostringstream oss; - oss << coll_type.asString() << "<" << basic_type.asString() << ">"; - return oss.str(); -} -void -visit_attribute(vespalib::ObjectVisitor& visitor, const IAttributeVector& attr) -{ - visitor.openStruct("attribute", "IAttributeVector"); - visitor.visitString("name", attr.getName()); - visitor.visitString("type", get_type(attr)); - visitor.visitBool("fast_search", attr.getIsFastSearch()); - visitor.visitBool("filter", attr.getIsFilter()); - visitor.closeStruct(); -} void AttributeFieldBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const @@ -408,101 +388,9 @@ private: //----------------------------------------------------------------------------- -template <typename SearchType> -class DirectWeightedSetBlueprint : public ComplexLeafBlueprint -{ -private: - std::vector<int32_t> _weights; - std::vector<IDocumentWeightAttribute::LookupResult> _terms; - const IAttributeVector &_iattr; - const IDocumentWeightAttribute &_attr; - vespalib::datastore::EntryRef _dictionary_snapshot; -public: - DirectWeightedSetBlueprint(const FieldSpec &field, const IAttributeVector &iattr, const IDocumentWeightAttribute &attr, size_t size_hint) - : ComplexLeafBlueprint(field), - _weights(), - _terms(), - _iattr(iattr), - _attr(attr), - _dictionary_snapshot(_attr.get_dictionary_snapshot()) - { - set_allow_termwise_eval(true); - _weights.reserve(size_hint); - _terms.reserve(size_hint); - } - ~DirectWeightedSetBlueprint() override; - void addTerm(const IDocumentWeightAttribute::LookupKey & key, int32_t weight, HitEstimate & estimate) { - IDocumentWeightAttribute::LookupResult result = _attr.lookup(key, _dictionary_snapshot); - HitEstimate childEst(result.posting_size, (result.posting_size == 0)); - if (!childEst.empty) { - if (estimate.empty) { - estimate = childEst; - } else { - estimate.estHits += childEst.estHits; - } - _weights.push_back(weight); - _terms.push_back(result); - } - } - void complete(HitEstimate estimate) { - setEstimate(estimate); - } - SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const override; - - std::unique_ptr<SearchIterator> createFilterSearch(bool strict, FilterConstraint constraint) const override; - std::unique_ptr<queryeval::MatchingElementsSearch> create_matching_elements_search(const MatchingElementsFields &fields) const override { - if (fields.has_field(_iattr.getName())) { - return queryeval::MatchingElementsSearch::create(_iattr, _dictionary_snapshot, vespalib::ConstArrayRef<IDocumentWeightAttribute::LookupResult>(_terms)); - } else { - return {}; - } - } - void visitMembers(vespalib::ObjectVisitor& visitor) const override { - LeafBlueprint::visitMembers(visitor); - visit_attribute(visitor, _iattr); - } -}; - -template <typename SearchType> -SearchIterator::UP -DirectWeightedSetBlueprint<SearchType>::createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const -{ - assert(tfmda.size() == 1); - assert(getState().numFields() == 1); - if (_terms.empty()) { - return std::make_unique<queryeval::EmptySearch>(); - } - std::vector<DocumentWeightIterator> iterators; - const size_t numChildren = _terms.size(); - iterators.reserve(numChildren); - for (const IDocumentWeightAttribute::LookupResult &r : _terms) { - _attr.create(r.posting_idx, iterators); - } - bool field_is_filter = getState().fields()[0].isFilter(); - if (field_is_filter && tfmda[0]->isNotNeeded()) { - return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators)); - } - return SearchType::create(*tfmda[0], field_is_filter, _weights, std::move(iterators)); -} - - -template <typename SearchType> -DirectWeightedSetBlueprint<SearchType>::~DirectWeightedSetBlueprint() = default; - -template <typename SearchType> -std::unique_ptr<SearchIterator> -DirectWeightedSetBlueprint<SearchType>::createFilterSearch(bool, FilterConstraint) const -{ - std::vector<DocumentWeightIterator> iterators; - iterators.reserve(_terms.size()); - for (const IDocumentWeightAttribute::LookupResult &r : _terms) { - _attr.create(r.posting_idx, iterators); - } - return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators)); -} //----------------------------------------------------------------------------- @@ -798,7 +686,7 @@ public: setResult(std::move(ws)); } else { if (_dwa != nullptr) { - auto *bp = new DirectWeightedSetBlueprint<queryeval::WeightedSetTermSearch>(_field, _attr, *_dwa, n.getNumTerms()); + auto *bp = new attribute::DirectWeightedSetBlueprint<queryeval::WeightedSetTermSearch>(_field, _attr, *_dwa, n.getNumTerms()); createDirectWeightedSet(bp, n); } else { auto *bp = new WeightedSetTermBlueprint(_field); @@ -809,7 +697,7 @@ public: void visit(query::DotProduct &n) override { if (_dwa != nullptr) { - auto *bp = new DirectWeightedSetBlueprint<queryeval::DotProductSearch>(_field, _attr, *_dwa, n.getNumTerms()); + auto *bp = new attribute::DirectWeightedSetBlueprint<queryeval::DotProductSearch>(_field, _attr, *_dwa, n.getNumTerms()); createDirectWeightedSet(bp, n); } else { auto *bp = new DotProductBlueprint(_field); diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.cpp new file mode 100644 index 00000000000..39f39212d5c --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.cpp @@ -0,0 +1,38 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "attribute_object_visitor.h" +#include <vespa/searchcommon/attribute/iattributevector.h> +#include <vespa/vespalib/objects/objectvisitor.h> +#include <sstream> + +namespace search::attribute { + +namespace { + +vespalib::string +get_type(const IAttributeVector& attr) +{ + auto coll_type = CollectionType(attr.getCollectionType()); + auto basic_type = BasicType(attr.getBasicType()); + if (coll_type.type() == CollectionType::SINGLE) { + return basic_type.asString(); + } + std::ostringstream oss; + oss << coll_type.asString() << "<" << basic_type.asString() << ">"; + return oss.str(); +} + +} + +void +visit_attribute(vespalib::ObjectVisitor& visitor, const IAttributeVector& attr) +{ + visitor.openStruct("attribute", "IAttributeVector"); + visitor.visitString("name", attr.getName()); + visitor.visitString("type", get_type(attr)); + visitor.visitBool("fast_search", attr.getIsFastSearch()); + visitor.visitBool("filter", attr.getIsFilter()); + visitor.closeStruct(); +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.h b/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.h new file mode 100644 index 00000000000..29c7e1556b6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/attribute_object_visitor.h @@ -0,0 +1,16 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace vespalib { class ObjectVisitor; } + +namespace search::attribute { + +class IAttributeVector; + +/** + * Function used to visit the basic properties of an IAttributeVector. + */ +void visit_attribute(vespalib::ObjectVisitor& visitor, const IAttributeVector& attr); + +} diff --git a/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.cpp new file mode 100644 index 00000000000..01b683f3b6d --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.cpp @@ -0,0 +1,14 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "direct_weighted_set_blueprint.h" +#include "direct_weighted_set_blueprint.hpp" +#include <vespa/searchlib/queryeval/dot_product_search.h> +#include <vespa/searchlib/queryeval/weighted_set_term_search.h> + +namespace search::attribute { + +template class DirectWeightedSetBlueprint<queryeval::WeightedSetTermSearch>; +template class DirectWeightedSetBlueprint<queryeval::DotProductSearch>; + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.h new file mode 100644 index 00000000000..e50c7688ac7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.h @@ -0,0 +1,72 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attribute_object_visitor.h" +#include "i_document_weight_attribute.h" +#include <vespa/searchcommon/attribute/iattributevector.h> +#include <vespa/searchlib/common/matching_elements_fields.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/field_spec.h> +#include <vespa/searchlib/queryeval/matching_elements_search.h> + +namespace search::queryeval { class SearchIterator; } + +namespace search::attribute { + +/** + * Blueprint used for WeightedSetTerm or DotProduct over a multi-value attribute + * which supports the IDocumentWeightAttribute interface. + * + * This allows access to low-level posting lists, which speeds up query execution. + */ +template <typename SearchType> +class DirectWeightedSetBlueprint : public queryeval::ComplexLeafBlueprint +{ +private: + std::vector<int32_t> _weights; + std::vector<IDocumentWeightAttribute::LookupResult> _terms; + const IAttributeVector &_iattr; + const IDocumentWeightAttribute &_attr; + vespalib::datastore::EntryRef _dictionary_snapshot; + +public: + DirectWeightedSetBlueprint(const queryeval::FieldSpec &field, const IAttributeVector &iattr, const IDocumentWeightAttribute &attr, size_t size_hint); + ~DirectWeightedSetBlueprint() override; + + void addTerm(const IDocumentWeightAttribute::LookupKey & key, int32_t weight, HitEstimate & estimate) { + IDocumentWeightAttribute::LookupResult result = _attr.lookup(key, _dictionary_snapshot); + HitEstimate childEst(result.posting_size, (result.posting_size == 0)); + if (!childEst.empty) { + if (estimate.empty) { + estimate = childEst; + } else { + estimate.estHits += childEst.estHits; + } + _weights.push_back(weight); + _terms.push_back(result); + } + } + void complete(HitEstimate estimate) { + setEstimate(estimate); + } + + std::unique_ptr<queryeval::SearchIterator> createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const override; + + std::unique_ptr<queryeval::SearchIterator> createFilterSearch(bool strict, FilterConstraint constraint) const override; + std::unique_ptr<queryeval::MatchingElementsSearch> create_matching_elements_search(const MatchingElementsFields &fields) const override { + if (fields.has_field(_iattr.getName())) { + return queryeval::MatchingElementsSearch::create(_iattr, _dictionary_snapshot, vespalib::ConstArrayRef<IDocumentWeightAttribute::LookupResult>(_terms)); + } else { + return {}; + } + } + void visitMembers(vespalib::ObjectVisitor& visitor) const override { + LeafBlueprint::visitMembers(visitor); + visit_attribute(visitor, _iattr); + } +}; + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.hpp new file mode 100644 index 00000000000..bf6410c347c --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/direct_weighted_set_blueprint.hpp @@ -0,0 +1,67 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "direct_weighted_set_blueprint.h" +#include "document_weight_or_filter_search.h" +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/queryeval/emptysearch.h> +#include <memory> + +namespace search::attribute { + +template <typename SearchType> +DirectWeightedSetBlueprint<SearchType>::DirectWeightedSetBlueprint(const queryeval::FieldSpec &field, + const IAttributeVector &iattr, + const IDocumentWeightAttribute &attr, + size_t size_hint) + : ComplexLeafBlueprint(field), + _weights(), + _terms(), + _iattr(iattr), + _attr(attr), + _dictionary_snapshot(_attr.get_dictionary_snapshot()) +{ + set_allow_termwise_eval(true); + _weights.reserve(size_hint); + _terms.reserve(size_hint); +} + +template <typename SearchType> +DirectWeightedSetBlueprint<SearchType>::~DirectWeightedSetBlueprint() = default; + +template <typename SearchType> +std::unique_ptr<queryeval::SearchIterator> +DirectWeightedSetBlueprint<SearchType>::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const +{ + assert(tfmda.size() == 1); + assert(getState().numFields() == 1); + if (_terms.empty()) { + return std::make_unique<queryeval::EmptySearch>(); + } + std::vector<DocumentWeightIterator> iterators; + const size_t numChildren = _terms.size(); + iterators.reserve(numChildren); + for (const IDocumentWeightAttribute::LookupResult &r : _terms) { + _attr.create(r.posting_idx, iterators); + } + bool field_is_filter = getState().fields()[0].isFilter(); + if (field_is_filter && tfmda[0]->isNotNeeded()) { + return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators)); + } + return SearchType::create(*tfmda[0], field_is_filter, _weights, std::move(iterators)); +} + +template <typename SearchType> +std::unique_ptr<queryeval::SearchIterator> +DirectWeightedSetBlueprint<SearchType>::createFilterSearch(bool, FilterConstraint) const +{ + std::vector<DocumentWeightIterator> iterators; + iterators.reserve(_terms.size()); + for (const IDocumentWeightAttribute::LookupResult &r : _terms) { + _attr.create(r.posting_idx, iterators); + } + return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators)); +} + +} |