diff options
author | Geir Storli <geirst@vespa.ai> | 2024-03-05 16:25:46 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-05 16:25:46 +0100 |
commit | d6e55aade839b51243eac107cce220e4d063c898 (patch) | |
tree | 523500fb934b0a9d91962d3fe79777b6aa7ee03a | |
parent | c70ca1e6d3b86df2f3f2f9575d2fe863b371b1c0 (diff) | |
parent | 65895c139c33e1fc7dd86b580530916decc2e27d (diff) |
Merge pull request #30493 from vespa-engine/geirst/flow-tuning-integration
Integrate flow tuning for attribute, memory and disk index search.
4 files changed, 47 insertions, 13 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index a5ca37906ba..2129ac40724 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -11,6 +11,7 @@ #include "multi_term_or_filter_search.h" #include "predicate_attribute.h" #include <vespa/eval/eval/value.h> +#include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/common/location.h> #include <vespa/searchlib/common/locationiterators.h> #include <vespa/searchlib/query/query_term_decoder.h> @@ -46,10 +47,11 @@ LOG_SETUP(".searchlib.attribute.attribute_blueprint_factory"); using search::attribute::BasicType; -using search::attribute::SearchContextParams; using search::attribute::CollectionType; +using search::attribute::Config; using search::attribute::IAttributeVector; using search::attribute::ISearchContext; +using search::attribute::SearchContextParams; using search::fef::TermFieldMatchData; using search::fef::TermFieldMatchDataArray; using search::fef::TermFieldMatchDataPosition; @@ -87,6 +89,10 @@ using search::queryeval::SearchIterator; using search::queryeval::Searchable; using search::queryeval::SimpleLeafBlueprint; using search::queryeval::WeightedSetTermBlueprint; +using search::queryeval::flow::btree_cost; +using search::queryeval::flow::btree_strict_cost; +using search::queryeval::flow::lookup_cost; +using search::queryeval::flow::lookup_strict_cost; using search::tensor::DenseTensorAttribute; using search::tensor::ITensorAttribute; using vespalib::Issue; @@ -115,6 +121,19 @@ private: }; //----------------------------------------------------------------------------- +size_t +get_num_indirections(const BasicType& basic_type, const CollectionType& col_type) +{ + size_t res = 0; + if (basic_type == BasicType::STRING) { + res += 1; + } + if (col_type != CollectionType::SINGLE) { + res += 1; + } + return res; +} + /** * Blueprint for creating regular, stack-based attribute iterators. **/ @@ -141,11 +160,12 @@ public: if (_hit_estimate.is_unknown()) { // E.g. attributes without fast-search are not able to provide a hit estimate. // In this case we just assume matching half of the document corpus. - // In addition, we are not able to skip documents efficiently when being strict. - return {0.5, 1.0, 1.0}; + // In addition, matching is lookup based, and we are not able to skip documents efficiently when being strict. + size_t indirections = get_num_indirections(_attr.getBasicType(), _attr.getCollectionType()); + return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)}; } else { double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit); - return {rel_est, 1.0, rel_est}; + return {rel_est, btree_cost(), btree_strict_cost(rel_est)}; } } @@ -480,9 +500,12 @@ public: double estimate(const IDirectPostingStore::LookupResult &term) const noexcept { return abs_to_rel_est(term.posting_size, docid_limit); } - double cost(const IDirectPostingStore::LookupResult &) const noexcept { return 1.0; } + double cost(const IDirectPostingStore::LookupResult &) const noexcept { + return btree_cost(); + } double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept { - return abs_to_rel_est(term.posting_size, docid_limit); + double rel_est = abs_to_rel_est(term.posting_size, docid_limit); + return btree_strict_cost(rel_est); } }; double child_est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms); diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h index 076c375091a..5aafe4af72b 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h @@ -80,9 +80,12 @@ public: double estimate(const IDirectPostingStore::LookupResult &term) const noexcept { return abs_to_rel_est(term.posting_size, docid_limit); } - double cost(const IDirectPostingStore::LookupResult &) const noexcept { return 1.0; } + double cost(const IDirectPostingStore::LookupResult &) const noexcept { + return search::queryeval::flow::btree_cost(); + } double strict_cost(const IDirectPostingStore::LookupResult &term) const noexcept { - return abs_to_rel_est(term.posting_size, docid_limit); + double rel_est = abs_to_rel_est(term.posting_size, docid_limit); + return search::queryeval::flow::btree_strict_cost(rel_est); } }; double est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms); diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp index 97c8dd391ba..99be653a398 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp @@ -4,6 +4,7 @@ #include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> #include <vespa/searchlib/queryeval/filter_wrapper.h> +#include <vespa/searchlib/queryeval/flow_tuning.h> #include <vespa/searchlib/queryeval/intermediate_blueprints.h> #include <vespa/vespalib/objects/visit.h> #include <vespa/vespalib/util/stringfmt.h> @@ -14,12 +15,14 @@ LOG_SETUP(".diskindex.disktermblueprint"); using search::BitVectorIterator; using search::fef::TermFieldMatchDataArray; using search::index::Schema; +using search::queryeval::Blueprint; using search::queryeval::BooleanMatchIteratorWrapper; using search::queryeval::FieldSpec; using search::queryeval::FieldSpecBaseList; -using search::queryeval::SearchIterator; using search::queryeval::LeafBlueprint; -using search::queryeval::Blueprint; +using search::queryeval::SearchIterator; +using search::queryeval::flow::disk_index_cost; +using search::queryeval::flow::disk_index_strict_cost; namespace search::diskindex { @@ -68,7 +71,8 @@ DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo) queryeval::FlowStats DiskTermBlueprint::calculate_flow_stats(uint32_t docid_limit) const { - return default_flow_stats(docid_limit, _lookupRes->counts._numDocs, 0); + double rel_est = abs_to_rel_est(_lookupRes->counts._numDocs, docid_limit); + return {rel_est, disk_index_cost(), disk_index_strict_cost(rel_est)}; } SearchIterator::UP diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index db3a0019d94..e2bb5e76751 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -4,9 +4,10 @@ #include "ordered_field_index_inserter.h" #include "posting_iterator.h" #include <vespa/searchlib/bitcompression/posocccompression.h> -#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> #include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> #include <vespa/searchlib/queryeval/filter_wrapper.h> +#include <vespa/searchlib/queryeval/flow_tuning.h> #include <vespa/searchlib/queryeval/searchiterator.h> #include <vespa/vespalib/btree/btree.hpp> #include <vespa/vespalib/btree/btreeiterator.hpp> @@ -30,6 +31,8 @@ using search::queryeval::BooleanMatchIteratorWrapper; using search::queryeval::FieldSpecBase; using search::queryeval::SearchIterator; using search::queryeval::SimpleLeafBlueprint; +using search::queryeval::flow::btree_cost; +using search::queryeval::flow::btree_strict_cost; using vespalib::GenerationHandler; namespace search::memoryindex { @@ -257,7 +260,8 @@ public: } queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override { - return default_flow_stats(docid_limit, _posting_itr.size(), 0); + double rel_est = abs_to_rel_est(_posting_itr.size(), docid_limit); + return {rel_est, btree_cost(), btree_strict_cost(rel_est)}; } SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray& tfmda, bool) const override { |