diff options
author | Geir Storli <geirst@yahooinc.com> | 2024-04-23 07:56:19 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2024-04-23 07:56:19 +0000 |
commit | 39fede2d5fe7e668f1baf306711e43726a6fee36 (patch) | |
tree | 1fcb406c186cb46aa7e6ef8d9f4e74e6071b2039 /searchlib | |
parent | e1165fba6e2b6d42fc71372567f886ddfdd4b998 (diff) |
Share code to calculate FlowStats based on attribute::HitEstimate.
This also changes to using the formulas in flow_tuning.h for lookup and btree costs.
Diffstat (limited to 'searchlib')
4 files changed, 69 insertions, 48 deletions
diff --git a/searchlib/src/vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h b/searchlib/src/vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h new file mode 100644 index 00000000000..33031069d8e --- /dev/null +++ b/searchlib/src/vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h @@ -0,0 +1,33 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "hit_estimate.h" +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/flow_tuning.h> + +namespace search::attribute { + +/** + * Adapter used when calculating FlowStats based on HitEstimate per term. + */ +struct HitEstimateFlowStatsAdapter { + uint32_t docid_limit; + uint32_t num_indirections; + explicit HitEstimateFlowStatsAdapter(uint32_t docid_limit_in, uint32_t num_indirections_in) noexcept + : docid_limit(docid_limit_in), num_indirections(num_indirections_in) {} + double abs_to_rel_est(const HitEstimate& est) const noexcept { + return queryeval::Blueprint::abs_to_rel_est(est.est_hits(), docid_limit); + } + double estimate(const HitEstimate& est) const noexcept { + return est.is_unknown() ? 0.5 : abs_to_rel_est(est); + } + double cost(const HitEstimate& est) const noexcept { + return est.is_unknown() ? queryeval::flow::lookup_cost(num_indirections) : queryeval::flow::btree_cost(abs_to_rel_est(est)); + } + double strict_cost(const HitEstimate &est) const noexcept { + return est.is_unknown() ? queryeval::flow::lookup_strict_cost(num_indirections) : queryeval::flow::btree_strict_cost(abs_to_rel_est(est)); + } +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index cafff1d25cd..5b17b491a20 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -13,6 +13,7 @@ #include "predicate_attribute.h" #include <vespa/eval/eval/value.h> #include <vespa/searchcommon/attribute/config.h> +#include <vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h> #include <vespa/searchlib/common/location.h> #include <vespa/searchlib/common/locationiterators.h> #include <vespa/searchlib/query/query_term_decoder.h> @@ -25,23 +26,23 @@ #include <vespa/searchlib/queryeval/emptysearch.h> #include <vespa/searchlib/queryeval/field_spec.hpp> #include <vespa/searchlib/queryeval/filter_wrapper.h> +#include <vespa/searchlib/queryeval/flow_tuning.h> #include <vespa/searchlib/queryeval/get_weight_from_node.h> #include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/queryeval/irequestcontext.h> #include <vespa/searchlib/queryeval/leaf_blueprints.h> #include <vespa/searchlib/queryeval/nearest_neighbor_blueprint.h> #include <vespa/searchlib/queryeval/orlikesearch.h> -#include <vespa/searchlib/queryeval/flow_tuning.h> #include <vespa/searchlib/queryeval/predicate_blueprint.h> #include <vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h> #include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h> #include <vespa/searchlib/queryeval/weighted_set_term_blueprint.h> #include <vespa/searchlib/queryeval/weighted_set_term_search.h> -#include <vespa/searchlib/queryeval/irequestcontext.h> #include <vespa/searchlib/tensor/dense_tensor_attribute.h> -#include <vespa/vespalib/util/regexp.h> -#include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/issue.h> +#include <vespa/vespalib/util/regexp.h> +#include <vespa/vespalib/util/stringfmt.h> #include <charconv> #include <vespa/log/log.h> @@ -93,6 +94,7 @@ using search::queryeval::StrictHeapOrSearch; using search::queryeval::WeightedSetTermBlueprint; using search::queryeval::flow::btree_cost; using search::queryeval::flow::btree_strict_cost; +using search::queryeval::flow::get_num_indirections; using search::queryeval::flow::lookup_cost; using search::queryeval::flow::lookup_strict_cost; using search::tensor::DenseTensorAttribute; @@ -123,19 +125,6 @@ private: }; //----------------------------------------------------------------------------- -size_t -get_num_indirections(const BasicType& basic_type, const CollectionType& col_type) -{ - size_t res = 0; - if (basic_type == BasicType::STRING) { - res += 1; - } - if (col_type != CollectionType::SINGLE) { - res += 1; - } - return res; -} - /** * Blueprint for creating regular, stack-based attribute iterators. **/ @@ -293,20 +282,11 @@ public: } queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override { using OrFlow = search::queryeval::OrFlow; - struct MyAdapter { - uint32_t docid_limit; - explicit MyAdapter(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {} - double estimate(const AttrHitEstimate &est) const noexcept { - return est.is_unknown() ? 0.5 : abs_to_rel_est(est.est_hits(), docid_limit); - } - double cost(const AttrHitEstimate &) const noexcept { return 1.0; } - double strict_cost(const AttrHitEstimate &est) const noexcept { - return est.is_unknown() ? 1.0 : abs_to_rel_est(est.est_hits(), docid_limit); - } - }; - double est = OrFlow::estimate_of(MyAdapter(docid_limit), _estimates); - return {est, OrFlow::cost_of(MyAdapter(docid_limit), _estimates, false), - OrFlow::cost_of(MyAdapter(docid_limit), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())}; + using MyAdapter = attribute::HitEstimateFlowStatsAdapter; + size_t indirections = queryeval::flow::get_num_indirections(_attribute.getBasicType(), _attribute.getCollectionType()); + double est = OrFlow::estimate_of(MyAdapter(docid_limit, indirections), _estimates); + return {est, OrFlow::cost_of(MyAdapter(docid_limit, indirections), _estimates, false), + OrFlow::cost_of(MyAdapter(docid_limit, indirections), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())}; } SearchIterator::UP diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp index 2ad67c85429..928023c0f94 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp @@ -2,20 +2,19 @@ #include "attribute_weighted_set_blueprint.h" #include "multi_term_hash_filter.hpp" +#include <vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h> #include <vespa/searchcommon/attribute/i_search_context.h> -#include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/fef/matchdatalayout.h> #include <vespa/searchlib/query/query_term_ucs4.h> #include <vespa/searchlib/queryeval/filter_wrapper.h> -#include <vespa/searchlib/queryeval/orsearch.h> #include <vespa/searchlib/queryeval/flow.h> #include <vespa/searchlib/queryeval/flow_tuning.h> +#include <vespa/searchlib/queryeval/orsearch.h> #include <vespa/searchlib/queryeval/weighted_set_term_search.h> #include <vespa/vespalib/objects/visit.h> #include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/vespalib/util/stringfmt.h> - namespace search { namespace { @@ -130,20 +129,11 @@ AttributeWeightedSetBlueprint::sort(queryeval::InFlow in_flow) queryeval::FlowStats AttributeWeightedSetBlueprint::calculate_flow_stats(uint32_t docid_limit) const { - struct MyAdapter { - uint32_t docid_limit; - MyAdapter(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {} - double estimate(const AttrHitEstimate &est) const noexcept { - return est.is_unknown() ? 0.5 : abs_to_rel_est(est.est_hits(), docid_limit); - } - double cost(const AttrHitEstimate &) const noexcept { return 1.0; } - double strict_cost(const AttrHitEstimate &est) const noexcept { - return est.is_unknown() ? 1.0 : abs_to_rel_est(est.est_hits(), docid_limit); - } - }; - double est = OrFlow::estimate_of(MyAdapter(docid_limit), _estimates); - return {est, OrFlow::cost_of(MyAdapter(docid_limit), _estimates, false), - OrFlow::cost_of(MyAdapter(docid_limit), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())}; + using MyAdapter = attribute::HitEstimateFlowStatsAdapter; + size_t num_indirections = queryeval::flow::get_num_indirections(_attr.getBasicType(), _attr.getCollectionType()); + double est = OrFlow::estimate_of(MyAdapter(docid_limit, num_indirections), _estimates); + return {est, OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, false), + OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())}; } queryeval::SearchIterator::UP diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h index e8a58bba9dc..6389de35264 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h @@ -1,6 +1,8 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include <vespa/searchcommon/attribute/basictype.h> +#include <vespa/searchcommon/attribute/collectiontype.h> #include <cmath> #include <cstddef> @@ -41,6 +43,22 @@ inline double heap_cost(double my_est, size_t num_children) { return my_est * std::log2(std::max(size_t(1), num_children)); } +/** + * Returns the number of memory indirections needed when doing lookups + * in an attribute with the given type. + */ +inline size_t get_num_indirections(const attribute::BasicType& basic_type, + const attribute::CollectionType& col_type) { + size_t res = 0; + if (basic_type == attribute::BasicType::STRING) { + res += 1; + } + if (col_type != attribute::CollectionType::SINGLE) { + res += 1; + } + return res; +} + // Non-strict cost of lookup based matching in an attribute (not fast-search). // Test used: IteratorBenchmark::analyze_term_search_in_attributes_non_strict inline double lookup_cost(size_t num_indirections) { |