aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2024-04-23 07:56:19 +0000
committerGeir Storli <geirst@yahooinc.com>2024-04-23 07:56:19 +0000
commit39fede2d5fe7e668f1baf306711e43726a6fee36 (patch)
tree1fcb406c186cb46aa7e6ef8d9f4e74e6071b2039 /searchlib
parente1165fba6e2b6d42fc71372567f886ddfdd4b998 (diff)
Share code to calculate FlowStats based on attribute::HitEstimate.
This also changes to using the formulas in flow_tuning.h for lookup and btree costs.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h33
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp42
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow_tuning.h18
4 files changed, 69 insertions, 48 deletions
diff --git a/searchlib/src/vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h b/searchlib/src/vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h
new file mode 100644
index 00000000000..33031069d8e
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h
@@ -0,0 +1,33 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "hit_estimate.h"
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/flow_tuning.h>
+
+namespace search::attribute {
+
+/**
+ * Adapter used when calculating FlowStats based on HitEstimate per term.
+ */
+struct HitEstimateFlowStatsAdapter {
+ uint32_t docid_limit;
+ uint32_t num_indirections;
+ explicit HitEstimateFlowStatsAdapter(uint32_t docid_limit_in, uint32_t num_indirections_in) noexcept
+ : docid_limit(docid_limit_in), num_indirections(num_indirections_in) {}
+ double abs_to_rel_est(const HitEstimate& est) const noexcept {
+ return queryeval::Blueprint::abs_to_rel_est(est.est_hits(), docid_limit);
+ }
+ double estimate(const HitEstimate& est) const noexcept {
+ return est.is_unknown() ? 0.5 : abs_to_rel_est(est);
+ }
+ double cost(const HitEstimate& est) const noexcept {
+ return est.is_unknown() ? queryeval::flow::lookup_cost(num_indirections) : queryeval::flow::btree_cost(abs_to_rel_est(est));
+ }
+ double strict_cost(const HitEstimate &est) const noexcept {
+ return est.is_unknown() ? queryeval::flow::lookup_strict_cost(num_indirections) : queryeval::flow::btree_strict_cost(abs_to_rel_est(est));
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index cafff1d25cd..5b17b491a20 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -13,6 +13,7 @@
#include "predicate_attribute.h"
#include <vespa/eval/eval/value.h>
#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h>
#include <vespa/searchlib/common/location.h>
#include <vespa/searchlib/common/locationiterators.h>
#include <vespa/searchlib/query/query_term_decoder.h>
@@ -25,23 +26,23 @@
#include <vespa/searchlib/queryeval/emptysearch.h>
#include <vespa/searchlib/queryeval/field_spec.hpp>
#include <vespa/searchlib/queryeval/filter_wrapper.h>
+#include <vespa/searchlib/queryeval/flow_tuning.h>
#include <vespa/searchlib/queryeval/get_weight_from_node.h>
#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/queryeval/irequestcontext.h>
#include <vespa/searchlib/queryeval/leaf_blueprints.h>
#include <vespa/searchlib/queryeval/nearest_neighbor_blueprint.h>
#include <vespa/searchlib/queryeval/orlikesearch.h>
-#include <vespa/searchlib/queryeval/flow_tuning.h>
#include <vespa/searchlib/queryeval/predicate_blueprint.h>
#include <vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h>
#include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h>
#include <vespa/searchlib/queryeval/weighted_set_term_blueprint.h>
#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
-#include <vespa/searchlib/queryeval/irequestcontext.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
-#include <vespa/vespalib/util/regexp.h>
-#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/vespalib/util/exceptions.h>
#include <vespa/vespalib/util/issue.h>
+#include <vespa/vespalib/util/regexp.h>
+#include <vespa/vespalib/util/stringfmt.h>
#include <charconv>
#include <vespa/log/log.h>
@@ -93,6 +94,7 @@ using search::queryeval::StrictHeapOrSearch;
using search::queryeval::WeightedSetTermBlueprint;
using search::queryeval::flow::btree_cost;
using search::queryeval::flow::btree_strict_cost;
+using search::queryeval::flow::get_num_indirections;
using search::queryeval::flow::lookup_cost;
using search::queryeval::flow::lookup_strict_cost;
using search::tensor::DenseTensorAttribute;
@@ -123,19 +125,6 @@ private:
};
//-----------------------------------------------------------------------------
-size_t
-get_num_indirections(const BasicType& basic_type, const CollectionType& col_type)
-{
- size_t res = 0;
- if (basic_type == BasicType::STRING) {
- res += 1;
- }
- if (col_type != CollectionType::SINGLE) {
- res += 1;
- }
- return res;
-}
-
/**
* Blueprint for creating regular, stack-based attribute iterators.
**/
@@ -293,20 +282,11 @@ public:
}
queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
using OrFlow = search::queryeval::OrFlow;
- struct MyAdapter {
- uint32_t docid_limit;
- explicit MyAdapter(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {}
- double estimate(const AttrHitEstimate &est) const noexcept {
- return est.is_unknown() ? 0.5 : abs_to_rel_est(est.est_hits(), docid_limit);
- }
- double cost(const AttrHitEstimate &) const noexcept { return 1.0; }
- double strict_cost(const AttrHitEstimate &est) const noexcept {
- return est.is_unknown() ? 1.0 : abs_to_rel_est(est.est_hits(), docid_limit);
- }
- };
- double est = OrFlow::estimate_of(MyAdapter(docid_limit), _estimates);
- return {est, OrFlow::cost_of(MyAdapter(docid_limit), _estimates, false),
- OrFlow::cost_of(MyAdapter(docid_limit), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())};
+ using MyAdapter = attribute::HitEstimateFlowStatsAdapter;
+ size_t indirections = queryeval::flow::get_num_indirections(_attribute.getBasicType(), _attribute.getCollectionType());
+ double est = OrFlow::estimate_of(MyAdapter(docid_limit, indirections), _estimates);
+ return {est, OrFlow::cost_of(MyAdapter(docid_limit, indirections), _estimates, false),
+ OrFlow::cost_of(MyAdapter(docid_limit, indirections), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())};
}
SearchIterator::UP
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
index 2ad67c85429..928023c0f94 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
@@ -2,20 +2,19 @@
#include "attribute_weighted_set_blueprint.h"
#include "multi_term_hash_filter.hpp"
+#include <vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h>
#include <vespa/searchcommon/attribute/i_search_context.h>
-#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/fef/matchdatalayout.h>
#include <vespa/searchlib/query/query_term_ucs4.h>
#include <vespa/searchlib/queryeval/filter_wrapper.h>
-#include <vespa/searchlib/queryeval/orsearch.h>
#include <vespa/searchlib/queryeval/flow.h>
#include <vespa/searchlib/queryeval/flow_tuning.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
#include <vespa/vespalib/objects/visit.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
#include <vespa/vespalib/util/stringfmt.h>
-
namespace search {
namespace {
@@ -130,20 +129,11 @@ AttributeWeightedSetBlueprint::sort(queryeval::InFlow in_flow)
queryeval::FlowStats
AttributeWeightedSetBlueprint::calculate_flow_stats(uint32_t docid_limit) const
{
- struct MyAdapter {
- uint32_t docid_limit;
- MyAdapter(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {}
- double estimate(const AttrHitEstimate &est) const noexcept {
- return est.is_unknown() ? 0.5 : abs_to_rel_est(est.est_hits(), docid_limit);
- }
- double cost(const AttrHitEstimate &) const noexcept { return 1.0; }
- double strict_cost(const AttrHitEstimate &est) const noexcept {
- return est.is_unknown() ? 1.0 : abs_to_rel_est(est.est_hits(), docid_limit);
- }
- };
- double est = OrFlow::estimate_of(MyAdapter(docid_limit), _estimates);
- return {est, OrFlow::cost_of(MyAdapter(docid_limit), _estimates, false),
- OrFlow::cost_of(MyAdapter(docid_limit), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())};
+ using MyAdapter = attribute::HitEstimateFlowStatsAdapter;
+ size_t num_indirections = queryeval::flow::get_num_indirections(_attr.getBasicType(), _attr.getCollectionType());
+ double est = OrFlow::estimate_of(MyAdapter(docid_limit, num_indirections), _estimates);
+ return {est, OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, false),
+ OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())};
}
queryeval::SearchIterator::UP
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
index e8a58bba9dc..6389de35264 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
@@ -1,6 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
+#include <vespa/searchcommon/attribute/basictype.h>
+#include <vespa/searchcommon/attribute/collectiontype.h>
#include <cmath>
#include <cstddef>
@@ -41,6 +43,22 @@ inline double heap_cost(double my_est, size_t num_children) {
return my_est * std::log2(std::max(size_t(1), num_children));
}
+/**
+ * Returns the number of memory indirections needed when doing lookups
+ * in an attribute with the given type.
+ */
+inline size_t get_num_indirections(const attribute::BasicType& basic_type,
+ const attribute::CollectionType& col_type) {
+ size_t res = 0;
+ if (basic_type == attribute::BasicType::STRING) {
+ res += 1;
+ }
+ if (col_type != attribute::CollectionType::SINGLE) {
+ res += 1;
+ }
+ return res;
+}
+
// Non-strict cost of lookup based matching in an attribute (not fast-search).
// Test used: IteratorBenchmark::analyze_term_search_in_attributes_non_strict
inline double lookup_cost(size_t num_indirections) {