From 0539ca0c737d39ba15346c83299592fa3835ca2e Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Wed, 13 Dec 2023 15:52:05 +0000 Subject: use flow to calculate relative estimates and iterator cost --- .../tests/queryeval/blueprint/blueprint_test.cpp | 5 +- .../blueprint/intermediate_blueprints_test.cpp | 70 +++++++++++++++++- .../src/vespa/searchlib/queryeval/CMakeLists.txt | 1 + .../src/vespa/searchlib/queryeval/blueprint.cpp | 2 + .../src/vespa/searchlib/queryeval/blueprint.h | 23 ++++++ searchlib/src/vespa/searchlib/queryeval/flow.cpp | 7 ++ searchlib/src/vespa/searchlib/queryeval/flow.h | 62 ++++++++++++++++ .../queryeval/intermediate_blueprints.cpp | 85 ++++++++++++++-------- .../searchlib/queryeval/intermediate_blueprints.h | 8 ++ 9 files changed, 230 insertions(+), 33 deletions(-) create mode 100644 searchlib/src/vespa/searchlib/queryeval/flow.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/flow.h (limited to 'searchlib') diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index 2d621fa1011..20cf2008e4b 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -22,6 +22,7 @@ class MyOr : public IntermediateBlueprint { private: public: + double calculate_cost() const final { return 1.0; } double calculate_relative_estimate() const final { return 0.5; } HitEstimate combine(const std::vector &data) const override { return max(data); @@ -773,8 +774,8 @@ TEST("requireThatDocIdLimitInjectionWorks") TEST("Control object sizes") { EXPECT_EQUAL(40u, sizeof(Blueprint::State)); - EXPECT_EQUAL(32u, sizeof(Blueprint)); - EXPECT_EQUAL(72u, sizeof(LeafBlueprint)); + EXPECT_EQUAL(40u, sizeof(Blueprint)); + EXPECT_EQUAL(80u, sizeof(LeafBlueprint)); } TEST_MAIN() { diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index 7d994ee4cd6..e24e91c2f1d 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -633,11 +634,17 @@ TEST("and with one empty child is optimized away") { struct make { make(make &&) = delete; make &operator=(make &&) = delete; + static constexpr double invalid_cost = -1.0; static constexpr uint32_t invalid_source = -1; + double cost_tag = invalid_cost; uint32_t source_tag = invalid_source; std::unique_ptr making; make(std::unique_ptr making_in) : making(std::move(making_in)) {} operator std::unique_ptr() && noexcept { return std::move(making); } + make &&cost(double leaf_cost) && { + cost_tag = leaf_cost; + return std::move(*this); + } make &&source(uint32_t source_id) && { source_tag = source_id; return std::move(*this); @@ -655,7 +662,12 @@ struct make { return std::move(*this); } make &&leaf(uint32_t estimate) && { - return std::move(*this).add(ap(MyLeafSpec(estimate).create())); + std::unique_ptr bp(MyLeafSpec(estimate).create()); + if (cost_tag != invalid_cost) { + bp->set_cost(cost_tag); + cost_tag = invalid_cost; + } + return std::move(*this).add(std::move(bp)); } make &&leafs(std::initializer_list estimates) && { for (uint32_t estimate: estimates) { @@ -1211,7 +1223,7 @@ TEST("relative estimate for RANK") { } TEST("relative estimate for ANDNOT") { - verify_relative_estimate(make::ANDNOT(), 0.2); + verify_relative_estimate(make::ANDNOT(), 0.2*0.7*0.5); } TEST("relative estimate for SB") { @@ -1232,4 +1244,58 @@ TEST("relative estimate for WEAKAND") { verify_relative_estimate(make::WEAKAND(50), 0.05); } +void verify_cost(make &&mk, double expect) { + EXPECT_EQUAL(mk.making->cost(), 1.0); + Blueprint::UP bp = std::move(mk) + .cost(1.1).leaf(200) + .cost(1.2).leaf(300) + .cost(1.3).leaf(500); + bp->setDocIdLimit(1000); + bp = Blueprint::optimize(std::move(bp)); + EXPECT_EQUAL(bp->cost(), expect); +} + +double calc_cost(std::vector> list) { + double flow = 1.0; + double cost = 0.0; + for (auto [sub_cost,pass_through]: list) { + cost += flow * sub_cost; + flow *= pass_through; + } + return cost; +} + +TEST("cost for OR") { + verify_cost(make::OR(), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}})); +} + +TEST("cost for AND") { + verify_cost(make::AND(), calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); +} + +TEST("cost for RANK") { + verify_cost(make::RANK(), 1.1); // first +} + +TEST("cost for ANDNOT") { + verify_cost(make::ANDNOT(), calc_cost({{1.1, 0.2},{1.3, 0.5},{1.2, 0.7}})); +} + +TEST("cost for SB") { + InvalidSelector sel; + verify_cost(make::SB(sel), 1.3); // max +} + +TEST("cost for NEAR") { + verify_cost(make::NEAR(1), 3.0 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); +} + +TEST("cost for ONEAR") { + verify_cost(make::ONEAR(1), 3.0 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); +} + +TEST("cost for WEAKAND") { + verify_cost(make::WEAKAND(1000), calc_cost({{1.1, 0.8},{1.2, 0.7},{1.3, 0.5}})); +} + TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt index b47f336c934..5e6d31d3761 100644 --- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt @@ -21,6 +21,7 @@ vespa_add_library(searchlib_queryeval OBJECT fake_searchable.cpp field_spec.cpp filter_wrapper.cpp + flow.cpp full_search.cpp get_weight_from_node.cpp global_filter.cpp diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 043b006d3bd..b71a579e097 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -120,6 +120,7 @@ Blueprint::State::~State() = default; Blueprint::Blueprint() noexcept : _parent(nullptr), + _cost(1.0), _sourceId(0xffffffff), _docid_limit(0), _frozen(false) @@ -560,6 +561,7 @@ IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass) optimize_self(pass); if (pass == OptimizePass::LAST) { sort(_children); + set_cost(calculate_cost()); } maybe_eliminate_self(self, get_replacement()); } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index 3261d380f36..66d55015f62 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -144,6 +144,22 @@ public: return (total_docs == 0) ? 0.0 : double(est) / double(total_docs); } + static double cost_of(const Children &children, auto flow) { + double cost = 0.0; + for (const auto &child: children) { + cost += flow.flow() * child->cost(); + flow.add(child->estimate()); + } + return cost; + } + + static double estimate_of(const Children &children, auto flow) { + for (const auto &child: children) { + flow.add(child->estimate()); + } + return flow.estimate(); + } + // utility that just takes maximum estimate static HitEstimate max(const std::vector &data); @@ -182,6 +198,7 @@ public: private: Blueprint *_parent; + double _cost; uint32_t _sourceId; uint32_t _docid_limit; bool _frozen; @@ -197,6 +214,8 @@ protected: _frozen = true; } + void set_cost(double value) noexcept { _cost = value; } + public: class IPredicate { public: @@ -219,6 +238,8 @@ public: Blueprint *getParent() const noexcept { return _parent; } bool has_parent() const { return (_parent != nullptr); } + double cost() const noexcept { return _cost; } + Blueprint &setSourceId(uint32_t sourceId) noexcept { _sourceId = sourceId; return *this; } uint32_t getSourceId() const noexcept { return _sourceId; } @@ -369,6 +390,7 @@ public: Blueprint::UP removeLastChild() { return removeChild(childCnt() - 1); } SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; + virtual double calculate_cost() const = 0; virtual HitEstimate combine(const std::vector &data) const = 0; virtual FieldSpecBaseList exposeFields() const = 0; virtual void sort(Children &children) const = 0; @@ -426,6 +448,7 @@ public: ~LeafBlueprint() override = default; const State &getState() const final { return _state; } void setDocIdLimit(uint32_t limit) noexcept final; + using Blueprint::set_cost; double calculate_relative_estimate() const override; void fetchPostings(const ExecuteInfo &execInfo) override; void freeze() final; diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.cpp b/searchlib/src/vespa/searchlib/queryeval/flow.cpp new file mode 100644 index 00000000000..4730058b357 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/flow.cpp @@ -0,0 +1,7 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "flow.h" + +namespace search::queryeval { + +} diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h new file mode 100644 index 00000000000..36c0a259feb --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -0,0 +1,62 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once +#include + +namespace search::queryeval { + +// Model how boolean result decisions flow through intermediate nodes +// of different types based on relative estimates for sub-expressions + +class AndFlow { +private: + double _flow; + size_t _cnt; +public: + AndFlow(double in = 1.0) noexcept : _flow(in), _cnt(0) {} + void add(double est) noexcept { + _flow *= est; + ++_cnt; + } + double flow() const noexcept { + return _flow; + } + double estimate() const noexcept { + return (_cnt > 0) ? _flow : 0.0; + } +}; + +class OrFlow { +private: + double _flow; +public: + OrFlow(double in = 1.0) noexcept : _flow(in) {} + void add(double est) noexcept { + _flow *= (1.0 - est); + } + double flow() const noexcept { + return _flow; + } + double estimate() const noexcept { + return (1.0 - _flow); + } +}; + +class AndNotFlow { +private: + double _flow; + size_t _cnt; +public: + AndNotFlow(double in = 1.0) noexcept : _flow(in), _cnt(0) {} + void add(double est) noexcept { + _flow *= (_cnt++ == 0) ? est : (1.0 - est); + } + double flow() const noexcept { + return _flow; + } + double estimate() const noexcept { + return (_cnt > 0) ? _flow : 0.0; + } +}; + +} diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index 7d992b510b5..c43335a6fdf 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "intermediate_blueprints.h" +#include "flow.h" #include "andnotsearch.h" #include "andsearch.h" #include "orsearch.h" @@ -81,33 +82,20 @@ need_normal_features_for_children(const IntermediateBlueprint &blueprint, fef::M } } -double rel_est_first_child(const Blueprint::Children &children) { - return children.empty() ? 0.0 : children[0]->getState().relative_estimate(); -} - -double rel_est_and(const Blueprint::Children &children) { - double flow = 1.0; - for (const Blueprint::UP &child: children) { - flow *= child->getState().relative_estimate(); - } - return children.empty() ? 0.0 : flow; -} - -double rel_est_or(const Blueprint::Children &children) { - double flow = 1.0; - for (const Blueprint::UP &child: children) { - flow *= (1.0 - child->getState().relative_estimate()); - } - return (1.0 - flow); -} - } // namespace search::queryeval:: //----------------------------------------------------------------------------- double -AndNotBlueprint::calculate_relative_estimate() const { - return rel_est_first_child(get_children()); +AndNotBlueprint::calculate_cost() const +{ + return cost_of(get_children(), AndNotFlow()); +} + +double +AndNotBlueprint::calculate_relative_estimate() const +{ + return estimate_of(get_children(), AndNotFlow()); } Blueprint::HitEstimate @@ -213,9 +201,14 @@ AndNotBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) co //----------------------------------------------------------------------------- +double +AndBlueprint::calculate_cost() const { + return cost_of(get_children(), AndFlow()); +} + double AndBlueprint::calculate_relative_estimate() const { - return rel_est_and(get_children()); + return estimate_of(get_children(), AndFlow()); } Blueprint::HitEstimate @@ -321,9 +314,14 @@ OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool u OrBlueprint::~OrBlueprint() = default; +double +OrBlueprint::calculate_cost() const { + return cost_of(get_children(), OrFlow()); +} + double OrBlueprint::calculate_relative_estimate() const { - return rel_est_or(get_children()); + return estimate_of(get_children(), OrFlow()); } Blueprint::HitEstimate @@ -417,9 +415,14 @@ OrBlueprint::calculate_cost_tier() const //----------------------------------------------------------------------------- WeakAndBlueprint::~WeakAndBlueprint() = default; +double +WeakAndBlueprint::calculate_cost() const { + return cost_of(get_children(), OrFlow()); +} + double WeakAndBlueprint::calculate_relative_estimate() const { - double child_est = rel_est_or(get_children()); + double child_est = estimate_of(get_children(), OrFlow()); double my_est = abs_to_rel_est(_n, get_docid_limit()); return std::min(my_est, child_est); } @@ -483,9 +486,14 @@ WeakAndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) c //----------------------------------------------------------------------------- +double +NearBlueprint::calculate_cost() const { + return cost_of(get_children(), AndFlow()) + childCnt() * 1.0; +} + double NearBlueprint::calculate_relative_estimate() const { - return rel_est_and(get_children()); + return estimate_of(get_children(), AndFlow()); } Blueprint::HitEstimate @@ -541,9 +549,14 @@ NearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) cons //----------------------------------------------------------------------------- +double +ONearBlueprint::calculate_cost() const { + return cost_of(get_children(), AndFlow()) + (childCnt() * 1.0); +} + double ONearBlueprint::calculate_relative_estimate() const { - return rel_est_and(get_children()); + return estimate_of(get_children(), AndFlow()); } Blueprint::HitEstimate @@ -602,9 +615,14 @@ ONearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) con //----------------------------------------------------------------------------- +double +RankBlueprint::calculate_cost() const { + return (childCnt() == 0) ? 1.0 : getChild(0).cost(); +} + double RankBlueprint::calculate_relative_estimate() const { - return rel_est_first_child(get_children()); + return (childCnt() == 0) ? 0.0 : getChild(0).estimate(); } Blueprint::HitEstimate @@ -699,9 +717,18 @@ SourceBlenderBlueprint::SourceBlenderBlueprint(const ISourceSelector &selector) SourceBlenderBlueprint::~SourceBlenderBlueprint() = default; +double +SourceBlenderBlueprint::calculate_cost() const { + double my_cost = 1.0; + for (const auto &child: get_children()) { + my_cost = std::max(my_cost, child->cost()); + } + return my_cost; +} + double SourceBlenderBlueprint::calculate_relative_estimate() const { - return rel_est_or(get_children()); + return estimate_of(get_children(), OrFlow()); } Blueprint::HitEstimate diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index b9306ea307d..14672c2a5cd 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -15,6 +15,7 @@ class AndNotBlueprint : public IntermediateBlueprint { public: bool supports_termwise_children() const override { return true; } + double calculate_cost() const final; double calculate_relative_estimate() const final; HitEstimate combine(const std::vector &data) const override; FieldSpecBaseList exposeFields() const override; @@ -42,6 +43,7 @@ class AndBlueprint : public IntermediateBlueprint { public: bool supports_termwise_children() const override { return true; } + double calculate_cost() const final; double calculate_relative_estimate() const final; HitEstimate combine(const std::vector &data) const override; FieldSpecBaseList exposeFields() const override; @@ -67,6 +69,7 @@ class OrBlueprint : public IntermediateBlueprint public: ~OrBlueprint() override; bool supports_termwise_children() const override { return true; } + double calculate_cost() const final; double calculate_relative_estimate() const final; HitEstimate combine(const std::vector &data) const override; FieldSpecBaseList exposeFields() const override; @@ -94,6 +97,7 @@ private: std::vector _weights; public: + double calculate_cost() const final; double calculate_relative_estimate() const final; HitEstimate combine(const std::vector &data) const override; FieldSpecBaseList exposeFields() const override; @@ -124,6 +128,7 @@ private: uint32_t _window; public: + double calculate_cost() const final; double calculate_relative_estimate() const final; HitEstimate combine(const std::vector &data) const override; FieldSpecBaseList exposeFields() const override; @@ -147,6 +152,7 @@ private: uint32_t _window; public: + double calculate_cost() const final; double calculate_relative_estimate() const final; HitEstimate combine(const std::vector &data) const override; FieldSpecBaseList exposeFields() const override; @@ -167,6 +173,7 @@ public: class RankBlueprint final : public IntermediateBlueprint { public: + double calculate_cost() const final; double calculate_relative_estimate() const final; HitEstimate combine(const std::vector &data) const override; FieldSpecBaseList exposeFields() const override; @@ -195,6 +202,7 @@ private: public: explicit SourceBlenderBlueprint(const ISourceSelector &selector) noexcept; ~SourceBlenderBlueprint() override; + double calculate_cost() const final; double calculate_relative_estimate() const final; HitEstimate combine(const std::vector &data) const override; FieldSpecBaseList exposeFields() const override; -- cgit v1.2.3