From 51db1c06279843401604cc61f8e6d12c9cc32830 Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Thu, 15 Feb 2024 09:50:53 +0000 Subject: use flow for fetchPostings note that only AND/OR are wired to their respective flows here, which means only strict OR should change (all children getting full in-flow) --- .../blueprint/intermediate_blueprints_test.cpp | 20 ++++++-- .../tests/queryeval/flow/queryeval_flow_test.cpp | 58 ++++++++++++++++++++++ .../src/vespa/searchlib/queryeval/blueprint.cpp | 17 +++---- .../src/vespa/searchlib/queryeval/blueprint.h | 2 +- searchlib/src/vespa/searchlib/queryeval/flow.h | 36 +++++++++++++- .../queryeval/intermediate_blueprints.cpp | 18 ++++--- .../searchlib/queryeval/intermediate_blueprints.h | 4 +- 7 files changed, 129 insertions(+), 26 deletions(-) diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index ee7c201f093..31db731a598 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -165,20 +165,30 @@ TEST("test Or propagates updated histestimate") { bp->addChild(ap(MyLeafSpec(800).create()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); bp->setDocIdLimit(5000); - // sort OR as non-strict to get expected order. With strict OR, - // the order would be irrelevant since we use the relative - // estimate as strict_cost for leafs. + // NOTE: use non-strict OR ordering since strict OR ordering is non-deterministic optimize(bp, false); - bp->fetchPostings(ExecuteInfo::TRUE); + //--- execute info when non-strict: + bp->fetchPostings(ExecuteInfo::FALSE); EXPECT_EQUAL(4u, bp->childCnt()); for (uint32_t i = 0; i < bp->childCnt(); i++) { const auto & child = dynamic_cast(bp->getChild(i)); - EXPECT_TRUE(child.is_strict); + EXPECT_FALSE(child.is_strict); } EXPECT_EQUAL(1.0, dynamic_cast(bp->getChild(0)).hit_rate); EXPECT_APPROX(0.5, dynamic_cast(bp->getChild(1)).hit_rate, 1e-6); EXPECT_APPROX(0.5*3.0/5.0, dynamic_cast(bp->getChild(2)).hit_rate, 1e-6); EXPECT_APPROX(0.5*3.0*42.0/(5.0*50.0), dynamic_cast(bp->getChild(3)).hit_rate, 1e-6); + //--- execute info when strict: + bp->fetchPostings(ExecuteInfo::TRUE); + EXPECT_EQUAL(4u, bp->childCnt()); + for (uint32_t i = 0; i < bp->childCnt(); i++) { + const auto & child = dynamic_cast(bp->getChild(i)); + EXPECT_TRUE(child.is_strict); + } + EXPECT_EQUAL(1.0, dynamic_cast(bp->getChild(0)).hit_rate); + EXPECT_EQUAL(1.0, dynamic_cast(bp->getChild(1)).hit_rate); + EXPECT_EQUAL(1.0, dynamic_cast(bp->getChild(2)).hit_rate); + EXPECT_EQUAL(1.0, dynamic_cast(bp->getChild(3)).hit_rate); } TEST("test And Blueprint") { diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index 7a3950dbf1c..5009a15e438 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -126,6 +126,14 @@ void verify_flow(auto flow, const std::vector &est_list, const std::vect } } +void verify_flow_calc(FlowCalc flow_calc, const std::vector &est_list, const std::vector &expect) { + ASSERT_EQ(est_list.size() + 1, expect.size()); + for (size_t i = 0; i < est_list.size(); ++i) { + EXPECT_DOUBLE_EQ(flow_calc(est_list[i]), expect[i]); + } + EXPECT_DOUBLE_EQ(flow_calc(0.5), expect.back()); +} + TEST(FlowTest, full_and_flow) { for (bool strict: {false, true}) { verify_flow(AndFlow(strict), {0.4, 0.7, 0.2}, @@ -133,6 +141,8 @@ TEST(FlowTest, full_and_flow) { {0.4, 0.4, false}, {0.4*0.7, 0.4*0.7, false}, {0.4*0.7*0.2, 0.4*0.7*0.2, false}}); + verify_flow_calc(flow_calc(strict, 1.0), + {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.7, 0.4*0.7*0.2}); } } @@ -143,6 +153,8 @@ TEST(FlowTest, partial_and_flow) { {in*0.4, in*0.4, false}, {in*0.4*0.7, in*0.4*0.7, false}, {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}}); + verify_flow_calc(flow_calc(false, in), + {0.4, 0.7, 0.2}, {in*1.0, in*0.4, in*0.4*0.7, in*0.4*0.7*0.2}); } } @@ -152,11 +164,15 @@ TEST(FlowTest, full_or_flow) { {0.6, 1.0-0.6, false}, {0.6*0.3, 1.0-0.6*0.3, false}, {0.6*0.3*0.8, 1.0-0.6*0.3*0.8, false}}); + verify_flow_calc(flow_calc(false, 1.0), + {0.4, 0.7, 0.2}, {1.0, 0.6, 0.6*0.3, 0.6*0.3*0.8}); verify_flow(OrFlow(true), {0.4, 0.7, 0.2}, {{1.0, 0.0, true}, {1.0, 1.0-0.6, true}, {1.0, 1.0-0.6*0.3, true}, {1.0, 1.0-0.6*0.3*0.8, true}}); + verify_flow_calc(flow_calc(true, 1.0), + {0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0}); } TEST(FlowTest, partial_or_flow) { @@ -166,6 +182,8 @@ TEST(FlowTest, partial_or_flow) { {in*0.6, 1.0-in*0.6, false}, {in*0.6*0.3, 1.0-in*0.6*0.3, false}, {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, false}}); + verify_flow_calc(flow_calc(false, in), + {0.4, 0.7, 0.2}, {in, in*0.6, in*0.6*0.3, in*0.6*0.3*0.8}); } } @@ -176,6 +194,8 @@ TEST(FlowTest, full_and_not_flow) { {0.4, 0.4, false}, {0.4*0.3, 0.4*0.3, false}, {0.4*0.3*0.8, 0.4*0.3*0.8, false}}); + verify_flow_calc(flow_calc(strict, 1.0), + {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.3, 0.4*0.3*0.8}); } } @@ -186,9 +206,47 @@ TEST(FlowTest, partial_and_not_flow) { {in*0.4, in*0.4, false}, {in*0.4*0.3, in*0.4*0.3, false}, {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}}); + verify_flow_calc(flow_calc(false, in), + {0.4, 0.7, 0.2}, {in, in*0.4, in*0.4*0.3, in*0.4*0.3*0.8}); + } +} + +TEST(FlowTest, full_first_flow_calc) { + for (bool strict: {false, true}) { + verify_flow_calc(first_flow_calc(strict, 1.0), + {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4, 0.4}); + } +} + +TEST(FlowTest, partial_first_flow_calc) { + for (double in: {1.0, 0.5, 0.25}) { + verify_flow_calc(first_flow_calc(false, in), + {0.4, 0.7, 0.2}, {in, in*0.4, in*0.4, in*0.4}); + } +} + +TEST(FlowTest, full_full_flow_calc) { + for (bool strict: {false, true}) { + verify_flow_calc(full_flow_calc(strict, 1.0), + {0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0}); } } +TEST(FlowTest, partial_full_flow_calc) { + for (double in: {1.0, 0.5, 0.25}) { + verify_flow_calc(full_flow_calc(false, in), + {0.4, 0.7, 0.2}, {in, in, in, in}); + } +} + +TEST(FlowTest, flow_calc_strictness_overrides_rate) { + EXPECT_EQ(flow_calc(true, 0.5)(0.5), 1.0); + EXPECT_EQ(flow_calc(true, 0.5)(0.5), 1.0); + EXPECT_EQ(flow_calc(true, 0.5)(0.5), 1.0); + EXPECT_EQ(first_flow_calc(true, 0.5)(0.5), 1.0); + EXPECT_EQ(full_flow_calc(true, 0.5)(0.5), 1.0); +} + TEST(FlowTest, flow_cost) { std::vector data = {{0.4, 1.1, 0.6}, {0.7, 1.2, 0.5}, {0.2, 1.3, 0.4}}; EXPECT_DOUBLE_EQ(ordered_cost_of(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3); diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 2f69c45d418..f3539c6989a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -481,6 +481,12 @@ IntermediateBlueprint::count_termwise_nodes(const UnpackInfo &unpack) const return termwise_nodes; } +FlowCalc +IntermediateBlueprint::make_flow_calc(bool strict, double flow) const +{ + return full_flow_calc(strict, flow); +} + IntermediateBlueprint::IndexList IntermediateBlueprint::find(const IPredicate & pred) const { @@ -538,13 +544,6 @@ IntermediateBlueprint::calculateState() const return state; } -double -IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const -{ - (void) child; - return hit_rate; -} - bool IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double match_limit) const { @@ -648,11 +647,11 @@ IntermediateBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const void IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo) { - double nextHitRate = execInfo.hit_rate(); + FlowCalc flow_calc = make_flow_calc(execInfo.is_strict(), execInfo.hit_rate()); for (size_t i = 0; i < _children.size(); ++i) { Blueprint & child = *_children[i]; + double nextHitRate = flow_calc(child.estimate()); child.fetchPostings(ExecuteInfo::create(execInfo.is_strict() && inheritStrict(i), nextHitRate, execInfo)); - nextHitRate = computeNextHitRate(child, nextHitRate); } } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index 439eff680ec..395512d84cc 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -362,7 +362,7 @@ private: bool infer_want_global_filter() const; size_t count_termwise_nodes(const UnpackInfo &unpack) const; - virtual double computeNextHitRate(const Blueprint & child, double hit_rate) const; + virtual FlowCalc make_flow_calc(bool strict, double flow) const; protected: // returns an empty collection if children have empty or diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index cfbb28b190f..f31122166d9 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include // Model how boolean result decisions flow through intermediate nodes // of different types based on relative estimates for sub-expressions @@ -280,4 +280,38 @@ public: } }; +using FlowCalc = std::function; + +template +FlowCalc flow_calc(bool strict, double non_strict_rate) { + FLOW flow = strict ? FLOW(true) : FLOW(non_strict_rate); + return [flow](double est) mutable noexcept { + double next_flow = flow.flow(); + flow.add(est); + return next_flow; + }; +} + +inline FlowCalc first_flow_calc(bool strict, double flow) { + if (strict) { + flow = 1.0; + } + bool first = true; + return [flow,first](double est) mutable noexcept { + double next_flow = flow; + if (first) { + flow *= est; + first = false; + } + return next_flow; + }; +} + +inline FlowCalc full_flow_calc(bool strict, double flow) { + if (strict) { + flow = 1.0; + } + return [flow](double) noexcept { return flow; }; +} + } diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index 993639becf2..6faa4ddf147 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -300,14 +300,10 @@ AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const return create_and_filter(get_children(), strict, constraint); } -double -AndBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const { - return hit_rate * child.estimate(); -} - -double -OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const { - return hit_rate * (1.0 - child.estimate()); +FlowCalc +AndBlueprint::make_flow_calc(bool strict, double flow) const +{ + return flow_calc(strict, flow); } //----------------------------------------------------------------------------- @@ -404,6 +400,12 @@ OrBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const return create_or_filter(get_children(), strict, constraint); } +FlowCalc +OrBlueprint::make_flow_calc(bool strict, double flow) const +{ + return flow_calc(strict, flow); +} + uint8_t OrBlueprint::calculate_cost_tier() const { diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index 1da70b4fa70..25586022535 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -56,7 +56,7 @@ public: SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; private: - double computeNextHitRate(const Blueprint & child, double hit_rate) const override; + virtual FlowCalc make_flow_calc(bool strict, double flow) const override; }; //----------------------------------------------------------------------------- @@ -81,7 +81,7 @@ public: SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; private: - double computeNextHitRate(const Blueprint & child, double hit_rate) const override; + FlowCalc make_flow_calc(bool strict, double flow) const override; uint8_t calculate_cost_tier() const override; }; -- cgit v1.2.3