diff options
author | Håvard Pettersen <havardpe@yahooinc.com> | 2024-02-23 09:41:04 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@yahooinc.com> | 2024-02-23 12:23:53 +0000 |
commit | fbc08038079b6079fcc12b1f54eba4d59acabe83 (patch) | |
tree | 218e277d83da7794b14ce449066df371ff4fc394 | |
parent | 10267c6347197b5e9731d0a1e9169d74608178f7 (diff) |
know the in-flow when sorting blueprints
also added the option and tagging mechanic needed to force selected
blueprints to be strict even in a non-strict context as well as
calculating how much time this saves you.
13 files changed, 171 insertions, 112 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index 5ade0a44b8a..1d7a693b1c9 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -200,7 +200,8 @@ Query::reserveHandles(const IRequestContext & requestContext, ISearchContext &co void Query::optimize(bool sort_by_cost) { - _blueprint = Blueprint::optimize_and_sort(std::move(_blueprint), true, sort_by_cost); + auto opts = Blueprint::Options::all().sort_by_cost(sort_by_cost); + _blueprint = Blueprint::optimize_and_sort(std::move(_blueprint), true, opts); LOG(debug, "optimized blueprint:\n%s\n", _blueprint->asString().c_str()); } @@ -222,7 +223,8 @@ Query::handle_global_filter(const IRequestContext & requestContext, uint32_t doc } // optimized order may change after accounting for global filter: trace.addEvent(5, "Optimize query execution plan to account for global filter"); - _blueprint = Blueprint::optimize_and_sort(std::move(_blueprint), true, sort_by_cost); + auto opts = Blueprint::Options::all().sort_by_cost(sort_by_cost); + _blueprint = Blueprint::optimize_and_sort(std::move(_blueprint), true, opts); LOG(debug, "blueprint after handle_global_filter:\n%s\n", _blueprint->asString().c_str()); // strictness may change if optimized order changed: fetchPostings(ExecuteInfo::create(true, 1.0, requestContext.getDoom(), requestContext.thread_bundle())); diff --git a/searchlib/src/tests/nearsearch/nearsearch_test.cpp b/searchlib/src/tests/nearsearch/nearsearch_test.cpp index 95701e59444..6f7cf85258b 100644 --- a/searchlib/src/tests/nearsearch/nearsearch_test.cpp +++ b/searchlib/src/tests/nearsearch/nearsearch_test.cpp @@ -229,7 +229,8 @@ Test::testNearSearch(MyQuery &query, uint32_t matchId) near_b->addChild(query.getTerm(i).make_blueprint(fieldId, i)); } bp->setDocIdLimit(1000); - bp = search::queryeval::Blueprint::optimize_and_sort(std::move(bp), true, true); + auto opts = search::queryeval::Blueprint::Options::all(); + bp = search::queryeval::Blueprint::optimize_and_sort(std::move(bp), true, opts); bp->fetchPostings(search::queryeval::ExecuteInfo::TRUE); search::fef::MatchData::UP md(layout.createMatchData()); search::queryeval::SearchIterator::UP near = bp->createSearch(*md, true); diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index 2a21d66c090..1af9ee6cff7 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -17,13 +17,15 @@ using namespace search::fef; namespace { +auto opts = Blueprint::Options::all(); + //----------------------------------------------------------------------------- class MyOr : public IntermediateBlueprint { private: - FlowCalc make_flow_calc(bool strict, double flow) const override { - return flow_calc<OrFlow>(strict, flow); + FlowCalc make_flow_calc(InFlow in_flow) const override { + return flow_calc<OrFlow>(in_flow); } public: FlowStats calculate_flow_stats(uint32_t) const final { @@ -451,7 +453,7 @@ TEST_F("testChildAndNotCollapsing", Fixture) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true); + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, opts); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -491,7 +493,7 @@ TEST_F("testChildAndCollapsing", Fixture) TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true); + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, opts); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -530,10 +532,9 @@ TEST_F("testChildOrCollapsing", Fixture) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - // we sort non-strict here since the default costs of 1/est for - // non-strict/strict leaf iterators makes the order of iterators - // under a strict OR irrelevant. - unsorted = Blueprint::optimize_and_sort(std::move(unsorted), false, true); + // we sort non-strict here since a strict OR does not have a + // deterministic sort order. + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), false, opts); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -577,7 +578,7 @@ TEST_F("testChildSorting", Fixture) TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true); + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, opts); TEST_DO(f.check_equal(*sorted, *unsorted)); } diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index 31db731a598..f192ea93b0e 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -132,7 +132,8 @@ TEST("test AndNot Blueprint") { template <typename BP> void optimize(std::unique_ptr<BP> &ref, bool strict) { - auto optimized = Blueprint::optimize_and_sort(std::move(ref), strict, true); + auto opts = Blueprint::Options::all(); + auto optimized = Blueprint::optimize_and_sort(std::move(ref), strict, opts); ref.reset(dynamic_cast<BP*>(optimized.get())); ASSERT_TRUE(ref); optimized.release(); @@ -568,9 +569,10 @@ optimize_and_compare(Blueprint::UP top, Blueprint::UP expect, bool strict = true top->setDocIdLimit(1000); expect->setDocIdLimit(1000); TEST_DO(compare(*top, *expect, false)); - top = Blueprint::optimize_and_sort(std::move(top), strict, sort_by_cost); + auto opts = Blueprint::Options::all().sort_by_cost(sort_by_cost); + top = Blueprint::optimize_and_sort(std::move(top), strict, opts); TEST_DO(compare(*top, *expect, true)); - expect = Blueprint::optimize_and_sort(std::move(expect), strict, sort_by_cost); + expect = Blueprint::optimize_and_sort(std::move(expect), strict, opts); TEST_DO(compare(*expect, *top, true)); } @@ -699,11 +701,12 @@ TEST("test empty root node optimization and safeness") { //------------------------------------------------------------------------- auto expect_up = std::make_unique<EmptyBlueprint>(); - compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top1), true, true), true); - compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top2), true, true), true); - compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top3), true, true), true); - compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top4), true, true), true); - compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top5), true, true), true); + auto opts = Blueprint::Options::all(); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top1), true, opts), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top2), true, opts), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top3), true, opts), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top4), true, opts), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top5), true, opts), true); } TEST("and with one empty child is optimized away") { @@ -711,7 +714,8 @@ TEST("and with one empty child is optimized away") { Blueprint::UP top = ap((new SourceBlenderBlueprint(*selector))-> addChild(ap(MyLeafSpec(10).create())). addChild(addLeafs(std::make_unique<AndBlueprint>(), {{0, true}, 10, 20}))); - top = Blueprint::optimize_and_sort(std::move(top), true, true); + auto opts = Blueprint::Options::all(); + top = Blueprint::optimize_and_sort(std::move(top), true, opts); Blueprint::UP expect_up(ap((new SourceBlenderBlueprint(*selector))-> addChild(ap(MyLeafSpec(10).create())). addChild(std::make_unique<EmptyBlueprint>()))); @@ -888,8 +892,9 @@ TEST("require that replaced blueprints retain source id") { addChild(ap(MyLeafSpec(30).create()->setSourceId(55))))); Blueprint::UP expect2_up(ap(MyLeafSpec(30).create()->setSourceId(42))); //------------------------------------------------------------------------- - top1_up = Blueprint::optimize_and_sort(std::move(top1_up), true, true); - top2_up = Blueprint::optimize_and_sort(std::move(top2_up), true, true); + auto opts = Blueprint::Options::all(); + top1_up = Blueprint::optimize_and_sort(std::move(top1_up), true, opts); + top2_up = Blueprint::optimize_and_sort(std::move(top2_up), true, opts); compare(*expect1_up, *top1_up, true); compare(*expect2_up, *top2_up, true); EXPECT_EQUAL(13u, top1_up->getSourceId()); @@ -1204,7 +1209,8 @@ TEST("require_that_unpack_optimization_is_not_overruled_by_equiv") { TEST("require that ANDNOT without children is optimized to empty search") { Blueprint::UP top_up = std::make_unique<AndNotBlueprint>(); auto expect_up = std::make_unique<EmptyBlueprint>(); - top_up = Blueprint::optimize_and_sort(std::move(top_up), true, true); + auto opts = Blueprint::Options::all(); + top_up = Blueprint::optimize_and_sort(std::move(top_up), true, opts); compare(*expect_up, *top_up, true); } diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp index 3e8bc06bfd8..c4d34ab3565 100644 --- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp +++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp @@ -49,7 +49,7 @@ concept ChildCollector = requires(T a, std::unique_ptr<Blueprint> bp) { struct DefaultBlueprint : Blueprint { FlowStats calculate_flow_stats(uint32_t) const override { abort(); } void optimize(Blueprint* &, OptimizePass) override { abort(); } - void sort(bool, bool) override { abort(); } + double sort(InFlow, const Options &) override { abort(); } const State &getState() const override { abort(); } void fetchPostings(const ExecuteInfo &) override { abort(); } void freeze() override { abort(); } diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index 5009a15e438..8b8b6c1282e 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -141,7 +141,7 @@ TEST(FlowTest, full_and_flow) { {0.4, 0.4, false}, {0.4*0.7, 0.4*0.7, false}, {0.4*0.7*0.2, 0.4*0.7*0.2, false}}); - verify_flow_calc(flow_calc<AndFlow>(strict, 1.0), + verify_flow_calc(flow_calc<AndFlow>(strict), {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.7, 0.4*0.7*0.2}); } } @@ -153,7 +153,7 @@ TEST(FlowTest, partial_and_flow) { {in*0.4, in*0.4, false}, {in*0.4*0.7, in*0.4*0.7, false}, {in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}}); - verify_flow_calc(flow_calc<AndFlow>(false, in), + verify_flow_calc(flow_calc<AndFlow>(in), {0.4, 0.7, 0.2}, {in*1.0, in*0.4, in*0.4*0.7, in*0.4*0.7*0.2}); } } @@ -164,14 +164,14 @@ TEST(FlowTest, full_or_flow) { {0.6, 1.0-0.6, false}, {0.6*0.3, 1.0-0.6*0.3, false}, {0.6*0.3*0.8, 1.0-0.6*0.3*0.8, false}}); - verify_flow_calc(flow_calc<OrFlow>(false, 1.0), + verify_flow_calc(flow_calc<OrFlow>(1.0), {0.4, 0.7, 0.2}, {1.0, 0.6, 0.6*0.3, 0.6*0.3*0.8}); verify_flow(OrFlow(true), {0.4, 0.7, 0.2}, {{1.0, 0.0, true}, {1.0, 1.0-0.6, true}, {1.0, 1.0-0.6*0.3, true}, {1.0, 1.0-0.6*0.3*0.8, true}}); - verify_flow_calc(flow_calc<OrFlow>(true, 1.0), + verify_flow_calc(flow_calc<OrFlow>(true), {0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0}); } @@ -182,7 +182,7 @@ TEST(FlowTest, partial_or_flow) { {in*0.6, 1.0-in*0.6, false}, {in*0.6*0.3, 1.0-in*0.6*0.3, false}, {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, false}}); - verify_flow_calc(flow_calc<OrFlow>(false, in), + verify_flow_calc(flow_calc<OrFlow>(in), {0.4, 0.7, 0.2}, {in, in*0.6, in*0.6*0.3, in*0.6*0.3*0.8}); } } @@ -194,7 +194,7 @@ TEST(FlowTest, full_and_not_flow) { {0.4, 0.4, false}, {0.4*0.3, 0.4*0.3, false}, {0.4*0.3*0.8, 0.4*0.3*0.8, false}}); - verify_flow_calc(flow_calc<AndNotFlow>(strict, 1.0), + verify_flow_calc(flow_calc<AndNotFlow>(strict), {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.3, 0.4*0.3*0.8}); } } @@ -206,45 +206,52 @@ TEST(FlowTest, partial_and_not_flow) { {in*0.4, in*0.4, false}, {in*0.4*0.3, in*0.4*0.3, false}, {in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}}); - verify_flow_calc(flow_calc<AndNotFlow>(false, in), + verify_flow_calc(flow_calc<AndNotFlow>(in), {0.4, 0.7, 0.2}, {in, in*0.4, in*0.4*0.3, in*0.4*0.3*0.8}); } } TEST(FlowTest, full_first_flow_calc) { for (bool strict: {false, true}) { - verify_flow_calc(first_flow_calc(strict, 1.0), + verify_flow_calc(first_flow_calc(strict), {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4, 0.4}); } } TEST(FlowTest, partial_first_flow_calc) { for (double in: {1.0, 0.5, 0.25}) { - verify_flow_calc(first_flow_calc(false, in), + verify_flow_calc(first_flow_calc(in), {0.4, 0.7, 0.2}, {in, in*0.4, in*0.4, in*0.4}); } } TEST(FlowTest, full_full_flow_calc) { for (bool strict: {false, true}) { - verify_flow_calc(full_flow_calc(strict, 1.0), + verify_flow_calc(full_flow_calc(strict), {0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0}); } } TEST(FlowTest, partial_full_flow_calc) { for (double in: {1.0, 0.5, 0.25}) { - verify_flow_calc(full_flow_calc(false, in), + verify_flow_calc(full_flow_calc(in), {0.4, 0.7, 0.2}, {in, in, in, in}); } } -TEST(FlowTest, flow_calc_strictness_overrides_rate) { - EXPECT_EQ(flow_calc<AndFlow>(true, 0.5)(0.5), 1.0); - EXPECT_EQ(flow_calc<OrFlow>(true, 0.5)(0.5), 1.0); - EXPECT_EQ(flow_calc<AndNotFlow>(true, 0.5)(0.5), 1.0); - EXPECT_EQ(first_flow_calc(true, 0.5)(0.5), 1.0); - EXPECT_EQ(full_flow_calc(true, 0.5)(0.5), 1.0); +TEST(FlowTest, in_flow_strict_vs_rate_interaction) { + EXPECT_EQ(InFlow(true).strict(), true); + EXPECT_EQ(InFlow(true).rate(), 1.0); + EXPECT_EQ(InFlow(false).strict(), false); + EXPECT_EQ(InFlow(false).rate(), 1.0); + EXPECT_EQ(InFlow(0.5).strict(), false); + EXPECT_EQ(InFlow(0.5).rate(), 0.5); + EXPECT_EQ(InFlow(true, 0.5).strict(), true); + EXPECT_EQ(InFlow(true, 0.5).rate(), 1.0); + EXPECT_EQ(InFlow(false, 0.5).strict(), false); + EXPECT_EQ(InFlow(false, 0.5).rate(), 0.5); + EXPECT_EQ(InFlow(-1.0).strict(), false); + EXPECT_EQ(InFlow(-1.0).rate(), 0.0); } TEST(FlowTest, flow_cost) { diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index 6747fed888c..bdc89363b22 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -342,7 +342,8 @@ non_strict_search(Blueprint& blueprint, MatchData& md, uint32_t docid_limit, dou BenchmarkResult benchmark_search(Blueprint::UP blueprint, uint32_t docid_limit, bool strict, double filter_hit_ratio) { - blueprint->sort(strict, true); + auto opts = Blueprint::Options::all(); + blueprint->sort(strict, opts); blueprint->fetchPostings(ExecuteInfo::createForTest(strict)); // Note: All blueprints get the same TermFieldMatchData instance. // This is OK as long as we don't do unpacking and only use 1 thread. diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp index c9fcb472b68..64f4fafd2d1 100644 --- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp +++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp @@ -46,7 +46,8 @@ std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResul } Blueprint::UP finalize(Blueprint::UP bp, bool strict) { - Blueprint::UP result = Blueprint::optimize_and_sort(std::move(bp), true, true); + auto opts = Blueprint::Options::all(); + Blueprint::UP result = Blueprint::optimize_and_sort(std::move(bp), true, opts); result->fetchPostings(ExecuteInfo::createForTest(strict)); result->freeze(); return result; diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index f3539c6989a..5a225328003 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -121,6 +121,7 @@ Blueprint::Blueprint() noexcept _flow_stats(0.0, 0.0, 0.0), _sourceId(0xffffffff), _docid_limit(0), + _force_strict(false), _frozen(false) { } @@ -481,12 +482,6 @@ IntermediateBlueprint::count_termwise_nodes(const UnpackInfo &unpack) const return termwise_nodes; } -FlowCalc -IntermediateBlueprint::make_flow_calc(bool strict, double flow) const -{ - return full_flow_calc(strict, flow); -} - IntermediateBlueprint::IndexList IntermediateBlueprint::find(const IPredicate & pred) const { @@ -574,13 +569,17 @@ IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass) maybe_eliminate_self(self, get_replacement()); } -void -IntermediateBlueprint::sort(bool strict, bool sort_by_cost) +double +IntermediateBlueprint::sort(InFlow in_flow, const Options &opts) { - sort(_children, strict, sort_by_cost); + auto flow_calc = make_flow_calc(in_flow); + sort(_children, in_flow.strict(), opts.sort_by_cost()); for (size_t i = 0; i < _children.size(); ++i) { - _children[i]->sort(strict && inheritStrict(i), sort_by_cost); + double next_rate = flow_calc(_children[i]->estimate()); + _children[i]->sort(InFlow(in_flow.strict() && inheritStrict(i), next_rate), opts); } + // TODO: better cost estimate (due to known in-flow and eagerness) + return in_flow.strict() ? strict_cost() : in_flow.rate() * cost(); } void @@ -647,7 +646,7 @@ IntermediateBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const void IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo) { - FlowCalc flow_calc = make_flow_calc(execInfo.is_strict(), execInfo.hit_rate()); + FlowCalc flow_calc = make_flow_calc(InFlow(execInfo.is_strict(), execInfo.hit_rate())); for (size_t i = 0; i < _children.size(); ++i) { Blueprint & child = *_children[i]; double nextHitRate = flow_calc(child.estimate()); @@ -766,9 +765,11 @@ LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass) maybe_eliminate_self(self, get_replacement()); } -void -LeafBlueprint::sort(bool, bool) +double +LeafBlueprint::sort(InFlow in_flow, const Options &) { + // TODO: better cost estimate (due to known in-flow and eagerness) + return in_flow.strict() ? strict_cost() : in_flow.rate() * cost(); } void diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index c24790ddcf1..0c08e6aedf5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -55,6 +55,29 @@ public: enum class OptimizePass { FIRST, LAST }; + class Options { + private: + bool _sort_by_cost; + bool _allow_force_strict; + public: + constexpr Options() noexcept + : _sort_by_cost(false), + _allow_force_strict(false) {} + constexpr bool sort_by_cost() const noexcept { return _sort_by_cost; } + constexpr Options &sort_by_cost(bool value) noexcept { + _sort_by_cost = value; + return *this; + } + constexpr bool allow_force_strict() const noexcept { return _allow_force_strict; } + constexpr Options &allow_force_strict(bool value) noexcept { + _allow_force_strict = value; + return *this; + } + static constexpr Options all() noexcept { + return Options().sort_by_cost(true).allow_force_strict(true); + } + }; + struct HitEstimate { uint32_t estHits; bool empty; @@ -182,6 +205,7 @@ private: FlowStats _flow_stats; uint32_t _sourceId; uint32_t _docid_limit; + bool _force_strict; bool _frozen; protected: @@ -224,10 +248,10 @@ public: uint32_t get_docid_limit() const noexcept { return _docid_limit; } static Blueprint::UP optimize(Blueprint::UP bp); - virtual void sort(bool strict, bool sort_by_cost) = 0; - static Blueprint::UP optimize_and_sort(Blueprint::UP bp, bool strict, bool sort_by_cost) { + virtual double sort(InFlow in_flow, const Options &opts) = 0; + static Blueprint::UP optimize_and_sort(Blueprint::UP bp, InFlow in_flow, const Options &opts) { auto result = optimize(std::move(bp)); - result->sort(strict, sort_by_cost); + result->sort(in_flow, opts); return result; } virtual void optimize(Blueprint* &self, OptimizePass pass) = 0; @@ -362,7 +386,7 @@ private: bool infer_want_global_filter() const; size_t count_termwise_nodes(const UnpackInfo &unpack) const; - virtual FlowCalc make_flow_calc(bool strict, double flow) const = 0; + virtual FlowCalc make_flow_calc(InFlow in_flow) const = 0; protected: // returns an empty collection if children have empty or @@ -385,7 +409,7 @@ public: void setDocIdLimit(uint32_t limit) noexcept final; void optimize(Blueprint* &self, OptimizePass pass) final; - void sort(bool strict, bool sort_by_cost) override; + double sort(InFlow in_flow, const Options &opts) override; void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override; IndexList find(const IPredicate & check) const; @@ -422,7 +446,7 @@ private: State _state; protected: void optimize(Blueprint* &self, OptimizePass pass) final; - void sort(bool strict, bool sort_by_cost) override; + double sort(InFlow in_flow, const Options &opts) override; void setEstimate(HitEstimate est) { _state.estimate(est); notifyChange(); diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index 4548baf7545..ade2516b509 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -11,6 +11,23 @@ namespace search::queryeval { +// Encapsulate information about strictness and in-flow in a structure +// for convenient parameter passing. We do not need an explicit value +// in the strict case since strict basically means the receiving end +// will eventually decide the actual flow. We use a rate of 1.0 for +// strict flow to indicate that the corpus is not reduced externally. +class InFlow { +private: + double _value; +public: + constexpr InFlow(bool strict, double rate) noexcept + : _value(strict ? -1.0 : std::max(rate, 0.0)) {} + constexpr InFlow(bool strict) noexcept : InFlow(strict, 1.0) {} + constexpr InFlow(double rate) noexcept : InFlow(false, rate) {} + constexpr bool strict() noexcept { return _value < 0.0; } + constexpr double rate() noexcept { return strict() ? 1.0 : _value; } +}; + struct FlowStats { double estimate; double cost; @@ -122,16 +139,13 @@ void sort_partial(ADAPTER adapter, T &children, size_t offset) { template <typename ADAPTER, typename T, typename F> double ordered_cost_of(ADAPTER adapter, const T &children, F flow) { - double cost = 0.0; + double total_cost = 0.0; for (const auto &child: children) { - if (flow.strict()) { - cost += adapter.strict_cost(child); - } else { - cost += flow.flow() * adapter.cost(child); - } + double child_cost = flow.strict() ? adapter.strict_cost(child) : (flow.flow() * adapter.cost(child)); + flow.update_cost(total_cost, child_cost); flow.add(adapter.estimate(child)); } - return cost; + return total_cost; } template <typename ADAPTER, typename T> @@ -188,8 +202,7 @@ private: bool _strict; bool _first; public: - AndFlow(bool strict) noexcept : _flow(1.0), _strict(strict), _first(true) {} - AndFlow(double in) noexcept : _flow(in), _strict(false), _first(true) {} + AndFlow(InFlow flow) noexcept : _flow(flow.rate()), _strict(flow.strict()), _first(true) {} void add(double est) noexcept { _flow *= est; _first = false; @@ -203,6 +216,9 @@ public: double estimate() const noexcept { return _first ? 0.0 : _flow; } + void update_cost(double &total_cost, double child_cost) noexcept { + total_cost += child_cost; + } static void sort(auto adapter, auto &children, bool strict) { flow::sort<flow::MinAndCost>(adapter, children); if (strict && children.size() > 1) { @@ -225,8 +241,7 @@ private: bool _strict; bool _first; public: - OrFlow(bool strict) noexcept : _flow(1.0), _strict(strict), _first(true) {} - OrFlow(double in) noexcept : _flow(in), _strict(false), _first(true) {} + OrFlow(InFlow flow) noexcept : _flow(flow.rate()), _strict(flow.strict()), _first(true) {} void add(double est) noexcept { _flow *= (1.0 - est); _first = false; @@ -240,6 +255,9 @@ public: double estimate() const noexcept { return _first ? 0.0 : (1.0 - _flow); } + void update_cost(double &total_cost, double child_cost) noexcept { + total_cost += child_cost; + } static void sort(auto adapter, auto &children, bool strict) { if (!strict) { flow::sort<flow::MinOrCost>(adapter, children); @@ -256,8 +274,7 @@ private: bool _strict; bool _first; public: - AndNotFlow(bool strict) noexcept : _flow(1.0), _strict(strict), _first(true) {} - AndNotFlow(double in) noexcept : _flow(in), _strict(false), _first(true) {} + AndNotFlow(InFlow flow) noexcept : _flow(flow.rate()), _strict(flow.strict()), _first(true) {} void add(double est) noexcept { _flow *= _first ? est : (1.0 - est); _first = false; @@ -271,6 +288,9 @@ public: double estimate() const noexcept { return _first ? 0.0 : _flow; } + void update_cost(double &total_cost, double child_cost) noexcept { + total_cost += child_cost; + } static void sort(auto adapter, auto &children, bool) { flow::sort_partial<flow::MinOrCost>(adapter, children, 1); } @@ -282,21 +302,18 @@ public: using FlowCalc = std::function<double(double)>; template <typename FLOW> -FlowCalc flow_calc(bool strict, double non_strict_rate) { - FLOW flow = strict ? FLOW(true) : FLOW(non_strict_rate); - return [flow](double est) mutable noexcept { +FlowCalc flow_calc(InFlow in_flow) { + return [flow=FLOW(in_flow)](double est) mutable noexcept { double next_flow = flow.flow(); flow.add(est); return next_flow; }; } -inline FlowCalc first_flow_calc(bool strict, double flow) { - if (strict) { - flow = 1.0; - } +inline FlowCalc first_flow_calc(InFlow in_flow) { bool first = true; - return [flow,first](double est) mutable noexcept { + double flow = in_flow.rate(); + return [first,flow](double est) mutable noexcept { double next_flow = flow; if (first) { flow *= est; @@ -306,10 +323,8 @@ inline FlowCalc first_flow_calc(bool strict, double flow) { }; } -inline FlowCalc full_flow_calc(bool strict, double flow) { - if (strict) { - flow = 1.0; - } +inline FlowCalc full_flow_calc(InFlow in_flow) { + double flow = in_flow.rate(); return [flow](double) noexcept { return flow; }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index b8bf7d40655..9d0acc50ce5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -208,9 +208,9 @@ AndNotBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) co FlowCalc -AndNotBlueprint::make_flow_calc(bool strict, double flow) const +AndNotBlueprint::make_flow_calc(InFlow in_flow) const { - return flow_calc<AndNotFlow>(strict, flow); + return flow_calc<AndNotFlow>(in_flow); } //----------------------------------------------------------------------------- @@ -308,9 +308,9 @@ AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const } FlowCalc -AndBlueprint::make_flow_calc(bool strict, double flow) const +AndBlueprint::make_flow_calc(InFlow in_flow) const { - return flow_calc<AndFlow>(strict, flow); + return flow_calc<AndFlow>(in_flow); } //----------------------------------------------------------------------------- @@ -408,9 +408,9 @@ OrBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const } FlowCalc -OrBlueprint::make_flow_calc(bool strict, double flow) const +OrBlueprint::make_flow_calc(InFlow in_flow) const { - return flow_calc<OrFlow>(strict, flow); + return flow_calc<OrFlow>(in_flow); } uint8_t @@ -426,9 +426,9 @@ OrBlueprint::calculate_cost_tier() const //----------------------------------------------------------------------------- FlowCalc -WeakAndBlueprint::make_flow_calc(bool strict, double flow) const +WeakAndBlueprint::make_flow_calc(InFlow in_flow) const { - return flow_calc<OrFlow>(strict, flow); + return flow_calc<OrFlow>(in_flow); } WeakAndBlueprint::~WeakAndBlueprint() = default; @@ -503,9 +503,9 @@ WeakAndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) c //----------------------------------------------------------------------------- FlowCalc -NearBlueprint::make_flow_calc(bool strict, double flow) const +NearBlueprint::make_flow_calc(InFlow in_flow) const { - return flow_calc<AndFlow>(strict, flow); + return flow_calc<AndFlow>(in_flow); } FlowStats @@ -574,9 +574,9 @@ NearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) cons //----------------------------------------------------------------------------- FlowCalc -ONearBlueprint::make_flow_calc(bool strict, double flow) const +ONearBlueprint::make_flow_calc(InFlow in_flow) const { - return flow_calc<AndFlow>(strict, flow); + return flow_calc<AndFlow>(in_flow); } FlowStats @@ -735,17 +735,17 @@ RankBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) cons } FlowCalc -RankBlueprint::make_flow_calc(bool strict, double flow) const +RankBlueprint::make_flow_calc(InFlow in_flow) const { - return first_flow_calc(strict, flow); + return first_flow_calc(in_flow); } //----------------------------------------------------------------------------- FlowCalc -SourceBlenderBlueprint::make_flow_calc(bool strict, double flow) const +SourceBlenderBlueprint::make_flow_calc(InFlow in_flow) const { - return full_flow_calc(strict, flow); + return full_flow_calc(in_flow); } SourceBlenderBlueprint::SourceBlenderBlueprint(const ISourceSelector &selector) noexcept diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index 0095095dfe8..028898d3f47 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -29,7 +29,7 @@ public: SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; private: - FlowCalc make_flow_calc(bool strict, double flow) const override; + FlowCalc make_flow_calc(InFlow in_flow) const override; uint8_t calculate_cost_tier() const override { return (childCnt() > 0) ? get_children()[0]->getState().cost_tier() : State::COST_TIER_NORMAL; } @@ -57,7 +57,7 @@ public: SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; private: - FlowCalc make_flow_calc(bool strict, double flow) const override; + FlowCalc make_flow_calc(InFlow in_flow) const override; }; //----------------------------------------------------------------------------- @@ -82,7 +82,7 @@ public: SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; private: - FlowCalc make_flow_calc(bool strict, double flow) const override; + FlowCalc make_flow_calc(InFlow in_flow) const override; uint8_t calculate_cost_tier() const override; }; @@ -94,7 +94,7 @@ private: uint32_t _n; std::vector<uint32_t> _weights; - FlowCalc make_flow_calc(bool strict, double flow) const override; + FlowCalc make_flow_calc(InFlow in_flow) const override; public: FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; @@ -125,7 +125,7 @@ class NearBlueprint : public IntermediateBlueprint private: uint32_t _window; - FlowCalc make_flow_calc(bool strict, double flow) const override; + FlowCalc make_flow_calc(InFlow in_flow) const override; public: FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; @@ -148,7 +148,7 @@ class ONearBlueprint : public IntermediateBlueprint private: uint32_t _window; - FlowCalc make_flow_calc(bool strict, double flow) const override; + FlowCalc make_flow_calc(InFlow in_flow) const override; public: FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; @@ -186,7 +186,7 @@ public: return (childCnt() > 0) ? get_children()[0]->getState().cost_tier() : State::COST_TIER_NORMAL; } private: - FlowCalc make_flow_calc(bool strict, double flow) const override; + FlowCalc make_flow_calc(InFlow in_flow) const override; }; //----------------------------------------------------------------------------- @@ -196,7 +196,7 @@ class SourceBlenderBlueprint final : public IntermediateBlueprint private: const ISourceSelector &_selector; - FlowCalc make_flow_calc(bool strict, double flow) const override; + FlowCalc make_flow_calc(InFlow in_flow) const override; public: explicit SourceBlenderBlueprint(const ISourceSelector &selector) noexcept; ~SourceBlenderBlueprint() override; |