diff options
15 files changed, 359 insertions, 255 deletions
diff --git a/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp b/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp index e136e491f05..4aefa10f5f2 100644 --- a/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp +++ b/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp @@ -180,9 +180,10 @@ TEST_F(LidAllocatorTest, whitelist_blueprint_can_maximize_relative_estimate) activate_lids({ 1, 2, 3, 4 }, true); // the number of hits are overestimated based on the number of // documents that could be active (100 in this test fixture) - EXPECT_EQ(make_whitelist_blueprint(1000)->estimate(), 0.1); - EXPECT_EQ(make_whitelist_blueprint(200)->estimate(), 0.5); - EXPECT_EQ(make_whitelist_blueprint(5)->estimate(), 1.0); + // NOTE: optimize must be called in order to calculate the relative estimate + EXPECT_EQ(Blueprint::optimize(make_whitelist_blueprint(1000))->estimate(), 0.1); + EXPECT_EQ(Blueprint::optimize(make_whitelist_blueprint(200))->estimate(), 0.5); + EXPECT_EQ(Blueprint::optimize(make_whitelist_blueprint(5))->estimate(), 1.0); } class LidAllocatorPerformanceTest : public LidAllocatorTest, diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index a93e8fbbddc..5ade0a44b8a 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -200,8 +200,7 @@ Query::reserveHandles(const IRequestContext & requestContext, ISearchContext &co void Query::optimize(bool sort_by_cost) { - (void) sort_by_cost; - _blueprint = Blueprint::optimize(std::move(_blueprint), sort_by_cost); + _blueprint = Blueprint::optimize_and_sort(std::move(_blueprint), true, sort_by_cost); LOG(debug, "optimized blueprint:\n%s\n", _blueprint->asString().c_str()); } @@ -223,7 +222,7 @@ Query::handle_global_filter(const IRequestContext & requestContext, uint32_t doc } // optimized order may change after accounting for global filter: trace.addEvent(5, "Optimize query execution plan to account for global filter"); - _blueprint = Blueprint::optimize(std::move(_blueprint), sort_by_cost); + _blueprint = Blueprint::optimize_and_sort(std::move(_blueprint), true, sort_by_cost); LOG(debug, "blueprint after handle_global_filter:\n%s\n", _blueprint->asString().c_str()); // strictness may change if optimized order changed: fetchPostings(ExecuteInfo::create(true, 1.0, requestContext.getDoom(), requestContext.thread_bundle())); diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index bbd2744119a..90452f1d12b 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -23,12 +23,15 @@ class MyOr : public IntermediateBlueprint { private: public: - double calculate_cost() const final { - return OrFlow::cost_of(get_children()); - } double calculate_relative_estimate() const final { return OrFlow::estimate_of(get_children()); } + double calculate_cost() const final { + return OrFlow::cost_of(get_children(), false); + } + double calculate_strict_cost() const final { + return OrFlow::cost_of(get_children(), true); + } HitEstimate combine(const std::vector<HitEstimate> &data) const override { return max(data); } @@ -37,7 +40,7 @@ public: return mixChildrenFields(); } - void sort(Children &children, bool) const override { + void sort(Children &children, bool, bool) const override { std::sort(children.begin(), children.end(), TieredGreaterEstimate()); } @@ -446,7 +449,7 @@ TEST_F("testChildAndNotCollapsing", Fixture) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - unsorted = Blueprint::optimize(std::move(unsorted), true); + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -486,7 +489,7 @@ TEST_F("testChildAndCollapsing", Fixture) TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - unsorted = Blueprint::optimize(std::move(unsorted), true); + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -525,7 +528,10 @@ TEST_F("testChildOrCollapsing", Fixture) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - unsorted = Blueprint::optimize(std::move(unsorted), true); + // we sort non-strict here since the default costs of 1/est for + // non-strict/strict leaf iterators makes the order of iterators + // under a strict OR irrelevant. + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), false, true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -569,7 +575,7 @@ TEST_F("testChildSorting", Fixture) TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - unsorted = Blueprint::optimize(std::move(unsorted), true); + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -650,12 +656,13 @@ getExpectedBlueprint() " estimate: HitEstimate {\n" " empty: false\n" " estHits: 9\n" - " relative_estimate: 0.5\n" " cost_tier: 1\n" " tree_size: 2\n" " allow_termwise_eval: false\n" " }\n" - " cost: 1\n" + " relative_estimate: 0\n" + " cost: 0\n" + " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " children: std::vector {\n" @@ -671,12 +678,13 @@ getExpectedBlueprint() " estimate: HitEstimate {\n" " empty: false\n" " estHits: 9\n" - " relative_estimate: 0.5\n" " cost_tier: 1\n" " tree_size: 1\n" " allow_termwise_eval: true\n" " }\n" - " cost: 1\n" + " relative_estimate: 0\n" + " cost: 0\n" + " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " }\n" @@ -702,12 +710,13 @@ getExpectedSlimeBlueprint() { " '[type]': 'HitEstimate'," " empty: false," " estHits: 9," - " relative_estimate: 0.5," " cost_tier: 1," " tree_size: 2," " allow_termwise_eval: false" " }," - " cost: 1.0," + " relative_estimate: 0.0," + " cost: 0.0," + " strict_cost: 0.0," " sourceId: 4294967295," " docid_limit: 0," " children: {" @@ -728,12 +737,13 @@ getExpectedSlimeBlueprint() { " '[type]': 'HitEstimate'," " empty: false," " estHits: 9," - " relative_estimate: 0.5," " cost_tier: 1," " tree_size: 1," " allow_termwise_eval: true" " }," - " cost: 1.0," + " relative_estimate: 0.0," + " cost: 0.0," + " strict_cost: 0.0," " sourceId: 4294967295," " docid_limit: 0" " }" @@ -786,9 +796,9 @@ TEST("requireThatDocIdLimitInjectionWorks") } TEST("Control object sizes") { - EXPECT_EQUAL(40u, sizeof(Blueprint::State)); - EXPECT_EQUAL(40u, sizeof(Blueprint)); - EXPECT_EQUAL(80u, sizeof(LeafBlueprint)); + EXPECT_EQUAL(32u, sizeof(Blueprint::State)); + EXPECT_EQUAL(56u, sizeof(Blueprint)); + EXPECT_EQUAL(96u, sizeof(LeafBlueprint)); } TEST_MAIN() { diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index 856ac2391f8..2cf523b508b 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -77,7 +77,7 @@ void check_sort_order(IntermediateBlueprint &self, BlueprintVector children, std unordered.push_back(child.get()); } // TODO: sort by cost (requires both setDocIdLimit and optimize to be called) - self.sort(children, false); + self.sort(children, true, false); for (size_t i = 0; i < children.size(); ++i) { EXPECT_EQUAL(children[i].get(), unordered[order[i]]); } @@ -130,8 +130,8 @@ TEST("test AndNot Blueprint") { } template <typename BP> -void optimize(std::unique_ptr<BP> &ref) { - auto optimized = Blueprint::optimize(std::move(ref), true); +void optimize(std::unique_ptr<BP> &ref, bool strict) { + auto optimized = Blueprint::optimize_and_sort(std::move(ref), strict, true); ref.reset(dynamic_cast<BP*>(optimized.get())); ASSERT_TRUE(ref); optimized.release(); @@ -144,7 +144,7 @@ TEST("test And propagates updated histestimate") { bp->addChild(ap(MyLeafSpec(200).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2))); bp->setDocIdLimit(5000); - optimize(bp); + optimize(bp, true); bp->fetchPostings(ExecuteInfo::TRUE); EXPECT_EQUAL(3u, bp->childCnt()); for (uint32_t i = 0; i < bp->childCnt(); i++) { @@ -164,7 +164,10 @@ TEST("test Or propagates updated histestimate") { bp->addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2))); bp->setDocIdLimit(5000); - optimize(bp); + // sort OR as non-strict to get expected order. With strict OR, + // the order would be irrelevant since we use the relative + // estimate as strict_cost for leafs. + optimize(bp, false); bp->fetchPostings(ExecuteInfo::TRUE); EXPECT_EQUAL(4u, bp->childCnt()); for (uint32_t i = 0; i < bp->childCnt(); i++) { @@ -519,16 +522,18 @@ void compare(const Blueprint &bp1, const Blueprint &bp2, bool expect_eq) { auto cmp_hook = [expect_eq](const auto &path, const auto &a, const auto &b) { if (!path.empty() && std::holds_alternative<vespalib::stringref>(path.back())) { vespalib::stringref field = std::get<vespalib::stringref>(path.back()); - if (field == "cost") { + // ignore these fields to enable comparing optimized with unoptimized trees + if (field == "relative_estimate" || field == "cost" || field == "strict_cost") { + auto check_value = [&](double value){ + if ((value > 0.0 && value < 1e-6) || (value > 0.0 && value < 1e-6)) { + fprintf(stderr, " small value at %s: %g\n", path_to_str(path).c_str(), + value); + } + }; + check_value(a.asDouble()); + check_value(b.asDouble()); return true; } - if (field == "relative_estimate") { - double a_val = a.asDouble(); - double b_val = b.asDouble(); - if (a_val != 0.0 && b_val != 0.0 && vespalib::approx_equal(a_val, b_val)) { - return true; - } - } } if (expect_eq) { fprintf(stderr, " mismatch at %s: %s vs %s\n", path_to_str(path).c_str(), @@ -548,13 +553,13 @@ void compare(const Blueprint &bp1, const Blueprint &bp2, bool expect_eq) { } void -optimize_and_compare(Blueprint::UP top, Blueprint::UP expect, bool sort_by_cost = true) { +optimize_and_compare(Blueprint::UP top, Blueprint::UP expect, bool strict = true, bool sort_by_cost = true) { top->setDocIdLimit(1000); expect->setDocIdLimit(1000); TEST_DO(compare(*top, *expect, false)); - top = Blueprint::optimize(std::move(top), sort_by_cost); + top = Blueprint::optimize_and_sort(std::move(top), strict, sort_by_cost); TEST_DO(compare(*top, *expect, true)); - expect = Blueprint::optimize(std::move(expect), sort_by_cost); + expect = Blueprint::optimize_and_sort(std::move(expect), strict, sort_by_cost); TEST_DO(compare(*expect, *top, true)); } @@ -614,7 +619,8 @@ TEST_F("test SourceBlender below OR partial optimization", SourceBlenderTestFixt expect->addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(f.selector_2), {{10, 1}, {20, 2}})); addLeafs(*expect, {3, 2, 1}); - optimize_and_compare(std::move(top), std::move(expect)); + // NOTE: use non-strict cost based sorting for expected order + optimize_and_compare(std::move(top), std::move(expect), false); } TEST_F("test OR replaced by source blender after full optimization", SourceBlenderTestFixture) { @@ -626,7 +632,8 @@ TEST_F("test OR replaced by source blender after full optimization", SourceBlend expect->addChild(addLeafsWithSourceId(2, std::make_unique<OrBlueprint>(), {{2000, 2}, {200, 2}, {20, 2}})); expect->addChild(addLeafsWithSourceId(1, std::make_unique<OrBlueprint>(), {{1000, 1}, {100, 1}, {10, 1}})); - optimize_and_compare(std::move(top), std::move(expect)); + // NOTE: use non-strict cost based sorting for expected order + optimize_and_compare(std::move(top), std::move(expect), false); } TEST_F("test SourceBlender below AND_NOT optimization", SourceBlenderTestFixture) { @@ -681,11 +688,11 @@ TEST("test empty root node optimization and safeness") { //------------------------------------------------------------------------- auto expect_up = std::make_unique<EmptyBlueprint>(); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top1), true)->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top2), true)->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top3), true)->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top4), true)->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top5), true)->asString()); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top1), true, true), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top2), true, true), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top3), true, true), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top4), true, true), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top5), true, true), true); } TEST("and with one empty child is optimized away") { @@ -693,11 +700,11 @@ TEST("and with one empty child is optimized away") { Blueprint::UP top = ap((new SourceBlenderBlueprint(*selector))-> addChild(ap(MyLeafSpec(10).create())). addChild(addLeafs(std::make_unique<AndBlueprint>(), {{0, true}, 10, 20}))); - top = Blueprint::optimize(std::move(top), true); + top = Blueprint::optimize_and_sort(std::move(top), true, true); Blueprint::UP expect_up(ap((new SourceBlenderBlueprint(*selector))-> addChild(ap(MyLeafSpec(10).create())). addChild(std::make_unique<EmptyBlueprint>()))); - EXPECT_EQUAL(expect_up->asString(), top->asString()); + compare(*expect_up, *top, true); } struct make { @@ -731,7 +738,7 @@ struct make { return std::move(*this); } make &&leaf(uint32_t estimate) && { - std::unique_ptr<LeafBlueprint> bp(MyLeafSpec(estimate).create()); + std::unique_ptr<MyLeaf> bp(MyLeafSpec(estimate).create()); if (cost_tag != invalid_cost) { bp->set_cost(cost_tag); cost_tag = invalid_cost; @@ -763,7 +770,8 @@ TEST("AND AND collapsing") { TEST("OR OR collapsing") { Blueprint::UP top = make::OR().leafs({1,3,5}).add(make::OR().leafs({2,4})); Blueprint::UP expect = make::OR().leafs({5,4,3,2,1}); - optimize_and_compare(std::move(top), std::move(expect)); + // NOTE: use non-strict cost based sorting for expected order + optimize_and_compare(std::move(top), std::move(expect), false); } TEST("AND_NOT AND_NOT collapsing") { @@ -841,7 +849,8 @@ TEST("test single child optimization") { TEST("test empty OR child optimization") { Blueprint::UP top = addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 20, {0, true}, 10, {0, true}, 0, 30, {0, true}}); Blueprint::UP expect = addLeafs(std::make_unique<OrBlueprint>(), {30, 20, 10, 0}); - optimize_and_compare(std::move(top), std::move(expect)); + // NOTE: use non-strict cost based sorting for expected order + optimize_and_compare(std::move(top), std::move(expect), false); } TEST("test empty AND_NOT child optimization") { @@ -868,10 +877,10 @@ TEST("require that replaced blueprints retain source id") { addChild(ap(MyLeafSpec(30).create()->setSourceId(55))))); Blueprint::UP expect2_up(ap(MyLeafSpec(30).create()->setSourceId(42))); //------------------------------------------------------------------------- - top1_up = Blueprint::optimize(std::move(top1_up), true); - top2_up = Blueprint::optimize(std::move(top2_up), true); - EXPECT_EQUAL(expect1_up->asString(), top1_up->asString()); - EXPECT_EQUAL(expect2_up->asString(), top2_up->asString()); + top1_up = Blueprint::optimize_and_sort(std::move(top1_up), true, true); + top2_up = Blueprint::optimize_and_sort(std::move(top2_up), true, true); + compare(*expect1_up, *top1_up, true); + compare(*expect2_up, *top2_up, true); EXPECT_EQUAL(13u, top1_up->getSourceId()); EXPECT_EQUAL(42u, top2_up->getSourceId()); } @@ -1181,45 +1190,25 @@ TEST("require_that_unpack_optimization_is_not_overruled_by_equiv") { } } -TEST("require that children of near are not optimized") { - auto top_up = ap((new NearBlueprint(10))-> - addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). - addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - auto expect_up = ap((new NearBlueprint(10))-> - addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). - addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - top_up = Blueprint::optimize(std::move(top_up), true); - TEST_DO(compare(*top_up, *expect_up, true)); -} - -TEST("require that children of onear are not optimized") { - auto top_up = ap((new ONearBlueprint(10))-> - addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). - addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - auto expect_up = ap((new ONearBlueprint(10))-> - addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). - addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - top_up = Blueprint::optimize(std::move(top_up), true); - TEST_DO(compare(*top_up, *expect_up, true)); -} - TEST("require that ANDNOT without children is optimized to empty search") { Blueprint::UP top_up = std::make_unique<AndNotBlueprint>(); auto expect_up = std::make_unique<EmptyBlueprint>(); - top_up = Blueprint::optimize(std::move(top_up), true); - EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize_and_sort(std::move(top_up), true, true); + compare(*expect_up, *top_up, true); } TEST("require that highest cost tier sorts last for OR") { Blueprint::UP top = addLeafsWithCostTier(std::make_unique<OrBlueprint>(), {{50, 1}, {30, 3}, {20, 2}, {10, 1}}); Blueprint::UP expect = addLeafsWithCostTier(std::make_unique<OrBlueprint>(), {{50, 1}, {10, 1}, {20, 2}, {30, 3}}); - optimize_and_compare(std::move(top), std::move(expect), false); + // cost-based sorting would ignore cost tier + optimize_and_compare(std::move(top), std::move(expect), true, false); } TEST("require that highest cost tier sorts last for AND") { Blueprint::UP top = addLeafsWithCostTier(std::make_unique<AndBlueprint>(), {{10, 1}, {20, 3}, {30, 2}, {50, 1}}); Blueprint::UP expect = addLeafsWithCostTier(std::make_unique<AndBlueprint>(), {{10, 1}, {50, 1}, {30, 2}, {20, 3}}); - optimize_and_compare(std::move(top), std::move(expect), false); + // cost-based sorting would ignore cost tier + optimize_and_compare(std::move(top), std::move(expect), true, false); } template<typename BP> @@ -1292,6 +1281,7 @@ void verify_relative_estimate(make &&mk, double expect) { EXPECT_EQUAL(mk.making->estimate(), 0.0); Blueprint::UP bp = std::move(mk).leafs({200,300,950}); bp->setDocIdLimit(1000); + bp = Blueprint::optimize(std::move(bp)); EXPECT_EQUAL(bp->estimate(), expect); } @@ -1329,15 +1319,17 @@ TEST("relative estimate for WEAKAND") { verify_relative_estimate(make::WEAKAND(50), 0.05); } -void verify_cost(make &&mk, double expect) { - EXPECT_EQUAL(mk.making->cost(), 1.0); +void verify_cost(make &&mk, double expect, double expect_strict) { + EXPECT_EQUAL(mk.making->cost(), 0.0); + EXPECT_EQUAL(mk.making->strict_cost(), 0.0); Blueprint::UP bp = std::move(mk) - .cost(1.1).leaf(200) - .cost(1.2).leaf(300) - .cost(1.3).leaf(500); + .cost(1.1).leaf(200) // strict_cost: 0.2*1.1 + .cost(1.2).leaf(300) // strict_cost: 0.3*1.2 + .cost(1.3).leaf(950); // rel_est: 0.5, strict_cost: 1.3 bp->setDocIdLimit(1000); - bp = Blueprint::optimize(std::move(bp), true); + bp = Blueprint::optimize(std::move(bp)); EXPECT_EQUAL(bp->cost(), expect); + EXPECT_EQUAL(bp->strict_cost(), expect_strict); } double calc_cost(std::vector<std::pair<double,double>> list) { @@ -1351,36 +1343,48 @@ double calc_cost(std::vector<std::pair<double,double>> list) { } TEST("cost for OR") { - verify_cost(make::OR(), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}})); + verify_cost(make::OR(), + calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}), + calc_cost({{0.2*1.1, 0.8},{0.3*1.2, 0.7},{1.3, 0.5}})); } TEST("cost for AND") { - verify_cost(make::AND(), calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); + verify_cost(make::AND(), + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}), + calc_cost({{0.2*1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); } TEST("cost for RANK") { - verify_cost(make::RANK(), 1.1); // first + verify_cost(make::RANK(), 1.1, 0.2*1.1); // first } TEST("cost for ANDNOT") { - verify_cost(make::ANDNOT(), calc_cost({{1.1, 0.2},{1.3, 0.5},{1.2, 0.7}})); + verify_cost(make::ANDNOT(), + calc_cost({{1.1, 0.2},{1.3, 0.5},{1.2, 0.7}}), + calc_cost({{0.2*1.1, 0.2},{1.3, 0.5},{1.2, 0.7}})); } TEST("cost for SB") { InvalidSelector sel; - verify_cost(make::SB(sel), 1.3); // max + verify_cost(make::SB(sel), 1.3, 1.3); // max } TEST("cost for NEAR") { - verify_cost(make::NEAR(1), 3.0 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); + verify_cost(make::NEAR(1), + 0.2*0.3*0.5 * 3 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}), + 0.2*0.3*0.5 * 3 + calc_cost({{0.2*1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); } TEST("cost for ONEAR") { - verify_cost(make::ONEAR(1), 3.0 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); + verify_cost(make::ONEAR(1), + 0.2*0.3*0.5 * 3 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}), + 0.2*0.3*0.5 * 3 + calc_cost({{0.2*1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); } TEST("cost for WEAKAND") { - verify_cost(make::WEAKAND(1000), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}})); + verify_cost(make::WEAKAND(1000), + calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}), + calc_cost({{0.2*1.1, 0.8},{0.3*1.2, 0.7},{1.3, 0.5}})); } TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/blueprint/mysearch.h b/searchlib/src/tests/queryeval/blueprint/mysearch.h index db7dd2adae6..6eb27364c2b 100644 --- a/searchlib/src/tests/queryeval/blueprint/mysearch.h +++ b/searchlib/src/tests/queryeval/blueprint/mysearch.h @@ -104,7 +104,8 @@ public: class MyLeaf : public SimpleLeafBlueprint { using TFMDA = search::fef::TermFieldMatchDataArray; - bool _got_global_filter; + bool _got_global_filter = false; + double _cost = 1.0; public: SearchIterator::UP @@ -113,18 +114,15 @@ public: return std::make_unique<MySearch>("leaf", tfmda, strict); } - MyLeaf() - : SimpleLeafBlueprint(), _got_global_filter(false) - {} - MyLeaf(FieldSpecBaseList fields) - : SimpleLeafBlueprint(std::move(fields)), _got_global_filter(false) - {} - + MyLeaf() : SimpleLeafBlueprint() {} + MyLeaf(FieldSpecBaseList fields) : SimpleLeafBlueprint(std::move(fields)) {} + void set_cost(double value) noexcept { _cost = value; } + double calculate_cost() const override { return _cost; } + MyLeaf &estimate(uint32_t hits, bool empty = false) { setEstimate(HitEstimate(hits, empty)); return *this; } - MyLeaf &cost_tier(uint32_t value) { set_cost_tier(value); return *this; @@ -153,7 +151,7 @@ private: public: explicit MyLeafSpec(uint32_t estHits, bool empty = false) - : _fields(), _estimate(estHits, empty), _cost_tier(0), _want_global_filter(false) {} + : _fields(), _estimate(estHits, empty), _cost_tier(0), _want_global_filter(false) {} MyLeafSpec &addField(uint32_t fieldId, uint32_t handle) { _fields.add(FieldSpecBase(fieldId, handle)); diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp index 1180206279d..4fc8922b9a3 100644 --- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp +++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp @@ -48,7 +48,10 @@ concept ChildCollector = requires(T a, std::unique_ptr<Blueprint> bp) { // inherit Blueprint to capture the default filter factory struct DefaultBlueprint : Blueprint { double calculate_relative_estimate() const override { abort(); } - void optimize(Blueprint* &, OptimizePass, bool) override { abort(); } + double calculate_cost() const override { abort(); } + double calculate_strict_cost() const override { abort(); } + void optimize(Blueprint* &, OptimizePass) override { abort(); } + void sort(bool, bool) override { abort(); } const State &getState() const override { abort(); } void fetchPostings(const ExecuteInfo &) override { abort(); } void freeze() override { abort(); } diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp index a9f549a0bd9..7a7abb20cdf 100644 --- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -626,12 +626,13 @@ TEST(ParallelWeakAndTest, require_that_asString_on_blueprint_works) " estimate: HitEstimate {\n" " empty: false\n" " estHits: 2\n" - " relative_estimate: 0.5\n" " cost_tier: 1\n" " tree_size: 2\n" " allow_termwise_eval: false\n" " }\n" - " cost: 1\n" + " relative_estimate: 0\n" + " cost: 0\n" + " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " _weights: std::vector {\n" @@ -650,12 +651,13 @@ TEST(ParallelWeakAndTest, require_that_asString_on_blueprint_works) " estimate: HitEstimate {\n" " empty: false\n" " estHits: 2\n" - " relative_estimate: 0.5\n" " cost_tier: 1\n" " tree_size: 1\n" " allow_termwise_eval: true\n" " }\n" - " cost: 1\n" + " relative_estimate: 0\n" + " cost: 0\n" + " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " }\n" diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp index 7c535e5d3d5..c9fcb472b68 100644 --- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp +++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp @@ -46,7 +46,7 @@ std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResul } Blueprint::UP finalize(Blueprint::UP bp, bool strict) { - Blueprint::UP result = Blueprint::optimize(std::move(bp), true); + Blueprint::UP result = Blueprint::optimize_and_sort(std::move(bp), true, true); result->fetchPostings(ExecuteInfo::createForTest(strict)); result->freeze(); return result; diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 6ca072d6dc7..2b9b45c990e 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -89,7 +89,6 @@ Blueprint::sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit) Blueprint::State::State() noexcept : _fields(), - _relative_estimate(0.0), _estimateHits(0), _tree_size(1), _estimateEmpty(true), @@ -106,7 +105,6 @@ Blueprint::State::State(FieldSpecBase field) noexcept Blueprint::State::State(FieldSpecBaseList fields_in) noexcept : _fields(std::move(fields_in)), - _relative_estimate(0.0), _estimateHits(0), _tree_size(1), _estimateEmpty(true), @@ -120,7 +118,9 @@ Blueprint::State::~State() = default; Blueprint::Blueprint() noexcept : _parent(nullptr), - _cost(1.0), + _relative_estimate(0.0), + _cost(0.0), + _strict_cost(0.0), _sourceId(0xffffffff), _docid_limit(0), _frozen(false) @@ -130,15 +130,15 @@ Blueprint::Blueprint() noexcept Blueprint::~Blueprint() = default; Blueprint::UP -Blueprint::optimize(Blueprint::UP bp, bool sort_by_cost) { +Blueprint::optimize(Blueprint::UP bp) { Blueprint *root = bp.release(); - root->optimize(root, OptimizePass::FIRST, sort_by_cost); - root->optimize(root, OptimizePass::LAST, sort_by_cost); + root->optimize(root, OptimizePass::FIRST); + root->optimize(root, OptimizePass::LAST); return Blueprint::UP(root); } void -Blueprint::optimize_self(OptimizePass, bool) +Blueprint::optimize_self(OptimizePass) { } @@ -353,12 +353,13 @@ Blueprint::visitMembers(vespalib::ObjectVisitor &visitor) const visitor.openStruct("estimate", "HitEstimate"); visitor.visitBool("empty", state.estimate().empty); visitor.visitInt("estHits", state.estimate().estHits); - visitor.visitFloat("relative_estimate", state.relative_estimate()); visitor.visitInt("cost_tier", state.cost_tier()); visitor.visitInt("tree_size", state.tree_size()); visitor.visitBool("allow_termwise_eval", state.allow_termwise_eval()); visitor.closeStruct(); + visitor.visitFloat("relative_estimate", _relative_estimate); visitor.visitFloat("cost", _cost); + visitor.visitFloat("strict_cost", _strict_cost); visitor.visitInt("sourceId", _sourceId); visitor.visitInt("docid_limit", _docid_limit); } @@ -518,7 +519,6 @@ IntermediateBlueprint::calculateState() const { State state(exposeFields()); state.estimate(calculateEstimate()); - state.relative_estimate(calculate_relative_estimate()); state.cost_tier(calculate_cost_tier()); state.allow_termwise_eval(infer_allow_termwise_eval()); state.want_global_filter(infer_want_global_filter()); @@ -548,25 +548,33 @@ IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double } void -IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) +IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass) { assert(self == this); - if (should_optimize_children()) { - for (auto &child : _children) { - auto *child_ptr = child.release(); - child_ptr->optimize(child_ptr, pass, sort_by_cost); - child.reset(child_ptr); - } + for (auto &child : _children) { + auto *child_ptr = child.release(); + child_ptr->optimize(child_ptr, pass); + child.reset(child_ptr); } - optimize_self(pass, sort_by_cost); + optimize_self(pass); if (pass == OptimizePass::LAST) { - sort(_children, sort_by_cost); + set_relative_estimate(calculate_relative_estimate()); set_cost(calculate_cost()); + set_strict_cost(calculate_strict_cost()); } maybe_eliminate_self(self, get_replacement()); } void +IntermediateBlueprint::sort(bool strict, bool sort_by_cost) +{ + sort(_children, strict, sort_by_cost); + for (size_t i = 0; i < _children.size(); ++i) { + _children[i]->sort(strict && inheritStrict(i), sort_by_cost); + } +} + +void IntermediateBlueprint::set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) { for (auto & child : _children) { @@ -710,23 +718,32 @@ IntermediateBlueprint::calculateUnpackInfo(const fef::MatchData & md) const //----------------------------------------------------------------------------- -void -LeafBlueprint::setDocIdLimit(uint32_t limit) noexcept { - Blueprint::setDocIdLimit(limit); - _state.relative_estimate(calculate_relative_estimate()); - notifyChange(); -} - double LeafBlueprint::calculate_relative_estimate() const { double rel_est = abs_to_rel_est(_state.estimate().estHits, get_docid_limit()); if (rel_est > 0.9) { // Assume we do not really know how much we are matching when - // we claim to match 'everything' + // we claim to match 'everything'. Also assume we are not able + // to skip documents efficiently when strict. + _can_skip = false; return 0.5; + } else { + _can_skip = true; + return rel_est; } - return rel_est; +} + +double +LeafBlueprint::calculate_cost() const +{ + return 1.0; +} + +double +LeafBlueprint::calculate_strict_cost() const +{ + return _can_skip ? estimate() * cost() : cost(); } void @@ -758,14 +775,24 @@ LeafBlueprint::getRange(vespalib::string &, vespalib::string &) const { } void -LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) +LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass) { assert(self == this); - optimize_self(pass, sort_by_cost); + optimize_self(pass); + if (pass == OptimizePass::LAST) { + set_relative_estimate(calculate_relative_estimate()); + set_cost(calculate_cost()); + set_strict_cost(calculate_strict_cost()); + } maybe_eliminate_self(self, get_replacement()); } void +LeafBlueprint::sort(bool, bool) +{ +} + +void LeafBlueprint::set_cost_tier(uint32_t value) { assert(value < 0x100); diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index d998c2e343e..e080d667dfa 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -53,7 +53,7 @@ public: using SearchIteratorUP = std::unique_ptr<SearchIterator>; enum class OptimizePass { FIRST, LAST }; - + struct HitEstimate { uint32_t estHits; bool empty; @@ -75,7 +75,6 @@ public: { private: FieldSpecBaseList _fields; - double _relative_estimate; uint32_t _estimateHits; uint32_t _tree_size : 20; bool _estimateEmpty : 1; @@ -111,9 +110,6 @@ public: return nullptr; } - void relative_estimate(double value) noexcept { _relative_estimate = value; } - double relative_estimate() const noexcept { return _relative_estimate; } - void estimate(HitEstimate est) noexcept { _estimateHits = est.estHits; _estimateEmpty = est.empty; @@ -182,7 +178,9 @@ public: private: Blueprint *_parent; + double _relative_estimate; double _cost; + double _strict_cost; uint32_t _sourceId; uint32_t _docid_limit; bool _frozen; @@ -198,7 +196,9 @@ protected: _frozen = true; } + void set_relative_estimate(double value) noexcept { _relative_estimate = value; } void set_cost(double value) noexcept { _cost = value; } + void set_strict_cost(double value) noexcept { _strict_cost = value; } public: class IPredicate { @@ -222,19 +222,22 @@ public: Blueprint *getParent() const noexcept { return _parent; } bool has_parent() const { return (_parent != nullptr); } - double cost() const noexcept { return _cost; } - Blueprint &setSourceId(uint32_t sourceId) noexcept { _sourceId = sourceId; return *this; } uint32_t getSourceId() const noexcept { return _sourceId; } virtual void setDocIdLimit(uint32_t limit) noexcept { _docid_limit = limit; } uint32_t get_docid_limit() const noexcept { return _docid_limit; } - static Blueprint::UP optimize(Blueprint::UP bp, bool sort_by_cost); - virtual void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) = 0; - virtual void optimize_self(OptimizePass pass, bool sort_by_cost); + static Blueprint::UP optimize(Blueprint::UP bp); + virtual void sort(bool strict, bool sort_by_cost) = 0; + static Blueprint::UP optimize_and_sort(Blueprint::UP bp, bool strict, bool sort_by_cost) { + auto result = optimize(std::move(bp)); + result->sort(strict, sort_by_cost); + return result; + } + virtual void optimize(Blueprint* &self, OptimizePass pass) = 0; + virtual void optimize_self(OptimizePass pass); virtual Blueprint::UP get_replacement(); - virtual bool should_optimize_children() const { return true; } virtual bool supports_termwise_children() const { return false; } virtual bool always_needs_unpack() const { return false; } @@ -254,9 +257,27 @@ public: const Blueprint &root() const; double hit_ratio() const { return getState().hit_ratio(_docid_limit); } - double estimate() const { return getState().relative_estimate(); } - virtual double calculate_relative_estimate() const = 0; + // The flow statistics for a blueprint is calculated during the + // LAST optimize pass (just prior to sorting). The relative + // estimate may be used to calculate the costs and the non-strict + // cost may be used to calculate the strict cost. After being + // calculated, each value is available through a simple accessor + // function. Note that these values may not be available for + // blueprints used inside complex leafs (this case will probably + // be solved using custom flow adapters that has knowledge of + // docid limit). + // + // 'estimate': relative estimate in the range [0,1] + // 'cost': per-document cost of non-strict evaluation + // 'strict_cost': per-document cost of strict evaluation + double estimate() const noexcept { return _relative_estimate; } + double cost() const noexcept { return _cost; } + double strict_cost() const noexcept { return _strict_cost; } + virtual double calculate_relative_estimate() const = 0; + virtual double calculate_cost() const = 0; + virtual double calculate_strict_cost() const = 0; + virtual void fetchPostings(const ExecuteInfo &execInfo) = 0; virtual void freeze() = 0; bool frozen() const { return _frozen; } @@ -360,7 +381,8 @@ public: void setDocIdLimit(uint32_t limit) noexcept final; - void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) final; + void optimize(Blueprint* &self, OptimizePass pass) final; + void sort(bool strict, bool sort_by_cost) override; void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override; IndexList find(const IPredicate & check) const; @@ -374,10 +396,9 @@ public: Blueprint::UP removeLastChild() { return removeChild(childCnt() - 1); } SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; - virtual double calculate_cost() const = 0; virtual HitEstimate combine(const std::vector<HitEstimate> &data) const = 0; virtual FieldSpecBaseList exposeFields() const = 0; - virtual void sort(Children &children, bool sort_by_cost) const = 0; + virtual void sort(Children &children, bool strict, bool sort_by_cost) const = 0; virtual bool inheritStrict(size_t i) const = 0; virtual SearchIteratorUP createIntermediateSearch(MultiSearch::Children subSearches, @@ -396,11 +417,12 @@ class LeafBlueprint : public Blueprint { private: State _state; + mutable bool _can_skip = true; protected: - void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) final; + void optimize(Blueprint* &self, OptimizePass pass) final; + void sort(bool strict, bool sort_by_cost) override; void setEstimate(HitEstimate est) { _state.estimate(est); - _state.relative_estimate(calculate_relative_estimate()); notifyChange(); } void set_cost_tier(uint32_t value); @@ -431,9 +453,9 @@ protected: public: ~LeafBlueprint() override = default; const State &getState() const final { return _state; } - void setDocIdLimit(uint32_t limit) noexcept final; - using Blueprint::set_cost; double calculate_relative_estimate() const override; + double calculate_cost() const override; + double calculate_strict_cost() const override; void fetchPostings(const ExecuteInfo &execInfo) override; void freeze() final; SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index 86ce6f8b93b..b90321581b5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -13,19 +13,11 @@ namespace search::queryeval { namespace flow { // the default adapter expects the shape of std::unique_ptr<Blueprint> -// with respect to estimate, cost and (coming soon) strict_cost. +// with respect to estimate, cost and strict_cost. struct DefaultAdapter { double estimate(const auto &child) const noexcept { return child->estimate(); } double cost(const auto &child) const noexcept { return child->cost(); } - // Estimate the per-document cost of strict evaluation of this - // child. This will typically be something like (estimate() * - // cost()) for leafs with posting lists. OR will aggregate strict - // cost by calculating the minimal OR flow of strict child - // costs. AND will aggregate strict cost by calculating the - // minimal AND flow where the cost of the first child is - // substituted by its strict cost. This value is currently not - // available in Blueprints. - double strict_cost(const auto &child) const noexcept { return child->cost(); } + double strict_cost(const auto &child) const noexcept { return child->strict_cost(); } }; template <typename ADAPTER, typename T> @@ -154,8 +146,6 @@ struct FlowMixin { static double cost_of(const auto &children, bool strict) { return cost_of(flow::DefaultAdapter(), children, strict); } - // TODO: remove - static double cost_of(const auto &children) { return cost_of(children, false); } }; class AndFlow : public FlowMixin<AndFlow> { @@ -190,9 +180,8 @@ public: children[0] = std::move(the_one); } } - // TODO: add strict - static void sort(auto &children) { - sort(flow::DefaultAdapter(), children, false); + static void sort(auto &children, bool strict) { + sort(flow::DefaultAdapter(), children, strict); } }; @@ -224,9 +213,8 @@ public: flow::sort<flow::MinOrCost>(adapter, children); } } - // TODO: add strict - static void sort(auto &children) { - sort(flow::DefaultAdapter(), children, false); + static void sort(auto &children, bool strict) { + sort(flow::DefaultAdapter(), children, strict); } }; @@ -254,9 +242,8 @@ public: static void sort(auto adapter, auto &children, bool) { flow::sort_partial<flow::MinOrCost>(adapter, children, 1); } - // TODO: add strict - static void sort(auto &children) { - sort(flow::DefaultAdapter(), children, false); + static void sort(auto &children, bool strict) { + sort(flow::DefaultAdapter(), children, strict); } }; diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index e60fe3d3f85..8cabe189b0e 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -33,7 +33,7 @@ size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources, } template <typename CombineType> -void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx, bool sort_by_cost) { +void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) { std::vector<size_t> source_blenders; const SourceBlenderBlueprint * reference = nullptr; for (size_t i = begin_idx; i < self.childCnt(); ++i) { @@ -63,7 +63,7 @@ void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx, boo top->addChild(std::move(sources.back())); sources.pop_back(); } - blender_up = Blueprint::optimize(std::move(blender_up), sort_by_cost); + blender_up = Blueprint::optimize(std::move(blender_up)); self.addChild(std::move(blender_up)); } } @@ -87,15 +87,21 @@ need_normal_features_for_children(const IntermediateBlueprint &blueprint, fef::M //----------------------------------------------------------------------------- double +AndNotBlueprint::calculate_relative_estimate() const +{ + return AndNotFlow::estimate_of(get_children()); +} + +double AndNotBlueprint::calculate_cost() const { - return AndNotFlow::cost_of(get_children()); + return AndNotFlow::cost_of(get_children(), false); } double -AndNotBlueprint::calculate_relative_estimate() const +AndNotBlueprint::calculate_strict_cost() const { - return AndNotFlow::estimate_of(get_children()); + return AndNotFlow::cost_of(get_children(), true); } Blueprint::HitEstimate @@ -114,7 +120,7 @@ AndNotBlueprint::exposeFields() const } void -AndNotBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) +AndNotBlueprint::optimize_self(OptimizePass pass) { if (childCnt() == 0) { return; @@ -152,7 +158,7 @@ AndNotBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<OrBlueprint>(*this, 1, sort_by_cost); + optimize_source_blenders<OrBlueprint>(*this, 1); } } @@ -166,10 +172,10 @@ AndNotBlueprint::get_replacement() } void -AndNotBlueprint::sort(Children &children, bool sort_by_cost) const +AndNotBlueprint::sort(Children &children, bool strict, bool sort_by_cost) const { if (sort_by_cost) { - AndNotFlow::sort(children); + AndNotFlow::sort(children, strict); } else { if (children.size() > 2) { std::sort(children.begin() + 1, children.end(), TieredGreaterEstimate()); @@ -213,13 +219,18 @@ AndNotBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) co //----------------------------------------------------------------------------- double +AndBlueprint::calculate_relative_estimate() const { + return AndFlow::estimate_of(get_children()); +} + +double AndBlueprint::calculate_cost() const { - return AndFlow::cost_of(get_children()); + return AndFlow::cost_of(get_children(), false); } double -AndBlueprint::calculate_relative_estimate() const { - return AndFlow::estimate_of(get_children()); +AndBlueprint::calculate_strict_cost() const { + return AndFlow::cost_of(get_children(), true); } Blueprint::HitEstimate @@ -235,7 +246,7 @@ AndBlueprint::exposeFields() const } void -AndBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) +AndBlueprint::optimize_self(OptimizePass pass) { if (pass == OptimizePass::FIRST) { for (size_t i = 0; i < childCnt(); ++i) { @@ -248,7 +259,7 @@ AndBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<AndBlueprint>(*this, 0, sort_by_cost); + optimize_source_blenders<AndBlueprint>(*this, 0); } } @@ -262,10 +273,10 @@ AndBlueprint::get_replacement() } void -AndBlueprint::sort(Children &children, bool sort_by_cost) const +AndBlueprint::sort(Children &children, bool strict, bool sort_by_cost) const { if (sort_by_cost) { - AndFlow::sort(children); + AndFlow::sort(children, strict); } else { std::sort(children.begin(), children.end(), TieredLessEstimate()); } @@ -322,13 +333,18 @@ OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const OrBlueprint::~OrBlueprint() = default; double +OrBlueprint::calculate_relative_estimate() const { + return OrFlow::estimate_of(get_children()); +} + +double OrBlueprint::calculate_cost() const { - return OrFlow::cost_of(get_children()); + return OrFlow::cost_of(get_children(), false); } double -OrBlueprint::calculate_relative_estimate() const { - return OrFlow::estimate_of(get_children()); +OrBlueprint::calculate_strict_cost() const { + return OrFlow::cost_of(get_children(), true); } Blueprint::HitEstimate @@ -344,7 +360,7 @@ OrBlueprint::exposeFields() const } void -OrBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) +OrBlueprint::optimize_self(OptimizePass pass) { if (pass == OptimizePass::FIRST) { for (size_t i = 0; (childCnt() > 1) && (i < childCnt()); ++i) { @@ -359,7 +375,7 @@ OrBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<OrBlueprint>(*this, 0, sort_by_cost); + optimize_source_blenders<OrBlueprint>(*this, 0); } } @@ -373,10 +389,10 @@ OrBlueprint::get_replacement() } void -OrBlueprint::sort(Children &children, bool sort_by_cost) const +OrBlueprint::sort(Children &children, bool strict, bool sort_by_cost) const { if (sort_by_cost) { - OrFlow::sort(children); + OrFlow::sort(children, strict); } else { std::sort(children.begin(), children.end(), TieredGreaterEstimate()); } @@ -427,17 +443,22 @@ OrBlueprint::calculate_cost_tier() const WeakAndBlueprint::~WeakAndBlueprint() = default; double -WeakAndBlueprint::calculate_cost() const { - return OrFlow::cost_of(get_children()); -} - -double WeakAndBlueprint::calculate_relative_estimate() const { double child_est = OrFlow::estimate_of(get_children()); double my_est = abs_to_rel_est(_n, get_docid_limit()); return std::min(my_est, child_est); } +double +WeakAndBlueprint::calculate_cost() const { + return OrFlow::cost_of(get_children(), false); +} + +double +WeakAndBlueprint::calculate_strict_cost() const { + return OrFlow::cost_of(get_children(), true); +} + Blueprint::HitEstimate WeakAndBlueprint::combine(const std::vector<HitEstimate> &data) const { @@ -456,7 +477,7 @@ WeakAndBlueprint::exposeFields() const } void -WeakAndBlueprint::sort(Children &, bool) const +WeakAndBlueprint::sort(Children &, bool, bool) const { // order needs to stay the same as _weights } @@ -498,13 +519,18 @@ WeakAndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) c //----------------------------------------------------------------------------- double +NearBlueprint::calculate_relative_estimate() const { + return AndFlow::estimate_of(get_children()); +} + +double NearBlueprint::calculate_cost() const { - return AndFlow::cost_of(get_children()) + childCnt() * 1.0; + return AndFlow::cost_of(get_children(), false) + childCnt() * estimate(); } double -NearBlueprint::calculate_relative_estimate() const { - return AndFlow::estimate_of(get_children()); +NearBlueprint::calculate_strict_cost() const { + return AndFlow::cost_of(get_children(), true) + childCnt() * estimate(); } Blueprint::HitEstimate @@ -520,10 +546,10 @@ NearBlueprint::exposeFields() const } void -NearBlueprint::sort(Children &children, bool sort_by_cost) const +NearBlueprint::sort(Children &children, bool strict, bool sort_by_cost) const { if (sort_by_cost) { - AndFlow::sort(children); + AndFlow::sort(children, strict); } else { std::sort(children.begin(), children.end(), TieredLessEstimate()); } @@ -565,13 +591,18 @@ NearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) cons //----------------------------------------------------------------------------- double +ONearBlueprint::calculate_relative_estimate() const { + return AndFlow::estimate_of(get_children()); +} + +double ONearBlueprint::calculate_cost() const { - return AndFlow::cost_of(get_children()) + (childCnt() * 1.0); + return AndFlow::cost_of(get_children(), false) + childCnt() * estimate(); } double -ONearBlueprint::calculate_relative_estimate() const { - return AndFlow::estimate_of(get_children()); +ONearBlueprint::calculate_strict_cost() const { + return AndFlow::cost_of(get_children(), true) + childCnt() * estimate(); } Blueprint::HitEstimate @@ -587,7 +618,7 @@ ONearBlueprint::exposeFields() const } void -ONearBlueprint::sort(Children &, bool) const +ONearBlueprint::sort(Children &, bool, bool) const { // ordered near cannot sort children here } @@ -630,13 +661,18 @@ ONearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) con //----------------------------------------------------------------------------- double +RankBlueprint::calculate_relative_estimate() const { + return (childCnt() == 0) ? 0.0 : getChild(0).estimate(); +} + +double RankBlueprint::calculate_cost() const { - return (childCnt() == 0) ? 1.0 : getChild(0).cost(); + return (childCnt() == 0) ? 0.0 : getChild(0).cost(); } double -RankBlueprint::calculate_relative_estimate() const { - return (childCnt() == 0) ? 0.0 : getChild(0).estimate(); +RankBlueprint::calculate_strict_cost() const { + return (childCnt() == 0) ? 0.0 : getChild(0).strict_cost(); } Blueprint::HitEstimate @@ -655,7 +691,7 @@ RankBlueprint::exposeFields() const } void -RankBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) +RankBlueprint::optimize_self(OptimizePass pass) { if (pass == OptimizePass::FIRST) { for (size_t i = 1; i < childCnt(); ++i) { @@ -665,7 +701,7 @@ RankBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<OrBlueprint>(*this, 1, sort_by_cost); + optimize_source_blenders<OrBlueprint>(*this, 1); } } @@ -679,7 +715,7 @@ RankBlueprint::get_replacement() } void -RankBlueprint::sort(Children &, bool) const +RankBlueprint::sort(Children &, bool, bool) const { } @@ -731,8 +767,13 @@ SourceBlenderBlueprint::SourceBlenderBlueprint(const ISourceSelector &selector) SourceBlenderBlueprint::~SourceBlenderBlueprint() = default; double +SourceBlenderBlueprint::calculate_relative_estimate() const { + return OrFlow::estimate_of(get_children()); +} + +double SourceBlenderBlueprint::calculate_cost() const { - double my_cost = 1.0; + double my_cost = 0.0; for (const auto &child: get_children()) { my_cost = std::max(my_cost, child->cost()); } @@ -740,8 +781,12 @@ SourceBlenderBlueprint::calculate_cost() const { } double -SourceBlenderBlueprint::calculate_relative_estimate() const { - return OrFlow::estimate_of(get_children()); +SourceBlenderBlueprint::calculate_strict_cost() const { + double my_cost = 0.0; + for (const auto &child: get_children()) { + my_cost = std::max(my_cost, child->strict_cost()); + } + return my_cost; } Blueprint::HitEstimate @@ -757,7 +802,7 @@ SourceBlenderBlueprint::exposeFields() const } void -SourceBlenderBlueprint::sort(Children &, bool) const +SourceBlenderBlueprint::sort(Children &, bool, bool) const { } diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index 620280e979b..368cbd35c69 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -15,14 +15,15 @@ class AndNotBlueprint : public IntermediateBlueprint { public: bool supports_termwise_children() const override { return true; } - double calculate_cost() const final; double calculate_relative_estimate() const final; + double calculate_cost() const final; + double calculate_strict_cost() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass, bool sort_by_cost) override; + void optimize_self(OptimizePass pass) override; AndNotBlueprint * asAndNot() noexcept final { return this; } Blueprint::UP get_replacement() override; - void sort(Children &children, bool sort_by_cost) const override; + void sort(Children &children, bool strict, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, @@ -43,14 +44,15 @@ class AndBlueprint : public IntermediateBlueprint { public: bool supports_termwise_children() const override { return true; } - double calculate_cost() const final; double calculate_relative_estimate() const final; + double calculate_cost() const final; + double calculate_strict_cost() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass, bool sort_by_cost) override; + void optimize_self(OptimizePass pass) override; AndBlueprint * asAnd() noexcept final { return this; } Blueprint::UP get_replacement() override; - void sort(Children &children, bool sort_by_cost) const override; + void sort(Children &children, bool strict, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, @@ -69,14 +71,15 @@ class OrBlueprint : public IntermediateBlueprint public: ~OrBlueprint() override; bool supports_termwise_children() const override { return true; } - double calculate_cost() const final; double calculate_relative_estimate() const final; + double calculate_cost() const final; + double calculate_strict_cost() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass, bool sort_by_cost) override; + void optimize_self(OptimizePass pass) override; OrBlueprint * asOr() noexcept final { return this; } Blueprint::UP get_replacement() override; - void sort(Children &children, bool sort_by_cost) const override; + void sort(Children &children, bool strict, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, @@ -97,11 +100,12 @@ private: std::vector<uint32_t> _weights; public: - double calculate_cost() const final; double calculate_relative_estimate() const final; + double calculate_cost() const final; + double calculate_strict_cost() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void sort(Children &children, bool sort_by_cost) const override; + void sort(Children &children, bool strict, bool sort_on_cost) const override; bool inheritStrict(size_t i) const override; bool always_needs_unpack() const override; WeakAndBlueprint * asWeakAnd() noexcept final { return this; } @@ -128,12 +132,12 @@ private: uint32_t _window; public: - double calculate_cost() const final; double calculate_relative_estimate() const final; + double calculate_cost() const final; + double calculate_strict_cost() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - bool should_optimize_children() const override { return false; } - void sort(Children &children, bool sort_by_cost) const override; + void sort(Children &children, bool strict, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; SearchIterator::UP @@ -152,12 +156,12 @@ private: uint32_t _window; public: - double calculate_cost() const final; double calculate_relative_estimate() const final; + double calculate_cost() const final; + double calculate_strict_cost() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - bool should_optimize_children() const override { return false; } - void sort(Children &children, bool sort_by_cost) const override; + void sort(Children &children, bool strict, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; SearchIterator::UP @@ -173,13 +177,14 @@ public: class RankBlueprint final : public IntermediateBlueprint { public: - double calculate_cost() const final; double calculate_relative_estimate() const final; + double calculate_cost() const final; + double calculate_strict_cost() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass, bool sort_by_cost) override; + void optimize_self(OptimizePass pass) override; Blueprint::UP get_replacement() override; - void sort(Children &children, bool sort_by_cost) const override; + void sort(Children &children, bool strict, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; bool isRank() const noexcept final { return true; } SearchIterator::UP @@ -202,11 +207,12 @@ private: public: explicit SourceBlenderBlueprint(const ISourceSelector &selector) noexcept; ~SourceBlenderBlueprint() override; - double calculate_cost() const final; double calculate_relative_estimate() const final; + double calculate_cost() const final; + double calculate_strict_cost() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void sort(Children &children, bool sort_by_cost) const override; + void sort(Children &children, bool strict, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp index 500e9fe4dbb..96181377282 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp @@ -45,7 +45,7 @@ SameElementBlueprint::addTerm(Blueprint::UP term) } void -SameElementBlueprint::optimize_self(OptimizePass pass, bool) +SameElementBlueprint::optimize_self(OptimizePass pass) { if (pass == OptimizePass::LAST) { std::sort(_terms.begin(), _terms.end(), diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h index 06c20339e81..6a988e67149 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h @@ -34,7 +34,7 @@ public: // used by create visitor void addTerm(Blueprint::UP term); - void optimize_self(OptimizePass pass, bool sort_by_cost) override; + void optimize_self(OptimizePass pass) override; void fetchPostings(const ExecuteInfo &execInfo) override; std::unique_ptr<SameElementSearch> create_same_element_search(search::fef::TermFieldMatchData& tfmd, bool strict) const; |