diff options
author | Håvard Pettersen <havardpe@yahooinc.com> | 2023-11-20 15:10:24 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@yahooinc.com> | 2023-11-20 16:04:07 +0000 |
commit | 2681132ed95ff889f82786f2a8c95da85aee5da1 (patch) | |
tree | aa32b184289b05730ca47bac834d6c32fa6b38b6 | |
parent | 195f000f27001b74721649905a9a1e48fb9f665c (diff) |
perform blueprint optimization in multiple passes
8 files changed, 90 insertions, 64 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index b150f8db7c5..e14469dc949 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -119,7 +119,9 @@ TEST("test And propagates updated histestimate") { bp.addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2))); bp.addChild(ap(MyLeafSpec(200).create<RememberExecuteInfo>()->setSourceId(2))); bp.addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2))); - bp.optimize_self(); + bp.optimize_self(Blueprint::OptimizePass::FIRST); + bp.optimize_self(Blueprint::OptimizePass::SECOND); + bp.optimize_self(Blueprint::OptimizePass::LAST); bp.setDocIdLimit(5000); bp.fetchPostings(ExecuteInfo::TRUE); EXPECT_EQUAL(3u, bp.childCnt()); @@ -139,7 +141,9 @@ TEST("test Or propagates updated histestimate") { bp.addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2))); bp.addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2))); bp.addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2))); - bp.optimize_self(); + bp.optimize_self(Blueprint::OptimizePass::FIRST); + bp.optimize_self(Blueprint::OptimizePass::SECOND); + bp.optimize_self(Blueprint::OptimizePass::LAST); bp.setDocIdLimit(5000); bp.fetchPostings(ExecuteInfo::TRUE); EXPECT_EQUAL(4u, bp.childCnt()); diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp index 71033ed7d06..5933122d7a2 100644 --- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp +++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp @@ -47,7 +47,7 @@ concept ChildCollector = requires(T a, std::unique_ptr<Blueprint> bp) { // inherit Blueprint to capture the default filter factory struct DefaultBlueprint : Blueprint { - void optimize(Blueprint* &) override { abort(); } + void optimize(Blueprint* &, OptimizePass) override { abort(); } const State &getState() const override { abort(); } void fetchPostings(const ExecuteInfo &) override { abort(); } void freeze() override { abort(); } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index aba80faaa8a..5eaa2dc40ab 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -129,12 +129,14 @@ Blueprint::~Blueprint() = default; Blueprint::UP Blueprint::optimize(Blueprint::UP bp) { Blueprint *root = bp.release(); - root->optimize(root); + root->optimize(root, OptimizePass::FIRST); + root->optimize(root, OptimizePass::SECOND); + root->optimize(root, OptimizePass::LAST); return Blueprint::UP(root); } void -Blueprint::optimize_self() +Blueprint::optimize_self(OptimizePass) { } @@ -539,19 +541,23 @@ IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double } void -IntermediateBlueprint::optimize(Blueprint* &self) +IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass) { assert(self == this); if (should_optimize_children()) { for (auto &child : _children) { auto *child_ptr = child.release(); - child_ptr->optimize(child_ptr); + child_ptr->optimize(child_ptr, pass); child.reset(child_ptr); } } - optimize_self(); - sort(_children); - maybe_eliminate_self(self, get_replacement()); + optimize_self(pass); + if (pass == OptimizePass::LAST) { + sort(_children); + } + if (pass == OptimizePass::FIRST) { + maybe_eliminate_self(self, get_replacement()); + } } void @@ -725,11 +731,13 @@ LeafBlueprint::getRange(vespalib::string &, vespalib::string &) const { } void -LeafBlueprint::optimize(Blueprint* &self) +LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass) { assert(self == this); - optimize_self(); - maybe_eliminate_self(self, get_replacement()); + optimize_self(pass); + if (pass == OptimizePass::FIRST) { + maybe_eliminate_self(self, get_replacement()); + } } void diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index da6050f075d..a6455c78921 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -45,6 +45,8 @@ public: using Children = std::vector<Blueprint::UP>; using SearchIteratorUP = std::unique_ptr<SearchIterator>; + enum class OptimizePass { FIRST, SECOND, LAST }; + struct HitEstimate { uint32_t estHits; bool empty; @@ -206,8 +208,8 @@ public: uint32_t get_docid_limit() const noexcept { return _docid_limit; } static Blueprint::UP optimize(Blueprint::UP bp); - virtual void optimize(Blueprint* &self) = 0; - virtual void optimize_self(); + virtual void optimize(Blueprint* &self, OptimizePass pass) = 0; + virtual void optimize_self(OptimizePass pass); virtual Blueprint::UP get_replacement(); virtual bool should_optimize_children() const { return true; } @@ -326,7 +328,7 @@ public: void setDocIdLimit(uint32_t limit) noexcept final; - void optimize(Blueprint* &self) final; + void optimize(Blueprint* &self, OptimizePass pass) final; void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override; IndexList find(const IPredicate & check) const; @@ -361,7 +363,7 @@ class LeafBlueprint : public Blueprint private: State _state; protected: - void optimize(Blueprint* &self) final; + void optimize(Blueprint* &self, OptimizePass pass) final; void setEstimate(HitEstimate est) { _state.estimate(est); notifyChange(); diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index 790d61b3731..6739843f822 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -103,25 +103,27 @@ AndNotBlueprint::exposeFields() const } void -AndNotBlueprint::optimize_self() +AndNotBlueprint::optimize_self(OptimizePass pass) { if (childCnt() == 0) { return; } - if (getChild(0).isAndNot()) { - auto *child = static_cast<AndNotBlueprint *>(&getChild(0)); - while (child->childCnt() > 1) { - addChild(child->removeChild(1)); + if (pass == OptimizePass::FIRST) { + if (getChild(0).isAndNot()) { + auto *child = static_cast<AndNotBlueprint *>(&getChild(0)); + while (child->childCnt() > 1) { + addChild(child->removeChild(1)); + } + insertChild(1, child->removeChild(0)); + removeChild(0); } - insertChild(1, child->removeChild(0)); - removeChild(0); - } - for (size_t i = 1; i < childCnt(); ++i) { - if (getChild(i).getState().estimate().empty) { - removeChild(i--); + for (size_t i = 1; i < childCnt(); ++i) { + if (getChild(i).getState().estimate().empty) { + removeChild(i--); + } } } - if ( !(getParent() && getParent()->isAndNot()) ) { + if (pass == OptimizePass::LAST) { optimize_source_blenders<OrBlueprint>(*this, 1); } } @@ -191,18 +193,20 @@ AndBlueprint::exposeFields() const } void -AndBlueprint::optimize_self() -{ - for (size_t i = 0; i < childCnt(); ++i) { - if (getChild(i).isAnd()) { - auto *child = static_cast<AndBlueprint *>(&getChild(i)); - while (child->childCnt() > 0) { - addChild(child->removeChild(0)); +AndBlueprint::optimize_self(OptimizePass pass) +{ + if (pass == OptimizePass::FIRST) { + for (size_t i = 0; i < childCnt(); ++i) { + if (getChild(i).isAnd()) { + auto *child = static_cast<AndBlueprint *>(&getChild(i)); + while (child->childCnt() > 0) { + addChild(child->removeChild(0)); + } + removeChild(i--); } - removeChild(i--); } } - if ( !(getParent() && getParent()->isAnd()) ) { + if (pass == OptimizePass::LAST) { optimize_source_blenders<AndBlueprint>(*this, 0); } } @@ -291,20 +295,22 @@ OrBlueprint::exposeFields() const } void -OrBlueprint::optimize_self() -{ - for (size_t i = 0; (childCnt() > 1) && (i < childCnt()); ++i) { - if (getChild(i).isOr()) { - auto *child = static_cast<OrBlueprint *>(&getChild(i)); - while (child->childCnt() > 0) { - addChild(child->removeChild(0)); +OrBlueprint::optimize_self(OptimizePass pass) +{ + if (pass == OptimizePass::FIRST) { + for (size_t i = 0; (childCnt() > 1) && (i < childCnt()); ++i) { + if (getChild(i).isOr()) { + auto *child = static_cast<OrBlueprint *>(&getChild(i)); + while (child->childCnt() > 0) { + addChild(child->removeChild(0)); + } + removeChild(i--); + } else if (getChild(i).getState().estimate().empty) { + removeChild(i--); } - removeChild(i--); - } else if (getChild(i).getState().estimate().empty) { - removeChild(i--); } } - if ( !(getParent() && getParent()->isOr()) ) { + if (pass == OptimizePass::LAST) { optimize_source_blenders<OrBlueprint>(*this, 0); } } @@ -542,14 +548,18 @@ RankBlueprint::exposeFields() const } void -RankBlueprint::optimize_self() +RankBlueprint::optimize_self(OptimizePass pass) { - for (size_t i = 1; i < childCnt(); ++i) { - if (getChild(i).getState().estimate().empty) { - removeChild(i--); + if (pass == OptimizePass::FIRST) { + for (size_t i = 1; i < childCnt(); ++i) { + if (getChild(i).getState().estimate().empty) { + removeChild(i--); + } } } - optimize_source_blenders<OrBlueprint>(*this, 1); + if (pass == OptimizePass::LAST) { + optimize_source_blenders<OrBlueprint>(*this, 1); + } } Blueprint::UP diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index 409a9e0fe95..31c9ac52576 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -17,7 +17,7 @@ public: bool supports_termwise_children() const override { return true; } HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self() override; + void optimize_self(OptimizePass pass) override; bool isAndNot() const override { return true; } Blueprint::UP get_replacement() override; void sort(Children &children) const override; @@ -40,7 +40,7 @@ public: bool supports_termwise_children() const override { return true; } HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self() override; + void optimize_self(OptimizePass pass) override; bool isAnd() const override { return true; } Blueprint::UP get_replacement() override; void sort(Children &children) const override; @@ -64,7 +64,7 @@ public: bool supports_termwise_children() const override { return true; } HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self() override; + void optimize_self(OptimizePass pass) override; bool isOr() const override { return true; } Blueprint::UP get_replacement() override; void sort(Children &children) const override; @@ -158,7 +158,7 @@ class RankBlueprint final : public IntermediateBlueprint public: HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self() override; + void optimize_self(OptimizePass pass) override; Blueprint::UP get_replacement() override; void sort(Children &children) const override; bool inheritStrict(size_t i) const override; diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp index 6b2faac847e..5c795679d48 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp @@ -45,12 +45,14 @@ SameElementBlueprint::addTerm(Blueprint::UP term) } void -SameElementBlueprint::optimize_self() +SameElementBlueprint::optimize_self(OptimizePass pass) { - std::sort(_terms.begin(), _terms.end(), - [](const auto &a, const auto &b) { - return (a->getState().estimate() < b->getState().estimate()); - }); + if (pass == OptimizePass::LAST) { + std::sort(_terms.begin(), _terms.end(), + [](const auto &a, const auto &b) { + return (a->getState().estimate() < b->getState().estimate()); + }); + } } void diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h index 240d064c5cc..cc6331375cf 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h @@ -34,7 +34,7 @@ public: // used by create visitor void addTerm(Blueprint::UP term); - void optimize_self() override; + void optimize_self(OptimizePass pass) override; void fetchPostings(const ExecuteInfo &execInfo) override; std::unique_ptr<SameElementSearch> create_same_element_search(search::fef::TermFieldMatchData& tfmd, bool strict) const; |