From 86e70f14ce88fb8f7a4e7889d2309bc13337d02e Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Mon, 27 Nov 2023 11:10:58 +0000 Subject: collapse co-nested and/andnot in first optimize pass do not mention specific optimize passes in test collapse second optimize pass into first --- .../blueprint/intermediate_blueprints_test.cpp | 170 ++++++++++++++++----- .../src/vespa/searchlib/queryeval/blueprint.cpp | 5 +- .../src/vespa/searchlib/queryeval/blueprint.h | 7 +- .../queryeval/intermediate_blueprints.cpp | 28 ++-- .../vespa/searchlib/queryeval/leaf_blueprints.h | 1 + 5 files changed, 157 insertions(+), 54 deletions(-) diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index 1d9f088d170..dd88684dfee 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -114,48 +114,52 @@ TEST("test AndNot Blueprint") { // createSearch tested by iterator unit test } +template +void optimize(std::unique_ptr &ref) { + auto optimized = Blueprint::optimize(std::move(ref)); + ref.reset(dynamic_cast(optimized.get())); + ASSERT_TRUE(ref); + optimized.release(); +} + TEST("test And propagates updated histestimate") { - AndBlueprint bp; - bp.setSourceId(2); - bp.addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); - bp.addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); - bp.addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); - bp.optimize_self(Blueprint::OptimizePass::FIRST); - bp.optimize_self(Blueprint::OptimizePass::SECOND); - bp.optimize_self(Blueprint::OptimizePass::LAST); - bp.setDocIdLimit(5000); - bp.fetchPostings(ExecuteInfo::TRUE); - EXPECT_EQUAL(3u, bp.childCnt()); - for (uint32_t i = 0; i < bp.childCnt(); i++) { - const auto & child = dynamic_cast(bp.getChild(i)); + auto bp = std::make_unique(); + bp->setSourceId(2); + bp->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + bp->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + bp->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + optimize(bp); + bp->setDocIdLimit(5000); + bp->fetchPostings(ExecuteInfo::TRUE); + EXPECT_EQUAL(3u, bp->childCnt()); + for (uint32_t i = 0; i < bp->childCnt(); i++) { + const auto & child = dynamic_cast(bp->getChild(i)); EXPECT_EQUAL((i == 0), child.executeInfo.isStrict()); } - EXPECT_EQUAL(1.0f, dynamic_cast(bp.getChild(0)).executeInfo.hitRate()); - EXPECT_EQUAL(1.0f/250, dynamic_cast(bp.getChild(1)).executeInfo.hitRate()); - EXPECT_EQUAL(1.0f/(250*25), dynamic_cast(bp.getChild(2)).executeInfo.hitRate()); + EXPECT_EQUAL(1.0f, dynamic_cast(bp->getChild(0)).executeInfo.hitRate()); + EXPECT_EQUAL(1.0f/250, dynamic_cast(bp->getChild(1)).executeInfo.hitRate()); + EXPECT_EQUAL(1.0f/(250*25), dynamic_cast(bp->getChild(2)).executeInfo.hitRate()); } TEST("test Or propagates updated histestimate") { - OrBlueprint bp; - bp.setSourceId(2); - bp.addChild(ap(MyLeafSpec(5000).create()->setSourceId(2))); - bp.addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); - bp.addChild(ap(MyLeafSpec(800).create()->setSourceId(2))); - bp.addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); - bp.optimize_self(Blueprint::OptimizePass::FIRST); - bp.optimize_self(Blueprint::OptimizePass::SECOND); - bp.optimize_self(Blueprint::OptimizePass::LAST); - bp.setDocIdLimit(5000); - bp.fetchPostings(ExecuteInfo::TRUE); - EXPECT_EQUAL(4u, bp.childCnt()); - for (uint32_t i = 0; i < bp.childCnt(); i++) { - const auto & child = dynamic_cast(bp.getChild(i)); + auto bp = std::make_unique(); + bp->setSourceId(2); + bp->addChild(ap(MyLeafSpec(5000).create()->setSourceId(2))); + bp->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + bp->addChild(ap(MyLeafSpec(800).create()->setSourceId(2))); + bp->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + optimize(bp); + bp->setDocIdLimit(5000); + bp->fetchPostings(ExecuteInfo::TRUE); + EXPECT_EQUAL(4u, bp->childCnt()); + for (uint32_t i = 0; i < bp->childCnt(); i++) { + const auto & child = dynamic_cast(bp->getChild(i)); EXPECT_TRUE(child.executeInfo.isStrict()); } - EXPECT_EQUAL(1.0f, dynamic_cast(bp.getChild(0)).executeInfo.hitRate()); - EXPECT_EQUAL(1.0f, dynamic_cast(bp.getChild(1)).executeInfo.hitRate()); - EXPECT_EQUAL(3.0f/5.0f, dynamic_cast(bp.getChild(2)).executeInfo.hitRate()); - EXPECT_EQUAL(3.0f*42.0f/(5.0f*50.0f), dynamic_cast(bp.getChild(3)).executeInfo.hitRate()); + EXPECT_EQUAL(1.0f, dynamic_cast(bp->getChild(0)).executeInfo.hitRate()); + EXPECT_EQUAL(1.0f, dynamic_cast(bp->getChild(1)).executeInfo.hitRate()); + EXPECT_EQUAL(3.0f/5.0f, dynamic_cast(bp->getChild(2)).executeInfo.hitRate()); + EXPECT_EQUAL(3.0f*42.0f/(5.0f*50.0f), dynamic_cast(bp->getChild(3)).executeInfo.hitRate()); } TEST("test And Blueprint") { @@ -469,6 +473,7 @@ struct SourceBlenderTestFixture { InvalidSelector selector_1; // the one InvalidSelector selector_2; // not the one void addChildrenForSBTest(IntermediateBlueprint & parent); + void addChildrenForSimpleSBTest(IntermediateBlueprint & parent); }; void @@ -488,7 +493,13 @@ void SourceBlenderTestFixture::addChildrenForSBTest(IntermediateBlueprint & pare parent.addChild(addLeafsWithSourceId(std::make_unique(selector_1), {{2000, 2}, {1000, 1}})); } -TEST_F("test SourceBlender below AND optimization", SourceBlenderTestFixture) { +void SourceBlenderTestFixture::addChildrenForSimpleSBTest(IntermediateBlueprint & parent) { + parent.addChild(addLeafsWithSourceId(std::make_unique(selector_1), {{200, 2}, {100, 1}, {300, 3}})); + parent.addChild(addLeafsWithSourceId(std::make_unique(selector_1), {{20, 2}, {10, 1}, {30, 3}})); + parent.addChild(addLeafsWithSourceId(std::make_unique(selector_1), {{2000, 2}, {1000, 1}})); +} + +TEST_F("test SourceBlender below AND partial optimization", SourceBlenderTestFixture) { auto top = std::make_unique(); f.addChildrenForSBTest(*top); @@ -505,7 +516,19 @@ TEST_F("test SourceBlender below AND optimization", SourceBlenderTestFixture) { optimize_and_compare(std::move(top), std::move(expect)); } -TEST_F("test SourceBlender below OR optimization", SourceBlenderTestFixture) { +TEST_F("test AND replaced by source blender after full optimization", SourceBlenderTestFixture) { + auto top = std::make_unique(); + f.addChildrenForSimpleSBTest(*top); + + auto expect = std::make_unique(f.selector_1); + expect->addChild(addLeafsWithSourceId(3, std::make_unique(), {{30, 3}, {300, 3}})); + expect->addChild(addLeafsWithSourceId(2, std::make_unique(), {{20, 2}, {200, 2}, {2000, 2}})); + expect->addChild(addLeafsWithSourceId(1, std::make_unique(), {{10, 1}, {100, 1}, {1000, 1}})); + + optimize_and_compare(std::move(top), std::move(expect)); +} + +TEST_F("test SourceBlender below OR partial optimization", SourceBlenderTestFixture) { auto top = std::make_unique(); f.addChildrenForSBTest(*top); //------------------------------------------------------------------------- @@ -521,6 +544,18 @@ TEST_F("test SourceBlender below OR optimization", SourceBlenderTestFixture) { optimize_and_compare(std::move(top), std::move(expect)); } +TEST_F("test OR replaced by source blender after full optimization", SourceBlenderTestFixture) { + auto top = std::make_unique(); + f.addChildrenForSimpleSBTest(*top); + + auto expect = std::make_unique(f.selector_1); + expect->addChild(addLeafsWithSourceId(3, std::make_unique(), {{300, 3}, {30, 3}})); + expect->addChild(addLeafsWithSourceId(2, std::make_unique(), {{2000, 2}, {200, 2}, {20, 2}})); + expect->addChild(addLeafsWithSourceId(1, std::make_unique(), {{1000, 1}, {100, 1}, {10, 1}})); + + optimize_and_compare(std::move(top), std::move(expect)); +} + TEST_F("test SourceBlender below AND_NOT optimization", SourceBlenderTestFixture) { auto top = std::make_unique(); top->addChild(addLeafsWithSourceId(std::make_unique(f.selector_1), {{42, 1}})); @@ -592,14 +627,69 @@ TEST("and with one empty child is optimized away") { EXPECT_EQUAL(expect_up->asString(), top->asString()); } +TEST("AND AND collapsing") { + auto top = std::make_unique(); + addLeafs(*top, {1,3,5}); + auto sub_and = std::make_unique(); + addLeafs(*sub_and, {2,4}); + top->addChild(std::move(sub_and)); + auto expect = std::make_unique(); + addLeafs(*expect, {1,2,3,4,5}); + optimize_and_compare(std::move(top), std::move(expect)); +} + +TEST("OR OR collapsing") { + auto top = std::make_unique(); + addLeafs(*top, {1,3,5}); + auto sub_and = std::make_unique(); + addLeafs(*sub_and, {2,4}); + top->addChild(std::move(sub_and)); + auto expect = std::make_unique(); + addLeafs(*expect, {5,4,3,2,1}); + optimize_and_compare(std::move(top), std::move(expect)); +} + +TEST("AND_NOT AND_NOT collapsing") { + auto top = std::make_unique(); + auto sub_and_not = std::make_unique(); + addLeafs(*sub_and_not, {1,3,5}); + top->addChild(std::move(sub_and_not)); + addLeafs(*top, {2,4}); + auto expect = std::make_unique(); + addLeafs(*expect, {1,5,4,3,2}); + optimize_and_compare(std::move(top), std::move(expect)); +} + +TEST("AND_NOT AND AND_NOT collapsing") { + auto top = std::make_unique(); + auto sub_and = std::make_unique(); + auto sub_and_not = std::make_unique(); + addLeafs(*sub_and_not, {1,5,6}); + sub_and->addChild(std::move(sub_and_not)); + addLeafs(*sub_and, {3,2}); + sub_and_not = std::make_unique(); + addLeafs(*sub_and_not, {4,8,9}); + sub_and->addChild(std::move(sub_and_not)); + top->addChild(std::move(sub_and)); + addLeafs(*top, {7}); + //------------------------------------------------------------------------- + auto expect = std::make_unique(); + auto sub_expect = std::make_unique(); + addLeafs(*sub_expect, {1,2,3,4}); + expect->addChild(std::move(sub_expect)); + addLeafs(*expect, {9,8,7,6,5}); + optimize_and_compare(std::move(top), std::move(expect)); +} + TEST("test single child optimization") { InvalidSelector selector; //------------------------------------------------------------------------- Blueprint::UP top = ap((new AndNotBlueprint())-> - addChild(ap((new AndBlueprint())-> - addChild(ap((new OrBlueprint())-> - addChild(ap((new SourceBlenderBlueprint(selector))-> - addChild(addLeafs(std::make_unique(), {42}))))))))); + addChild(ap((new AndBlueprint())-> + addChild(ap((new RankBlueprint())-> + addChild(ap((new OrBlueprint())-> + addChild(ap((new SourceBlenderBlueprint(selector))-> + addChild(addLeafs(std::make_unique(), {42}))))))))))); //------------------------------------------------------------------------- Blueprint::UP expect = addLeafs(std::make_unique(selector), {42}); //------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index a8e2e77623f..639805e116e 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -36,8 +36,8 @@ void maybe_eliminate_self(Blueprint* &self, Blueprint::UP replacement) { self->setSourceId(discard->getSourceId()); discard->setParent(nullptr); } - // replace with empty blueprint if empty - if (self->getState().estimate().empty) { + // replace with empty blueprint if empty, skip if already empty blueprint + if ((self->as_empty() == nullptr) && self->getState().estimate().empty) { Blueprint::UP discard(self); self = new EmptyBlueprint(discard->getState().fields()); self->setParent(discard->getParent()); @@ -130,7 +130,6 @@ Blueprint::UP Blueprint::optimize(Blueprint::UP bp) { Blueprint *root = bp.release(); root->optimize(root, OptimizePass::FIRST); - root->optimize(root, OptimizePass::SECOND); root->optimize(root, OptimizePass::LAST); return Blueprint::UP(root); } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index 81d225661d0..a02cb7dd17f 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -32,6 +32,7 @@ class SourceBlenderBlueprint; class AndBlueprint; class AndNotBlueprint; class OrBlueprint; +class EmptyBlueprint; /** * A Blueprint is an intermediate representation of a search. More @@ -50,7 +51,7 @@ public: using Children = std::vector; using SearchIteratorUP = std::unique_ptr; - enum class OptimizePass { FIRST, SECOND, LAST }; + enum class OptimizePass { FIRST, LAST }; struct HitEstimate { uint32_t estHits; @@ -270,6 +271,9 @@ public: virtual bool isRank() const noexcept { return false; } virtual const attribute::ISearchContext *get_attribute_search_context() const noexcept { return nullptr; } + // to avoid replacing an empty blueprint with another empty blueprint + virtual EmptyBlueprint *as_empty() noexcept { return nullptr; } + // For document summaries with matched-elements-only set. virtual std::unique_ptr create_matching_elements_search(const MatchingElementsFields &fields) const; }; @@ -347,6 +351,7 @@ public: IntermediateBlueprint & insertChild(size_t n, Blueprint::UP child); IntermediateBlueprint &addChild(Blueprint::UP child); Blueprint::UP removeChild(size_t n); + Blueprint::UP removeLastChild() { return removeChild(childCnt() - 1); } SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; virtual HitEstimate combine(const std::vector &data) const = 0; diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index b315965b5f4..4d0656b421c 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -52,7 +52,7 @@ void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) { source_blenders.pop_back(); SourceBlenderBlueprint * blender = blender_up->asSourceBlender(); while (blender->childCnt() > 0) { - Blueprint::UP child_up = blender->removeChild(blender->childCnt() - 1); + Blueprint::UP child_up = blender->removeLastChild(); size_t source_idx = lookup_create_source(sources, child_up->getSourceId(), self.get_docid_limit()); sources[source_idx]->addChild(std::move(child_up)); } @@ -107,14 +107,24 @@ AndNotBlueprint::optimize_self(OptimizePass pass) return; } if (pass == OptimizePass::FIRST) { - AndNotBlueprint * child = getChild(0).asAndNot(); - if (child != nullptr) { + if (auto *child = getChild(0).asAndNot()) { while (child->childCnt() > 1) { - addChild(child->removeChild(1)); + addChild(child->removeLastChild()); } insertChild(1, child->removeChild(0)); removeChild(0); } + if (auto *child = getChild(0).asAnd()) { + for (size_t i = 0; i < child->childCnt(); ++i) { + if (auto *grand_child = child->getChild(i).asAndNot()) { + while (grand_child->childCnt() > 1) { + addChild(grand_child->removeLastChild()); + } + child->addChild(grand_child->removeChild(0)); + child->removeChild(i--); + } + } + } for (size_t i = 1; i < childCnt(); ++i) { if (getChild(i).getState().estimate().empty) { removeChild(i--); @@ -195,10 +205,9 @@ AndBlueprint::optimize_self(OptimizePass pass) { if (pass == OptimizePass::FIRST) { for (size_t i = 0; i < childCnt(); ++i) { - AndBlueprint * child = getChild(i).asAnd(); - if (child != nullptr) { + if (auto *child = getChild(i).asAnd()) { while (child->childCnt() > 0) { - addChild(child->removeChild(0)); + addChild(child->removeLastChild()); } removeChild(i--); } @@ -297,10 +306,9 @@ OrBlueprint::optimize_self(OptimizePass pass) { if (pass == OptimizePass::FIRST) { for (size_t i = 0; (childCnt() > 1) && (i < childCnt()); ++i) { - OrBlueprint * child = getChild(i).asOr(); - if (child != nullptr) { + if (auto *child = getChild(i).asOr()) { while (child->childCnt() > 0) { - addChild(child->removeChild(0)); + addChild(child->removeLastChild()); } removeChild(i--); } else if (getChild(i).getState().estimate().empty) { diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h index a95ca0efc72..a1c23649dd6 100644 --- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h @@ -20,6 +20,7 @@ public: EmptyBlueprint(FieldSpecBase field) : SimpleLeafBlueprint(field) {} EmptyBlueprint() = default; SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; + EmptyBlueprint *as_empty() noexcept final override { return this; } }; class AlwaysTrueBlueprint : public SimpleLeafBlueprint -- cgit v1.2.3