aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-11-28 17:05:42 +0100
committerGitHub <noreply@github.com>2023-11-28 17:05:42 +0100
commit6359834bc232523a4d8686e16f411fa148a649c2 (patch)
treef5a34c257ee1428d8916889c4272e0b8c018535d
parentd4f030fca74bb4795a993b30716c0a1ce68f6bfe (diff)
parent86e70f14ce88fb8f7a4e7889d2309bc13337d02e (diff)
Merge pull request #29485 from vespa-engine/havardpe/andnot-and-andnot-collapsingv8.265.12
collapse co-nested and/andnot in first optimize pass
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp170
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h7
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp28
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h1
5 files changed, 157 insertions, 54 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index 1d9f088d170..dd88684dfee 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -114,48 +114,52 @@ TEST("test AndNot Blueprint") {
// createSearch tested by iterator unit test
}
+template <typename BP>
+void optimize(std::unique_ptr<BP> &ref) {
+ auto optimized = Blueprint::optimize(std::move(ref));
+ ref.reset(dynamic_cast<BP*>(optimized.get()));
+ ASSERT_TRUE(ref);
+ optimized.release();
+}
+
TEST("test And propagates updated histestimate") {
- AndBlueprint bp;
- bp.setSourceId(2);
- bp.addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
- bp.addChild(ap(MyLeafSpec(200).create<RememberExecuteInfo>()->setSourceId(2)));
- bp.addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2)));
- bp.optimize_self(Blueprint::OptimizePass::FIRST);
- bp.optimize_self(Blueprint::OptimizePass::SECOND);
- bp.optimize_self(Blueprint::OptimizePass::LAST);
- bp.setDocIdLimit(5000);
- bp.fetchPostings(ExecuteInfo::TRUE);
- EXPECT_EQUAL(3u, bp.childCnt());
- for (uint32_t i = 0; i < bp.childCnt(); i++) {
- const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp.getChild(i));
+ auto bp = std::make_unique<AndBlueprint>();
+ bp->setSourceId(2);
+ bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
+ bp->addChild(ap(MyLeafSpec(200).create<RememberExecuteInfo>()->setSourceId(2)));
+ bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2)));
+ optimize(bp);
+ bp->setDocIdLimit(5000);
+ bp->fetchPostings(ExecuteInfo::TRUE);
+ EXPECT_EQUAL(3u, bp->childCnt());
+ for (uint32_t i = 0; i < bp->childCnt(); i++) {
+ const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp->getChild(i));
EXPECT_EQUAL((i == 0), child.executeInfo.isStrict());
}
- EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(0)).executeInfo.hitRate());
- EXPECT_EQUAL(1.0f/250, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(1)).executeInfo.hitRate());
- EXPECT_EQUAL(1.0f/(250*25), dynamic_cast<const RememberExecuteInfo &>(bp.getChild(2)).executeInfo.hitRate());
+ EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(0)).executeInfo.hitRate());
+ EXPECT_EQUAL(1.0f/250, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(1)).executeInfo.hitRate());
+ EXPECT_EQUAL(1.0f/(250*25), dynamic_cast<const RememberExecuteInfo &>(bp->getChild(2)).executeInfo.hitRate());
}
TEST("test Or propagates updated histestimate") {
- OrBlueprint bp;
- bp.setSourceId(2);
- bp.addChild(ap(MyLeafSpec(5000).create<RememberExecuteInfo>()->setSourceId(2)));
- bp.addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2)));
- bp.addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2)));
- bp.addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
- bp.optimize_self(Blueprint::OptimizePass::FIRST);
- bp.optimize_self(Blueprint::OptimizePass::SECOND);
- bp.optimize_self(Blueprint::OptimizePass::LAST);
- bp.setDocIdLimit(5000);
- bp.fetchPostings(ExecuteInfo::TRUE);
- EXPECT_EQUAL(4u, bp.childCnt());
- for (uint32_t i = 0; i < bp.childCnt(); i++) {
- const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp.getChild(i));
+ auto bp = std::make_unique<OrBlueprint>();
+ bp->setSourceId(2);
+ bp->addChild(ap(MyLeafSpec(5000).create<RememberExecuteInfo>()->setSourceId(2)));
+ bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2)));
+ bp->addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2)));
+ bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
+ optimize(bp);
+ bp->setDocIdLimit(5000);
+ bp->fetchPostings(ExecuteInfo::TRUE);
+ EXPECT_EQUAL(4u, bp->childCnt());
+ for (uint32_t i = 0; i < bp->childCnt(); i++) {
+ const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp->getChild(i));
EXPECT_TRUE(child.executeInfo.isStrict());
}
- EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(0)).executeInfo.hitRate());
- EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(1)).executeInfo.hitRate());
- EXPECT_EQUAL(3.0f/5.0f, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(2)).executeInfo.hitRate());
- EXPECT_EQUAL(3.0f*42.0f/(5.0f*50.0f), dynamic_cast<const RememberExecuteInfo &>(bp.getChild(3)).executeInfo.hitRate());
+ EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(0)).executeInfo.hitRate());
+ EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(1)).executeInfo.hitRate());
+ EXPECT_EQUAL(3.0f/5.0f, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(2)).executeInfo.hitRate());
+ EXPECT_EQUAL(3.0f*42.0f/(5.0f*50.0f), dynamic_cast<const RememberExecuteInfo &>(bp->getChild(3)).executeInfo.hitRate());
}
TEST("test And Blueprint") {
@@ -469,6 +473,7 @@ struct SourceBlenderTestFixture {
InvalidSelector selector_1; // the one
InvalidSelector selector_2; // not the one
void addChildrenForSBTest(IntermediateBlueprint & parent);
+ void addChildrenForSimpleSBTest(IntermediateBlueprint & parent);
};
void
@@ -488,7 +493,13 @@ void SourceBlenderTestFixture::addChildrenForSBTest(IntermediateBlueprint & pare
parent.addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(selector_1), {{2000, 2}, {1000, 1}}));
}
-TEST_F("test SourceBlender below AND optimization", SourceBlenderTestFixture) {
+void SourceBlenderTestFixture::addChildrenForSimpleSBTest(IntermediateBlueprint & parent) {
+ parent.addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(selector_1), {{200, 2}, {100, 1}, {300, 3}}));
+ parent.addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(selector_1), {{20, 2}, {10, 1}, {30, 3}}));
+ parent.addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(selector_1), {{2000, 2}, {1000, 1}}));
+}
+
+TEST_F("test SourceBlender below AND partial optimization", SourceBlenderTestFixture) {
auto top = std::make_unique<AndBlueprint>();
f.addChildrenForSBTest(*top);
@@ -505,7 +516,19 @@ TEST_F("test SourceBlender below AND optimization", SourceBlenderTestFixture) {
optimize_and_compare(std::move(top), std::move(expect));
}
-TEST_F("test SourceBlender below OR optimization", SourceBlenderTestFixture) {
+TEST_F("test AND replaced by source blender after full optimization", SourceBlenderTestFixture) {
+ auto top = std::make_unique<AndBlueprint>();
+ f.addChildrenForSimpleSBTest(*top);
+
+ auto expect = std::make_unique<SourceBlenderBlueprint>(f.selector_1);
+ expect->addChild(addLeafsWithSourceId(3, std::make_unique<AndBlueprint>(), {{30, 3}, {300, 3}}));
+ expect->addChild(addLeafsWithSourceId(2, std::make_unique<AndBlueprint>(), {{20, 2}, {200, 2}, {2000, 2}}));
+ expect->addChild(addLeafsWithSourceId(1, std::make_unique<AndBlueprint>(), {{10, 1}, {100, 1}, {1000, 1}}));
+
+ optimize_and_compare(std::move(top), std::move(expect));
+}
+
+TEST_F("test SourceBlender below OR partial optimization", SourceBlenderTestFixture) {
auto top = std::make_unique<OrBlueprint>();
f.addChildrenForSBTest(*top);
//-------------------------------------------------------------------------
@@ -521,6 +544,18 @@ TEST_F("test SourceBlender below OR optimization", SourceBlenderTestFixture) {
optimize_and_compare(std::move(top), std::move(expect));
}
+TEST_F("test OR replaced by source blender after full optimization", SourceBlenderTestFixture) {
+ auto top = std::make_unique<OrBlueprint>();
+ f.addChildrenForSimpleSBTest(*top);
+
+ auto expect = std::make_unique<SourceBlenderBlueprint>(f.selector_1);
+ expect->addChild(addLeafsWithSourceId(3, std::make_unique<OrBlueprint>(), {{300, 3}, {30, 3}}));
+ expect->addChild(addLeafsWithSourceId(2, std::make_unique<OrBlueprint>(), {{2000, 2}, {200, 2}, {20, 2}}));
+ expect->addChild(addLeafsWithSourceId(1, std::make_unique<OrBlueprint>(), {{1000, 1}, {100, 1}, {10, 1}}));
+
+ optimize_and_compare(std::move(top), std::move(expect));
+}
+
TEST_F("test SourceBlender below AND_NOT optimization", SourceBlenderTestFixture) {
auto top = std::make_unique<AndNotBlueprint>();
top->addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(f.selector_1), {{42, 1}}));
@@ -592,14 +627,69 @@ TEST("and with one empty child is optimized away") {
EXPECT_EQUAL(expect_up->asString(), top->asString());
}
+TEST("AND AND collapsing") {
+ auto top = std::make_unique<AndBlueprint>();
+ addLeafs(*top, {1,3,5});
+ auto sub_and = std::make_unique<AndBlueprint>();
+ addLeafs(*sub_and, {2,4});
+ top->addChild(std::move(sub_and));
+ auto expect = std::make_unique<AndBlueprint>();
+ addLeafs(*expect, {1,2,3,4,5});
+ optimize_and_compare(std::move(top), std::move(expect));
+}
+
+TEST("OR OR collapsing") {
+ auto top = std::make_unique<OrBlueprint>();
+ addLeafs(*top, {1,3,5});
+ auto sub_and = std::make_unique<OrBlueprint>();
+ addLeafs(*sub_and, {2,4});
+ top->addChild(std::move(sub_and));
+ auto expect = std::make_unique<OrBlueprint>();
+ addLeafs(*expect, {5,4,3,2,1});
+ optimize_and_compare(std::move(top), std::move(expect));
+}
+
+TEST("AND_NOT AND_NOT collapsing") {
+ auto top = std::make_unique<AndNotBlueprint>();
+ auto sub_and_not = std::make_unique<AndNotBlueprint>();
+ addLeafs(*sub_and_not, {1,3,5});
+ top->addChild(std::move(sub_and_not));
+ addLeafs(*top, {2,4});
+ auto expect = std::make_unique<AndNotBlueprint>();
+ addLeafs(*expect, {1,5,4,3,2});
+ optimize_and_compare(std::move(top), std::move(expect));
+}
+
+TEST("AND_NOT AND AND_NOT collapsing") {
+ auto top = std::make_unique<AndNotBlueprint>();
+ auto sub_and = std::make_unique<AndBlueprint>();
+ auto sub_and_not = std::make_unique<AndNotBlueprint>();
+ addLeafs(*sub_and_not, {1,5,6});
+ sub_and->addChild(std::move(sub_and_not));
+ addLeafs(*sub_and, {3,2});
+ sub_and_not = std::make_unique<AndNotBlueprint>();
+ addLeafs(*sub_and_not, {4,8,9});
+ sub_and->addChild(std::move(sub_and_not));
+ top->addChild(std::move(sub_and));
+ addLeafs(*top, {7});
+ //-------------------------------------------------------------------------
+ auto expect = std::make_unique<AndNotBlueprint>();
+ auto sub_expect = std::make_unique<AndBlueprint>();
+ addLeafs(*sub_expect, {1,2,3,4});
+ expect->addChild(std::move(sub_expect));
+ addLeafs(*expect, {9,8,7,6,5});
+ optimize_and_compare(std::move(top), std::move(expect));
+}
+
TEST("test single child optimization") {
InvalidSelector selector;
//-------------------------------------------------------------------------
Blueprint::UP top = ap((new AndNotBlueprint())->
- addChild(ap((new AndBlueprint())->
- addChild(ap((new OrBlueprint())->
- addChild(ap((new SourceBlenderBlueprint(selector))->
- addChild(addLeafs(std::make_unique<RankBlueprint>(), {42})))))))));
+ addChild(ap((new AndBlueprint())->
+ addChild(ap((new RankBlueprint())->
+ addChild(ap((new OrBlueprint())->
+ addChild(ap((new SourceBlenderBlueprint(selector))->
+ addChild(addLeafs(std::make_unique<RankBlueprint>(), {42})))))))))));
//-------------------------------------------------------------------------
Blueprint::UP expect = addLeafs(std::make_unique<SourceBlenderBlueprint>(selector), {42});
//-------------------------------------------------------------------------
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index a8e2e77623f..639805e116e 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -36,8 +36,8 @@ void maybe_eliminate_self(Blueprint* &self, Blueprint::UP replacement) {
self->setSourceId(discard->getSourceId());
discard->setParent(nullptr);
}
- // replace with empty blueprint if empty
- if (self->getState().estimate().empty) {
+ // replace with empty blueprint if empty, skip if already empty blueprint
+ if ((self->as_empty() == nullptr) && self->getState().estimate().empty) {
Blueprint::UP discard(self);
self = new EmptyBlueprint(discard->getState().fields());
self->setParent(discard->getParent());
@@ -130,7 +130,6 @@ Blueprint::UP
Blueprint::optimize(Blueprint::UP bp) {
Blueprint *root = bp.release();
root->optimize(root, OptimizePass::FIRST);
- root->optimize(root, OptimizePass::SECOND);
root->optimize(root, OptimizePass::LAST);
return Blueprint::UP(root);
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index 81d225661d0..a02cb7dd17f 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -32,6 +32,7 @@ class SourceBlenderBlueprint;
class AndBlueprint;
class AndNotBlueprint;
class OrBlueprint;
+class EmptyBlueprint;
/**
* A Blueprint is an intermediate representation of a search. More
@@ -50,7 +51,7 @@ public:
using Children = std::vector<Blueprint::UP>;
using SearchIteratorUP = std::unique_ptr<SearchIterator>;
- enum class OptimizePass { FIRST, SECOND, LAST };
+ enum class OptimizePass { FIRST, LAST };
struct HitEstimate {
uint32_t estHits;
@@ -270,6 +271,9 @@ public:
virtual bool isRank() const noexcept { return false; }
virtual const attribute::ISearchContext *get_attribute_search_context() const noexcept { return nullptr; }
+ // to avoid replacing an empty blueprint with another empty blueprint
+ virtual EmptyBlueprint *as_empty() noexcept { return nullptr; }
+
// For document summaries with matched-elements-only set.
virtual std::unique_ptr<MatchingElementsSearch> create_matching_elements_search(const MatchingElementsFields &fields) const;
};
@@ -347,6 +351,7 @@ public:
IntermediateBlueprint & insertChild(size_t n, Blueprint::UP child);
IntermediateBlueprint &addChild(Blueprint::UP child);
Blueprint::UP removeChild(size_t n);
+ Blueprint::UP removeLastChild() { return removeChild(childCnt() - 1); }
SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override;
virtual HitEstimate combine(const std::vector<HitEstimate> &data) const = 0;
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index b315965b5f4..4d0656b421c 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -52,7 +52,7 @@ void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) {
source_blenders.pop_back();
SourceBlenderBlueprint * blender = blender_up->asSourceBlender();
while (blender->childCnt() > 0) {
- Blueprint::UP child_up = blender->removeChild(blender->childCnt() - 1);
+ Blueprint::UP child_up = blender->removeLastChild();
size_t source_idx = lookup_create_source(sources, child_up->getSourceId(), self.get_docid_limit());
sources[source_idx]->addChild(std::move(child_up));
}
@@ -107,14 +107,24 @@ AndNotBlueprint::optimize_self(OptimizePass pass)
return;
}
if (pass == OptimizePass::FIRST) {
- AndNotBlueprint * child = getChild(0).asAndNot();
- if (child != nullptr) {
+ if (auto *child = getChild(0).asAndNot()) {
while (child->childCnt() > 1) {
- addChild(child->removeChild(1));
+ addChild(child->removeLastChild());
}
insertChild(1, child->removeChild(0));
removeChild(0);
}
+ if (auto *child = getChild(0).asAnd()) {
+ for (size_t i = 0; i < child->childCnt(); ++i) {
+ if (auto *grand_child = child->getChild(i).asAndNot()) {
+ while (grand_child->childCnt() > 1) {
+ addChild(grand_child->removeLastChild());
+ }
+ child->addChild(grand_child->removeChild(0));
+ child->removeChild(i--);
+ }
+ }
+ }
for (size_t i = 1; i < childCnt(); ++i) {
if (getChild(i).getState().estimate().empty) {
removeChild(i--);
@@ -195,10 +205,9 @@ AndBlueprint::optimize_self(OptimizePass pass)
{
if (pass == OptimizePass::FIRST) {
for (size_t i = 0; i < childCnt(); ++i) {
- AndBlueprint * child = getChild(i).asAnd();
- if (child != nullptr) {
+ if (auto *child = getChild(i).asAnd()) {
while (child->childCnt() > 0) {
- addChild(child->removeChild(0));
+ addChild(child->removeLastChild());
}
removeChild(i--);
}
@@ -297,10 +306,9 @@ OrBlueprint::optimize_self(OptimizePass pass)
{
if (pass == OptimizePass::FIRST) {
for (size_t i = 0; (childCnt() > 1) && (i < childCnt()); ++i) {
- OrBlueprint * child = getChild(i).asOr();
- if (child != nullptr) {
+ if (auto *child = getChild(i).asOr()) {
while (child->childCnt() > 0) {
- addChild(child->removeChild(0));
+ addChild(child->removeLastChild());
}
removeChild(i--);
} else if (getChild(i).getState().estimate().empty) {
diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
index a95ca0efc72..a1c23649dd6 100644
--- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
@@ -20,6 +20,7 @@ public:
EmptyBlueprint(FieldSpecBase field) : SimpleLeafBlueprint(field) {}
EmptyBlueprint() = default;
SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override;
+ EmptyBlueprint *as_empty() noexcept final override { return this; }
};
class AlwaysTrueBlueprint : public SimpleLeafBlueprint