aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2024-02-15 09:50:53 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2024-02-16 11:52:14 +0000
commit51db1c06279843401604cc61f8e6d12c9cc32830 (patch)
tree5e915a302ed8f7cd680bf8e04477eb7a5c2618c9
parented564d0a528d5a88585b2c5e3e56ec21b7f12a12 (diff)
use flow for fetchPostings
note that only AND/OR are wired to their respective flows here, which means only strict OR should change (all children getting full in-flow)
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp20
-rw-r--r--searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp58
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow.h36
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp18
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h4
7 files changed, 129 insertions, 26 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index ee7c201f093..31db731a598 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -165,20 +165,30 @@ TEST("test Or propagates updated histestimate") {
bp->addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2)));
bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
bp->setDocIdLimit(5000);
- // sort OR as non-strict to get expected order. With strict OR,
- // the order would be irrelevant since we use the relative
- // estimate as strict_cost for leafs.
+ // NOTE: use non-strict OR ordering since strict OR ordering is non-deterministic
optimize(bp, false);
- bp->fetchPostings(ExecuteInfo::TRUE);
+ //--- execute info when non-strict:
+ bp->fetchPostings(ExecuteInfo::FALSE);
EXPECT_EQUAL(4u, bp->childCnt());
for (uint32_t i = 0; i < bp->childCnt(); i++) {
const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp->getChild(i));
- EXPECT_TRUE(child.is_strict);
+ EXPECT_FALSE(child.is_strict);
}
EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(0)).hit_rate);
EXPECT_APPROX(0.5, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(1)).hit_rate, 1e-6);
EXPECT_APPROX(0.5*3.0/5.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(2)).hit_rate, 1e-6);
EXPECT_APPROX(0.5*3.0*42.0/(5.0*50.0), dynamic_cast<const RememberExecuteInfo &>(bp->getChild(3)).hit_rate, 1e-6);
+ //--- execute info when strict:
+ bp->fetchPostings(ExecuteInfo::TRUE);
+ EXPECT_EQUAL(4u, bp->childCnt());
+ for (uint32_t i = 0; i < bp->childCnt(); i++) {
+ const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp->getChild(i));
+ EXPECT_TRUE(child.is_strict);
+ }
+ EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(0)).hit_rate);
+ EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(1)).hit_rate);
+ EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(2)).hit_rate);
+ EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(3)).hit_rate);
}
TEST("test And Blueprint") {
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index 7a3950dbf1c..5009a15e438 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -126,6 +126,14 @@ void verify_flow(auto flow, const std::vector<double> &est_list, const std::vect
}
}
+void verify_flow_calc(FlowCalc flow_calc, const std::vector<double> &est_list, const std::vector<double> &expect) {
+ ASSERT_EQ(est_list.size() + 1, expect.size());
+ for (size_t i = 0; i < est_list.size(); ++i) {
+ EXPECT_DOUBLE_EQ(flow_calc(est_list[i]), expect[i]);
+ }
+ EXPECT_DOUBLE_EQ(flow_calc(0.5), expect.back());
+}
+
TEST(FlowTest, full_and_flow) {
for (bool strict: {false, true}) {
verify_flow(AndFlow(strict), {0.4, 0.7, 0.2},
@@ -133,6 +141,8 @@ TEST(FlowTest, full_and_flow) {
{0.4, 0.4, false},
{0.4*0.7, 0.4*0.7, false},
{0.4*0.7*0.2, 0.4*0.7*0.2, false}});
+ verify_flow_calc(flow_calc<AndFlow>(strict, 1.0),
+ {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.7, 0.4*0.7*0.2});
}
}
@@ -143,6 +153,8 @@ TEST(FlowTest, partial_and_flow) {
{in*0.4, in*0.4, false},
{in*0.4*0.7, in*0.4*0.7, false},
{in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}});
+ verify_flow_calc(flow_calc<AndFlow>(false, in),
+ {0.4, 0.7, 0.2}, {in*1.0, in*0.4, in*0.4*0.7, in*0.4*0.7*0.2});
}
}
@@ -152,11 +164,15 @@ TEST(FlowTest, full_or_flow) {
{0.6, 1.0-0.6, false},
{0.6*0.3, 1.0-0.6*0.3, false},
{0.6*0.3*0.8, 1.0-0.6*0.3*0.8, false}});
+ verify_flow_calc(flow_calc<OrFlow>(false, 1.0),
+ {0.4, 0.7, 0.2}, {1.0, 0.6, 0.6*0.3, 0.6*0.3*0.8});
verify_flow(OrFlow(true), {0.4, 0.7, 0.2},
{{1.0, 0.0, true},
{1.0, 1.0-0.6, true},
{1.0, 1.0-0.6*0.3, true},
{1.0, 1.0-0.6*0.3*0.8, true}});
+ verify_flow_calc(flow_calc<OrFlow>(true, 1.0),
+ {0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0});
}
TEST(FlowTest, partial_or_flow) {
@@ -166,6 +182,8 @@ TEST(FlowTest, partial_or_flow) {
{in*0.6, 1.0-in*0.6, false},
{in*0.6*0.3, 1.0-in*0.6*0.3, false},
{in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, false}});
+ verify_flow_calc(flow_calc<OrFlow>(false, in),
+ {0.4, 0.7, 0.2}, {in, in*0.6, in*0.6*0.3, in*0.6*0.3*0.8});
}
}
@@ -176,6 +194,8 @@ TEST(FlowTest, full_and_not_flow) {
{0.4, 0.4, false},
{0.4*0.3, 0.4*0.3, false},
{0.4*0.3*0.8, 0.4*0.3*0.8, false}});
+ verify_flow_calc(flow_calc<AndNotFlow>(strict, 1.0),
+ {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.3, 0.4*0.3*0.8});
}
}
@@ -186,9 +206,47 @@ TEST(FlowTest, partial_and_not_flow) {
{in*0.4, in*0.4, false},
{in*0.4*0.3, in*0.4*0.3, false},
{in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}});
+ verify_flow_calc(flow_calc<AndNotFlow>(false, in),
+ {0.4, 0.7, 0.2}, {in, in*0.4, in*0.4*0.3, in*0.4*0.3*0.8});
+ }
+}
+
+TEST(FlowTest, full_first_flow_calc) {
+ for (bool strict: {false, true}) {
+ verify_flow_calc(first_flow_calc(strict, 1.0),
+ {0.4, 0.7, 0.2}, {1.0, 0.4, 0.4, 0.4});
+ }
+}
+
+TEST(FlowTest, partial_first_flow_calc) {
+ for (double in: {1.0, 0.5, 0.25}) {
+ verify_flow_calc(first_flow_calc(false, in),
+ {0.4, 0.7, 0.2}, {in, in*0.4, in*0.4, in*0.4});
+ }
+}
+
+TEST(FlowTest, full_full_flow_calc) {
+ for (bool strict: {false, true}) {
+ verify_flow_calc(full_flow_calc(strict, 1.0),
+ {0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0});
}
}
+TEST(FlowTest, partial_full_flow_calc) {
+ for (double in: {1.0, 0.5, 0.25}) {
+ verify_flow_calc(full_flow_calc(false, in),
+ {0.4, 0.7, 0.2}, {in, in, in, in});
+ }
+}
+
+TEST(FlowTest, flow_calc_strictness_overrides_rate) {
+ EXPECT_EQ(flow_calc<AndFlow>(true, 0.5)(0.5), 1.0);
+ EXPECT_EQ(flow_calc<OrFlow>(true, 0.5)(0.5), 1.0);
+ EXPECT_EQ(flow_calc<AndNotFlow>(true, 0.5)(0.5), 1.0);
+ EXPECT_EQ(first_flow_calc(true, 0.5)(0.5), 1.0);
+ EXPECT_EQ(full_flow_calc(true, 0.5)(0.5), 1.0);
+}
+
TEST(FlowTest, flow_cost) {
std::vector<FlowStats> data = {{0.4, 1.1, 0.6}, {0.7, 1.2, 0.5}, {0.2, 1.3, 0.4}};
EXPECT_DOUBLE_EQ(ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3);
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index 2f69c45d418..f3539c6989a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -481,6 +481,12 @@ IntermediateBlueprint::count_termwise_nodes(const UnpackInfo &unpack) const
return termwise_nodes;
}
+FlowCalc
+IntermediateBlueprint::make_flow_calc(bool strict, double flow) const
+{
+ return full_flow_calc(strict, flow);
+}
+
IntermediateBlueprint::IndexList
IntermediateBlueprint::find(const IPredicate & pred) const
{
@@ -538,13 +544,6 @@ IntermediateBlueprint::calculateState() const
return state;
}
-double
-IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const
-{
- (void) child;
- return hit_rate;
-}
-
bool
IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double match_limit) const
{
@@ -648,11 +647,11 @@ IntermediateBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
void
IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo)
{
- double nextHitRate = execInfo.hit_rate();
+ FlowCalc flow_calc = make_flow_calc(execInfo.is_strict(), execInfo.hit_rate());
for (size_t i = 0; i < _children.size(); ++i) {
Blueprint & child = *_children[i];
+ double nextHitRate = flow_calc(child.estimate());
child.fetchPostings(ExecuteInfo::create(execInfo.is_strict() && inheritStrict(i), nextHitRate, execInfo));
- nextHitRate = computeNextHitRate(child, nextHitRate);
}
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index 439eff680ec..395512d84cc 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -362,7 +362,7 @@ private:
bool infer_want_global_filter() const;
size_t count_termwise_nodes(const UnpackInfo &unpack) const;
- virtual double computeNextHitRate(const Blueprint & child, double hit_rate) const;
+ virtual FlowCalc make_flow_calc(bool strict, double flow) const;
protected:
// returns an empty collection if children have empty or
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index cfbb28b190f..f31122166d9 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -4,7 +4,7 @@
#include <vespa/vespalib/util/small_vector.h>
#include <cstddef>
#include <algorithm>
-#include <cmath>
+#include <functional>
// Model how boolean result decisions flow through intermediate nodes
// of different types based on relative estimates for sub-expressions
@@ -280,4 +280,38 @@ public:
}
};
+using FlowCalc = std::function<double(double)>;
+
+template <typename FLOW>
+FlowCalc flow_calc(bool strict, double non_strict_rate) {
+ FLOW flow = strict ? FLOW(true) : FLOW(non_strict_rate);
+ return [flow](double est) mutable noexcept {
+ double next_flow = flow.flow();
+ flow.add(est);
+ return next_flow;
+ };
+}
+
+inline FlowCalc first_flow_calc(bool strict, double flow) {
+ if (strict) {
+ flow = 1.0;
+ }
+ bool first = true;
+ return [flow,first](double est) mutable noexcept {
+ double next_flow = flow;
+ if (first) {
+ flow *= est;
+ first = false;
+ }
+ return next_flow;
+ };
+}
+
+inline FlowCalc full_flow_calc(bool strict, double flow) {
+ if (strict) {
+ flow = 1.0;
+ }
+ return [flow](double) noexcept { return flow; };
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 993639becf2..6faa4ddf147 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -300,14 +300,10 @@ AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
return create_and_filter(get_children(), strict, constraint);
}
-double
-AndBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const {
- return hit_rate * child.estimate();
-}
-
-double
-OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const {
- return hit_rate * (1.0 - child.estimate());
+FlowCalc
+AndBlueprint::make_flow_calc(bool strict, double flow) const
+{
+ return flow_calc<AndFlow>(strict, flow);
}
//-----------------------------------------------------------------------------
@@ -404,6 +400,12 @@ OrBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
return create_or_filter(get_children(), strict, constraint);
}
+FlowCalc
+OrBlueprint::make_flow_calc(bool strict, double flow) const
+{
+ return flow_calc<OrFlow>(strict, flow);
+}
+
uint8_t
OrBlueprint::calculate_cost_tier() const
{
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index 1da70b4fa70..25586022535 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -56,7 +56,7 @@ public:
SearchIterator::UP
createFilterSearch(bool strict, FilterConstraint constraint) const override;
private:
- double computeNextHitRate(const Blueprint & child, double hit_rate) const override;
+ virtual FlowCalc make_flow_calc(bool strict, double flow) const override;
};
//-----------------------------------------------------------------------------
@@ -81,7 +81,7 @@ public:
SearchIterator::UP
createFilterSearch(bool strict, FilterConstraint constraint) const override;
private:
- double computeNextHitRate(const Blueprint & child, double hit_rate) const override;
+ FlowCalc make_flow_calc(bool strict, double flow) const override;
uint8_t calculate_cost_tier() const override;
};