summaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2024-02-02 14:00:53 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2024-02-05 11:03:13 +0000
commit021d823d55362441e11c81224583bb9d46618a35 (patch)
treecbbbe136bb7a2d790e2fc2a02a1d9026bbd84e9b /searchlib/src
parent3da723264949a92244565b962c2a4f773203d1e8 (diff)
adjust strict OR flow
A simple heap-based strict OR is not able to benefit from short circuiting of the per-document evaluation. An 'optimal' strict OR would need more book-keeping and would probably be slower than the simple implementation in most normal cases.
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp4
-rw-r--r--searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp25
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow.h19
3 files changed, 18 insertions, 30 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index 241d6c67e0a..aed6f6e60f2 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -1345,7 +1345,7 @@ double calc_cost(std::vector<std::pair<double,double>> list) {
TEST("cost for OR") {
verify_cost(make::OR(),
calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}),
- calc_cost({{0.2*1.1, 0.8},{0.3*1.2, 0.7},{1.3, 0.5}}));
+ 0.2*1.1 + 0.3*1.2 + 1.3);
}
TEST("cost for AND") {
@@ -1384,7 +1384,7 @@ TEST("cost for ONEAR") {
TEST("cost for WEAKAND") {
verify_cost(make::WEAKAND(1000),
calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}),
- calc_cost({{0.2*1.1, 0.8},{0.3*1.2, 0.7},{1.3, 0.5}}));
+ 0.2*1.1 + 0.3*1.2 + 1.3);
}
TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index 9a9adeac2bc..ec3e0066134 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -130,10 +130,6 @@ TEST(FlowTest, or_ordering_is_strict_weak) {
verify_ordering_is_strict_weak<flow::MinOrCost>();
}
-TEST(FlowTest, strict_or_ordering_is_strict_weak) {
- verify_ordering_is_strict_weak<flow::MinOrStrictCost>();
-}
-
struct ExpectFlow {
double flow;
double est;
@@ -166,13 +162,16 @@ TEST(FlowTest, basic_and_flow) {
TEST(FlowTest, basic_or_flow) {
for (double in: {1.0, 0.5, 0.25}) {
- for (bool strict: {false, true}) {
- verify_flow(OrFlow(in, strict), {0.4, 0.7, 0.2},
- {{in, 0.0, strict},
- {in*0.6, 1.0-in*0.6, strict},
- {in*0.6*0.3, 1.0-in*0.6*0.3, strict},
- {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, strict}});
- }
+ verify_flow(OrFlow(in, false), {0.4, 0.7, 0.2},
+ {{in, 0.0, false},
+ {in*0.6, 1.0-in*0.6, false},
+ {in*0.6*0.3, 1.0-in*0.6*0.3, false},
+ {in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, false}});
+ verify_flow(OrFlow(in, true), {0.4, 0.7, 0.2},
+ {{in, 0.0, true},
+ {in, 1.0-in*0.6, true},
+ {in, 1.0-in*0.6*0.3, true},
+ {in, 1.0-in*0.6*0.3*0.8, true}});
}
}
@@ -193,7 +192,7 @@ TEST(FlowTest, flow_cost) {
EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.7*1.3);
EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.7*1.3);
EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, false), 1.1 + 0.6*1.2 + 0.6*0.3*1.3);
- EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.6*0.5 + 0.6*0.3*0.4);
+ EXPECT_DOUBLE_EQ(Item::ordered_cost_of<OrFlow>(data, true), 0.6 + 0.5 + 0.4);
EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, false), 1.1 + 0.4*1.2 + 0.4*0.3*1.3);
EXPECT_DOUBLE_EQ(Item::ordered_cost_of<AndNotFlow>(data, true), 0.6 + 0.4*1.2 + 0.4*0.3*1.3);
}
@@ -232,7 +231,7 @@ TEST(FlowTest, optimal_or_flow) {
EXPECT_EQ(Item::ordered_cost_of<OrFlow>(data, strict), min_cost);
auto check = [&](const std::vector<Item> &my_data) noexcept {
double my_cost = Item::ordered_cost_of<OrFlow>(my_data, strict);
- EXPECT_LE(min_cost, my_cost);
+ EXPECT_LE(min_cost, my_cost + 1e-9);
max_cost = std::max(max_cost, my_cost);
};
each_perm(data, check);
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index 01a5922d864..8b354b7b9a9 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -67,16 +67,6 @@ struct MinOrCost {
}
};
-template <typename ADAPTER>
-struct MinOrStrictCost {
- // sort children to minimize total cost of strict OR flow
- [[no_unique_address]] ADAPTER adapter;
- MinOrStrictCost(ADAPTER adapter_in) noexcept : adapter(adapter_in) {}
- bool operator () (const auto &a, const auto &b) const noexcept {
- return adapter.estimate(a) * adapter.strict_cost(b) > adapter.estimate(b) * adapter.strict_cost(a);
- }
-};
-
template <typename ADAPTER, typename T, typename F>
double estimate_of(ADAPTER adapter, const T &children, F flow) {
for (const auto &child: children) {
@@ -195,18 +185,19 @@ public:
class OrFlow : public FlowMixin<OrFlow>{
private:
+ double _full;
double _flow;
bool _strict;
bool _first;
public:
OrFlow(double in, bool strict) noexcept
- : _flow(in), _strict(strict), _first(true) {}
+ : _full(in), _flow(in), _strict(strict), _first(true) {}
void add(double est) noexcept {
_flow *= (1.0 - est);
_first = false;
}
double flow() const noexcept {
- return _flow;
+ return _strict ? _full : _flow;
}
bool strict() const noexcept {
return _strict;
@@ -215,9 +206,7 @@ public:
return _first ? 0.0 : (1.0 - _flow);
}
static void sort(auto adapter, auto &children, bool strict) {
- if (strict) {
- flow::sort<flow::MinOrStrictCost>(adapter, children);
- } else {
+ if (!strict) {
flow::sort<flow::MinOrCost>(adapter, children);
}
}