summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2024-02-23 09:41:04 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2024-02-23 12:23:53 +0000
commitfbc08038079b6079fcc12b1f54eba4d59acabe83 (patch)
tree218e277d83da7794b14ce449066df371ff4fc394 /searchlib
parent10267c6347197b5e9731d0a1e9169d74608178f7 (diff)
know the in-flow when sorting blueprints
also added the option and tagging mechanic needed to force selected blueprints to be strict even in a non-strict context as well as calculating how much time this saves you.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/nearsearch/nearsearch_test.cpp3
-rw-r--r--searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp19
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp30
-rw-r--r--searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp2
-rw-r--r--searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp41
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp3
-rw-r--r--searchlib/src/tests/queryeval/same_element/same_element_test.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h36
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow.h65
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp32
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h16
12 files changed, 167 insertions, 110 deletions
diff --git a/searchlib/src/tests/nearsearch/nearsearch_test.cpp b/searchlib/src/tests/nearsearch/nearsearch_test.cpp
index 95701e59444..6f7cf85258b 100644
--- a/searchlib/src/tests/nearsearch/nearsearch_test.cpp
+++ b/searchlib/src/tests/nearsearch/nearsearch_test.cpp
@@ -229,7 +229,8 @@ Test::testNearSearch(MyQuery &query, uint32_t matchId)
near_b->addChild(query.getTerm(i).make_blueprint(fieldId, i));
}
bp->setDocIdLimit(1000);
- bp = search::queryeval::Blueprint::optimize_and_sort(std::move(bp), true, true);
+ auto opts = search::queryeval::Blueprint::Options::all();
+ bp = search::queryeval::Blueprint::optimize_and_sort(std::move(bp), true, opts);
bp->fetchPostings(search::queryeval::ExecuteInfo::TRUE);
search::fef::MatchData::UP md(layout.createMatchData());
search::queryeval::SearchIterator::UP near = bp->createSearch(*md, true);
diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
index 2a21d66c090..1af9ee6cff7 100644
--- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
@@ -17,13 +17,15 @@ using namespace search::fef;
namespace {
+auto opts = Blueprint::Options::all();
+
//-----------------------------------------------------------------------------
class MyOr : public IntermediateBlueprint
{
private:
- FlowCalc make_flow_calc(bool strict, double flow) const override {
- return flow_calc<OrFlow>(strict, flow);
+ FlowCalc make_flow_calc(InFlow in_flow) const override {
+ return flow_calc<OrFlow>(in_flow);
}
public:
FlowStats calculate_flow_stats(uint32_t) const final {
@@ -451,7 +453,7 @@ TEST_F("testChildAndNotCollapsing", Fixture)
);
TEST_DO(f.check_not_equal(*sorted, *unsorted));
unsorted->setDocIdLimit(1000);
- unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true);
+ unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, opts);
TEST_DO(f.check_equal(*sorted, *unsorted));
}
@@ -491,7 +493,7 @@ TEST_F("testChildAndCollapsing", Fixture)
TEST_DO(f.check_not_equal(*sorted, *unsorted));
unsorted->setDocIdLimit(1000);
- unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true);
+ unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, opts);
TEST_DO(f.check_equal(*sorted, *unsorted));
}
@@ -530,10 +532,9 @@ TEST_F("testChildOrCollapsing", Fixture)
);
TEST_DO(f.check_not_equal(*sorted, *unsorted));
unsorted->setDocIdLimit(1000);
- // we sort non-strict here since the default costs of 1/est for
- // non-strict/strict leaf iterators makes the order of iterators
- // under a strict OR irrelevant.
- unsorted = Blueprint::optimize_and_sort(std::move(unsorted), false, true);
+ // we sort non-strict here since a strict OR does not have a
+ // deterministic sort order.
+ unsorted = Blueprint::optimize_and_sort(std::move(unsorted), false, opts);
TEST_DO(f.check_equal(*sorted, *unsorted));
}
@@ -577,7 +578,7 @@ TEST_F("testChildSorting", Fixture)
TEST_DO(f.check_not_equal(*sorted, *unsorted));
unsorted->setDocIdLimit(1000);
- unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true);
+ unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, opts);
TEST_DO(f.check_equal(*sorted, *unsorted));
}
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index 31db731a598..f192ea93b0e 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -132,7 +132,8 @@ TEST("test AndNot Blueprint") {
template <typename BP>
void optimize(std::unique_ptr<BP> &ref, bool strict) {
- auto optimized = Blueprint::optimize_and_sort(std::move(ref), strict, true);
+ auto opts = Blueprint::Options::all();
+ auto optimized = Blueprint::optimize_and_sort(std::move(ref), strict, opts);
ref.reset(dynamic_cast<BP*>(optimized.get()));
ASSERT_TRUE(ref);
optimized.release();
@@ -568,9 +569,10 @@ optimize_and_compare(Blueprint::UP top, Blueprint::UP expect, bool strict = true
top->setDocIdLimit(1000);
expect->setDocIdLimit(1000);
TEST_DO(compare(*top, *expect, false));
- top = Blueprint::optimize_and_sort(std::move(top), strict, sort_by_cost);
+ auto opts = Blueprint::Options::all().sort_by_cost(sort_by_cost);
+ top = Blueprint::optimize_and_sort(std::move(top), strict, opts);
TEST_DO(compare(*top, *expect, true));
- expect = Blueprint::optimize_and_sort(std::move(expect), strict, sort_by_cost);
+ expect = Blueprint::optimize_and_sort(std::move(expect), strict, opts);
TEST_DO(compare(*expect, *top, true));
}
@@ -699,11 +701,12 @@ TEST("test empty root node optimization and safeness") {
//-------------------------------------------------------------------------
auto expect_up = std::make_unique<EmptyBlueprint>();
- compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top1), true, true), true);
- compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top2), true, true), true);
- compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top3), true, true), true);
- compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top4), true, true), true);
- compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top5), true, true), true);
+ auto opts = Blueprint::Options::all();
+ compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top1), true, opts), true);
+ compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top2), true, opts), true);
+ compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top3), true, opts), true);
+ compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top4), true, opts), true);
+ compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top5), true, opts), true);
}
TEST("and with one empty child is optimized away") {
@@ -711,7 +714,8 @@ TEST("and with one empty child is optimized away") {
Blueprint::UP top = ap((new SourceBlenderBlueprint(*selector))->
addChild(ap(MyLeafSpec(10).create())).
addChild(addLeafs(std::make_unique<AndBlueprint>(), {{0, true}, 10, 20})));
- top = Blueprint::optimize_and_sort(std::move(top), true, true);
+ auto opts = Blueprint::Options::all();
+ top = Blueprint::optimize_and_sort(std::move(top), true, opts);
Blueprint::UP expect_up(ap((new SourceBlenderBlueprint(*selector))->
addChild(ap(MyLeafSpec(10).create())).
addChild(std::make_unique<EmptyBlueprint>())));
@@ -888,8 +892,9 @@ TEST("require that replaced blueprints retain source id") {
addChild(ap(MyLeafSpec(30).create()->setSourceId(55)))));
Blueprint::UP expect2_up(ap(MyLeafSpec(30).create()->setSourceId(42)));
//-------------------------------------------------------------------------
- top1_up = Blueprint::optimize_and_sort(std::move(top1_up), true, true);
- top2_up = Blueprint::optimize_and_sort(std::move(top2_up), true, true);
+ auto opts = Blueprint::Options::all();
+ top1_up = Blueprint::optimize_and_sort(std::move(top1_up), true, opts);
+ top2_up = Blueprint::optimize_and_sort(std::move(top2_up), true, opts);
compare(*expect1_up, *top1_up, true);
compare(*expect2_up, *top2_up, true);
EXPECT_EQUAL(13u, top1_up->getSourceId());
@@ -1204,7 +1209,8 @@ TEST("require_that_unpack_optimization_is_not_overruled_by_equiv") {
TEST("require that ANDNOT without children is optimized to empty search") {
Blueprint::UP top_up = std::make_unique<AndNotBlueprint>();
auto expect_up = std::make_unique<EmptyBlueprint>();
- top_up = Blueprint::optimize_and_sort(std::move(top_up), true, true);
+ auto opts = Blueprint::Options::all();
+ top_up = Blueprint::optimize_and_sort(std::move(top_up), true, opts);
compare(*expect_up, *top_up, true);
}
diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
index 3e8bc06bfd8..c4d34ab3565 100644
--- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
+++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
@@ -49,7 +49,7 @@ concept ChildCollector = requires(T a, std::unique_ptr<Blueprint> bp) {
struct DefaultBlueprint : Blueprint {
FlowStats calculate_flow_stats(uint32_t) const override { abort(); }
void optimize(Blueprint* &, OptimizePass) override { abort(); }
- void sort(bool, bool) override { abort(); }
+ double sort(InFlow, const Options &) override { abort(); }
const State &getState() const override { abort(); }
void fetchPostings(const ExecuteInfo &) override { abort(); }
void freeze() override { abort(); }
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
index 5009a15e438..8b8b6c1282e 100644
--- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -141,7 +141,7 @@ TEST(FlowTest, full_and_flow) {
{0.4, 0.4, false},
{0.4*0.7, 0.4*0.7, false},
{0.4*0.7*0.2, 0.4*0.7*0.2, false}});
- verify_flow_calc(flow_calc<AndFlow>(strict, 1.0),
+ verify_flow_calc(flow_calc<AndFlow>(strict),
{0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.7, 0.4*0.7*0.2});
}
}
@@ -153,7 +153,7 @@ TEST(FlowTest, partial_and_flow) {
{in*0.4, in*0.4, false},
{in*0.4*0.7, in*0.4*0.7, false},
{in*0.4*0.7*0.2, in*0.4*0.7*0.2, false}});
- verify_flow_calc(flow_calc<AndFlow>(false, in),
+ verify_flow_calc(flow_calc<AndFlow>(in),
{0.4, 0.7, 0.2}, {in*1.0, in*0.4, in*0.4*0.7, in*0.4*0.7*0.2});
}
}
@@ -164,14 +164,14 @@ TEST(FlowTest, full_or_flow) {
{0.6, 1.0-0.6, false},
{0.6*0.3, 1.0-0.6*0.3, false},
{0.6*0.3*0.8, 1.0-0.6*0.3*0.8, false}});
- verify_flow_calc(flow_calc<OrFlow>(false, 1.0),
+ verify_flow_calc(flow_calc<OrFlow>(1.0),
{0.4, 0.7, 0.2}, {1.0, 0.6, 0.6*0.3, 0.6*0.3*0.8});
verify_flow(OrFlow(true), {0.4, 0.7, 0.2},
{{1.0, 0.0, true},
{1.0, 1.0-0.6, true},
{1.0, 1.0-0.6*0.3, true},
{1.0, 1.0-0.6*0.3*0.8, true}});
- verify_flow_calc(flow_calc<OrFlow>(true, 1.0),
+ verify_flow_calc(flow_calc<OrFlow>(true),
{0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0});
}
@@ -182,7 +182,7 @@ TEST(FlowTest, partial_or_flow) {
{in*0.6, 1.0-in*0.6, false},
{in*0.6*0.3, 1.0-in*0.6*0.3, false},
{in*0.6*0.3*0.8, 1.0-in*0.6*0.3*0.8, false}});
- verify_flow_calc(flow_calc<OrFlow>(false, in),
+ verify_flow_calc(flow_calc<OrFlow>(in),
{0.4, 0.7, 0.2}, {in, in*0.6, in*0.6*0.3, in*0.6*0.3*0.8});
}
}
@@ -194,7 +194,7 @@ TEST(FlowTest, full_and_not_flow) {
{0.4, 0.4, false},
{0.4*0.3, 0.4*0.3, false},
{0.4*0.3*0.8, 0.4*0.3*0.8, false}});
- verify_flow_calc(flow_calc<AndNotFlow>(strict, 1.0),
+ verify_flow_calc(flow_calc<AndNotFlow>(strict),
{0.4, 0.7, 0.2}, {1.0, 0.4, 0.4*0.3, 0.4*0.3*0.8});
}
}
@@ -206,45 +206,52 @@ TEST(FlowTest, partial_and_not_flow) {
{in*0.4, in*0.4, false},
{in*0.4*0.3, in*0.4*0.3, false},
{in*0.4*0.3*0.8, in*0.4*0.3*0.8, false}});
- verify_flow_calc(flow_calc<AndNotFlow>(false, in),
+ verify_flow_calc(flow_calc<AndNotFlow>(in),
{0.4, 0.7, 0.2}, {in, in*0.4, in*0.4*0.3, in*0.4*0.3*0.8});
}
}
TEST(FlowTest, full_first_flow_calc) {
for (bool strict: {false, true}) {
- verify_flow_calc(first_flow_calc(strict, 1.0),
+ verify_flow_calc(first_flow_calc(strict),
{0.4, 0.7, 0.2}, {1.0, 0.4, 0.4, 0.4});
}
}
TEST(FlowTest, partial_first_flow_calc) {
for (double in: {1.0, 0.5, 0.25}) {
- verify_flow_calc(first_flow_calc(false, in),
+ verify_flow_calc(first_flow_calc(in),
{0.4, 0.7, 0.2}, {in, in*0.4, in*0.4, in*0.4});
}
}
TEST(FlowTest, full_full_flow_calc) {
for (bool strict: {false, true}) {
- verify_flow_calc(full_flow_calc(strict, 1.0),
+ verify_flow_calc(full_flow_calc(strict),
{0.4, 0.7, 0.2}, {1.0, 1.0, 1.0, 1.0});
}
}
TEST(FlowTest, partial_full_flow_calc) {
for (double in: {1.0, 0.5, 0.25}) {
- verify_flow_calc(full_flow_calc(false, in),
+ verify_flow_calc(full_flow_calc(in),
{0.4, 0.7, 0.2}, {in, in, in, in});
}
}
-TEST(FlowTest, flow_calc_strictness_overrides_rate) {
- EXPECT_EQ(flow_calc<AndFlow>(true, 0.5)(0.5), 1.0);
- EXPECT_EQ(flow_calc<OrFlow>(true, 0.5)(0.5), 1.0);
- EXPECT_EQ(flow_calc<AndNotFlow>(true, 0.5)(0.5), 1.0);
- EXPECT_EQ(first_flow_calc(true, 0.5)(0.5), 1.0);
- EXPECT_EQ(full_flow_calc(true, 0.5)(0.5), 1.0);
+TEST(FlowTest, in_flow_strict_vs_rate_interaction) {
+ EXPECT_EQ(InFlow(true).strict(), true);
+ EXPECT_EQ(InFlow(true).rate(), 1.0);
+ EXPECT_EQ(InFlow(false).strict(), false);
+ EXPECT_EQ(InFlow(false).rate(), 1.0);
+ EXPECT_EQ(InFlow(0.5).strict(), false);
+ EXPECT_EQ(InFlow(0.5).rate(), 0.5);
+ EXPECT_EQ(InFlow(true, 0.5).strict(), true);
+ EXPECT_EQ(InFlow(true, 0.5).rate(), 1.0);
+ EXPECT_EQ(InFlow(false, 0.5).strict(), false);
+ EXPECT_EQ(InFlow(false, 0.5).rate(), 0.5);
+ EXPECT_EQ(InFlow(-1.0).strict(), false);
+ EXPECT_EQ(InFlow(-1.0).rate(), 0.0);
}
TEST(FlowTest, flow_cost) {
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index 6747fed888c..bdc89363b22 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -342,7 +342,8 @@ non_strict_search(Blueprint& blueprint, MatchData& md, uint32_t docid_limit, dou
BenchmarkResult
benchmark_search(Blueprint::UP blueprint, uint32_t docid_limit, bool strict, double filter_hit_ratio)
{
- blueprint->sort(strict, true);
+ auto opts = Blueprint::Options::all();
+ blueprint->sort(strict, opts);
blueprint->fetchPostings(ExecuteInfo::createForTest(strict));
// Note: All blueprints get the same TermFieldMatchData instance.
// This is OK as long as we don't do unpacking and only use 1 thread.
diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
index c9fcb472b68..64f4fafd2d1 100644
--- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
+++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
@@ -46,7 +46,8 @@ std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResul
}
Blueprint::UP finalize(Blueprint::UP bp, bool strict) {
- Blueprint::UP result = Blueprint::optimize_and_sort(std::move(bp), true, true);
+ auto opts = Blueprint::Options::all();
+ Blueprint::UP result = Blueprint::optimize_and_sort(std::move(bp), true, opts);
result->fetchPostings(ExecuteInfo::createForTest(strict));
result->freeze();
return result;
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index f3539c6989a..5a225328003 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -121,6 +121,7 @@ Blueprint::Blueprint() noexcept
_flow_stats(0.0, 0.0, 0.0),
_sourceId(0xffffffff),
_docid_limit(0),
+ _force_strict(false),
_frozen(false)
{
}
@@ -481,12 +482,6 @@ IntermediateBlueprint::count_termwise_nodes(const UnpackInfo &unpack) const
return termwise_nodes;
}
-FlowCalc
-IntermediateBlueprint::make_flow_calc(bool strict, double flow) const
-{
- return full_flow_calc(strict, flow);
-}
-
IntermediateBlueprint::IndexList
IntermediateBlueprint::find(const IPredicate & pred) const
{
@@ -574,13 +569,17 @@ IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass)
maybe_eliminate_self(self, get_replacement());
}
-void
-IntermediateBlueprint::sort(bool strict, bool sort_by_cost)
+double
+IntermediateBlueprint::sort(InFlow in_flow, const Options &opts)
{
- sort(_children, strict, sort_by_cost);
+ auto flow_calc = make_flow_calc(in_flow);
+ sort(_children, in_flow.strict(), opts.sort_by_cost());
for (size_t i = 0; i < _children.size(); ++i) {
- _children[i]->sort(strict && inheritStrict(i), sort_by_cost);
+ double next_rate = flow_calc(_children[i]->estimate());
+ _children[i]->sort(InFlow(in_flow.strict() && inheritStrict(i), next_rate), opts);
}
+ // TODO: better cost estimate (due to known in-flow and eagerness)
+ return in_flow.strict() ? strict_cost() : in_flow.rate() * cost();
}
void
@@ -647,7 +646,7 @@ IntermediateBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
void
IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo)
{
- FlowCalc flow_calc = make_flow_calc(execInfo.is_strict(), execInfo.hit_rate());
+ FlowCalc flow_calc = make_flow_calc(InFlow(execInfo.is_strict(), execInfo.hit_rate()));
for (size_t i = 0; i < _children.size(); ++i) {
Blueprint & child = *_children[i];
double nextHitRate = flow_calc(child.estimate());
@@ -766,9 +765,11 @@ LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass)
maybe_eliminate_self(self, get_replacement());
}
-void
-LeafBlueprint::sort(bool, bool)
+double
+LeafBlueprint::sort(InFlow in_flow, const Options &)
{
+ // TODO: better cost estimate (due to known in-flow and eagerness)
+ return in_flow.strict() ? strict_cost() : in_flow.rate() * cost();
}
void
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index c24790ddcf1..0c08e6aedf5 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -55,6 +55,29 @@ public:
enum class OptimizePass { FIRST, LAST };
+ class Options {
+ private:
+ bool _sort_by_cost;
+ bool _allow_force_strict;
+ public:
+ constexpr Options() noexcept
+ : _sort_by_cost(false),
+ _allow_force_strict(false) {}
+ constexpr bool sort_by_cost() const noexcept { return _sort_by_cost; }
+ constexpr Options &sort_by_cost(bool value) noexcept {
+ _sort_by_cost = value;
+ return *this;
+ }
+ constexpr bool allow_force_strict() const noexcept { return _allow_force_strict; }
+ constexpr Options &allow_force_strict(bool value) noexcept {
+ _allow_force_strict = value;
+ return *this;
+ }
+ static constexpr Options all() noexcept {
+ return Options().sort_by_cost(true).allow_force_strict(true);
+ }
+ };
+
struct HitEstimate {
uint32_t estHits;
bool empty;
@@ -182,6 +205,7 @@ private:
FlowStats _flow_stats;
uint32_t _sourceId;
uint32_t _docid_limit;
+ bool _force_strict;
bool _frozen;
protected:
@@ -224,10 +248,10 @@ public:
uint32_t get_docid_limit() const noexcept { return _docid_limit; }
static Blueprint::UP optimize(Blueprint::UP bp);
- virtual void sort(bool strict, bool sort_by_cost) = 0;
- static Blueprint::UP optimize_and_sort(Blueprint::UP bp, bool strict, bool sort_by_cost) {
+ virtual double sort(InFlow in_flow, const Options &opts) = 0;
+ static Blueprint::UP optimize_and_sort(Blueprint::UP bp, InFlow in_flow, const Options &opts) {
auto result = optimize(std::move(bp));
- result->sort(strict, sort_by_cost);
+ result->sort(in_flow, opts);
return result;
}
virtual void optimize(Blueprint* &self, OptimizePass pass) = 0;
@@ -362,7 +386,7 @@ private:
bool infer_want_global_filter() const;
size_t count_termwise_nodes(const UnpackInfo &unpack) const;
- virtual FlowCalc make_flow_calc(bool strict, double flow) const = 0;
+ virtual FlowCalc make_flow_calc(InFlow in_flow) const = 0;
protected:
// returns an empty collection if children have empty or
@@ -385,7 +409,7 @@ public:
void setDocIdLimit(uint32_t limit) noexcept final;
void optimize(Blueprint* &self, OptimizePass pass) final;
- void sort(bool strict, bool sort_by_cost) override;
+ double sort(InFlow in_flow, const Options &opts) override;
void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override;
IndexList find(const IPredicate & check) const;
@@ -422,7 +446,7 @@ private:
State _state;
protected:
void optimize(Blueprint* &self, OptimizePass pass) final;
- void sort(bool strict, bool sort_by_cost) override;
+ double sort(InFlow in_flow, const Options &opts) override;
void setEstimate(HitEstimate est) {
_state.estimate(est);
notifyChange();
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
index 4548baf7545..ade2516b509 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -11,6 +11,23 @@
namespace search::queryeval {
+// Encapsulate information about strictness and in-flow in a structure
+// for convenient parameter passing. We do not need an explicit value
+// in the strict case since strict basically means the receiving end
+// will eventually decide the actual flow. We use a rate of 1.0 for
+// strict flow to indicate that the corpus is not reduced externally.
+class InFlow {
+private:
+ double _value;
+public:
+ constexpr InFlow(bool strict, double rate) noexcept
+ : _value(strict ? -1.0 : std::max(rate, 0.0)) {}
+ constexpr InFlow(bool strict) noexcept : InFlow(strict, 1.0) {}
+ constexpr InFlow(double rate) noexcept : InFlow(false, rate) {}
+ constexpr bool strict() noexcept { return _value < 0.0; }
+ constexpr double rate() noexcept { return strict() ? 1.0 : _value; }
+};
+
struct FlowStats {
double estimate;
double cost;
@@ -122,16 +139,13 @@ void sort_partial(ADAPTER adapter, T &children, size_t offset) {
template <typename ADAPTER, typename T, typename F>
double ordered_cost_of(ADAPTER adapter, const T &children, F flow) {
- double cost = 0.0;
+ double total_cost = 0.0;
for (const auto &child: children) {
- if (flow.strict()) {
- cost += adapter.strict_cost(child);
- } else {
- cost += flow.flow() * adapter.cost(child);
- }
+ double child_cost = flow.strict() ? adapter.strict_cost(child) : (flow.flow() * adapter.cost(child));
+ flow.update_cost(total_cost, child_cost);
flow.add(adapter.estimate(child));
}
- return cost;
+ return total_cost;
}
template <typename ADAPTER, typename T>
@@ -188,8 +202,7 @@ private:
bool _strict;
bool _first;
public:
- AndFlow(bool strict) noexcept : _flow(1.0), _strict(strict), _first(true) {}
- AndFlow(double in) noexcept : _flow(in), _strict(false), _first(true) {}
+ AndFlow(InFlow flow) noexcept : _flow(flow.rate()), _strict(flow.strict()), _first(true) {}
void add(double est) noexcept {
_flow *= est;
_first = false;
@@ -203,6 +216,9 @@ public:
double estimate() const noexcept {
return _first ? 0.0 : _flow;
}
+ void update_cost(double &total_cost, double child_cost) noexcept {
+ total_cost += child_cost;
+ }
static void sort(auto adapter, auto &children, bool strict) {
flow::sort<flow::MinAndCost>(adapter, children);
if (strict && children.size() > 1) {
@@ -225,8 +241,7 @@ private:
bool _strict;
bool _first;
public:
- OrFlow(bool strict) noexcept : _flow(1.0), _strict(strict), _first(true) {}
- OrFlow(double in) noexcept : _flow(in), _strict(false), _first(true) {}
+ OrFlow(InFlow flow) noexcept : _flow(flow.rate()), _strict(flow.strict()), _first(true) {}
void add(double est) noexcept {
_flow *= (1.0 - est);
_first = false;
@@ -240,6 +255,9 @@ public:
double estimate() const noexcept {
return _first ? 0.0 : (1.0 - _flow);
}
+ void update_cost(double &total_cost, double child_cost) noexcept {
+ total_cost += child_cost;
+ }
static void sort(auto adapter, auto &children, bool strict) {
if (!strict) {
flow::sort<flow::MinOrCost>(adapter, children);
@@ -256,8 +274,7 @@ private:
bool _strict;
bool _first;
public:
- AndNotFlow(bool strict) noexcept : _flow(1.0), _strict(strict), _first(true) {}
- AndNotFlow(double in) noexcept : _flow(in), _strict(false), _first(true) {}
+ AndNotFlow(InFlow flow) noexcept : _flow(flow.rate()), _strict(flow.strict()), _first(true) {}
void add(double est) noexcept {
_flow *= _first ? est : (1.0 - est);
_first = false;
@@ -271,6 +288,9 @@ public:
double estimate() const noexcept {
return _first ? 0.0 : _flow;
}
+ void update_cost(double &total_cost, double child_cost) noexcept {
+ total_cost += child_cost;
+ }
static void sort(auto adapter, auto &children, bool) {
flow::sort_partial<flow::MinOrCost>(adapter, children, 1);
}
@@ -282,21 +302,18 @@ public:
using FlowCalc = std::function<double(double)>;
template <typename FLOW>
-FlowCalc flow_calc(bool strict, double non_strict_rate) {
- FLOW flow = strict ? FLOW(true) : FLOW(non_strict_rate);
- return [flow](double est) mutable noexcept {
+FlowCalc flow_calc(InFlow in_flow) {
+ return [flow=FLOW(in_flow)](double est) mutable noexcept {
double next_flow = flow.flow();
flow.add(est);
return next_flow;
};
}
-inline FlowCalc first_flow_calc(bool strict, double flow) {
- if (strict) {
- flow = 1.0;
- }
+inline FlowCalc first_flow_calc(InFlow in_flow) {
bool first = true;
- return [flow,first](double est) mutable noexcept {
+ double flow = in_flow.rate();
+ return [first,flow](double est) mutable noexcept {
double next_flow = flow;
if (first) {
flow *= est;
@@ -306,10 +323,8 @@ inline FlowCalc first_flow_calc(bool strict, double flow) {
};
}
-inline FlowCalc full_flow_calc(bool strict, double flow) {
- if (strict) {
- flow = 1.0;
- }
+inline FlowCalc full_flow_calc(InFlow in_flow) {
+ double flow = in_flow.rate();
return [flow](double) noexcept { return flow; };
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index b8bf7d40655..9d0acc50ce5 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -208,9 +208,9 @@ AndNotBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) co
FlowCalc
-AndNotBlueprint::make_flow_calc(bool strict, double flow) const
+AndNotBlueprint::make_flow_calc(InFlow in_flow) const
{
- return flow_calc<AndNotFlow>(strict, flow);
+ return flow_calc<AndNotFlow>(in_flow);
}
//-----------------------------------------------------------------------------
@@ -308,9 +308,9 @@ AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
}
FlowCalc
-AndBlueprint::make_flow_calc(bool strict, double flow) const
+AndBlueprint::make_flow_calc(InFlow in_flow) const
{
- return flow_calc<AndFlow>(strict, flow);
+ return flow_calc<AndFlow>(in_flow);
}
//-----------------------------------------------------------------------------
@@ -408,9 +408,9 @@ OrBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
}
FlowCalc
-OrBlueprint::make_flow_calc(bool strict, double flow) const
+OrBlueprint::make_flow_calc(InFlow in_flow) const
{
- return flow_calc<OrFlow>(strict, flow);
+ return flow_calc<OrFlow>(in_flow);
}
uint8_t
@@ -426,9 +426,9 @@ OrBlueprint::calculate_cost_tier() const
//-----------------------------------------------------------------------------
FlowCalc
-WeakAndBlueprint::make_flow_calc(bool strict, double flow) const
+WeakAndBlueprint::make_flow_calc(InFlow in_flow) const
{
- return flow_calc<OrFlow>(strict, flow);
+ return flow_calc<OrFlow>(in_flow);
}
WeakAndBlueprint::~WeakAndBlueprint() = default;
@@ -503,9 +503,9 @@ WeakAndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) c
//-----------------------------------------------------------------------------
FlowCalc
-NearBlueprint::make_flow_calc(bool strict, double flow) const
+NearBlueprint::make_flow_calc(InFlow in_flow) const
{
- return flow_calc<AndFlow>(strict, flow);
+ return flow_calc<AndFlow>(in_flow);
}
FlowStats
@@ -574,9 +574,9 @@ NearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) cons
//-----------------------------------------------------------------------------
FlowCalc
-ONearBlueprint::make_flow_calc(bool strict, double flow) const
+ONearBlueprint::make_flow_calc(InFlow in_flow) const
{
- return flow_calc<AndFlow>(strict, flow);
+ return flow_calc<AndFlow>(in_flow);
}
FlowStats
@@ -735,17 +735,17 @@ RankBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) cons
}
FlowCalc
-RankBlueprint::make_flow_calc(bool strict, double flow) const
+RankBlueprint::make_flow_calc(InFlow in_flow) const
{
- return first_flow_calc(strict, flow);
+ return first_flow_calc(in_flow);
}
//-----------------------------------------------------------------------------
FlowCalc
-SourceBlenderBlueprint::make_flow_calc(bool strict, double flow) const
+SourceBlenderBlueprint::make_flow_calc(InFlow in_flow) const
{
- return full_flow_calc(strict, flow);
+ return full_flow_calc(in_flow);
}
SourceBlenderBlueprint::SourceBlenderBlueprint(const ISourceSelector &selector) noexcept
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index 0095095dfe8..028898d3f47 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -29,7 +29,7 @@ public:
SearchIterator::UP
createFilterSearch(bool strict, FilterConstraint constraint) const override;
private:
- FlowCalc make_flow_calc(bool strict, double flow) const override;
+ FlowCalc make_flow_calc(InFlow in_flow) const override;
uint8_t calculate_cost_tier() const override {
return (childCnt() > 0) ? get_children()[0]->getState().cost_tier() : State::COST_TIER_NORMAL;
}
@@ -57,7 +57,7 @@ public:
SearchIterator::UP
createFilterSearch(bool strict, FilterConstraint constraint) const override;
private:
- FlowCalc make_flow_calc(bool strict, double flow) const override;
+ FlowCalc make_flow_calc(InFlow in_flow) const override;
};
//-----------------------------------------------------------------------------
@@ -82,7 +82,7 @@ public:
SearchIterator::UP
createFilterSearch(bool strict, FilterConstraint constraint) const override;
private:
- FlowCalc make_flow_calc(bool strict, double flow) const override;
+ FlowCalc make_flow_calc(InFlow in_flow) const override;
uint8_t calculate_cost_tier() const override;
};
@@ -94,7 +94,7 @@ private:
uint32_t _n;
std::vector<uint32_t> _weights;
- FlowCalc make_flow_calc(bool strict, double flow) const override;
+ FlowCalc make_flow_calc(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
@@ -125,7 +125,7 @@ class NearBlueprint : public IntermediateBlueprint
private:
uint32_t _window;
- FlowCalc make_flow_calc(bool strict, double flow) const override;
+ FlowCalc make_flow_calc(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
@@ -148,7 +148,7 @@ class ONearBlueprint : public IntermediateBlueprint
private:
uint32_t _window;
- FlowCalc make_flow_calc(bool strict, double flow) const override;
+ FlowCalc make_flow_calc(InFlow in_flow) const override;
public:
FlowStats calculate_flow_stats(uint32_t docid_limit) const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
@@ -186,7 +186,7 @@ public:
return (childCnt() > 0) ? get_children()[0]->getState().cost_tier() : State::COST_TIER_NORMAL;
}
private:
- FlowCalc make_flow_calc(bool strict, double flow) const override;
+ FlowCalc make_flow_calc(InFlow in_flow) const override;
};
//-----------------------------------------------------------------------------
@@ -196,7 +196,7 @@ class SourceBlenderBlueprint final : public IntermediateBlueprint
private:
const ISourceSelector &_selector;
- FlowCalc make_flow_calc(bool strict, double flow) const override;
+ FlowCalc make_flow_calc(InFlow in_flow) const override;
public:
explicit SourceBlenderBlueprint(const ISourceSelector &selector) noexcept;
~SourceBlenderBlueprint() override;