diff options
Diffstat (limited to 'searchlib')
17 files changed, 83 insertions, 38 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index bb79ad85cc0..6ec1ffd460e 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -822,6 +822,36 @@ TEST("Options binding and nesting") { check_opts(false, false, false); } +TEST("self strict resolving during sort") { + uint32_t docs = 1000; + uint32_t hits = 250; + auto leaf = std::unique_ptr<Blueprint>(MyLeafSpec(hits).create()); + leaf->setDocIdLimit(docs); + leaf->update_flow_stats(docs); + EXPECT_EQUAL(leaf->estimate(), 0.25); + EXPECT_EQUAL(leaf->cost(), 1.0); + EXPECT_EQUAL(leaf->strict_cost(), 0.25); + EXPECT_EQUAL(leaf->strict(), false); // starting value + { // do not allow force strict + auto guard = Blueprint::bind_opts(make_opts(true, false, false)); + leaf->sort(true); + EXPECT_EQUAL(leaf->strict(), true); + leaf->sort(false); + EXPECT_EQUAL(leaf->strict(), false); + } + { // allow force strict + auto guard = Blueprint::bind_opts(make_opts(true, true, false)); + leaf->sort(true); + EXPECT_EQUAL(leaf->strict(), true); + leaf->sort(false); + EXPECT_EQUAL(leaf->strict(), true); + leaf->sort(0.30); + EXPECT_EQUAL(leaf->strict(), true); + leaf->sort(0.20); + EXPECT_EQUAL(leaf->strict(), false); + } +} + TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp index d40248336cb..16e78f77eec 100644 --- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp +++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp @@ -66,7 +66,7 @@ struct LeafProxy : SimpleLeafBlueprint { return child->calculate_flow_stats(my_docid_limit); } void sort(InFlow in_flow) override { - strict(in_flow.strict()); + resolve_strict(in_flow); child->sort(in_flow); } SearchIteratorUP createLeafSearch(const TermFieldMatchDataArray &) const override { abort(); } diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index ed23aaad226..70c52f1d6c2 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -479,7 +479,7 @@ TEST(FlowTest, strict_and_with_allow_force_strict_partitioning) { size_t a = 0; for (size_t b = 0; b < data.size(); ++b) { double est = data[b].estimate; - if (flow::should_force_strict(est, data[b].cost, data[b].strict_cost, rate)) { + if (flow::should_force_strict(data[b], rate)) { auto pos = data.begin() + b; std::rotate(data.begin() + a, pos, pos + 1); ++a; @@ -551,7 +551,7 @@ TEST(FlowTest, strict_and_with_allow_force_strict_assume_strict_then_partition_a if (i == 0) { return data[i].strict_cost < data[i].cost; } else { - return flow::should_force_strict(data[i].estimate, data[i].cost, data[i].strict_cost, est); + return flow::should_force_strict(data[i], est); } }; for (size_t i = 0; i < last; ++i) { diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 6d79f7bcb26..aebfda8fffd 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -288,7 +288,7 @@ public: bool should_use() const { return _should_use; } void sort(queryeval::InFlow in_flow) override { - strict(in_flow.strict()); + resolve_strict(in_flow); } queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override { using OrFlow = search::queryeval::OrFlow; @@ -375,7 +375,7 @@ public: const common::Location &location() const { return _location; } void sort(queryeval::InFlow in_flow) override { - strict(in_flow.strict()); + resolve_strict(in_flow); } queryeval::FlowStats calculate_flow_stats(uint32_t) const override { @@ -508,7 +508,7 @@ public: } void sort(queryeval::InFlow in_flow) override { - strict(in_flow.strict()); + resolve_strict(in_flow); } queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override { diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp index ce9163bbddb..2ad67c85429 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp @@ -124,7 +124,7 @@ AttributeWeightedSetBlueprint::addToken(std::unique_ptr<ISearchContext> context, void AttributeWeightedSetBlueprint::sort(queryeval::InFlow in_flow) { - strict(in_flow.strict()); + resolve_strict(in_flow); } queryeval::FlowStats diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h index 62c06c99bd4..1759ca71432 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h @@ -73,7 +73,7 @@ public: } void sort(queryeval::InFlow in_flow) override { - strict(in_flow.strict()); + resolve_strict(in_flow); } queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override { diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 73f99d28eda..43339b68999 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -129,6 +129,18 @@ Blueprint::Blueprint() noexcept Blueprint::~Blueprint() = default; void +Blueprint::resolve_strict(InFlow &in_flow) noexcept +{ + if (!in_flow.strict() && opt_allow_force_strict()) { + auto stats = FlowStats::from(flow::DefaultAdapter(), this); + if (flow::should_force_strict(stats, in_flow.rate())) { + in_flow.force_strict(); + } + } + _strict = in_flow.strict(); +} + +void Blueprint::each_node_post_order(const std::function<void(Blueprint&)> &f) { f(*this); @@ -605,7 +617,7 @@ IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass) void IntermediateBlueprint::sort(InFlow in_flow) { - strict(in_flow.strict()); // authorative strict tag (->fetchPostings,->createSearch,->createFilterSearch) + resolve_strict(in_flow); if (!opt_keep_order()) [[likely]] { sort(_children, in_flow.strict(), opt_sort_by_cost()); } @@ -826,7 +838,7 @@ LeafBlueprint::set_tree_size(uint32_t value) void SimpleLeafBlueprint::sort(InFlow in_flow) { - strict(in_flow.strict()); // authorative strict tag (->fetchPostings,->createSearch,->createFilterSearch) + resolve_strict(in_flow); } } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index d57fbeae3b1..1501289c590 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -254,11 +254,11 @@ protected: _frozen = true; } - // Should be called by sort; sort is responsible for propagating - // strict tagging throughout the blueprint tree. Note that calling - // this directly breaks some tests using leaf proxy decorators - // that are not able to forward the non-virtual call. - void strict(bool value) noexcept { _strict = value; } + // Call this first inside sort implementations to handle 2 things: + // + // (1) force in_flow to be strict if allowed and better. + // (2) tag blueprint with the strictness of the in_flow. + void resolve_strict(InFlow &in_flow) noexcept; public: class IPredicate { diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp index 3be1009279b..08f09ac50b1 100644 --- a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp @@ -40,11 +40,12 @@ DotProductBlueprint::addTerm(Blueprint::UP term, int32_t weight, HitEstimate & e } void -DotProductBlueprint::sort(InFlow) +DotProductBlueprint::sort(InFlow in_flow) { - strict(true); + in_flow.force_strict(); + resolve_strict(in_flow); for (auto &term: _terms) { - term->sort(true); + term->sort(in_flow); } } diff --git a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp index 8b5e76085d2..e9a960f03e4 100644 --- a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp @@ -56,7 +56,7 @@ EquivBlueprint::~EquivBlueprint() = default; void EquivBlueprint::sort(InFlow in_flow) { - strict(in_flow.strict()); + resolve_strict(in_flow); auto flow = OrFlow(in_flow); for (auto &term: _terms) { term->sort(InFlow(flow.strict(), flow.flow())); diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index 98f57a4182d..bc76856dac1 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -25,8 +25,9 @@ public: : _value(strict ? -1.0 : std::max(rate, 0.0)) {} constexpr InFlow(bool strict) noexcept : InFlow(strict, 1.0) {} constexpr InFlow(double rate) noexcept : InFlow(false, rate) {} - constexpr bool strict() noexcept { return _value < 0.0; } - constexpr double rate() noexcept { return strict() ? 1.0 : _value; } + constexpr void force_strict() noexcept { _value = -1.0; } + constexpr bool strict() const noexcept { return _value < 0.0; } + constexpr double rate() const noexcept { return strict() ? 1.0 : _value; } }; struct FlowStats { @@ -122,13 +123,13 @@ struct MinOrCost { }; // estimate the cost of evaluating a strict child in a non-strict context -inline double forced_strict_cost(double estimate, double strict_cost, double rate) { - return 0.2 * (rate - estimate) + strict_cost; +inline double forced_strict_cost(const FlowStats &stats, double rate) { + return 0.2 * (rate - stats.estimate) + stats.strict_cost; } // would it be faster to force a non-strict child to be strict -inline bool should_force_strict(double estimate, double cost, double strict_cost, double rate) { - return forced_strict_cost(estimate, strict_cost, rate) < (cost * rate); +inline bool should_force_strict(const FlowStats &stats, double rate) { + return forced_strict_cost(stats, rate) < (stats.cost * rate); } // estimate the absolute cost of evaluating a child with a specific in flow @@ -139,7 +140,7 @@ inline double min_child_cost(InFlow in_flow, const FlowStats &stats, bool allow_ if (!allow_force_strict) { return stats.cost * in_flow.rate(); } - return std::min(forced_strict_cost(stats.estimate, stats.strict_cost, in_flow.rate()), + return std::min(forced_strict_cost(stats, in_flow.rate()), stats.cost * in_flow.rate()); } @@ -186,7 +187,7 @@ template <typename ADAPTER, typename T, typename F> double ordered_cost_of(ADAPTER adapter, const T &children, F flow, bool allow_force_strict) { double total_cost = 0.0; for (const auto &child: children) { - FlowStats stats(adapter.estimate(child), adapter.cost(child), adapter.strict_cost(child)); + auto stats = FlowStats::from(adapter, child); double child_cost = min_child_cost(InFlow(flow.strict(), flow.flow()), stats, allow_force_strict); flow.update_cost(total_cost, child_cost); flow.add(stats.estimate); @@ -224,7 +225,7 @@ auto select_forced_strict_and_child(auto adapter, const auto &children, size_t f for (size_t idx = first; idx < children.size(); ++idx) { auto child = FlowStats::from(adapter, children[idx]); double child_abs_cost = est * child.cost; - double forced_cost = forced_strict_cost(child.estimate, child.strict_cost, est); + double forced_cost = forced_strict_cost(child, est); double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost); if (my_diff < best_diff) { best_diff = my_diff; @@ -252,7 +253,7 @@ auto select_forced_strict_and_child_with_order(auto adapter, const auto &childre for (size_t idx = first; idx < children.size(); ++idx) { auto child = FlowStats::from(adapter, children[idx]); double child_abs_cost = est * child.cost; - double forced_cost = forced_strict_cost(child.estimate, child.strict_cost, est); + double forced_cost = forced_strict_cost(child, est); double my_diff = (forced_cost + child.estimate * cost) - (cost + child_abs_cost); size_t target = first; while (target > 0) { @@ -262,7 +263,7 @@ auto select_forced_strict_and_child_with_order(auto adapter, const auto &childre // do not move past someone with lower estimate break; } - if (!should_force_strict(other.estimate, other.cost, other.strict_cost, (est_until(candidate) * child.estimate))) { + if (!should_force_strict(other, (est_until(candidate) * child.estimate))) { // no not move past someone who will lose strictness break; } diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp index 63697a3405a..da1e29875be 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp @@ -128,7 +128,7 @@ NearestNeighborBlueprint::perform_top_k(const search::tensor::NearestNeighborInd void NearestNeighborBlueprint::sort(InFlow in_flow) { - strict(in_flow.strict()); + resolve_strict(in_flow); } std::unique_ptr<SearchIterator> diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp index a3c309d6169..16bb7198477 100644 --- a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp @@ -282,7 +282,7 @@ PredicateBlueprint::fetchPostings(const ExecuteInfo &) { void PredicateBlueprint::sort(InFlow in_flow) { - strict(in_flow.strict()); + resolve_strict(in_flow); } SearchIterator::UP diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp index a531df52d15..44cb75aace2 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp @@ -47,7 +47,7 @@ SameElementBlueprint::addTerm(Blueprint::UP term) void SameElementBlueprint::sort(InFlow in_flow) { - strict(in_flow.strict()); + resolve_strict(in_flow); auto flow = AndFlow(in_flow); for (auto &term: _terms) { term->sort(InFlow(flow.strict(), flow.flow())); diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp index 621fb101d5d..d29129f1120 100644 --- a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp @@ -48,7 +48,7 @@ SimplePhraseBlueprint::addTerm(Blueprint::UP term) void SimplePhraseBlueprint::sort(InFlow in_flow) { - strict(in_flow.strict()); + resolve_strict(in_flow); for (auto &term: _terms) { term->sort(in_flow); } diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp index c0beaf19d70..4c55496822b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp @@ -69,7 +69,7 @@ ParallelWeakAndBlueprint::addTerm(Blueprint::UP term, int32_t weight, HitEstimat void ParallelWeakAndBlueprint::sort(InFlow in_flow) { - strict(in_flow.strict()); + resolve_strict(in_flow); auto flow = OrFlow(in_flow); for (auto &term: _terms) { term->sort(InFlow(flow.strict(), flow.flow())); diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp index 49c48718420..25c82834814 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp @@ -94,11 +94,12 @@ WeightedSetTermBlueprint::addTerm(Blueprint::UP term, int32_t weight, HitEstimat } void -WeightedSetTermBlueprint::sort(InFlow) +WeightedSetTermBlueprint::sort(InFlow in_flow) { - strict(true); + in_flow.force_strict(); + resolve_strict(in_flow); for (auto &term: _terms) { - term->sort(true); + term->sort(in_flow); } } |