summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2023-12-13 15:52:05 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2023-12-14 10:41:43 +0000
commit0539ca0c737d39ba15346c83299592fa3835ca2e (patch)
tree6d458828191f85367c45fab2a990f53c021a9cef /searchlib
parent7de020a6ff76db5adfaf66145b604f998013c5f4 (diff)
use flow to calculate relative estimates and iterator cost
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp5
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp70
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h23
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow.h62
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp85
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h8
9 files changed, 230 insertions, 33 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
index 2d621fa1011..20cf2008e4b 100644
--- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
@@ -22,6 +22,7 @@ class MyOr : public IntermediateBlueprint
{
private:
public:
+ double calculate_cost() const final { return 1.0; }
double calculate_relative_estimate() const final { return 0.5; }
HitEstimate combine(const std::vector<HitEstimate> &data) const override {
return max(data);
@@ -773,8 +774,8 @@ TEST("requireThatDocIdLimitInjectionWorks")
TEST("Control object sizes") {
EXPECT_EQUAL(40u, sizeof(Blueprint::State));
- EXPECT_EQUAL(32u, sizeof(Blueprint));
- EXPECT_EQUAL(72u, sizeof(LeafBlueprint));
+ EXPECT_EQUAL(40u, sizeof(Blueprint));
+ EXPECT_EQUAL(80u, sizeof(LeafBlueprint));
}
TEST_MAIN() {
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index 7d994ee4cd6..e24e91c2f1d 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -4,6 +4,7 @@
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/searchlib/queryeval/isourceselector.h>
#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/flow.h>
#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
#include <vespa/searchlib/queryeval/leaf_blueprints.h>
#include <vespa/searchlib/queryeval/equiv_blueprint.h>
@@ -633,11 +634,17 @@ TEST("and with one empty child is optimized away") {
struct make {
make(make &&) = delete;
make &operator=(make &&) = delete;
+ static constexpr double invalid_cost = -1.0;
static constexpr uint32_t invalid_source = -1;
+ double cost_tag = invalid_cost;
uint32_t source_tag = invalid_source;
std::unique_ptr<IntermediateBlueprint> making;
make(std::unique_ptr<IntermediateBlueprint> making_in) : making(std::move(making_in)) {}
operator std::unique_ptr<Blueprint>() && noexcept { return std::move(making); }
+ make &&cost(double leaf_cost) && {
+ cost_tag = leaf_cost;
+ return std::move(*this);
+ }
make &&source(uint32_t source_id) && {
source_tag = source_id;
return std::move(*this);
@@ -655,7 +662,12 @@ struct make {
return std::move(*this);
}
make &&leaf(uint32_t estimate) && {
- return std::move(*this).add(ap(MyLeafSpec(estimate).create()));
+ std::unique_ptr<LeafBlueprint> bp(MyLeafSpec(estimate).create());
+ if (cost_tag != invalid_cost) {
+ bp->set_cost(cost_tag);
+ cost_tag = invalid_cost;
+ }
+ return std::move(*this).add(std::move(bp));
}
make &&leafs(std::initializer_list<uint32_t> estimates) && {
for (uint32_t estimate: estimates) {
@@ -1211,7 +1223,7 @@ TEST("relative estimate for RANK") {
}
TEST("relative estimate for ANDNOT") {
- verify_relative_estimate(make::ANDNOT(), 0.2);
+ verify_relative_estimate(make::ANDNOT(), 0.2*0.7*0.5);
}
TEST("relative estimate for SB") {
@@ -1232,4 +1244,58 @@ TEST("relative estimate for WEAKAND") {
verify_relative_estimate(make::WEAKAND(50), 0.05);
}
+void verify_cost(make &&mk, double expect) {
+ EXPECT_EQUAL(mk.making->cost(), 1.0);
+ Blueprint::UP bp = std::move(mk)
+ .cost(1.1).leaf(200)
+ .cost(1.2).leaf(300)
+ .cost(1.3).leaf(500);
+ bp->setDocIdLimit(1000);
+ bp = Blueprint::optimize(std::move(bp));
+ EXPECT_EQUAL(bp->cost(), expect);
+}
+
+double calc_cost(std::vector<std::pair<double,double>> list) {
+ double flow = 1.0;
+ double cost = 0.0;
+ for (auto [sub_cost,pass_through]: list) {
+ cost += flow * sub_cost;
+ flow *= pass_through;
+ }
+ return cost;
+}
+
+TEST("cost for OR") {
+ verify_cost(make::OR(), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}));
+}
+
+TEST("cost for AND") {
+ verify_cost(make::AND(), calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}));
+}
+
+TEST("cost for RANK") {
+ verify_cost(make::RANK(), 1.1); // first
+}
+
+TEST("cost for ANDNOT") {
+ verify_cost(make::ANDNOT(), calc_cost({{1.1, 0.2},{1.3, 0.5},{1.2, 0.7}}));
+}
+
+TEST("cost for SB") {
+ InvalidSelector sel;
+ verify_cost(make::SB(sel), 1.3); // max
+}
+
+TEST("cost for NEAR") {
+ verify_cost(make::NEAR(1), 3.0 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}));
+}
+
+TEST("cost for ONEAR") {
+ verify_cost(make::ONEAR(1), 3.0 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}));
+}
+
+TEST("cost for WEAKAND") {
+ verify_cost(make::WEAKAND(1000), calc_cost({{1.1, 0.8},{1.2, 0.7},{1.3, 0.5}}));
+}
+
TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
index b47f336c934..5e6d31d3761 100644
--- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
@@ -21,6 +21,7 @@ vespa_add_library(searchlib_queryeval OBJECT
fake_searchable.cpp
field_spec.cpp
filter_wrapper.cpp
+ flow.cpp
full_search.cpp
get_weight_from_node.cpp
global_filter.cpp
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index 043b006d3bd..b71a579e097 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -120,6 +120,7 @@ Blueprint::State::~State() = default;
Blueprint::Blueprint() noexcept
: _parent(nullptr),
+ _cost(1.0),
_sourceId(0xffffffff),
_docid_limit(0),
_frozen(false)
@@ -560,6 +561,7 @@ IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass)
optimize_self(pass);
if (pass == OptimizePass::LAST) {
sort(_children);
+ set_cost(calculate_cost());
}
maybe_eliminate_self(self, get_replacement());
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index 3261d380f36..66d55015f62 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -144,6 +144,22 @@ public:
return (total_docs == 0) ? 0.0 : double(est) / double(total_docs);
}
+ static double cost_of(const Children &children, auto flow) {
+ double cost = 0.0;
+ for (const auto &child: children) {
+ cost += flow.flow() * child->cost();
+ flow.add(child->estimate());
+ }
+ return cost;
+ }
+
+ static double estimate_of(const Children &children, auto flow) {
+ for (const auto &child: children) {
+ flow.add(child->estimate());
+ }
+ return flow.estimate();
+ }
+
// utility that just takes maximum estimate
static HitEstimate max(const std::vector<HitEstimate> &data);
@@ -182,6 +198,7 @@ public:
private:
Blueprint *_parent;
+ double _cost;
uint32_t _sourceId;
uint32_t _docid_limit;
bool _frozen;
@@ -197,6 +214,8 @@ protected:
_frozen = true;
}
+ void set_cost(double value) noexcept { _cost = value; }
+
public:
class IPredicate {
public:
@@ -219,6 +238,8 @@ public:
Blueprint *getParent() const noexcept { return _parent; }
bool has_parent() const { return (_parent != nullptr); }
+ double cost() const noexcept { return _cost; }
+
Blueprint &setSourceId(uint32_t sourceId) noexcept { _sourceId = sourceId; return *this; }
uint32_t getSourceId() const noexcept { return _sourceId; }
@@ -369,6 +390,7 @@ public:
Blueprint::UP removeLastChild() { return removeChild(childCnt() - 1); }
SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override;
+ virtual double calculate_cost() const = 0;
virtual HitEstimate combine(const std::vector<HitEstimate> &data) const = 0;
virtual FieldSpecBaseList exposeFields() const = 0;
virtual void sort(Children &children) const = 0;
@@ -426,6 +448,7 @@ public:
~LeafBlueprint() override = default;
const State &getState() const final { return _state; }
void setDocIdLimit(uint32_t limit) noexcept final;
+ using Blueprint::set_cost;
double calculate_relative_estimate() const override;
void fetchPostings(const ExecuteInfo &execInfo) override;
void freeze() final;
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.cpp b/searchlib/src/vespa/searchlib/queryeval/flow.cpp
new file mode 100644
index 00000000000..4730058b357
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.cpp
@@ -0,0 +1,7 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "flow.h"
+
+namespace search::queryeval {
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h
new file mode 100644
index 00000000000..36c0a259feb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/flow.h
@@ -0,0 +1,62 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+#include <cstddef>
+
+namespace search::queryeval {
+
+// Model how boolean result decisions flow through intermediate nodes
+// of different types based on relative estimates for sub-expressions
+
+class AndFlow {
+private:
+ double _flow;
+ size_t _cnt;
+public:
+ AndFlow(double in = 1.0) noexcept : _flow(in), _cnt(0) {}
+ void add(double est) noexcept {
+ _flow *= est;
+ ++_cnt;
+ }
+ double flow() const noexcept {
+ return _flow;
+ }
+ double estimate() const noexcept {
+ return (_cnt > 0) ? _flow : 0.0;
+ }
+};
+
+class OrFlow {
+private:
+ double _flow;
+public:
+ OrFlow(double in = 1.0) noexcept : _flow(in) {}
+ void add(double est) noexcept {
+ _flow *= (1.0 - est);
+ }
+ double flow() const noexcept {
+ return _flow;
+ }
+ double estimate() const noexcept {
+ return (1.0 - _flow);
+ }
+};
+
+class AndNotFlow {
+private:
+ double _flow;
+ size_t _cnt;
+public:
+ AndNotFlow(double in = 1.0) noexcept : _flow(in), _cnt(0) {}
+ void add(double est) noexcept {
+ _flow *= (_cnt++ == 0) ? est : (1.0 - est);
+ }
+ double flow() const noexcept {
+ return _flow;
+ }
+ double estimate() const noexcept {
+ return (_cnt > 0) ? _flow : 0.0;
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 7d992b510b5..c43335a6fdf 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "intermediate_blueprints.h"
+#include "flow.h"
#include "andnotsearch.h"
#include "andsearch.h"
#include "orsearch.h"
@@ -81,33 +82,20 @@ need_normal_features_for_children(const IntermediateBlueprint &blueprint, fef::M
}
}
-double rel_est_first_child(const Blueprint::Children &children) {
- return children.empty() ? 0.0 : children[0]->getState().relative_estimate();
-}
-
-double rel_est_and(const Blueprint::Children &children) {
- double flow = 1.0;
- for (const Blueprint::UP &child: children) {
- flow *= child->getState().relative_estimate();
- }
- return children.empty() ? 0.0 : flow;
-}
-
-double rel_est_or(const Blueprint::Children &children) {
- double flow = 1.0;
- for (const Blueprint::UP &child: children) {
- flow *= (1.0 - child->getState().relative_estimate());
- }
- return (1.0 - flow);
-}
-
} // namespace search::queryeval::<unnamed>
//-----------------------------------------------------------------------------
double
-AndNotBlueprint::calculate_relative_estimate() const {
- return rel_est_first_child(get_children());
+AndNotBlueprint::calculate_cost() const
+{
+ return cost_of(get_children(), AndNotFlow());
+}
+
+double
+AndNotBlueprint::calculate_relative_estimate() const
+{
+ return estimate_of(get_children(), AndNotFlow());
}
Blueprint::HitEstimate
@@ -214,8 +202,13 @@ AndNotBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) co
//-----------------------------------------------------------------------------
double
+AndBlueprint::calculate_cost() const {
+ return cost_of(get_children(), AndFlow());
+}
+
+double
AndBlueprint::calculate_relative_estimate() const {
- return rel_est_and(get_children());
+ return estimate_of(get_children(), AndFlow());
}
Blueprint::HitEstimate
@@ -322,8 +315,13 @@ OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool u
OrBlueprint::~OrBlueprint() = default;
double
+OrBlueprint::calculate_cost() const {
+ return cost_of(get_children(), OrFlow());
+}
+
+double
OrBlueprint::calculate_relative_estimate() const {
- return rel_est_or(get_children());
+ return estimate_of(get_children(), OrFlow());
}
Blueprint::HitEstimate
@@ -418,8 +416,13 @@ OrBlueprint::calculate_cost_tier() const
WeakAndBlueprint::~WeakAndBlueprint() = default;
double
+WeakAndBlueprint::calculate_cost() const {
+ return cost_of(get_children(), OrFlow());
+}
+
+double
WeakAndBlueprint::calculate_relative_estimate() const {
- double child_est = rel_est_or(get_children());
+ double child_est = estimate_of(get_children(), OrFlow());
double my_est = abs_to_rel_est(_n, get_docid_limit());
return std::min(my_est, child_est);
}
@@ -484,8 +487,13 @@ WeakAndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) c
//-----------------------------------------------------------------------------
double
+NearBlueprint::calculate_cost() const {
+ return cost_of(get_children(), AndFlow()) + childCnt() * 1.0;
+}
+
+double
NearBlueprint::calculate_relative_estimate() const {
- return rel_est_and(get_children());
+ return estimate_of(get_children(), AndFlow());
}
Blueprint::HitEstimate
@@ -542,8 +550,13 @@ NearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) cons
//-----------------------------------------------------------------------------
double
+ONearBlueprint::calculate_cost() const {
+ return cost_of(get_children(), AndFlow()) + (childCnt() * 1.0);
+}
+
+double
ONearBlueprint::calculate_relative_estimate() const {
- return rel_est_and(get_children());
+ return estimate_of(get_children(), AndFlow());
}
Blueprint::HitEstimate
@@ -603,8 +616,13 @@ ONearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) con
//-----------------------------------------------------------------------------
double
+RankBlueprint::calculate_cost() const {
+ return (childCnt() == 0) ? 1.0 : getChild(0).cost();
+}
+
+double
RankBlueprint::calculate_relative_estimate() const {
- return rel_est_first_child(get_children());
+ return (childCnt() == 0) ? 0.0 : getChild(0).estimate();
}
Blueprint::HitEstimate
@@ -700,8 +718,17 @@ SourceBlenderBlueprint::SourceBlenderBlueprint(const ISourceSelector &selector)
SourceBlenderBlueprint::~SourceBlenderBlueprint() = default;
double
+SourceBlenderBlueprint::calculate_cost() const {
+ double my_cost = 1.0;
+ for (const auto &child: get_children()) {
+ my_cost = std::max(my_cost, child->cost());
+ }
+ return my_cost;
+}
+
+double
SourceBlenderBlueprint::calculate_relative_estimate() const {
- return rel_est_or(get_children());
+ return estimate_of(get_children(), OrFlow());
}
Blueprint::HitEstimate
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index b9306ea307d..14672c2a5cd 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -15,6 +15,7 @@ class AndNotBlueprint : public IntermediateBlueprint
{
public:
bool supports_termwise_children() const override { return true; }
+ double calculate_cost() const final;
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
@@ -42,6 +43,7 @@ class AndBlueprint : public IntermediateBlueprint
{
public:
bool supports_termwise_children() const override { return true; }
+ double calculate_cost() const final;
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
@@ -67,6 +69,7 @@ class OrBlueprint : public IntermediateBlueprint
public:
~OrBlueprint() override;
bool supports_termwise_children() const override { return true; }
+ double calculate_cost() const final;
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
@@ -94,6 +97,7 @@ private:
std::vector<uint32_t> _weights;
public:
+ double calculate_cost() const final;
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
@@ -124,6 +128,7 @@ private:
uint32_t _window;
public:
+ double calculate_cost() const final;
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
@@ -147,6 +152,7 @@ private:
uint32_t _window;
public:
+ double calculate_cost() const final;
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
@@ -167,6 +173,7 @@ public:
class RankBlueprint final : public IntermediateBlueprint
{
public:
+ double calculate_cost() const final;
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
@@ -195,6 +202,7 @@ private:
public:
explicit SourceBlenderBlueprint(const ISourceSelector &selector) noexcept;
~SourceBlenderBlueprint() override;
+ double calculate_cost() const final;
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;