summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-10-01 14:58:10 +0200
committerGitHub <noreply@github.com>2021-10-01 14:58:10 +0200
commit0f3a8f3e24b2bbd932ee112615ed8a0798a37175 (patch)
tree6d46f4a0b292df10ef1a0fc37d1a161a6cdaa0c3
parente7e38302b5b962c501f98c38878d496a20846e64 (diff)
parent0fad251ccd7241f9e4b905935398891ef1800d72 (diff)
Merge pull request #19398 from vespa-engine/havardpe/estimate-or-as-saturated-sum
use saturated sum as hit estimate for OR
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp7
4 files changed, 48 insertions, 10 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index eb6e49747a1..802aee8dee0 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -1453,4 +1453,20 @@ TEST("require that intermediate cost tier is minimum cost tier of children") {
EXPECT_EQUAL(bp2->getState().cost_tier(), 2u);
}
+void verify_or_est(const std::vector<Blueprint::HitEstimate> &child_estimates, Blueprint::HitEstimate expect) {
+ OrBlueprint my_or;
+ my_or.setDocIdLimit(32);
+ auto my_est = my_or.combine(child_estimates);
+ EXPECT_EQUAL(my_est.empty, expect.empty);
+ EXPECT_EQUAL(my_est.estHits, expect.estHits);
+}
+
+TEST("require that OR blueprint use saturated sum as estimate") {
+ TEST_DO(verify_or_est({{0, true},{0, true},{0, true}}, {0, true}));
+ TEST_DO(verify_or_est({{0, true},{0, false},{0, true}}, {0, false}));
+ TEST_DO(verify_or_est({{4, false},{6, false},{5, false}}, {15, false}));
+ TEST_DO(verify_or_est({{5, false},{20, false},{10, false}}, {32, false}));
+ TEST_DO(verify_or_est({{100, false},{300, false},{200, false}}, {300, false}));
+}
+
TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index 897d36150d9..c68bccc451b 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -27,12 +27,11 @@ namespace search::queryeval {
void maybe_eliminate_self(Blueprint* &self, Blueprint::UP replacement) {
// replace with replacement
if (replacement) {
- Blueprint *tmp = replacement.release();
- tmp->setParent(self->getParent());
- tmp->setSourceId(self->getSourceId());
- self->setParent(0);
- replacement.reset(self);
- self = tmp;
+ Blueprint::UP discard(self);
+ self = replacement.release();
+ self->setParent(discard->getParent());
+ self->setSourceId(discard->getSourceId());
+ discard->setParent(nullptr);
}
// replace with empty blueprint if empty
if (self->getState().estimate().empty) {
@@ -40,6 +39,8 @@ void maybe_eliminate_self(Blueprint* &self, Blueprint::UP replacement) {
self = new EmptyBlueprint(discard->getState().fields());
self->setParent(discard->getParent());
self->setSourceId(discard->getSourceId());
+ self->setDocIdLimit(discard->get_docid_limit());
+ discard->setParent(nullptr);
}
}
@@ -69,6 +70,20 @@ Blueprint::min(const std::vector<HitEstimate> &data)
return est;
}
+Blueprint::HitEstimate
+Blueprint::sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit)
+{
+ uint64_t sum = 0;
+ bool empty = true;
+ uint32_t limit = docid_limit;
+ for (const auto &est: data) {
+ sum += est.estHits;
+ empty = (empty && est.empty);
+ limit = std::max(limit, est.estHits);
+ }
+ return { uint32_t(std::min(sum, uint64_t(limit))), empty };
+}
+
Blueprint::State::State(const FieldSpecBaseList &fields_in)
: _fields(fields_in),
_estimate(),
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index b75c903234f..38b02c4d927 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -101,7 +101,7 @@ public:
double hit_ratio(uint32_t docid_limit) const {
uint32_t total_hits = _estimate.estHits;
uint32_t total_docs = std::max(total_hits, docid_limit);
- return double(total_hits) / double(total_docs);
+ return (total_docs == 0) ? 0.0 : double(total_hits) / double(total_docs);
}
void tree_size(uint32_t value) { _tree_size = value; }
uint32_t tree_size() const { return _tree_size; }
@@ -119,6 +119,12 @@ public:
// utility that just takes minium estimate
static HitEstimate min(const std::vector<HitEstimate> &data);
+ // utility that calculates saturated sum
+ //
+ // upper limit for estimate: docid_limit
+ // lower limit for docid_limit: max child estimate
+ static HitEstimate sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit);
+
// utility to get the greater estimate to sort first, higher tiers last
struct TieredGreaterEstimate {
bool operator () (Blueprint * const &a, Blueprint * const &b) const {
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 561c532430b..eb16025f175 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -19,7 +19,7 @@ namespace search::queryeval {
namespace {
template <typename CombineType>
-size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources, uint32_t child_source) {
+size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources, uint32_t child_source, uint32_t docid_limit) {
for (size_t i = 0; i < sources.size(); ++i) {
if (sources[i]->getSourceId() == child_source) {
return i;
@@ -27,6 +27,7 @@ size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources,
}
sources.push_back(std::unique_ptr<CombineType>(new CombineType()));
sources.back()->setSourceId(child_source);
+ sources.back()->setDocIdLimit(docid_limit);
return (sources.size() - 1);
}
@@ -53,7 +54,7 @@ void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) {
auto *blender = static_cast<SourceBlenderBlueprint *>(blender_up.get());
while (blender->childCnt() > 0) {
Blueprint::UP child_up = blender->removeChild(blender->childCnt() - 1);
- size_t source_idx = lookup_create_source(sources, child_up->getSourceId());
+ size_t source_idx = lookup_create_source(sources, child_up->getSourceId(), self.get_docid_limit());
sources[source_idx]->addChild(std::move(child_up));
}
}
@@ -298,7 +299,7 @@ AndBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const
Blueprint::HitEstimate
OrBlueprint::combine(const std::vector<HitEstimate> &data) const
{
- return max(data);
+ return sat_sum(data, get_docid_limit());
}
FieldSpecBaseList