diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-10-01 14:58:10 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-01 14:58:10 +0200 |
commit | 0f3a8f3e24b2bbd932ee112615ed8a0798a37175 (patch) | |
tree | 6d46f4a0b292df10ef1a0fc37d1a161a6cdaa0c3 | |
parent | e7e38302b5b962c501f98c38878d496a20846e64 (diff) | |
parent | 0fad251ccd7241f9e4b905935398891ef1800d72 (diff) |
Merge pull request #19398 from vespa-engine/havardpe/estimate-or-as-saturated-sum
use saturated sum as hit estimate for OR
4 files changed, 48 insertions, 10 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index eb6e49747a1..802aee8dee0 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -1453,4 +1453,20 @@ TEST("require that intermediate cost tier is minimum cost tier of children") { EXPECT_EQUAL(bp2->getState().cost_tier(), 2u); } +void verify_or_est(const std::vector<Blueprint::HitEstimate> &child_estimates, Blueprint::HitEstimate expect) { + OrBlueprint my_or; + my_or.setDocIdLimit(32); + auto my_est = my_or.combine(child_estimates); + EXPECT_EQUAL(my_est.empty, expect.empty); + EXPECT_EQUAL(my_est.estHits, expect.estHits); +} + +TEST("require that OR blueprint use saturated sum as estimate") { + TEST_DO(verify_or_est({{0, true},{0, true},{0, true}}, {0, true})); + TEST_DO(verify_or_est({{0, true},{0, false},{0, true}}, {0, false})); + TEST_DO(verify_or_est({{4, false},{6, false},{5, false}}, {15, false})); + TEST_DO(verify_or_est({{5, false},{20, false},{10, false}}, {32, false})); + TEST_DO(verify_or_est({{100, false},{300, false},{200, false}}, {300, false})); +} + TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 897d36150d9..c68bccc451b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -27,12 +27,11 @@ namespace search::queryeval { void maybe_eliminate_self(Blueprint* &self, Blueprint::UP replacement) { // replace with replacement if (replacement) { - Blueprint *tmp = replacement.release(); - tmp->setParent(self->getParent()); - tmp->setSourceId(self->getSourceId()); - self->setParent(0); - replacement.reset(self); - self = tmp; + Blueprint::UP discard(self); + self = replacement.release(); + self->setParent(discard->getParent()); + self->setSourceId(discard->getSourceId()); + discard->setParent(nullptr); } // replace with empty blueprint if empty if (self->getState().estimate().empty) { @@ -40,6 +39,8 @@ void maybe_eliminate_self(Blueprint* &self, Blueprint::UP replacement) { self = new EmptyBlueprint(discard->getState().fields()); self->setParent(discard->getParent()); self->setSourceId(discard->getSourceId()); + self->setDocIdLimit(discard->get_docid_limit()); + discard->setParent(nullptr); } } @@ -69,6 +70,20 @@ Blueprint::min(const std::vector<HitEstimate> &data) return est; } +Blueprint::HitEstimate +Blueprint::sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit) +{ + uint64_t sum = 0; + bool empty = true; + uint32_t limit = docid_limit; + for (const auto &est: data) { + sum += est.estHits; + empty = (empty && est.empty); + limit = std::max(limit, est.estHits); + } + return { uint32_t(std::min(sum, uint64_t(limit))), empty }; +} + Blueprint::State::State(const FieldSpecBaseList &fields_in) : _fields(fields_in), _estimate(), diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index b75c903234f..38b02c4d927 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -101,7 +101,7 @@ public: double hit_ratio(uint32_t docid_limit) const { uint32_t total_hits = _estimate.estHits; uint32_t total_docs = std::max(total_hits, docid_limit); - return double(total_hits) / double(total_docs); + return (total_docs == 0) ? 0.0 : double(total_hits) / double(total_docs); } void tree_size(uint32_t value) { _tree_size = value; } uint32_t tree_size() const { return _tree_size; } @@ -119,6 +119,12 @@ public: // utility that just takes minium estimate static HitEstimate min(const std::vector<HitEstimate> &data); + // utility that calculates saturated sum + // + // upper limit for estimate: docid_limit + // lower limit for docid_limit: max child estimate + static HitEstimate sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit); + // utility to get the greater estimate to sort first, higher tiers last struct TieredGreaterEstimate { bool operator () (Blueprint * const &a, Blueprint * const &b) const { diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index 561c532430b..eb16025f175 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -19,7 +19,7 @@ namespace search::queryeval { namespace { template <typename CombineType> -size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources, uint32_t child_source) { +size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources, uint32_t child_source, uint32_t docid_limit) { for (size_t i = 0; i < sources.size(); ++i) { if (sources[i]->getSourceId() == child_source) { return i; @@ -27,6 +27,7 @@ size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources, } sources.push_back(std::unique_ptr<CombineType>(new CombineType())); sources.back()->setSourceId(child_source); + sources.back()->setDocIdLimit(docid_limit); return (sources.size() - 1); } @@ -53,7 +54,7 @@ void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) { auto *blender = static_cast<SourceBlenderBlueprint *>(blender_up.get()); while (blender->childCnt() > 0) { Blueprint::UP child_up = blender->removeChild(blender->childCnt() - 1); - size_t source_idx = lookup_create_source(sources, child_up->getSourceId()); + size_t source_idx = lookup_create_source(sources, child_up->getSourceId(), self.get_docid_limit()); sources[source_idx]->addChild(std::move(child_up)); } } @@ -298,7 +299,7 @@ AndBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const Blueprint::HitEstimate OrBlueprint::combine(const std::vector<HitEstimate> &data) const { - return max(data); + return sat_sum(data, get_docid_limit()); } FieldSpecBaseList |