diff options
author | Håvard Pettersen <havardpe@yahooinc.com> | 2022-09-14 15:30:25 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@yahooinc.com> | 2022-09-15 09:46:18 +0000 |
commit | 8ed8658a8073da8e1be630907995df24c28ab7a5 (patch) | |
tree | e0c0d5a81ff951882af175f103e50b57e3756a00 /searchlib/src/vespa | |
parent | 2921bb13d900fbc25aa7d0d248b94e0d87c970df (diff) |
multi-threaded global filter creation
Diffstat (limited to 'searchlib/src/vespa')
3 files changed, 65 insertions, 3 deletions
diff --git a/searchlib/src/vespa/searchlib/queryeval/global_filter.cpp b/searchlib/src/vespa/searchlib/queryeval/global_filter.cpp index 2aff91974a7..04e4d154821 100644 --- a/searchlib/src/vespa/searchlib/queryeval/global_filter.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/global_filter.cpp @@ -1,7 +1,14 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "global_filter.h" +#include "blueprint.h" #include <vespa/vespalib/util/require.h> +#include <vespa/vespalib/util/thread_bundle.h> +#include <vespa/searchlib/common/bitvector.h> +#include <cassert> + +using vespalib::Runnable; +using vespalib::ThreadBundle; namespace search::queryeval { @@ -49,6 +56,27 @@ struct MultiBitVectorFilter : public GlobalFilter { } }; +std::unique_ptr<BitVector> make_part(Blueprint &blueprint, uint32_t begin, uint32_t end) { + bool strict = true; + auto constraint = Blueprint::FilterConstraint::UPPER_BOUND; + auto filter_iterator = blueprint.createFilterSearch(strict, constraint); + filter_iterator->initRange(begin, end); + auto result = filter_iterator->get_hits(begin); + // count bits in parallel and cache the results for later + result->countTrueBits(); + return result; +} + +struct MakePart : Runnable { + Blueprint &blueprint; + uint32_t begin; + uint32_t end; + std::unique_ptr<BitVector> result; + MakePart(Blueprint &blueprint_in, uint32_t begin_in, uint32_t end_in) noexcept + : blueprint(blueprint_in), begin(begin_in), end(end_in), result() {} + void run() override { result = make_part(blueprint, begin, end); } +}; + } GlobalFilter::GlobalFilter() = default; @@ -83,9 +111,10 @@ GlobalFilter::create(std::unique_ptr<BitVector> vector) std::shared_ptr<GlobalFilter> GlobalFilter::create(std::vector<std::unique_ptr<BitVector>> vectors) { - uint32_t total_size = 0; + uint32_t total_size = 1; uint32_t total_count = 0; std::vector<uint32_t> splits; + splits.reserve(vectors.size()); for (size_t i = 0; i < vectors.size(); ++i) { bool last = ((i + 1) == vectors.size()); total_count += vectors[i]->countTrueBits(); @@ -100,4 +129,32 @@ GlobalFilter::create(std::vector<std::unique_ptr<BitVector>> vectors) total_size, total_count); } +std::shared_ptr<GlobalFilter> +GlobalFilter::create(Blueprint &blueprint, uint32_t docid_limit, ThreadBundle &thread_bundle) +{ + uint32_t num_threads = thread_bundle.size(); + std::vector<MakePart> parts; + parts.reserve(num_threads); + uint32_t docid = 1; + uint32_t per_thread = (docid_limit - docid) / num_threads; + uint32_t rest_docs = (docid_limit - docid) % num_threads; + while (docid < docid_limit) { + uint32_t part_size = per_thread + (parts.size() < rest_docs); + parts.emplace_back(blueprint, docid, docid + part_size); + docid += part_size; + } + assert(parts.size() <= num_threads); + assert((docid == docid_limit) || parts.empty()); + thread_bundle.run(parts); + if (parts.size() == 1) { + return create(std::move(parts[0].result)); + } + std::vector<std::unique_ptr<BitVector>> vectors; + vectors.reserve(parts.size()); + for (MakePart &part: parts) { + vectors.push_back(std::move(part.result)); + } + return create(std::move(vectors)); +} + } diff --git a/searchlib/src/vespa/searchlib/queryeval/global_filter.h b/searchlib/src/vespa/searchlib/queryeval/global_filter.h index e93864db2c8..e162bccdbc5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/global_filter.h +++ b/searchlib/src/vespa/searchlib/queryeval/global_filter.h @@ -3,11 +3,15 @@ #pragma once #include <memory> -#include <vespa/searchlib/common/bitvector.h> #include <vector> +namespace vespalib { class ThreadBundle; } +namespace search { class BitVector; } + namespace search::queryeval { +class Blueprint; + /** * Hold ownership of a global filter that can be taken into account by * adaptive query operators. The owned 'bitvector' should be a @@ -35,6 +39,7 @@ public: static std::shared_ptr<GlobalFilter> create(std::vector<uint32_t> docids, uint32_t size); static std::shared_ptr<GlobalFilter> create(std::unique_ptr<BitVector> vector); static std::shared_ptr<GlobalFilter> create(std::vector<std::unique_ptr<BitVector>> vectors); + static std::shared_ptr<GlobalFilter> create(Blueprint &blueprint, uint32_t docid_limit, vespalib::ThreadBundle &thread_bundle); }; } // namespace diff --git a/searchlib/src/vespa/searchlib/queryeval/simpleresult.h b/searchlib/src/vespa/searchlib/queryeval/simpleresult.h index a7bb30ec487..4a8b7a3429f 100644 --- a/searchlib/src/vespa/searchlib/queryeval/simpleresult.h +++ b/searchlib/src/vespa/searchlib/queryeval/simpleresult.h @@ -20,7 +20,7 @@ public: /** * Create an empty result **/ - SimpleResult() : _hits() {} + SimpleResult() noexcept : _hits() {} /** * Create a result with the given hits. |