diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-02-01 16:15:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-01 16:15:59 +0100 |
commit | 4386ce0ea433d534198d90c6c68c9d0a34c4fff0 (patch) | |
tree | c6ec6065da17b883548ac46c6dcac813d2bb3f04 /searchlib | |
parent | 819479c878b9d2b5dbb9eb3b31a51211aff5b467 (diff) | |
parent | f665524fa7e95327efbac679f4294b1a64d7fdd9 (diff) |
Merge pull request #30128 from vespa-engine/havardpe/strict-heap-or-cleanup
minor adjustments of strict heap OR
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp | 20 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/queryeval/orlikesearch.h | 49 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/queryeval/orsearch.cpp | 4 |
3 files changed, 40 insertions, 33 deletions
diff --git a/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp b/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp index 543441565a2..c27302e818f 100644 --- a/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp +++ b/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp @@ -22,8 +22,9 @@ using TMD = search::fef::TermFieldMatchData; using vespalib::make_string_short::fmt; using Impl = OrSearch::StrictImpl; -double budget = 0.25; -size_t bench_docs = 1000; +double budget = 5.0; +size_t bench_docs = 10'000'000; +bool bench_mode = false; constexpr uint32_t default_seed = 5489u; std::mt19937 gen(default_seed); @@ -279,6 +280,10 @@ TEST(OrSpeed, or_seek_unpack) { } TEST(OrSpeed, bm_array_vs_bitvector) { + if (!bench_mode) { + fprintf(stdout, "[ SKIPPING ] run with 'bench' parameter to activate\n"); + return; + } for (size_t one_of: {16, 32, 64}) { double target = 1.0 / one_of; size_t hits = target * bench_docs; @@ -294,8 +299,12 @@ TEST(OrSpeed, bm_array_vs_bitvector) { } TEST(OrSpeed, bm_strict_or) { - for (double target: {0.001, 0.01, 0.1, 1.0, 10.0}) { - for (size_t child_cnt: {2, 5, 10, 100, 1000}) { + if (!bench_mode) { + fprintf(stdout, "[ SKIPPING ] run with 'bench' parameter to activate\n"); + return; + } + for (double target: {0.001, 0.01, 0.1, 0.5, 1.0, 10.0}) { + for (size_t child_cnt: {2, 3, 4, 5, 10, 100, 250, 500, 1000}) { for (bool optimize: {false, true}) { OrSetup setup(bench_docs); size_t part = setup.per_child(target, child_cnt); @@ -317,9 +326,8 @@ TEST(OrSpeed, bm_strict_or) { int main(int argc, char **argv) { if (argc > 1 && (argv[1] == std::string("bench"))) { - budget = 5.0; - bench_docs = 10'000'000; fprintf(stderr, "running in benchmarking mode\n"); + bench_mode = true; ++argv; --argc; } diff --git a/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h b/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h index 906f51d5bec..a15a87c2d03 100644 --- a/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h +++ b/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h @@ -67,37 +67,33 @@ private: }; template <typename Unpack, typename HEAP, typename ref_t> -class StrictHeapOrSearch : public OrSearch +class StrictHeapOrSearch final : public OrSearch { private: struct Less { const uint32_t *child_docid; - explicit Less(const std::vector<uint32_t> &cd) : child_docid(cd.data()) {} - bool operator()(const ref_t &a, const ref_t &b) const { + constexpr explicit Less(const std::vector<uint32_t> &cd) noexcept : child_docid(cd.data()) {} + constexpr bool operator()(const ref_t &a, const ref_t &b) const noexcept { return (child_docid[a] < child_docid[b]); } }; + std::vector<ref_t> _data; std::vector<uint32_t> _child_docid; - std::vector<ref_t> _data_space; - ref_t *_data_begin; - ref_t *_data_end; Unpack _unpacker; void init_data() { - _data_space.resize(getChildren().size()); + _data.resize(getChildren().size()); for (size_t i = 0; i < getChildren().size(); ++i) { - _data_space[i] = i; + _data[i] = i; } - _data_begin = _data_space.data(); - _data_end = _data_begin + _data_space.size(); } - void onRemove(size_t index) final { + void onRemove(size_t index) override { _unpacker.onRemove(index); _child_docid.erase(_child_docid.begin() + index); init_data(); } - void onInsert(size_t index) final { + void onInsert(size_t index) override { _unpacker.onInsert(index); _child_docid.insert(_child_docid.begin() + index, getChildren()[index]->getDocId()); init_data(); @@ -106,46 +102,47 @@ private: getChildren()[child]->doSeek(docid); _child_docid[child] = getChildren()[child]->getDocId(); } + ref_t *data_begin() noexcept { return _data.data(); } + ref_t *data_pos(size_t offset) noexcept { return _data.data() + offset; } + ref_t *data_end() noexcept { return _data.data() + _data.size(); } public: StrictHeapOrSearch(Children children, const Unpack &unpacker) : OrSearch(std::move(children)), + _data(), _child_docid(getChildren().size()), - _data_space(), - _data_begin(nullptr), - _data_end(nullptr), _unpacker(unpacker) { HEAP::require_left_heap(); init_data(); } - void initRange(uint32_t begin, uint32_t end) final { + void initRange(uint32_t begin, uint32_t end) override { OrSearch::initRange(begin, end); for (size_t i = 0; i < getChildren().size(); ++i) { _child_docid[i] = getChildren()[i]->getDocId(); } - for (size_t i = 2; i <= _data_space.size(); ++i) { - HEAP::push(_data_begin, _data_begin + i, Less(_child_docid)); + for (size_t i = 2; i <= _data.size(); ++i) { + HEAP::push(data_begin(), data_pos(i), Less(_child_docid)); } } - void doSeek(uint32_t docid) final { - while (_child_docid[HEAP::front(_data_begin, _data_end)] < docid) { - seek_child(HEAP::front(_data_begin, _data_end), docid); - HEAP::adjust(_data_begin, _data_end, Less(_child_docid)); + void doSeek(uint32_t docid) override { + while (_child_docid[HEAP::front(data_begin(), data_end())] < docid) { + seek_child(HEAP::front(data_begin(), data_end()), docid); + HEAP::adjust(data_begin(), data_end(), Less(_child_docid)); } - setDocId(_child_docid[HEAP::front(_data_begin, _data_end)]); + setDocId(_child_docid[HEAP::front(data_begin(), data_end())]); } - void doUnpack(uint32_t docid) final { + void doUnpack(uint32_t docid) override { _unpacker.each([&](ref_t child) { if (__builtin_expect(_child_docid[child] == docid, false)) { getChildren()[child]->doUnpack(docid); } }, getChildren().size()); } - bool needUnpack(size_t index) const final { + bool needUnpack(size_t index) const override { return _unpacker.needUnpack(index); } - Trinary is_strict() const final { return Trinary::True; } + Trinary is_strict() const override { return Trinary::True; } }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp index f7ab9af072f..29ec8632612 100644 --- a/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp @@ -75,8 +75,10 @@ private: template <typename Unpack> SearchIterator::UP create_strict_or(std::vector<SearchIterator::UP> children, const Unpack &unpack, OrSearch::StrictImpl strict_impl) { if (strict_impl == OrSearch::StrictImpl::HEAP) { - if (children.size() <= 0xff) { + if (children.size() <= 0x70) { return std::make_unique<StrictHeapOrSearch<Unpack,LeftArrayHeap,uint8_t>>(std::move(children), unpack); + } else if (children.size() <= 0xff) { + return std::make_unique<StrictHeapOrSearch<Unpack,LeftHeap,uint8_t>>(std::move(children), unpack); } else if (children.size() <= 0xffff) { return std::make_unique<StrictHeapOrSearch<Unpack,LeftHeap,uint16_t>>(std::move(children), unpack); } else { |