aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-02-01 16:15:59 +0100
committerGitHub <noreply@github.com>2024-02-01 16:15:59 +0100
commit4386ce0ea433d534198d90c6c68c9d0a34c4fff0 (patch)
treec6ec6065da17b883548ac46c6dcac813d2bb3f04 /searchlib
parent819479c878b9d2b5dbb9eb3b31a51211aff5b467 (diff)
parentf665524fa7e95327efbac679f4294b1a64d7fdd9 (diff)
Merge pull request #30128 from vespa-engine/havardpe/strict-heap-or-cleanup
minor adjustments of strict heap OR
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/orlikesearch.h49
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/orsearch.cpp4
3 files changed, 40 insertions, 33 deletions
diff --git a/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp b/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp
index 543441565a2..c27302e818f 100644
--- a/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp
+++ b/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp
@@ -22,8 +22,9 @@ using TMD = search::fef::TermFieldMatchData;
using vespalib::make_string_short::fmt;
using Impl = OrSearch::StrictImpl;
-double budget = 0.25;
-size_t bench_docs = 1000;
+double budget = 5.0;
+size_t bench_docs = 10'000'000;
+bool bench_mode = false;
constexpr uint32_t default_seed = 5489u;
std::mt19937 gen(default_seed);
@@ -279,6 +280,10 @@ TEST(OrSpeed, or_seek_unpack) {
}
TEST(OrSpeed, bm_array_vs_bitvector) {
+ if (!bench_mode) {
+ fprintf(stdout, "[ SKIPPING ] run with 'bench' parameter to activate\n");
+ return;
+ }
for (size_t one_of: {16, 32, 64}) {
double target = 1.0 / one_of;
size_t hits = target * bench_docs;
@@ -294,8 +299,12 @@ TEST(OrSpeed, bm_array_vs_bitvector) {
}
TEST(OrSpeed, bm_strict_or) {
- for (double target: {0.001, 0.01, 0.1, 1.0, 10.0}) {
- for (size_t child_cnt: {2, 5, 10, 100, 1000}) {
+ if (!bench_mode) {
+ fprintf(stdout, "[ SKIPPING ] run with 'bench' parameter to activate\n");
+ return;
+ }
+ for (double target: {0.001, 0.01, 0.1, 0.5, 1.0, 10.0}) {
+ for (size_t child_cnt: {2, 3, 4, 5, 10, 100, 250, 500, 1000}) {
for (bool optimize: {false, true}) {
OrSetup setup(bench_docs);
size_t part = setup.per_child(target, child_cnt);
@@ -317,9 +326,8 @@ TEST(OrSpeed, bm_strict_or) {
int main(int argc, char **argv) {
if (argc > 1 && (argv[1] == std::string("bench"))) {
- budget = 5.0;
- bench_docs = 10'000'000;
fprintf(stderr, "running in benchmarking mode\n");
+ bench_mode = true;
++argv;
--argc;
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h b/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h
index 906f51d5bec..a15a87c2d03 100644
--- a/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h
+++ b/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h
@@ -67,37 +67,33 @@ private:
};
template <typename Unpack, typename HEAP, typename ref_t>
-class StrictHeapOrSearch : public OrSearch
+class StrictHeapOrSearch final : public OrSearch
{
private:
struct Less {
const uint32_t *child_docid;
- explicit Less(const std::vector<uint32_t> &cd) : child_docid(cd.data()) {}
- bool operator()(const ref_t &a, const ref_t &b) const {
+ constexpr explicit Less(const std::vector<uint32_t> &cd) noexcept : child_docid(cd.data()) {}
+ constexpr bool operator()(const ref_t &a, const ref_t &b) const noexcept {
return (child_docid[a] < child_docid[b]);
}
};
+ std::vector<ref_t> _data;
std::vector<uint32_t> _child_docid;
- std::vector<ref_t> _data_space;
- ref_t *_data_begin;
- ref_t *_data_end;
Unpack _unpacker;
void init_data() {
- _data_space.resize(getChildren().size());
+ _data.resize(getChildren().size());
for (size_t i = 0; i < getChildren().size(); ++i) {
- _data_space[i] = i;
+ _data[i] = i;
}
- _data_begin = _data_space.data();
- _data_end = _data_begin + _data_space.size();
}
- void onRemove(size_t index) final {
+ void onRemove(size_t index) override {
_unpacker.onRemove(index);
_child_docid.erase(_child_docid.begin() + index);
init_data();
}
- void onInsert(size_t index) final {
+ void onInsert(size_t index) override {
_unpacker.onInsert(index);
_child_docid.insert(_child_docid.begin() + index, getChildren()[index]->getDocId());
init_data();
@@ -106,46 +102,47 @@ private:
getChildren()[child]->doSeek(docid);
_child_docid[child] = getChildren()[child]->getDocId();
}
+ ref_t *data_begin() noexcept { return _data.data(); }
+ ref_t *data_pos(size_t offset) noexcept { return _data.data() + offset; }
+ ref_t *data_end() noexcept { return _data.data() + _data.size(); }
public:
StrictHeapOrSearch(Children children, const Unpack &unpacker)
: OrSearch(std::move(children)),
+ _data(),
_child_docid(getChildren().size()),
- _data_space(),
- _data_begin(nullptr),
- _data_end(nullptr),
_unpacker(unpacker)
{
HEAP::require_left_heap();
init_data();
}
- void initRange(uint32_t begin, uint32_t end) final {
+ void initRange(uint32_t begin, uint32_t end) override {
OrSearch::initRange(begin, end);
for (size_t i = 0; i < getChildren().size(); ++i) {
_child_docid[i] = getChildren()[i]->getDocId();
}
- for (size_t i = 2; i <= _data_space.size(); ++i) {
- HEAP::push(_data_begin, _data_begin + i, Less(_child_docid));
+ for (size_t i = 2; i <= _data.size(); ++i) {
+ HEAP::push(data_begin(), data_pos(i), Less(_child_docid));
}
}
- void doSeek(uint32_t docid) final {
- while (_child_docid[HEAP::front(_data_begin, _data_end)] < docid) {
- seek_child(HEAP::front(_data_begin, _data_end), docid);
- HEAP::adjust(_data_begin, _data_end, Less(_child_docid));
+ void doSeek(uint32_t docid) override {
+ while (_child_docid[HEAP::front(data_begin(), data_end())] < docid) {
+ seek_child(HEAP::front(data_begin(), data_end()), docid);
+ HEAP::adjust(data_begin(), data_end(), Less(_child_docid));
}
- setDocId(_child_docid[HEAP::front(_data_begin, _data_end)]);
+ setDocId(_child_docid[HEAP::front(data_begin(), data_end())]);
}
- void doUnpack(uint32_t docid) final {
+ void doUnpack(uint32_t docid) override {
_unpacker.each([&](ref_t child) {
if (__builtin_expect(_child_docid[child] == docid, false)) {
getChildren()[child]->doUnpack(docid);
}
}, getChildren().size());
}
- bool needUnpack(size_t index) const final {
+ bool needUnpack(size_t index) const override {
return _unpacker.needUnpack(index);
}
- Trinary is_strict() const final { return Trinary::True; }
+ Trinary is_strict() const override { return Trinary::True; }
};
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp
index f7ab9af072f..29ec8632612 100644
--- a/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp
@@ -75,8 +75,10 @@ private:
template <typename Unpack>
SearchIterator::UP create_strict_or(std::vector<SearchIterator::UP> children, const Unpack &unpack, OrSearch::StrictImpl strict_impl) {
if (strict_impl == OrSearch::StrictImpl::HEAP) {
- if (children.size() <= 0xff) {
+ if (children.size() <= 0x70) {
return std::make_unique<StrictHeapOrSearch<Unpack,LeftArrayHeap,uint8_t>>(std::move(children), unpack);
+ } else if (children.size() <= 0xff) {
+ return std::make_unique<StrictHeapOrSearch<Unpack,LeftHeap,uint8_t>>(std::move(children), unpack);
} else if (children.size() <= 0xffff) {
return std::make_unique<StrictHeapOrSearch<Unpack,LeftHeap,uint16_t>>(std::move(children), unpack);
} else {