diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-11-01 21:03:03 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-01 21:03:03 +0100 |
commit | 4a4d0e54738157630a4ee2688d1bdfa95b354513 (patch) | |
tree | da67e9dc56167aff2e1efddcc3c27134b6bddcdf | |
parent | ac46419c3733bd40bc41869cbf23f5a367b22a60 (diff) | |
parent | b09afaeea370deefe5e50178c4c3a61b43fba078 (diff) |
Merge pull request #29188 from vespa-engine/geirst/tune-performance-of-range-search
Adjust threshold for when to use array vs bitvector in posting list m…
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp | 30 |
1 files changed, 29 insertions, 1 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index 964101ed3a6..e4047b57341 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -82,10 +82,38 @@ template <typename DataT> void PostingListSearchContextT<DataT>::fetchPostings(const queryeval::ExecuteInfo & execInfo) { + // The following constant is derived after running parts of + // the range search performance test with 10M documents on an Apple M1 Pro with 32 GB memory. + // This code was compiled with two different strategies: + // 1) 'always array merging' + // 2) 'always bitvector merging' + // https://github.com/vespa-engine/system-test/tree/master/tests/performance/range_search + // + // The following 33 test cases were used: + // range_hits_ratio=[1, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50], values_in_range=[1, 100, 10000], fast_search=true, filter_hits_ratio=0. + // + // The baseline performance is given by values_in_range=1, as this uses a single posting list. + // The total cost of posting list merging is the difference in avg query latency (ms) between the baseline and the case in question. + // Based on perf analysis we observe that the cost of iterating the posting list entries and inserting them into + // either an array or bitvector is equal. + // The differences however are: + // 1) Merging sorted array segments (one per posting list) into one large sorted array. + // 2) Allocating the memory needed for the bitvector. + // + // The cost of the two strategies is modeled as: + // 1) estimated_hits_in_range * X + // 2) docIdLimit * Y + // + // Based on the performance results we calculate average values for X and Y: + // 1) X = Array merging cost per hit = 32 ns + // 2) Y = Memory allocation cost per document = 0.08 ns + // + // The threshold for when to use array merging is therefore 0.0025 (0.08 / 32). + constexpr float threshold_for_using_array = 0.0025; if (!_merger.merge_done() && _uniqueValues >= 2u && this->_dictionary.get_has_btree_dictionary()) { if (execInfo.isStrict() || use_posting_lists_when_non_strict(execInfo)) { size_t sum = estimated_hits_in_range(); - if (sum < _docIdLimit / 64) { + if (sum < (_docIdLimit * threshold_for_using_array)) { _merger.reserveArray(_uniqueValues, sum); fillArray(); } else { |