diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2020-04-25 14:04:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-25 14:04:04 +0200 |
commit | bcc30b625534f8034575debb5e057c4e72abef93 (patch) | |
tree | 9aa6dc600d6ed385c170e8eff459a03fbd1e6833 | |
parent | b406f648a6924104d4c5e9e29688ced5e3e2a9ae (diff) | |
parent | ff556e3da8ca8c79112bf36315c42c8800c6b25d (diff) |
Merge pull request #13062 from vespa-engine/revert-13055-vekterli/optimize-btree-db-find-parents-with-used-bits-aggregation
Revert "Optimize B-tree bucket DB lookup with used-bits aggregation"
3 files changed, 12 insertions, 79 deletions
diff --git a/storage/src/tests/distributor/bucketdatabasetest.cpp b/storage/src/tests/distributor/bucketdatabasetest.cpp index 56e996abf99..1d5355df92f 100644 --- a/storage/src/tests/distributor/bucketdatabasetest.cpp +++ b/storage/src/tests/distributor/bucketdatabasetest.cpp @@ -707,37 +707,4 @@ TEST_P(BucketDatabaseTest, DISABLED_benchmark_const_iteration) { db().toString(false).c_str(), elapsed); } -TEST_P(BucketDatabaseTest, DISABLED_benchmark_find_parents) { - constexpr uint32_t superbuckets = 1u << 16u; - constexpr uint32_t sub_buckets = 14; - constexpr uint32_t n_buckets = superbuckets * sub_buckets; - - std::vector<uint64_t> bucket_keys; - bucket_keys.reserve(n_buckets); - - for (uint32_t sb = 0; sb < superbuckets; ++sb) { - for (uint64_t i = 0; i < sub_buckets; ++i) { - document::BucketId bucket(48, (i << 32ULL) | sb); // TODO eval with different bit counts - bucket_keys.emplace_back(bucket.toKey()); - } - } - fprintf(stderr, "Inserting %zu buckets into DB\n", bucket_keys.size()); - std::sort(bucket_keys.begin(), bucket_keys.end()); - for (uint64_t k : bucket_keys) { - db().update(BucketDatabase::Entry(BucketId(BucketId::keyToBucketId(k)), BI3(0, 1, 2))); - } - - fprintf(stderr, "Invoking getParents() %zu times\n", bucket_keys.size()); - auto elapsed = vespalib::BenchmarkTimer::benchmark([&] { - std::vector<BucketDatabase::Entry> entries; - for (uint64_t k : bucket_keys) { - db().getParents(BucketId(BucketId::keyToBucketId(k)), entries); - assert(entries.size() == 1); - entries.clear(); - } - }, 30); - fprintf(stderr, "Looking up all buckets in %s takes %g seconds\n", - db().toString(false).c_str(), elapsed); -} - } diff --git a/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp b/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp index 2adb3049853..85f5c8c5be9 100644 --- a/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp +++ b/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp @@ -194,27 +194,13 @@ void BTreeBucketDatabase::remove(const BucketId& bucket) { * This allows us to reuse a single iterator and to continue seeking forwards * from its current position. * - * To speed up the process of converging on the target bucket without needing - * to check many unrelated subtrees, we let the underlying B-tree automatically - * aggregate the min/max range of the used-bits of all contained bucket keys. - * If we e.g. know that the minimum number of used bits in the DB is 16, we can - * immediately seek to this level in the tree instead of working our way down - * one bit at a time. By definition, no parents can exist above this level. - * This is a very important optimization, as bucket trees are usually very well - * balanced due to randomized distribution of data (combined with a cluster-wide - * minimum tree level imposed by distribution bits). It is common that the minimum - * number of used bits == max number of used bits, i.e. a totally even split. - * This means that for a system without inconsistently split buckets (i.e. no - * parents) we're highly likely to converge on the target bucket in a single seek. - * * Algorithm: * * Core invariant: every subsequent iterator seek performed in this algorithm * is for a key that is strictly higher than the one the iterator is currently at. * * 1. Lbound seek to the lowest key that is known to exclude all already visited - * parents. On the first iteration we use a bit count equal to the minimum number - * of key used-bits in the entire DB, allowing us to potentially skip most subtrees. + * parents. On the first iteration this is zero, i.e. the first in-order node. * 2. If the current node's key is greater than that of the requested bucket's key, * we've either descended to--or beyond--it in its own subtree or we've entered * a disjoint subtree. Since we know that all parents must sort before any given @@ -243,13 +229,14 @@ void BTreeBucketDatabase::remove(const BucketId& bucket) { * 6. Iff iterator is still valid, go to step 2 * * This algorithm is able to skip through large parts of the tree in a sparsely populated - * tree, but the number of seeks will trend towards O(b - min_bits) as with the legacy - * implementation when a tree is densely populated (where `b` is the used-bits count of the - * most specific node in the tree for the target bucket, and min_bits is the minimum number - * of used-bits for any key in the database). This because all logical inner nodes in the tree - * will have subtrees under them. Even in the worst case we should be more efficient than the - * legacy Judy-based implementation since we've cut any dense search space in half for each - * invocation of seek() on the iterator. + * tree, but the number of seeks will trend towards O(b) as with the legacy implementation + * when a tree is densely populated. This because all logical inner nodes in the tree will + * have subtrees under them. Even in the worst case we should be more efficient than the + * legacy implementation since we've cut any dense search space in half for each invocation + * of seek() on the iterator + * + * TODO use min-aggregation to infer used bit range in the tree. This should allow us to + * skip scanning through many disjoint subtrees. */ BTreeBucketDatabase::BTree::ConstIterator BTreeBucketDatabase::find_parents_internal(const BTree::FrozenView& frozen_view, @@ -258,15 +245,9 @@ BTreeBucketDatabase::find_parents_internal(const BTree::FrozenView& frozen_view, { const uint64_t bucket_key = bucket.toKey(); auto iter = frozen_view.begin(); - if (!iter.valid()) { - return iter; - } - assert(iter.getAggregated().getMin() >= static_cast<int32_t>(BucketId::minNumBits)); - assert(iter.getAggregated().getMax() <= static_cast<int32_t>(BucketId::maxNumBits)); - // Start at the lowest possible tree level no parents can exist above, - // descending towards the bucket itself. + // Start at the root level, descending towards the bucket itself. // Try skipping as many levels of the tree as possible as we go. - uint32_t bits = iter.getAggregated().getMin(); + uint32_t bits = 1; while (iter.valid() && (iter.getKey() < bucket_key)) { auto candidate = BucketId(BucketId::keyToBucketId(iter.getKey())); if (candidate.contains(bucket)) { diff --git a/storage/src/vespa/storage/bucketdb/btree_bucket_database.h b/storage/src/vespa/storage/bucketdb/btree_bucket_database.h index 75736e59092..8898b0c395a 100644 --- a/storage/src/vespa/storage/bucketdb/btree_bucket_database.h +++ b/storage/src/vespa/storage/bucketdb/btree_bucket_database.h @@ -4,8 +4,6 @@ #include "bucketdatabase.h" #include <vespa/vespalib/btree/btree.h> -#include <vespa/vespalib/btree/minmaxaggregated.h> -#include <vespa/vespalib/btree/minmaxaggrcalc.h> #include <vespa/vespalib/datastore/array_store.h> namespace storage { @@ -25,21 +23,8 @@ namespace storage { */ // TODO create and use a new DB interface with better bulk loading, snapshot and iteration support class BTreeBucketDatabase : public BucketDatabase { - - struct KeyUsedBitsMinMaxAggrCalc : search::btree::MinMaxAggrCalc { - constexpr static bool aggregate_over_values() { return false; } - constexpr static int32_t getVal(uint64_t key) noexcept { - static_assert(document::BucketId::CountBits == 6u); - return static_cast<int32_t>(key & 0b11'1111U); // 6 LSB of key contains used-bits - } - }; - // Mapping from u64: bucket key -> <MSB u32: gc timestamp, LSB u32: ArrayStore ref> - using BTree = search::btree::BTree<uint64_t, uint64_t, - search::btree::MinMaxAggregated, - std::less<>, - search::btree::BTreeDefaultTraits, - KeyUsedBitsMinMaxAggrCalc>; + using BTree = search::btree::BTree<uint64_t, uint64_t>; using ReplicaStore = search::datastore::ArrayStore<BucketCopy>; using GenerationHandler = vespalib::GenerationHandler; |