summaryrefslogtreecommitdiffstats
path: root/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp')
-rw-r--r--storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp527
1 files changed, 66 insertions, 461 deletions
diff --git a/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp b/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp
index 42bd3a247bb..9634d6d0953 100644
--- a/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp
+++ b/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp
@@ -1,22 +1,10 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "btree_bucket_database.h"
-#include <vespa/vespalib/btree/btreebuilder.h>
-#include <vespa/vespalib/btree/btreenodeallocator.hpp>
-#include <vespa/vespalib/btree/btreenode.hpp>
-#include <vespa/vespalib/btree/btreenodestore.hpp>
-#include <vespa/vespalib/btree/btreeiterator.hpp>
-#include <vespa/vespalib/btree/btreeroot.hpp>
-#include <vespa/vespalib/btree/btreebuilder.hpp>
-#include <vespa/vespalib/btree/btree.hpp>
-#include <vespa/vespalib/btree/btreestore.hpp>
+#include "generic_btree_bucket_database.hpp"
#include <vespa/vespalib/datastore/array_store.hpp>
#include <iostream>
-// TODO remove once this impl uses the generic bucket B-tree code!
-#include "generic_btree_bucket_database.h"
-#include <vespa/vespalib/datastore/datastore.h>
-
/*
* Buckets in our tree are represented by their 64-bit numeric key, in what's known as
* "reversed bit order with appended used-bits" form. I.e. a bucket ID (16, 0xcafe), which
@@ -74,232 +62,53 @@ uint64_t value_from(uint32_t gc_timestamp, EntryRef ref) {
return ((uint64_t(gc_timestamp) << 32u) | ref.ref());
}
-// TODO dedupe and unify common code
-uint8_t
-getMinDiffBits(uint16_t minBits, const document::BucketId& a, const document::BucketId& b) {
- for (uint32_t i = minBits; i <= std::min(a.getUsedBits(), b.getUsedBits()); i++) {
- document::BucketId a1(i, a.getRawId());
- document::BucketId b1(i, b.getRawId());
- if (b1.getId() != a1.getId()) {
- return i;
- }
- }
- return minBits;
}
-uint8_t next_parent_bit_seek_level(uint8_t minBits, const document::BucketId& a, const document::BucketId& b) {
- const uint8_t min_used = std::min(a.getUsedBits(), b.getUsedBits());
- assert(min_used >= minBits); // Always monotonically descending towards leaves
- for (uint32_t i = minBits; i <= min_used; i++) {
- document::BucketId a1(i, a.getRawId());
- document::BucketId b1(i, b.getRawId());
- if (b1.getId() != a1.getId()) {
- return i;
- }
- }
- // The bit prefix is equal, which means that one node is a parent of the other. In this
- // case we have to force the seek to continue from the next level in the tree.
- return std::max(min_used, minBits) + 1;
-}
+struct BTreeBucketDatabase::ReplicaValueTraits {
+ using ValueType = Entry;
+ using ConstValueRef = ConstEntryRef;
+ using DataStoreType = vespalib::datastore::ArrayStore<BucketCopy>;
-// TODO getMinDiffBits is hoisted from lockablemap.cpp, could probably be rewritten in terms of xor and MSB bit scan instr
-/*
- * 63 -------- ... -> 0
- * a: 1101111111 ... 0010
- * b: 1101110010 ... 0011
- * a ^ b: 0000001101 ... 0001
- * ^- diff bit = 57
- *
- * 63 - vespalib::Optimized::msbIdx(a ^ b) ==> 6
- *
- * what if a == b? special case? not a problem if we can prove this never happens.
- */
+ static ValueType make_invalid_value() {
+ return Entry::createInvalid();
+ }
+ static uint64_t wrap_and_store_value(DataStoreType& store, const Entry& entry) noexcept {
+ auto replicas_ref = store.add(entry.getBucketInfo().getRawNodes());
+ return value_from(entry.getBucketInfo().getLastGarbageCollectionTime(), replicas_ref);
+ }
+ static void remove_by_wrapped_value(DataStoreType& store, uint64_t value) noexcept {
+ store.remove(entry_ref_from_value(value));
+ }
+ static ValueType unwrap_from_key_value(const DataStoreType& store, uint64_t key, uint64_t value) {
+ const auto replicas_ref = store.get(entry_ref_from_value(value));
+ const auto bucket = BucketId(BucketId::keyToBucketId(key));
+ return entry_from_replica_array_ref(bucket, gc_timestamp_from_value(value), replicas_ref);
+ }
+ static ConstValueRef unwrap_const_ref_from_key_value(const DataStoreType& store, uint64_t key, uint64_t value) {
+ const auto replicas_ref = store.get(entry_ref_from_value(value));
+ const auto bucket = BucketId(BucketId::keyToBucketId(key));
+ return const_entry_ref_from_replica_array_ref(bucket, gc_timestamp_from_value(value), replicas_ref);
+ }
+};
-}
+template class bucketdb::GenericBTreeBucketDatabase<BTreeBucketDatabase::ReplicaValueTraits>;
BTreeBucketDatabase::BTreeBucketDatabase()
- : _tree(),
- _store(make_default_array_store_config<ReplicaStore>()),
- _generation_handler()
+ : _impl(std::make_unique<ImplType>(make_default_array_store_config<ReplicaValueTraits::DataStoreType>()))
{
}
BTreeBucketDatabase::~BTreeBucketDatabase() = default;
-void BTreeBucketDatabase::commit_tree_changes() {
- // TODO break up and refactor
- // TODO verify semantics and usage
- // TODO make BTree wrapping API which abstracts away all this stuff via reader/writer interfaces
- _tree.getAllocator().freeze();
-
- auto current_gen = _generation_handler.getCurrentGeneration();
- _store.transferHoldLists(current_gen);
- _tree.getAllocator().transferHoldLists(current_gen);
-
- _generation_handler.incGeneration();
-
- auto used_gen = _generation_handler.getFirstUsedGeneration();
- _store.trimHoldLists(used_gen);
- _tree.getAllocator().trimHoldLists(used_gen);
-}
-
-Entry BTreeBucketDatabase::entry_from_value(uint64_t bucket_key, uint64_t value) const {
- const auto replicas_ref = _store.get(entry_ref_from_value(value));
- const auto bucket = BucketId(BucketId::keyToBucketId(bucket_key));
- return entry_from_replica_array_ref(bucket, gc_timestamp_from_value(value), replicas_ref);
-}
-
-Entry BTreeBucketDatabase::entry_from_iterator(const BTree::ConstIterator& iter) const {
- if (!iter.valid()) {
- return Entry::createInvalid();
- }
- const auto value = iter.getData();
- std::atomic_thread_fence(std::memory_order_acquire);
- return entry_from_value(iter.getKey(), value);
-}
-
-ConstEntryRef BTreeBucketDatabase::const_entry_ref_from_iterator(const BTree::ConstIterator& iter) const {
- if (!iter.valid()) {
- return ConstEntryRef::createInvalid();
- }
- const auto value = iter.getData();
- std::atomic_thread_fence(std::memory_order_acquire);
- const auto replicas_ref = _store.get(entry_ref_from_value(value));
- const auto bucket = BucketId(BucketId::keyToBucketId(iter.getKey()));
- return const_entry_ref_from_replica_array_ref(bucket, gc_timestamp_from_value(value), replicas_ref);
-}
-
-BucketId BTreeBucketDatabase::bucket_from_valid_iterator(const BTree::ConstIterator& iter) const {
- return BucketId(BucketId::keyToBucketId(iter.getKey()));
-}
-
Entry BTreeBucketDatabase::get(const BucketId& bucket) const {
- return entry_from_iterator(_tree.find(bucket.toKey()));
+ return _impl->get(bucket);
}
void BTreeBucketDatabase::remove(const BucketId& bucket) {
- auto iter = _tree.find(bucket.toKey());
- if (!iter.valid()) {
- return;
- }
- const auto value = iter.getData();
- _store.remove(entry_ref_from_value(value));
- _tree.remove(iter);
- commit_tree_changes();
+ _impl->remove(bucket);
}
-/*
- * Finding the complete set of parents of a given bucket is not obvious how to
- * do efficiently, as we only know that the parents are ordered before their
- * children, but we do not a-priori know if any exist at all. The Judy DB impl
- * does O(b) explicit point lookups (where b is the number of used bits in the
- * bucket), starting at the leaf bit and working towards the root. To avoid
- * having to re-create iterators and perform a full tree search every time, we
- * turn this on its head and start from the root, progressing towards the leaf.
- * This allows us to reuse a single iterator and to continue seeking forwards
- * from its current position.
- *
- * To speed up the process of converging on the target bucket without needing
- * to check many unrelated subtrees, we let the underlying B-tree automatically
- * aggregate the min/max range of the used-bits of all contained bucket keys.
- * If we e.g. know that the minimum number of used bits in the DB is 16, we can
- * immediately seek to this level in the tree instead of working our way down
- * one bit at a time. By definition, no parents can exist above this level.
- * This is a very important optimization, as bucket trees are usually very well
- * balanced due to randomized distribution of data (combined with a cluster-wide
- * minimum tree level imposed by distribution bits). It is common that the minimum
- * number of used bits == max number of used bits, i.e. a totally even split.
- * This means that for a system without inconsistently split buckets (i.e. no
- * parents) we're highly likely to converge on the target bucket in a single seek.
- *
- * Algorithm:
- *
- * Core invariant: every subsequent iterator seek performed in this algorithm
- * is for a key that is strictly higher than the one the iterator is currently at.
- *
- * 1. Lbound seek to the lowest key that is known to exclude all already visited
- * parents. On the first iteration we use a bit count equal to the minimum number
- * of key used-bits in the entire DB, allowing us to potentially skip most subtrees.
- * 2. If the current node's key is greater than that of the requested bucket's key,
- * we've either descended to--or beyond--it in its own subtree or we've entered
- * a disjoint subtree. Since we know that all parents must sort before any given
- * child bucket, no more parents may be found at this point. Algorithm terminates.
- * 3. As the main body of the loop is entered, we know one of following must hold:
- * 3.1 The current node is an explicitly present parent of our bucket.
- * 3.2 The current node is contained in a left subtree branch of a parent that
- * does not have a bucket explicitly present in the tree. It cannot be in
- * a right subtree of any parent, as that would imply the node is ordered
- * _after_ our own bucket in an in-order traversal, which would contradict
- * the check in step 2 above.
- * 4. If the current node contains the requested bucket, we're at a parent
- * node of the bucket; add it to the result set.
- * If this is _not_ the case, we're in a different subtree. Example: the
- * requested bucket has a key whose MSB is 1 but the first bucket in the
- * tree has a key with an MSB of 0. Either way we need to update our search
- * key to home in on the target subtree where more parents may be found;
- * 5. Update the seek key to find the next possible parent. To ensure this key is
- * strictly greater than the iterator's current key we find the largest shared
- * prefix of bits in common between the current node's key and the requested
- * bucket's key. The prefix length + 1 is then the depth in the tree at which the
- * two subtrees branch off and diverge.
- * The new key is then the MSB prefix length + 1 requested bucket's key with a
- * matching number of used-bits set. Forward lbound-seek the iterator to this key.
- * `--> TODO elaborate on prefix semantics when they are equal wrt. min used bits
- * 6. Iff iterator is still valid, go to step 2
- *
- * This algorithm is able to skip through large parts of the tree in a sparsely populated
- * tree, but the number of seeks will trend towards O(b - min_bits) as with the legacy
- * implementation when a tree is densely populated (where `b` is the used-bits count of the
- * most specific node in the tree for the target bucket, and min_bits is the minimum number
- * of used-bits for any key in the database). This because all logical inner nodes in the tree
- * will have subtrees under them. Even in the worst case we should be more efficient than the
- * legacy Judy-based implementation since we've cut any dense search space in half for each
- * invocation of seek() on the iterator.
- */
-BTreeBucketDatabase::BTree::ConstIterator
-BTreeBucketDatabase::find_parents_internal(const BTree::FrozenView& frozen_view,
- const document::BucketId& bucket,
- std::vector<Entry>& entries) const
-{
- const uint64_t bucket_key = bucket.toKey();
- if (frozen_view.empty()) {
- return frozen_view.begin(); // Will be invalid.
- }
- const auto min_db_bits = frozen_view.getAggregated().getMin();
- assert(min_db_bits >= static_cast<int32_t>(BucketId::minNumBits));
- assert(min_db_bits <= static_cast<int32_t>(BucketId::maxNumBits));
- // Start at the lowest possible tree level no parents can exist above,
- // descending towards the bucket itself.
- // Note: important to use getId() rather than getRawId(), as min_db_bits may be
- // greater than the used bits of the queried bucket. If we used the raw ID, we'd
- // end up looking at undefined bits.
- const auto first_key = BucketId(min_db_bits, bucket.getId()).toKey();
- auto iter = frozen_view.lowerBound(first_key);
- // Try skipping as many levels of the tree as possible as we go.
- uint32_t bits = min_db_bits;
- while (iter.valid() && (iter.getKey() < bucket_key)) {
- auto candidate = BucketId(BucketId::keyToBucketId(iter.getKey()));
- if (candidate.contains(bucket)) {
- assert(candidate.getUsedBits() >= bits);
- entries.emplace_back(entry_from_iterator(iter));
- }
- bits = next_parent_bit_seek_level(bits, candidate, bucket);
- const auto parent_key = BucketId(bits, bucket.getRawId()).toKey();
- assert(parent_key > iter.getKey());
- iter.seek(parent_key);
- }
- return iter;
-}
-
-void BTreeBucketDatabase::find_parents_and_self_internal(const BTree::FrozenView& frozen_view,
- const document::BucketId& bucket,
- std::vector<Entry>& entries) const
-{
- auto iter = find_parents_internal(frozen_view, bucket, entries);
- if (iter.valid() && iter.getKey() == bucket.toKey()) {
- entries.emplace_back(entry_from_iterator(iter));
- }
-}
+using bucketdb::ByValue;
/*
* Note: due to legacy API reasons, iff the requested bucket itself exists in the
@@ -309,212 +118,53 @@ void BTreeBucketDatabase::find_parents_and_self_internal(const BTree::FrozenView
void BTreeBucketDatabase::getParents(const BucketId& bucket,
std::vector<Entry>& entries) const
{
- auto view = _tree.getFrozenView();
- find_parents_and_self_internal(view, bucket, entries);
+ _impl->find_parents_and_self<ByValue>(bucket, [&entries]([[maybe_unused]] uint64_t key, Entry entry){
+ entries.emplace_back(std::move(entry));
+ });
}
void BTreeBucketDatabase::getAll(const BucketId& bucket,
std::vector<Entry>& entries) const
{
- auto view = _tree.getFrozenView();
- auto iter = find_parents_internal(view, bucket, entries);
- // `iter` is already pointing at, or beyond, one of the bucket's subtrees.
- for (; iter.valid(); ++iter) {
- auto candidate = BucketId(BucketId::keyToBucketId(iter.getKey()));
- if (bucket.contains(candidate)) {
- entries.emplace_back(entry_from_iterator(iter));
- } else {
- break;
- }
- }
+ _impl->find_parents_self_and_children<ByValue>(bucket, [&entries]([[maybe_unused]] uint64_t key, Entry entry){
+ entries.emplace_back(std::move(entry));
+ });
}
void BTreeBucketDatabase::update(const Entry& newEntry) {
assert(newEntry.valid());
- auto replicas_ref = _store.add(newEntry.getBucketInfo().getRawNodes());
- const auto new_value = value_from(newEntry.getBucketInfo().getLastGarbageCollectionTime(), replicas_ref);
- const auto bucket_key = newEntry.getBucketId().toKey();
- auto iter = _tree.lowerBound(bucket_key);
- if (iter.valid() && (iter.getKey() == bucket_key)) {
- _store.remove(entry_ref_from_value(iter.getData()));
- // In-place update of value; does not require tree structure modification
- std::atomic_thread_fence(std::memory_order_release); // Must ensure visibility when new array ref is observed
- iter.writeData(new_value);
- } else {
- _tree.insert(iter, bucket_key, new_value);
- }
- commit_tree_changes(); // TODO does publishing a new root imply an implicit memory fence?
+ _impl->update(newEntry.getBucketId(), newEntry);
}
// TODO need snapshot read with guarding
// FIXME semantics of for-each in judy and bit tree DBs differ, former expects lbound, latter ubound..!
// FIXME but bit-tree code says "lowerBound" in impl and "after" in declaration???
void BTreeBucketDatabase::forEach(EntryProcessor& proc, const BucketId& after) const {
- for (auto iter = _tree.upperBound(after.toKey()); iter.valid(); ++iter) {
- if (!proc.process(const_entry_ref_from_iterator(iter))) {
+ for (auto iter = _impl->upper_bound(after.toKey()); iter.valid(); ++iter) {
+ if (!proc.process(_impl->const_value_ref_from_valid_iterator(iter))) {
break;
}
}
}
-struct BTreeBuilderMerger final : BucketDatabase::Merger {
- BTreeBucketDatabase& _db;
- BTreeBucketDatabase::BTree::Builder& _builder;
- uint64_t _current_key;
- uint64_t _current_value;
- Entry _cached_entry;
- bool _valid_cached_entry;
-
- BTreeBuilderMerger(BTreeBucketDatabase& db,
- BTreeBucketDatabase::BTree::Builder& builder)
- : _db(db),
- _builder(builder),
- _current_key(0),
- _current_value(0),
- _cached_entry(),
- _valid_cached_entry(false)
- {}
- ~BTreeBuilderMerger() override = default;
-
- uint64_t bucket_key() const noexcept override {
- return _current_key;
- }
- BucketId bucket_id() const noexcept override {
- return BucketId(BucketId::keyToBucketId(_current_key));
- }
- Entry& current_entry() override {
- if (!_valid_cached_entry) {
- _cached_entry = _db.entry_from_value(_current_key, _current_value);
- _valid_cached_entry = true;
- }
- return _cached_entry;
- }
- void insert_before_current(const Entry& e) override {
- const uint64_t bucket_key = e.getBucketId().toKey();
- assert(bucket_key < _current_key);
-
- auto replicas_ref = _db._store.add(e.getBucketInfo().getRawNodes());
- const auto new_value = value_from(e.getBucketInfo().getLastGarbageCollectionTime(), replicas_ref);
-
- _builder.insert(bucket_key, new_value);
- }
-
- void update_iteration_state(uint64_t key, uint64_t value) {
- _current_key = key;
- _current_value = value;
- _valid_cached_entry = false;
- }
-};
-
-struct BTreeTrailingInserter final : BucketDatabase::TrailingInserter {
- BTreeBucketDatabase& _db;
- BTreeBucketDatabase::BTree::Builder& _builder;
-
- BTreeTrailingInserter(BTreeBucketDatabase& db,
- BTreeBucketDatabase::BTree::Builder& builder)
- : _db(db),
- _builder(builder)
- {}
-
- ~BTreeTrailingInserter() override = default;
-
- void insert_at_end(const Entry& e) override {
- const uint64_t bucket_key = e.getBucketId().toKey();
- const auto replicas_ref = _db._store.add(e.getBucketInfo().getRawNodes());
- const auto new_value = value_from(e.getBucketInfo().getLastGarbageCollectionTime(), replicas_ref);
- _builder.insert(bucket_key, new_value);
- }
-};
-
-// TODO lbound arg?
void BTreeBucketDatabase::merge(MergingProcessor& proc) {
- BTreeBucketDatabase::BTree::Builder builder(_tree.getAllocator());
- BTreeBuilderMerger merger(*this, builder);
-
- // TODO for_each instead?
- for (auto iter = _tree.begin(); iter.valid(); ++iter) {
- const uint64_t key = iter.getKey();
- const uint64_t value = iter.getData();
- merger.update_iteration_state(key, value);
-
- auto result = proc.merge(merger);
-
- if (result == MergingProcessor::Result::KeepUnchanged) {
- builder.insert(key, value); // Reuse array store ref with no changes
- } else if (result == MergingProcessor::Result::Update) {
- assert(merger._valid_cached_entry); // Must actually have been touched
- assert(merger._cached_entry.valid());
- _store.remove(entry_ref_from_value(value));
- auto new_replicas_ref = _store.add(merger._cached_entry.getBucketInfo().getRawNodes());
- const auto new_value = value_from(merger._cached_entry.getBucketInfo().getLastGarbageCollectionTime(), new_replicas_ref);
- builder.insert(key, new_value);
- } else if (result == MergingProcessor::Result::Skip) {
- _store.remove(entry_ref_from_value(value));
- } else {
- abort();
- }
- }
- BTreeTrailingInserter inserter(*this, builder);
- proc.insert_remaining_at_end(inserter);
-
- _tree.assign(builder);
- commit_tree_changes();
+ _impl->merge(proc);
}
Entry BTreeBucketDatabase::upperBound(const BucketId& bucket) const {
- return entry_from_iterator(_tree.upperBound(bucket.toKey()));
+ return _impl->entry_from_iterator(_impl->upper_bound(bucket.toKey()));
}
uint64_t BTreeBucketDatabase::size() const {
- return _tree.size();
+ return _impl->size();
}
void BTreeBucketDatabase::clear() {
- _tree.clear();
- commit_tree_changes();
+ _impl->clear();
}
-/*
- * Returns the bucket ID which, based on the buckets already existing in the DB,
- * is the most specific location in the tree in which it should reside. This may
- * or may not be a bucket that already exists.
- *
- * Example: if there is a single bucket (1, 1) in the tree, a query for (1, 1) or
- * (1, 3) will return (1, 1) as that is the most specific leaf in that subtree.
- * A query for (1, 0) will return (1, 0) even though this doesn't currently exist,
- * as there is no existing bucket that can contain the queried bucket. It is up to
- * the caller to create this bucket according to its needs.
- *
- * Usually this function will be called with an ID whose used-bits is at max (58), in
- * order to find a leaf bucket to route an incoming document operation to.
- *
- * TODO rename this function, it's very much _not_ obvious what an "appropriate" bucket is..!
- * TODO this should be possible to do concurrently
- */
BucketId BTreeBucketDatabase::getAppropriateBucket(uint16_t minBits, const BucketId& bid) {
- // The bucket tree is ordered in such a way that it represents a
- // natural in-order traversal of all buckets, with inner nodes being
- // visited before leaf nodes. This means that a lower bound seek will
- // never return a parent of a seeked bucket. The iterator will be pointing
- // to a bucket that is either the actual bucket given as the argument to
- // lowerBound() or the next in-order bucket (or end() if none exists).
- auto iter = _tree.lowerBound(bid.toKey());
- if (iter.valid()) {
- // Find the first level in the tree where the paths through the bucket tree
- // diverge for the target bucket and the current bucket.
- minBits = getMinDiffBits(minBits, bucket_from_valid_iterator(iter), bid);
- }
- // TODO is it better to copy original iterator and do begin() on the copy?
- auto first_iter = _tree.begin();
- // Original iterator might be in a different subtree than that of our
- // target bucket. If possible, rewind one node to discover any parent or
- // leftmost sibling of our node. If there's no such node, we'll still
- // discover the greatest equal bit prefix.
- if (iter != first_iter) {
- --iter;
- minBits = getMinDiffBits(minBits, bucket_from_valid_iterator(iter), bid);
- }
- return BucketId(minBits, bid.getRawId());
+ return _impl->getAppropriateBucket(minBits, bid);
}
/*
@@ -527,24 +177,7 @@ BucketId BTreeBucketDatabase::getAppropriateBucket(uint16_t minBits, const Bucke
*/
// TODO rename/clarify to indicate this is child _subtrees_, not explicit child _buckets_!
uint32_t BTreeBucketDatabase::childCount(const BucketId& bucket) const {
- assert(bucket.getUsedBits() < BucketId::maxNumBits);
- BucketId lhs_bucket(bucket.getUsedBits() + 1, bucket.getId());
- BucketId rhs_bucket(bucket.getUsedBits() + 1, (1ULL << bucket.getUsedBits()) | bucket.getId());
-
- auto iter = _tree.lowerBound(lhs_bucket.toKey());
- if (!iter.valid()) {
- return 0;
- }
- if (lhs_bucket.contains(bucket_from_valid_iterator(iter))) {
- iter.seek(rhs_bucket.toKey());
- if (!iter.valid()) {
- return 1; // lhs subtree only
- }
- return (rhs_bucket.contains(bucket_from_valid_iterator(iter)) ? 2 : 1);
- } else if (rhs_bucket.contains(bucket_from_valid_iterator(iter))) {
- return 1; // rhs subtree only
- }
- return 0;
+ return _impl->child_subtree_count(bucket);
}
void BTreeBucketDatabase::print(std::ostream& out, bool verbose,
@@ -556,15 +189,22 @@ void BTreeBucketDatabase::print(std::ostream& out, bool verbose,
}
vespalib::MemoryUsage BTreeBucketDatabase::memory_usage() const noexcept {
- auto mem_usage = _tree.getMemoryUsage();
- mem_usage.merge(_store.getMemoryUsage());
- return mem_usage;
+ return _impl->memory_usage();
}
+class BTreeBucketDatabase::ReadGuardImpl final : public bucketdb::ReadGuard<Entry> {
+ ImplType::ReadSnapshot _snapshot;
+public:
+ explicit ReadGuardImpl(const BTreeBucketDatabase& db);
+ ~ReadGuardImpl() override;
+
+ void find_parents_and_self(const document::BucketId& bucket,
+ std::vector<Entry>& entries) const override;
+ [[nodiscard]] uint64_t generation() const noexcept override;
+};
+
BTreeBucketDatabase::ReadGuardImpl::ReadGuardImpl(const BTreeBucketDatabase& db)
- : _db(&db),
- _guard(_db->_generation_handler.takeGuard()),
- _frozen_view(_db->_tree.getFrozenView())
+ : _snapshot(*db._impl)
{}
BTreeBucketDatabase::ReadGuardImpl::~ReadGuardImpl() = default;
@@ -572,52 +212,17 @@ BTreeBucketDatabase::ReadGuardImpl::~ReadGuardImpl() = default;
void BTreeBucketDatabase::ReadGuardImpl::find_parents_and_self(const document::BucketId& bucket,
std::vector<Entry>& entries) const
{
- _db->find_parents_and_self_internal(_frozen_view, bucket, entries);
+ _snapshot.find_parents_and_self<ByValue>(bucket, [&entries]([[maybe_unused]] uint64_t key, Entry entry){
+ entries.emplace_back(std::move(entry));
+ });
}
uint64_t BTreeBucketDatabase::ReadGuardImpl::generation() const noexcept {
- return _guard.getGeneration();
+ return _snapshot.generation();
}
-// TODO replace existing distributor DB code with generic impl.
-// This is to ensure the generic implementation compiles with an ArrayStore backing in
-// the meantime.
-struct BTreeBucketDatabase2 {
- struct ReplicaValueTraits {
- using ValueType = Entry;
- using ConstValueRef = ConstEntryRef;
- using DataStoreType = vespalib::datastore::ArrayStore<BucketCopy>;
-
- static ValueType make_invalid_value() {
- return Entry::createInvalid();
- }
- static uint64_t wrap_and_store_value(DataStoreType& store, const Entry& entry) noexcept {
- auto replicas_ref = store.add(entry.getBucketInfo().getRawNodes());
- return value_from(entry.getBucketInfo().getLastGarbageCollectionTime(), replicas_ref);
- }
- static void remove_by_wrapped_value(DataStoreType& store, uint64_t value) noexcept {
- store.remove(entry_ref_from_value(value));
- }
- static ValueType unwrap_from_key_value(const DataStoreType& store, uint64_t key, uint64_t value) {
- const auto replicas_ref = store.get(entry_ref_from_value(value));
- const auto bucket = BucketId(BucketId::keyToBucketId(key));
- return entry_from_replica_array_ref(bucket, gc_timestamp_from_value(value), replicas_ref);
- }
- static ConstValueRef unwrap_const_ref_from_key_value(const DataStoreType& store, uint64_t key, uint64_t value) {
- const auto replicas_ref = store.get(entry_ref_from_value(value));
- const auto bucket = BucketId(BucketId::keyToBucketId(key));
- return const_entry_ref_from_replica_array_ref(bucket, gc_timestamp_from_value(value), replicas_ref);
- }
- };
-
- using BTreeImpl = bucketdb::GenericBTreeBucketDatabase<ReplicaValueTraits>;
- BTreeImpl _impl;
-
- BTreeBucketDatabase2()
- : _impl(make_default_array_store_config<ReplicaValueTraits::DataStoreType>())
- {}
-};
-
-template class bucketdb::GenericBTreeBucketDatabase<BTreeBucketDatabase2::ReplicaValueTraits>;
+std::unique_ptr<bucketdb::ReadGuard<Entry>> BTreeBucketDatabase::acquire_read_guard() const {
+ return std::make_unique<ReadGuardImpl>(*this);
+}
}