aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-11-02 13:08:30 +0100
committerGitHub <noreply@github.com>2020-11-02 13:08:30 +0100
commitc3c3c80cc34eff69f3ca0282da08bee2caa733cc (patch)
tree3fea78233d652e0c5b7a8878d22e0cd67b0367b2
parent262465b73640d676a7340950cf120c272e3ce4ad (diff)
parent7fe9b99240dc82a0416608c1f92c6ab50eea5dc4 (diff)
Merge pull request #15116 from vespa-engine/vekterli/add-striped-btree-bucket-db-impl
Add striped content node B-tree bucket database impl
-rw-r--r--storage/src/tests/bucketdb/lockablemaptest.cpp77
-rw-r--r--storage/src/tests/common/teststorageapp.cpp5
-rw-r--r--storage/src/vespa/storage/bucketdb/CMakeLists.txt1
-rw-r--r--storage/src/vespa/storage/bucketdb/abstract_bucket_map.h12
-rw-r--r--storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp13
-rw-r--r--storage/src/vespa/storage/bucketdb/btree_bucket_database.h2
-rw-r--r--storage/src/vespa/storage/bucketdb/btree_lockable_map.h6
-rw-r--r--storage/src/vespa/storage/bucketdb/btree_lockable_map.hpp17
-rw-r--r--storage/src/vespa/storage/bucketdb/bucketdatabase.h4
-rw-r--r--storage/src/vespa/storage/bucketdb/const_iterator.h25
-rw-r--r--storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.h4
-rw-r--r--storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.hpp35
-rw-r--r--storage/src/vespa/storage/bucketdb/read_guard.h6
-rw-r--r--storage/src/vespa/storage/bucketdb/storbucketdb.cpp17
-rw-r--r--storage/src/vespa/storage/bucketdb/storbucketdb.h8
-rw-r--r--storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.cpp8
-rw-r--r--storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.h90
-rw-r--r--storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.hpp313
-rw-r--r--storage/src/vespa/storage/common/content_bucket_db_options.h18
-rw-r--r--storage/src/vespa/storage/common/content_bucket_space.cpp5
-rw-r--r--storage/src/vespa/storage/common/content_bucket_space.h2
-rw-r--r--storage/src/vespa/storage/common/content_bucket_space_repo.cpp6
-rw-r--r--storage/src/vespa/storage/common/content_bucket_space_repo.h2
-rw-r--r--storage/src/vespa/storage/config/stor-server.def9
-rw-r--r--storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.cpp4
-rw-r--r--storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.h2
-rw-r--r--storage/src/vespa/storage/storageserver/servicelayernodecontext.cpp4
-rw-r--r--storage/src/vespa/storage/storageserver/servicelayernodecontext.h2
-rw-r--r--storageserver/src/vespa/storageserver/app/servicelayerprocess.cpp12
29 files changed, 612 insertions, 97 deletions
diff --git a/storage/src/tests/bucketdb/lockablemaptest.cpp b/storage/src/tests/bucketdb/lockablemaptest.cpp
index f5e68dd3de1..7f5069f96b2 100644
--- a/storage/src/tests/bucketdb/lockablemaptest.cpp
+++ b/storage/src/tests/bucketdb/lockablemaptest.cpp
@@ -2,6 +2,7 @@
#include <vespa/vespalib/util/document_runnable.h>
#include <vespa/storage/bucketdb/btree_lockable_map.hpp>
+#include <vespa/storage/bucketdb/striped_btree_lockable_map.hpp>
#include <vespa/vespalib/gtest/gtest.h>
#include <gmock/gmock.h>
@@ -9,6 +10,8 @@
LOG_SETUP(".lockable_map_test");
// FIXME these old tests may have the least obvious semantics and worst naming in the entire storage module
+// FIXME the non-bucket ID based tests only "accidentally" work with the striped DB implementation
+// since they just all happen to look like zero-buckets with count-bits above the minimum threshold.
using namespace ::testing;
using document::BucketId;
@@ -55,8 +58,7 @@ struct LockableMapTest : ::testing::Test {
using Map = MapT;
};
-// TODO add striped B-tree DB to this type set once ready
-using MapTypes = ::testing::Types<bucketdb::BTreeLockableMap<A>>;
+using MapTypes = ::testing::Types<bucketdb::BTreeLockableMap<A>, bucketdb::StripedBTreeLockableMap<A>>;
VESPA_GTEST_TYPED_TEST_SUITE(LockableMapTest, MapTypes);
// Disable warnings emitted by gtest generated files when using typed tests
@@ -102,46 +104,6 @@ TYPED_TEST(LockableMapTest, simple_usage) {
EXPECT_TRUE(map.empty());
}
-TYPED_TEST(LockableMapTest, comparison) {
- TypeParam map1;
- TypeParam map2;
- bool preExisted;
-
- // Check empty state is correct
- EXPECT_EQ(map1, map2);
- EXPECT_FALSE(map1 < map2);
- EXPECT_FALSE(map1 != map2);
-
- // Check that different lengths are ok
- map1.insert(4, A(1, 2, 3), "foo", preExisted);
- EXPECT_FALSE(map1 == map2);
- EXPECT_FALSE(map1 < map2);
- EXPECT_LT(map2, map1);
- EXPECT_NE(map1, map2);
-
- // Check that equal elements are ok
- map2.insert(4, A(1, 2, 3), "foo", preExisted);
- EXPECT_EQ(map1, map2);
- EXPECT_FALSE(map1 < map2);
- EXPECT_FALSE(map1 != map2);
-
- // Check that non-equal values are ok
- map1.insert(6, A(1, 2, 6), "foo", preExisted);
- map2.insert(6, A(1, 2, 3), "foo", preExisted);
- EXPECT_FALSE(map1 == map2);
- EXPECT_FALSE(map1 < map2);
- EXPECT_LT(map2, map1);
- EXPECT_NE(map1, map2);
-
- // Check that non-equal keys are ok
- map1.erase(6, "foo");
- map1.insert(7, A(1, 2, 3), "foo", preExisted);
- EXPECT_FALSE(map1 == map2);
- EXPECT_FALSE(map1 < map2);
- EXPECT_LT(map2, map1);
- EXPECT_NE(map1, map2);
-}
-
namespace {
template <typename Map>
@@ -300,6 +262,29 @@ TYPED_TEST(LockableMapTest, iterating) {
}
}
+TYPED_TEST(LockableMapTest, explicit_iterator_is_key_ordered) {
+ TypeParam map;
+ bool preExisted;
+ map.insert(16, A(16, 0, 0), "foo", preExisted);
+ map.insert(18, A(18, 0, 0), "foo", preExisted);
+ map.insert(11, A(11, 0, 0), "foo", preExisted);
+ map.insert(14, A(14, 0, 0), "foo", preExisted);
+ map.insert(20, A(20, 0, 0), "foo", preExisted);
+
+ std::string expected("11 - A(11, 0, 0)\n"
+ "14 - A(14, 0, 0)\n"
+ "16 - A(16, 0, 0)\n"
+ "18 - A(18, 0, 0)\n"
+ "20 - A(20, 0, 0)\n");
+ ConstProcessor<TypeParam> cproc;
+
+ auto guard = map.acquire_read_guard();
+ for (auto iter = guard->create_iterator(); iter->valid(); iter->next()) {
+ cproc(iter->key(), iter->value());
+ }
+ EXPECT_EQ(expected, cproc.toString());
+}
+
TYPED_TEST(LockableMapTest, chunked_iteration_is_transparent_across_chunk_sizes) {
TypeParam map;
bool preExisted;
@@ -721,16 +706,16 @@ TYPED_TEST(LockableMapTest, find_all_inconsistently_split_6) {
TYPED_TEST(LockableMapTest, find_all_inconsistent_below_16_bits) {
TypeParam map;
- document::BucketId id1(1, 0x1); // contains id2-id3
- document::BucketId id2(3, 0x1);
- document::BucketId id3(4, 0xD);
+ document::BucketId id1(8, 0b0000'0000'0001); // contains id2-id3
+ document::BucketId id2(10, 0b0011'0000'0001);
+ document::BucketId id3(11, 0b0101'0000'0001);
bool preExisted;
map.insert(id1.stripUnused().toKey(), A(1,2,3), "foo", preExisted);
map.insert(id2.stripUnused().toKey(), A(2,3,4), "foo", preExisted);
map.insert(id3.stripUnused().toKey(), A(3,4,5), "foo", preExisted);
- document::BucketId id(3, 0x5);
+ document::BucketId id(10, 0b0001'0000'0001);
auto results = map.getAll(id, "foo");
diff --git a/storage/src/tests/common/teststorageapp.cpp b/storage/src/tests/common/teststorageapp.cpp
index 2fc33bc1360..14f1e78f5ca 100644
--- a/storage/src/tests/common/teststorageapp.cpp
+++ b/storage/src/tests/common/teststorageapp.cpp
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "teststorageapp.h"
+#include <vespa/storage/common/content_bucket_db_options.h>
#include <vespa/storage/config/config-stor-server.h>
#include <vespa/config-stor-distribution.h>
#include <vespa/config-load-type.h>
@@ -135,7 +136,7 @@ namespace {
}
TestServiceLayerApp::TestServiceLayerApp(vespalib::stringref configId)
- : TestStorageApp(std::make_unique<ServiceLayerComponentRegisterImpl>(true), // TODO remove B-tree flag once default
+ : TestStorageApp(std::make_unique<ServiceLayerComponentRegisterImpl>(ContentBucketDbOptions()),
lib::NodeType::STORAGE, getIndexFromConfig(configId), configId),
_compReg(dynamic_cast<ServiceLayerComponentRegisterImpl&>(TestStorageApp::getComponentRegister())),
_persistenceProvider(),
@@ -148,7 +149,7 @@ TestServiceLayerApp::TestServiceLayerApp(vespalib::stringref configId)
TestServiceLayerApp::TestServiceLayerApp(NodeIndex index,
vespalib::stringref configId)
- : TestStorageApp(std::make_unique<ServiceLayerComponentRegisterImpl>(true), // TODO remove B-tree flag once default
+ : TestStorageApp(std::make_unique<ServiceLayerComponentRegisterImpl>(ContentBucketDbOptions()),
lib::NodeType::STORAGE, index, configId),
_compReg(dynamic_cast<ServiceLayerComponentRegisterImpl&>(TestStorageApp::getComponentRegister())),
_persistenceProvider(),
diff --git a/storage/src/vespa/storage/bucketdb/CMakeLists.txt b/storage/src/vespa/storage/bucketdb/CMakeLists.txt
index 23974cf2522..20510d895e7 100644
--- a/storage/src/vespa/storage/bucketdb/CMakeLists.txt
+++ b/storage/src/vespa/storage/bucketdb/CMakeLists.txt
@@ -10,6 +10,7 @@ vespa_add_library(storage_bucketdb OBJECT
bucketmanagermetrics.cpp
generic_btree_bucket_database.cpp
storbucketdb.cpp
+ striped_btree_lockable_map.cpp
DEPENDS
)
vespa_generate_config(storage_bucketdb stor-bucketdb.def)
diff --git a/storage/src/vespa/storage/bucketdb/abstract_bucket_map.h b/storage/src/vespa/storage/bucketdb/abstract_bucket_map.h
index eedef2ec0ff..bf46f7c3992 100644
--- a/storage/src/vespa/storage/bucketdb/abstract_bucket_map.h
+++ b/storage/src/vespa/storage/bucketdb/abstract_bucket_map.h
@@ -177,12 +177,8 @@ public:
do_for_each_mutable_unordered(std::move(func), clientId);
}
- void for_each(std::function<Decision(uint64_t, const ValueT&)> func,
- const char* clientId,
- const key_type& first = 0,
- const key_type& last = UINT64_MAX)
- {
- do_for_each(std::move(func), clientId, first, last);
+ void for_each(std::function<Decision(uint64_t, const ValueT&)> func, const char* clientId) {
+ do_for_each(std::move(func), clientId);
}
std::unique_ptr<bucketdb::ReadGuard<ValueT>> acquire_read_guard() const {
@@ -206,9 +202,7 @@ private:
virtual void do_for_each_mutable_unordered(std::function<Decision(uint64_t, ValueT&)> func,
const char* clientId) = 0;
virtual void do_for_each(std::function<Decision(uint64_t, const ValueT&)> func,
- const char* clientId,
- const key_type& first,
- const key_type& last) = 0;
+ const char* clientId) = 0;
virtual std::unique_ptr<bucketdb::ReadGuard<ValueT>> do_acquire_read_guard() const = 0;
};
diff --git a/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp b/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp
index 602cdbdc5d5..d547830d765 100644
--- a/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp
+++ b/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp
@@ -195,7 +195,9 @@ vespalib::MemoryUsage BTreeBucketDatabase::memory_usage() const noexcept {
return _impl->memory_usage();
}
-class BTreeBucketDatabase::ReadGuardImpl final : public bucketdb::ReadGuard<Entry> {
+class BTreeBucketDatabase::ReadGuardImpl final
+ : public bucketdb::ReadGuard<Entry, ConstEntryRef>
+{
ImplType::ReadSnapshot _snapshot;
public:
explicit ReadGuardImpl(const BTreeBucketDatabase& db);
@@ -204,6 +206,7 @@ public:
std::vector<Entry> find_parents_and_self(const document::BucketId& bucket) const override;
std::vector<Entry> find_parents_self_and_children(const document::BucketId& bucket) const override;
void for_each(std::function<void(uint64_t, const Entry&)> func) const override;
+ std::unique_ptr<bucketdb::ConstIterator<ConstEntryRef>> create_iterator() const override;
[[nodiscard]] uint64_t generation() const noexcept override;
};
@@ -235,11 +238,17 @@ void BTreeBucketDatabase::ReadGuardImpl::for_each(std::function<void(uint64_t, c
_snapshot.for_each<ByValue>(std::move(func));
}
+std::unique_ptr<bucketdb::ConstIterator<ConstEntryRef>>
+BTreeBucketDatabase::ReadGuardImpl::create_iterator() const {
+ return _snapshot.create_iterator(); // TODO test
+}
+
uint64_t BTreeBucketDatabase::ReadGuardImpl::generation() const noexcept {
return _snapshot.generation();
}
-std::unique_ptr<bucketdb::ReadGuard<Entry>> BTreeBucketDatabase::acquire_read_guard() const {
+std::unique_ptr<bucketdb::ReadGuard<Entry, ConstEntryRef>>
+BTreeBucketDatabase::acquire_read_guard() const {
return std::make_unique<ReadGuardImpl>(*this);
}
diff --git a/storage/src/vespa/storage/bucketdb/btree_bucket_database.h b/storage/src/vespa/storage/bucketdb/btree_bucket_database.h
index 122c6eeb0fb..2821e360792 100644
--- a/storage/src/vespa/storage/bucketdb/btree_bucket_database.h
+++ b/storage/src/vespa/storage/bucketdb/btree_bucket_database.h
@@ -58,7 +58,7 @@ private:
class ReadGuardImpl;
friend class ReadGuardImpl;
public:
- std::unique_ptr<bucketdb::ReadGuard<Entry>> acquire_read_guard() const override;
+ std::unique_ptr<bucketdb::ReadGuard<Entry, ConstEntryRef>> acquire_read_guard() const override;
vespalib::MemoryUsage memory_usage() const noexcept override;
};
diff --git a/storage/src/vespa/storage/bucketdb/btree_lockable_map.h b/storage/src/vespa/storage/bucketdb/btree_lockable_map.h
index 6e42a721732..5667c75c5d7 100644
--- a/storage/src/vespa/storage/bucketdb/btree_lockable_map.h
+++ b/storage/src/vespa/storage/bucketdb/btree_lockable_map.h
@@ -73,6 +73,8 @@ public:
void showLockClients(vespalib::asciistream & out) const override;
private:
+ template <typename T1> friend class StripedBTreeLockableMap;
+
struct hasher {
size_t operator () (const LockId & lid) const { return lid.hash(); }
};
@@ -121,9 +123,7 @@ private:
const char* clientId) override;
void do_for_each(std::function<Decision(uint64_t, const mapped_type&)> func,
- const char* clientId,
- const key_type& first,
- const key_type& last) override;
+ const char* clientId) override;
void do_for_each_chunked(std::function<Decision(uint64_t, const mapped_type&)> func,
const char* client_id,
diff --git a/storage/src/vespa/storage/bucketdb/btree_lockable_map.hpp b/storage/src/vespa/storage/bucketdb/btree_lockable_map.hpp
index d291cfd371a..c8597915e44 100644
--- a/storage/src/vespa/storage/bucketdb/btree_lockable_map.hpp
+++ b/storage/src/vespa/storage/bucketdb/btree_lockable_map.hpp
@@ -1,4 +1,6 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
#include "btree_lockable_map.h"
#include "generic_btree_bucket_database.hpp"
#include <vespa/vespalib/btree/btreebuilder.h>
@@ -293,15 +295,13 @@ void BTreeLockableMap<T>::do_for_each_mutable_unordered(std::function<Decision(u
template <typename T>
void BTreeLockableMap<T>::do_for_each(std::function<Decision(uint64_t, const mapped_type&)> func,
- const char* clientId,
- const key_type& first,
- const key_type& last)
+ const char* clientId)
{
- key_type key = first;
+ key_type key = 0;
mapped_type val;
std::unique_lock guard(_lock);
while (true) {
- if (findNextKey(key, val, clientId, guard) || key > last) {
+ if (findNextKey(key, val, clientId, guard)) {
return;
}
Decision d(func(key, val));
@@ -363,6 +363,7 @@ public:
std::vector<T> find_parents_and_self(const document::BucketId& bucket) const override;
std::vector<T> find_parents_self_and_children(const document::BucketId& bucket) const override;
void for_each(std::function<void(uint64_t, const T&)> func) const override;
+ std::unique_ptr<ConstIterator<const T&>> create_iterator() const override;
[[nodiscard]] uint64_t generation() const noexcept override;
};
@@ -404,6 +405,12 @@ void BTreeLockableMap<T>::ReadGuardImpl::for_each(std::function<void(uint64_t, c
}
template <typename T>
+std::unique_ptr<ConstIterator<const T&>>
+BTreeLockableMap<T>::ReadGuardImpl::create_iterator() const {
+ return _snapshot.template create_iterator(); // TODO test
+}
+
+template <typename T>
uint64_t BTreeLockableMap<T>::ReadGuardImpl::generation() const noexcept {
return _snapshot.generation();
}
diff --git a/storage/src/vespa/storage/bucketdb/bucketdatabase.h b/storage/src/vespa/storage/bucketdb/bucketdatabase.h
index 1ee165a739c..cd94a698358 100644
--- a/storage/src/vespa/storage/bucketdb/bucketdatabase.h
+++ b/storage/src/vespa/storage/bucketdb/bucketdatabase.h
@@ -141,10 +141,10 @@ public:
virtual uint32_t childCount(const document::BucketId&) const = 0;
- using ReadGuard = bucketdb::ReadGuard<Entry>;
+ using ReadGuard = bucketdb::ReadGuard<Entry, ConstEntryRef>;
virtual std::unique_ptr<ReadGuard> acquire_read_guard() const {
- return std::unique_ptr<bucketdb::ReadGuard<Entry>>();
+ return std::unique_ptr<bucketdb::ReadGuard<Entry, ConstEntryRef>>();
}
[[nodiscard]] virtual vespalib::MemoryUsage memory_usage() const noexcept = 0;
diff --git a/storage/src/vespa/storage/bucketdb/const_iterator.h b/storage/src/vespa/storage/bucketdb/const_iterator.h
new file mode 100644
index 00000000000..ae251754c58
--- /dev/null
+++ b/storage/src/vespa/storage/bucketdb/const_iterator.h
@@ -0,0 +1,25 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <cstdint>
+
+namespace storage::bucketdb {
+
+/**
+ * Iterator interface for iterating over entries in a bucket database.
+ * Implementations shall guarantee that iteration happens in natural key order.
+ *
+ * Key type is in the standard u64 "reversed bucket ID bits" form.
+ * Value type is always a (possibly wrapped) const reference to the entry itself.
+ */
+template <typename ConstRefT>
+class ConstIterator {
+public:
+ virtual ~ConstIterator() = default;
+ virtual void next() noexcept = 0;
+ [[nodiscard]] virtual bool valid() const noexcept = 0;
+ [[nodiscard]] virtual uint64_t key() const noexcept = 0;
+ [[nodiscard]] virtual ConstRefT value() const = 0;
+};
+
+}
diff --git a/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.h b/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.h
index 9534b583cd5..ea6d47f26bb 100644
--- a/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.h
+++ b/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.h
@@ -1,6 +1,7 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
+#include "const_iterator.h"
#include "db_merger.h"
#include <vespa/document/bucket/bucketid.h>
#include <vespa/vespalib/btree/btree.h>
@@ -120,6 +121,8 @@ public:
const GenericBTreeBucketDatabase* _db;
vespalib::GenerationHandler::Guard _guard;
typename BTree::FrozenView _frozen_view;
+
+ class ConstIteratorImpl;
public:
explicit ReadSnapshot(const GenericBTreeBucketDatabase& db);
~ReadSnapshot();
@@ -133,6 +136,7 @@ public:
void find_parents_self_and_children(const document::BucketId& bucket, Func func) const;
template <typename IterValueExtractor, typename Func>
void for_each(Func func) const;
+ std::unique_ptr<ConstIterator<ConstValueRef>> create_iterator() const;
[[nodiscard]] uint64_t generation() const noexcept;
};
private:
diff --git a/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.hpp b/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.hpp
index e53fad84db2..839efae4c1b 100644
--- a/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.hpp
+++ b/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.hpp
@@ -565,6 +565,41 @@ void GenericBTreeBucketDatabase<DataStoreTraitsT>::ReadSnapshot::for_each(Func f
}
template <typename DataStoreTraitsT>
+class GenericBTreeBucketDatabase<DataStoreTraitsT>::ReadSnapshot::ConstIteratorImpl
+ : public ConstIterator<typename DataStoreTraitsT::ConstValueRef>
+{
+ const typename GenericBTreeBucketDatabase<DataStoreTraitsT>::ReadSnapshot& _snapshot;
+ typename BTree::ConstIterator _iter;
+public:
+ using ConstValueRef = typename DataStoreTraitsT::ConstValueRef;
+
+ explicit ConstIteratorImpl(const GenericBTreeBucketDatabase<DataStoreTraitsT>::ReadSnapshot& snapshot)
+ : _snapshot(snapshot),
+ _iter(_snapshot._frozen_view.begin())
+ {}
+ ~ConstIteratorImpl() override = default;
+
+ void next() noexcept override {
+ ++_iter;
+ }
+ bool valid() const noexcept override {
+ return _iter.valid();
+ }
+ uint64_t key() const noexcept override {
+ return _iter.getKey();
+ }
+ ConstValueRef value() const override {
+ return ByConstRef::apply(*_snapshot._db, _iter);
+ }
+};
+
+template <typename DataStoreTraitsT>
+std::unique_ptr<ConstIterator<typename DataStoreTraitsT::ConstValueRef>>
+GenericBTreeBucketDatabase<DataStoreTraitsT>::ReadSnapshot::create_iterator() const {
+ return std::make_unique<ConstIteratorImpl>(*this);
+}
+
+template <typename DataStoreTraitsT>
uint64_t GenericBTreeBucketDatabase<DataStoreTraitsT>::ReadSnapshot::generation() const noexcept {
return _guard.getGeneration();
}
diff --git a/storage/src/vespa/storage/bucketdb/read_guard.h b/storage/src/vespa/storage/bucketdb/read_guard.h
index bacb0a38e2f..68a0c8578a8 100644
--- a/storage/src/vespa/storage/bucketdb/read_guard.h
+++ b/storage/src/vespa/storage/bucketdb/read_guard.h
@@ -1,12 +1,15 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
+#include "const_iterator.h"
#include <vespa/document/bucket/bucketid.h>
#include <functional>
+#include <memory>
#include <vector>
namespace storage::bucketdb {
+
/*
* Read guard for accessing the bucket tree of an underlying bucket database
* in a thread-safe, read-only manner.
@@ -26,7 +29,7 @@ namespace storage::bucketdb {
* memory to be retained by the backing DB until released.
*/
-template <typename ValueT>
+template <typename ValueT, typename ConstRefT = const ValueT&>
class ReadGuard {
public:
ReadGuard() = default;
@@ -40,6 +43,7 @@ public:
virtual std::vector<ValueT> find_parents_and_self(const document::BucketId& bucket) const = 0;
virtual std::vector<ValueT> find_parents_self_and_children(const document::BucketId& bucket) const = 0;
virtual void for_each(std::function<void(uint64_t, const ValueT&)> func) const = 0;
+ virtual std::unique_ptr<ConstIterator<ConstRefT>> create_iterator() const = 0;
// If the underlying guard represents a snapshot, returns its monotonically
// increasing generation. Otherwise returns 0.
[[nodiscard]] virtual uint64_t generation() const noexcept = 0;
diff --git a/storage/src/vespa/storage/bucketdb/storbucketdb.cpp b/storage/src/vespa/storage/bucketdb/storbucketdb.cpp
index b6a5a2320b1..63e00d3804c 100644
--- a/storage/src/vespa/storage/bucketdb/storbucketdb.cpp
+++ b/storage/src/vespa/storage/bucketdb/storbucketdb.cpp
@@ -2,6 +2,8 @@
#include "storbucketdb.h"
#include "btree_lockable_map.h"
+#include "striped_btree_lockable_map.h"
+#include <vespa/storage/common/content_bucket_db_options.h>
#include <vespa/log/log.h>
LOG_SETUP(".storage.bucketdb.stor_bucket_db");
@@ -42,12 +44,17 @@ std::unique_ptr<AbstractBucketMap<StorageBucketInfo>> make_btree_db_impl() {
return std::make_unique<BTreeLockableMap<StorageBucketInfo>>();
}
+std::unique_ptr<AbstractBucketMap<StorageBucketInfo>> make_striped_btree_db_impl(uint8_t n_stripes) {
+ return std::make_unique<StripedBTreeLockableMap<StorageBucketInfo>>(n_stripes);
+}
+
}
} // bucketdb
-StorBucketDatabase::StorBucketDatabase([[maybe_unused]] bool use_btree_db)
- : _impl(bucketdb::make_btree_db_impl())
+StorBucketDatabase::StorBucketDatabase(const ContentBucketDbOptions& opts)
+ : _impl((opts.n_stripe_bits > 0) ? bucketdb::make_striped_btree_db_impl(opts.n_stripe_bits)
+ : bucketdb::make_btree_db_impl())
{}
void
@@ -123,11 +130,9 @@ void StorBucketDatabase::for_each_mutable_unordered(
void StorBucketDatabase::for_each(
std::function<Decision(uint64_t, const bucketdb::StorageBucketInfo&)> func,
- const char* clientId,
- const key_type& first,
- const key_type& last)
+ const char* clientId)
{
- _impl->for_each(std::move(func), clientId, first, last);
+ _impl->for_each(std::move(func), clientId);
}
std::unique_ptr<bucketdb::ReadGuard<StorBucketDatabase::Entry>>
diff --git a/storage/src/vespa/storage/bucketdb/storbucketdb.h b/storage/src/vespa/storage/bucketdb/storbucketdb.h
index 61d8522ae9d..cf6401d9478 100644
--- a/storage/src/vespa/storage/bucketdb/storbucketdb.h
+++ b/storage/src/vespa/storage/bucketdb/storbucketdb.h
@@ -10,6 +10,8 @@
namespace storage {
+struct ContentBucketDbOptions;
+
class StorBucketDatabase {
std::unique_ptr<bucketdb::AbstractBucketMap<bucketdb::StorageBucketInfo>> _impl;
public:
@@ -25,7 +27,7 @@ public:
CREATE_IF_NONEXISTING = 1
};
- explicit StorBucketDatabase(bool use_btree_db = false);
+ explicit StorBucketDatabase(const ContentBucketDbOptions&);
void insert(const document::BucketId&, const bucketdb::StorageBucketInfo&,
const char* clientId);
@@ -64,9 +66,7 @@ public:
const char* clientId);
void for_each(std::function<Decision(uint64_t, const bucketdb::StorageBucketInfo&)> func,
- const char* clientId,
- const key_type& first = key_type(),
- const key_type& last = key_type() - 1);
+ const char* clientId);
[[nodiscard]] std::unique_ptr<bucketdb::ReadGuard<Entry>> acquire_read_guard() const;
diff --git a/storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.cpp b/storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.cpp
new file mode 100644
index 00000000000..90f7ec5ae7b
--- /dev/null
+++ b/storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.cpp
@@ -0,0 +1,8 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "striped_btree_lockable_map.hpp"
+
+namespace storage::bucketdb {
+
+template class StripedBTreeLockableMap<StorageBucketInfo>; // Forced instantiation.
+
+}
diff --git a/storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.h b/storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.h
new file mode 100644
index 00000000000..b01f06c2b3f
--- /dev/null
+++ b/storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.h
@@ -0,0 +1,90 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "btree_lockable_map.h"
+#include <memory>
+#include <vector>
+
+namespace storage::bucketdb {
+
+/**
+ * Bucket database implementation that stripes all superbuckets across
+ * a set of disjoint sub-DBs. All locking is handled by the individual
+ * sub-DBs, meaning that accessing one does not cause contention for
+ * readers/writers accessing another.
+ * Ordered iteration is transparently provided by the const for_each method
+ * and by read guards.
+ */
+template <typename T>
+class StripedBTreeLockableMap final : public AbstractBucketMap<T> {
+public:
+ using ParentType = AbstractBucketMap<T>;
+ using WrappedEntry = typename ParentType::WrappedEntry;
+ using key_type = typename ParentType::key_type;
+ using mapped_type = typename ParentType::mapped_type;
+ using LockId = typename ParentType::LockId;
+ using EntryMap = typename ParentType::EntryMap;
+ using Decision = typename ParentType::Decision;
+ using BucketId = document::BucketId;
+
+ constexpr static uint8_t MaxStripeBits = 8;
+private:
+ using StripedDBType = BTreeLockableMap<T>;
+ uint8_t _n_stripe_bits;
+ size_t _n_stripes;
+ std::vector<std::unique_ptr<StripedDBType>> _stripes;
+public:
+ explicit StripedBTreeLockableMap(uint8_t n_stripe_bits = 4);
+ ~StripedBTreeLockableMap() override;
+
+ size_t size() const noexcept override;
+ size_t getMemoryUsage() const noexcept override;
+ vespalib::MemoryUsage detailed_memory_usage() const noexcept override;
+ bool empty() const noexcept override;
+
+ WrappedEntry get(const key_type& key, const char* clientId, bool createIfNonExisting) override;
+ WrappedEntry get(const key_type& key, const char* clientId) {
+ return get(key, clientId, false);
+ }
+ bool erase(const key_type& key, const char* clientId, bool has_lock) override;
+ void insert(const key_type& key, const mapped_type& value,
+ const char* client_id, bool has_lock, bool& pre_existed) override;
+
+ bool erase(const key_type& key, const char* client_id) {
+ return erase(key, client_id, false);
+ }
+ void insert(const key_type& key, const mapped_type& value,
+ const char* client_id, bool& pre_existed) {
+ return insert(key, value, client_id, false, pre_existed);
+ }
+ void clear();
+ void print(std::ostream& out, bool verbose, const std::string& indent) const override;
+ EntryMap getContained(const BucketId& bucketId, const char* clientId) override;
+ EntryMap getAll(const BucketId& bucketId, const char* clientId) override;
+ bool isConsistent(const WrappedEntry& entry) override;
+ void showLockClients(vespalib::asciistream & out) const override;
+
+private:
+ class ReadGuardImpl;
+
+ void unlock(const key_type& key) override;
+
+ void do_for_each_mutable_unordered(std::function<Decision(uint64_t, mapped_type&)> func,
+ const char* client_id) override;
+
+ void do_for_each(std::function<Decision(uint64_t, const mapped_type&)> func,
+ const char* client_id) override;
+
+ void do_for_each_chunked(std::function<Decision(uint64_t, const mapped_type&)> func,
+ const char* client_id,
+ vespalib::duration yield_time,
+ uint32_t chunk_size) override;
+
+ std::unique_ptr<ReadGuard<T>> do_acquire_read_guard() const override;
+
+ [[nodiscard]] size_t stripe_of(key_type) const noexcept;
+ [[nodiscard]] StripedDBType& db_for(key_type) noexcept;
+ [[nodiscard]] const StripedDBType& db_for(key_type) const noexcept;
+};
+
+}
diff --git a/storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.hpp b/storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.hpp
new file mode 100644
index 00000000000..e4f6efa30a3
--- /dev/null
+++ b/storage/src/vespa/storage/bucketdb/striped_btree_lockable_map.hpp
@@ -0,0 +1,313 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "striped_btree_lockable_map.h"
+#include "btree_lockable_map.hpp"
+#include <algorithm>
+#include <cassert>
+#include <queue>
+
+namespace storage::bucketdb {
+
+namespace {
+
+constexpr uint8_t used_bits_of(uint64_t key) noexcept {
+ return static_cast<uint8_t>(key & 0b11'1111ULL);
+}
+
+}
+
+template <typename T>
+StripedBTreeLockableMap<T>::StripedBTreeLockableMap(uint8_t n_stripe_bits)
+ : _n_stripe_bits(n_stripe_bits),
+ _n_stripes(1ULL << _n_stripe_bits),
+ _stripes()
+{
+ assert(_n_stripe_bits > 0);
+ assert(_n_stripe_bits <= MaxStripeBits);
+ _stripes.reserve(_n_stripes);
+ for (size_t i = 0; i < _n_stripes; ++i) {
+ // TODO reduce initial sub-DB data store memory usage based on number of stripes
+ _stripes.emplace_back(std::make_unique<BTreeLockableMap<T>>());
+ }
+}
+
+template <typename T>
+StripedBTreeLockableMap<T>::~StripedBTreeLockableMap() = default;
+
+template <typename T>
+size_t StripedBTreeLockableMap<T>::stripe_of(key_type key) const noexcept {
+ assert(used_bits_of(key) >= _n_stripe_bits);
+ // Since bucket keys have count-bits at the LSB positions, we want to look at the MSBs instead.
+ return (key >> (64 - _n_stripe_bits));
+}
+
+template <typename T>
+typename StripedBTreeLockableMap<T>::StripedDBType&
+StripedBTreeLockableMap<T>::db_for(key_type key) noexcept {
+ return *_stripes[stripe_of(key)];
+}
+
+template <typename T>
+const typename StripedBTreeLockableMap<T>::StripedDBType&
+StripedBTreeLockableMap<T>::db_for(key_type key) const noexcept {
+ return *_stripes[stripe_of(key)];
+}
+
+template <typename T>
+size_t StripedBTreeLockableMap<T>::size() const noexcept {
+ size_t sz = 0;
+ for (auto& s : _stripes) {
+ sz += s->size();
+ }
+ return sz;
+}
+
+template <typename T>
+size_t StripedBTreeLockableMap<T>::getMemoryUsage() const noexcept {
+ size_t mem_usage = 0;
+ for (auto& s : _stripes) {
+ mem_usage += s->getMemoryUsage();
+ }
+ return mem_usage;
+}
+
+template <typename T>
+vespalib::MemoryUsage StripedBTreeLockableMap<T>::detailed_memory_usage() const noexcept {
+ vespalib::MemoryUsage mem_usage;
+ for (auto& s : _stripes) {
+ mem_usage.merge(s->detailed_memory_usage());
+ }
+ return mem_usage;
+}
+
+template <typename T>
+bool StripedBTreeLockableMap<T>::empty() const noexcept {
+ return std::all_of(_stripes.begin(), _stripes.end(), [](auto& s){ return s->empty(); });
+}
+
+template <typename T>
+typename StripedBTreeLockableMap<T>::WrappedEntry
+StripedBTreeLockableMap<T>::get(const key_type& key, const char* clientId, bool createIfNonExisting) {
+ return db_for(key).get(key, clientId, createIfNonExisting);
+}
+
+template <typename T>
+bool StripedBTreeLockableMap<T>::erase(const key_type& key, const char* client_id, bool has_lock) {
+ return db_for(key).erase(key, client_id, has_lock);
+}
+
+template <typename T>
+void StripedBTreeLockableMap<T>::insert(const key_type& key, const mapped_type& value,
+ const char* clientId, bool has_lock, bool& pre_existed)
+{
+ db_for(key).insert(key, value, clientId, has_lock, pre_existed);
+}
+
+template <typename T>
+void StripedBTreeLockableMap<T>::clear() {
+ for (auto& s : _stripes) {
+ s->clear();
+ }
+}
+
+template <typename T>
+void StripedBTreeLockableMap<T>::print(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ // TODO more wrapped printing?
+ for (auto& s : _stripes) {
+ s->print(out, verbose, indent);
+ }
+}
+
+template <typename T>
+typename StripedBTreeLockableMap<T>::EntryMap
+StripedBTreeLockableMap<T>::getContained(const BucketId& bucket, const char* clientId) {
+ return db_for(bucket.toKey()).getContained(bucket, clientId);
+}
+
+template <typename T>
+typename StripedBTreeLockableMap<T>::EntryMap
+StripedBTreeLockableMap<T>::getAll(const BucketId& bucket, const char* clientId) {
+ return db_for(bucket.toKey()).getAll(bucket, clientId);
+}
+
+template <typename T>
+bool StripedBTreeLockableMap<T>::isConsistent(const StripedBTreeLockableMap::WrappedEntry& entry) {
+ return db_for(entry.getKey()).isConsistent(entry);
+}
+
+template <typename T>
+void StripedBTreeLockableMap<T>::showLockClients(vespalib::asciistream& out) const {
+ for (auto& s : _stripes) {
+ s->showLockClients(out);
+ }
+}
+
+template <typename T>
+void StripedBTreeLockableMap<T>::do_for_each_mutable_unordered(std::function<Decision(uint64_t, mapped_type&)> func,
+ const char* client_id)
+{
+ // This is by definition unordered in terms of bucket keys
+ for (auto& stripe : _stripes) {
+ // TODO pass functor by ref instead
+ stripe->for_each_mutable_unordered(func, client_id);
+ }
+}
+
+template <typename T>
+void StripedBTreeLockableMap<T>::unlock(const key_type& key) {
+ db_for(key).unlock(key);
+}
+
+template <typename T>
+void StripedBTreeLockableMap<T>::do_for_each(std::function<Decision(uint64_t, const mapped_type&)> func,
+ [[maybe_unused]] const char* client_id)
+{
+ auto guard = do_acquire_read_guard();
+ for (auto iter = guard->create_iterator(); iter->valid(); iter->next()) {
+ if (func(iter->key(), iter->value()) != Decision::CONTINUE) {
+ break;
+ }
+ }
+}
+
+template <typename T>
+void StripedBTreeLockableMap<T>::do_for_each_chunked(std::function<Decision(uint64_t, const mapped_type&)> func,
+ const char* client_id,
+ [[maybe_unused]] vespalib::duration yield_time,
+ [[maybe_unused]] uint32_t chunk_size)
+{
+ do_for_each(std::move(func), client_id);
+}
+
+template <typename T>
+class StripedBTreeLockableMap<T>::ReadGuardImpl final
+ : public bucketdb::ReadGuard<T, const T&>
+{
+ const StripedBTreeLockableMap<T>& _db;
+ // There is a 1-1 relationship between DB stripes and guards.
+ // This is essential to be able to choose the correct guard.
+ std::vector<std::unique_ptr<bucketdb::ReadGuard<T, const T&>>> _stripe_guards;
+public:
+ explicit ReadGuardImpl(const StripedBTreeLockableMap<T>& db);
+ ~ReadGuardImpl() override;
+
+ std::vector<T> find_parents_and_self(const document::BucketId& bucket) const override;
+ std::vector<T> find_parents_self_and_children(const document::BucketId& bucket) const override;
+ void for_each(std::function<void(uint64_t, const T&)> func) const override;
+ std::unique_ptr<ConstIterator<const T&>> create_iterator() const override;
+ [[nodiscard]] uint64_t generation() const noexcept override { return 0; /*TODO*/ }
+};
+
+template <typename T>
+StripedBTreeLockableMap<T>::ReadGuardImpl::ReadGuardImpl(const StripedBTreeLockableMap<T>& db)
+ : _db(db),
+ _stripe_guards()
+{
+ _stripe_guards.reserve(_db._stripes.size());
+ for (auto& s : _db._stripes) {
+ _stripe_guards.emplace_back(s->acquire_read_guard());
+ }
+}
+
+template <typename T>
+StripedBTreeLockableMap<T>::ReadGuardImpl::~ReadGuardImpl() = default;
+
+template <typename T>
+std::vector<T>
+StripedBTreeLockableMap<T>::ReadGuardImpl::find_parents_and_self(const document::BucketId& bucket) const {
+ return _stripe_guards[_db.stripe_of(bucket.toKey())]->find_parents_and_self(bucket);
+}
+
+template <typename T>
+std::vector<T>
+StripedBTreeLockableMap<T>::ReadGuardImpl::find_parents_self_and_children(const document::BucketId& bucket) const {
+ return _stripe_guards[_db.stripe_of(bucket.toKey())]->find_parents_self_and_children(bucket);
+}
+
+template <typename T>
+void StripedBTreeLockableMap<T>::ReadGuardImpl::for_each(std::function<void(uint64_t, const T&)> func) const {
+ for (auto iter = create_iterator(); iter->valid(); iter->next()) {
+ func(iter->key(), iter->value());
+ }
+}
+
+template <typename T>
+std::unique_ptr<ReadGuard<T>> StripedBTreeLockableMap<T>::do_acquire_read_guard() const {
+ return std::make_unique<ReadGuardImpl>(*this);
+}
+
+namespace {
+
+template <typename T> using Iter = ConstIterator<const T&>;
+template <typename T> using KeyAndIterPtr = std::pair<uint64_t, Iter<T>*>;
+
+template <typename T>
+struct CompareFirstGreater {
+ bool operator()(const KeyAndIterPtr<T>& lhs, const KeyAndIterPtr<T>& rhs) const noexcept {
+ return (lhs.first > rhs.first);
+ }
+};
+
+template <typename T>
+class ConstIteratorImpl final : public ConstIterator<const T&> {
+ using PriorityQueueType = std::priority_queue<KeyAndIterPtr<T>,
+ std::vector<KeyAndIterPtr<T>>,
+ CompareFirstGreater<T>>;
+ // This is pretty heavy weight, but this iterator is only used for full DB sweeps
+ // used by background maintenance operations, not by any realtime traffic.
+ std::vector<std::unique_ptr<Iter<T>>> _iters;
+ PriorityQueueType _iter_queue;
+public:
+ // Precondition: all iterators must be initially valid
+ explicit ConstIteratorImpl(std::vector<std::unique_ptr<Iter<T>>> iters)
+ : _iters(std::move(iters))
+ {
+ for (auto& iter : _iters) {
+ _iter_queue.push(std::make_pair(iter->key(), iter.get()));
+ }
+ }
+
+ void next() noexcept override {
+ assert(!_iter_queue.empty());
+ auto* top_iter = _iter_queue.top().second;
+ top_iter->next();
+ _iter_queue.pop();
+ if (top_iter->valid()) {
+ _iter_queue.push(std::make_pair(top_iter->key(), top_iter));
+ }
+ }
+
+ bool valid() const noexcept override {
+ return !_iter_queue.empty();
+ }
+
+ uint64_t key() const noexcept override {
+ return _iter_queue.top().first;
+ }
+
+ const T& value() const override {
+ return _iter_queue.top().second->value();
+ }
+};
+
+}
+
+template <typename T>
+std::unique_ptr<ConstIterator<const T&>>
+StripedBTreeLockableMap<T>::ReadGuardImpl::create_iterator() const {
+ std::vector<std::unique_ptr<Iter<T>>> iters;
+ iters.reserve(_db._n_stripes);
+ for (auto& g : _stripe_guards) {
+ auto iter = g->create_iterator();
+ if (!iter->valid()) {
+ continue;
+ }
+ iters.emplace_back(std::move(iter));
+ }
+ return std::make_unique<ConstIteratorImpl<T>>(std::move(iters));
+}
+
+}
diff --git a/storage/src/vespa/storage/common/content_bucket_db_options.h b/storage/src/vespa/storage/common/content_bucket_db_options.h
new file mode 100644
index 00000000000..60c5f04b8c6
--- /dev/null
+++ b/storage/src/vespa/storage/common/content_bucket_db_options.h
@@ -0,0 +1,18 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <cstdint>
+
+namespace storage {
+
+/**
+ * Type-safe encapsulation of any options that can be passed from config
+ * to the content node bucket database implementation.
+ */
+struct ContentBucketDbOptions {
+ // The number of DB stripes created will be 2^n (within implementation limits)
+ // TODO expose max limit here?
+ uint8_t n_stripe_bits = 0;
+};
+
+}
diff --git a/storage/src/vespa/storage/common/content_bucket_space.cpp b/storage/src/vespa/storage/common/content_bucket_space.cpp
index e293c4bc336..da2f78e2a6a 100644
--- a/storage/src/vespa/storage/common/content_bucket_space.cpp
+++ b/storage/src/vespa/storage/common/content_bucket_space.cpp
@@ -4,9 +4,10 @@
namespace storage {
-ContentBucketSpace::ContentBucketSpace(document::BucketSpace bucketSpace, bool use_btree_db)
+ContentBucketSpace::ContentBucketSpace(document::BucketSpace bucketSpace,
+ const ContentBucketDbOptions& db_opts)
: _bucketSpace(bucketSpace),
- _bucketDatabase(use_btree_db),
+ _bucketDatabase(db_opts),
_lock(),
_clusterState(),
_distribution(),
diff --git a/storage/src/vespa/storage/common/content_bucket_space.h b/storage/src/vespa/storage/common/content_bucket_space.h
index bf001f8b6f2..98f287a3154 100644
--- a/storage/src/vespa/storage/common/content_bucket_space.h
+++ b/storage/src/vespa/storage/common/content_bucket_space.h
@@ -26,7 +26,7 @@ private:
public:
using UP = std::unique_ptr<ContentBucketSpace>;
- explicit ContentBucketSpace(document::BucketSpace bucketSpace, bool use_btree_db = false);
+ explicit ContentBucketSpace(document::BucketSpace bucketSpace, const ContentBucketDbOptions& db_opts);
document::BucketSpace bucketSpace() const noexcept { return _bucketSpace; }
StorBucketDatabase &bucketDatabase() { return _bucketDatabase; }
diff --git a/storage/src/vespa/storage/common/content_bucket_space_repo.cpp b/storage/src/vespa/storage/common/content_bucket_space_repo.cpp
index 2cc03e4d1e5..a76ad9576e0 100644
--- a/storage/src/vespa/storage/common/content_bucket_space_repo.cpp
+++ b/storage/src/vespa/storage/common/content_bucket_space_repo.cpp
@@ -7,13 +7,13 @@ using document::BucketSpace;
namespace storage {
-ContentBucketSpaceRepo::ContentBucketSpaceRepo(bool use_btree_db)
+ContentBucketSpaceRepo::ContentBucketSpaceRepo(const ContentBucketDbOptions& db_opts)
: _map()
{
_map.emplace(document::FixedBucketSpaces::default_space(),
- std::make_unique<ContentBucketSpace>(document::FixedBucketSpaces::default_space(), use_btree_db));
+ std::make_unique<ContentBucketSpace>(document::FixedBucketSpaces::default_space(), db_opts));
_map.emplace(document::FixedBucketSpaces::global_space(),
- std::make_unique<ContentBucketSpace>(document::FixedBucketSpaces::global_space(), use_btree_db));
+ std::make_unique<ContentBucketSpace>(document::FixedBucketSpaces::global_space(), db_opts));
}
ContentBucketSpace &
diff --git a/storage/src/vespa/storage/common/content_bucket_space_repo.h b/storage/src/vespa/storage/common/content_bucket_space_repo.h
index 1c858e47bcd..01698eb6e01 100644
--- a/storage/src/vespa/storage/common/content_bucket_space_repo.h
+++ b/storage/src/vespa/storage/common/content_bucket_space_repo.h
@@ -19,7 +19,7 @@ private:
BucketSpaceMap _map;
public:
- explicit ContentBucketSpaceRepo(bool use_btree_db = false);
+ explicit ContentBucketSpaceRepo(const ContentBucketDbOptions&);
ContentBucketSpace &get(document::BucketSpace bucketSpace) const;
BucketSpaceMap::const_iterator begin() const { return _map.begin(); }
BucketSpaceMap::const_iterator end() const { return _map.end(); }
diff --git a/storage/src/vespa/storage/config/stor-server.def b/storage/src/vespa/storage/config/stor-server.def
index cd940079a70..54160bc53fe 100644
--- a/storage/src/vespa/storage/config/stor-server.def
+++ b/storage/src/vespa/storage/config/stor-server.def
@@ -88,3 +88,12 @@ simulated_bucket_request_latency_msec int default=0
## If set, content node processes will use a B-tree backed bucket database implementation
## instead of the legacy Judy-based implementation.
use_content_node_btree_bucket_db bool default=true restart
+
+## If non-zero, the bucket DB will be striped into 2^bits sub-databases, each handling
+## a disjoint subset of the node's buckets, in order to reduce locking contention.
+## Max value is unspecified, but will be clamped internally.
+## WARNING:
+## Setting this to a non-zero value requires the minimum split bit level in the cluster
+## to be enforced, so only set this value if you know exactly what you're doing!
+content_node_bucket_db_stripe_bits int default=0
+
diff --git a/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.cpp b/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.cpp
index 7a342c1df25..e2a06ee640b 100644
--- a/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.cpp
+++ b/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.cpp
@@ -9,8 +9,8 @@ namespace storage {
using vespalib::IllegalStateException;
-ServiceLayerComponentRegisterImpl::ServiceLayerComponentRegisterImpl(bool use_btree_db)
- : _bucketSpaceRepo(use_btree_db)
+ServiceLayerComponentRegisterImpl::ServiceLayerComponentRegisterImpl(const ContentBucketDbOptions& db_opts)
+ : _bucketSpaceRepo(db_opts)
{ }
void
diff --git a/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.h b/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.h
index 967adf8f787..a77b8061842 100644
--- a/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.h
+++ b/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.h
@@ -26,7 +26,7 @@ class ServiceLayerComponentRegisterImpl
public:
typedef std::unique_ptr<ServiceLayerComponentRegisterImpl> UP;
- ServiceLayerComponentRegisterImpl(bool use_btree_db = false);
+ explicit ServiceLayerComponentRegisterImpl(const ContentBucketDbOptions&);
ContentBucketSpaceRepo& getBucketSpaceRepo() { return _bucketSpaceRepo; }
MinimumUsedBitsTracker& getMinUsedBitsTracker() {
diff --git a/storage/src/vespa/storage/storageserver/servicelayernodecontext.cpp b/storage/src/vespa/storage/storageserver/servicelayernodecontext.cpp
index bbfc9afc7aa..4e4e18a7f01 100644
--- a/storage/src/vespa/storage/storageserver/servicelayernodecontext.cpp
+++ b/storage/src/vespa/storage/storageserver/servicelayernodecontext.cpp
@@ -4,8 +4,8 @@
namespace storage {
-ServiceLayerNodeContext::ServiceLayerNodeContext(framework::Clock::UP clock, bool use_btree_db)
- : StorageNodeContext(std::make_unique<ServiceLayerComponentRegisterImpl>(use_btree_db),
+ServiceLayerNodeContext::ServiceLayerNodeContext(framework::Clock::UP clock, const ContentBucketDbOptions& db_opts)
+ : StorageNodeContext(std::make_unique<ServiceLayerComponentRegisterImpl>(db_opts),
std::move(clock)),
_componentRegister(dynamic_cast<ComponentRegister&>(StorageNodeContext::getComponentRegister()))
{
diff --git a/storage/src/vespa/storage/storageserver/servicelayernodecontext.h b/storage/src/vespa/storage/storageserver/servicelayernodecontext.h
index 0d7ac5bff69..0516c9e3bda 100644
--- a/storage/src/vespa/storage/storageserver/servicelayernodecontext.h
+++ b/storage/src/vespa/storage/storageserver/servicelayernodecontext.h
@@ -29,7 +29,7 @@ struct ServiceLayerNodeContext : public StorageNodeContext {
* You can provide your own clock implementation. Useful in testing where
* you want to fake the clock.
*/
- ServiceLayerNodeContext(framework::Clock::UP clock, bool use_btree_db);
+ ServiceLayerNodeContext(framework::Clock::UP clock, const ContentBucketDbOptions& db_opts);
/**
* Get the actual component register. Available as the actual type as the
diff --git a/storageserver/src/vespa/storageserver/app/servicelayerprocess.cpp b/storageserver/src/vespa/storageserver/app/servicelayerprocess.cpp
index bdb53ce6a60..7c7816f33ac 100644
--- a/storageserver/src/vespa/storageserver/app/servicelayerprocess.cpp
+++ b/storageserver/src/vespa/storageserver/app/servicelayerprocess.cpp
@@ -2,6 +2,7 @@
#include "servicelayerprocess.h"
#include <vespa/config/helper/configgetter.hpp>
+#include <vespa/storage/common/content_bucket_db_options.h>
#include <vespa/storage/common/i_storage_chain_builder.h>
#include <vespa/storage/config/config-stor-server.h>
#include <vespa/storage/storageserver/servicelayernode.h>
@@ -14,11 +15,16 @@ namespace storage {
namespace {
-bool configured_to_use_btree_db(const config::ConfigUri& config_uri) {
+ContentBucketDbOptions bucket_db_options_from_config(const config::ConfigUri& config_uri) {
using vespa::config::content::core::StorServerConfig;
auto server_config = config::ConfigGetter<StorServerConfig>::getConfig(
config_uri.getConfigId(), config_uri.getContext());
- return server_config->useContentNodeBtreeBucketDb;
+ // For now, limit to max 8 bits, i.e. 256 sub DBs.
+ // 0 bits (the default value) disables striping entirely.
+ auto n_stripe_bits = std::min(std::max(server_config->contentNodeBucketDbStripeBits, 0), 8);
+ ContentBucketDbOptions opts;
+ opts.n_stripe_bits = n_stripe_bits;
+ return opts;
}
}
@@ -29,7 +35,7 @@ ServiceLayerProcess::ServiceLayerProcess(const config::ConfigUri& configUri)
_node(),
_storage_chain_builder(),
_context(std::make_unique<framework::defaultimplementation::RealClock>(),
- configured_to_use_btree_db(configUri))
+ bucket_db_options_from_config(configUri))
{
}