diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2019-08-19 15:31:27 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-08-19 15:31:27 +0200 |
commit | aefe9e9f70d0c62b5799b1d72839189bf6494f52 (patch) | |
tree | 9cf3ee4967006d1c9980d8708a4ff6ee4bc0629e /storage/src | |
parent | 7a3e27db59a235b568fe75b09ef959b31f23685a (diff) | |
parent | 0bb83cdc7c83cb3c7932778eae3da633181eded5 (diff) |
Merge pull request #10325 from vespa-engine/vekterli/add-minimal-bucket-db-read-snapshot-guard-interface
Add minimal bucket DB read snapshot guard interface
Diffstat (limited to 'storage/src')
7 files changed, 123 insertions, 34 deletions
diff --git a/storage/src/tests/distributor/bucketdatabasetest.cpp b/storage/src/tests/distributor/bucketdatabasetest.cpp index 6225865b153..1d5355df92f 100644 --- a/storage/src/tests/distributor/bucketdatabasetest.cpp +++ b/storage/src/tests/distributor/bucketdatabasetest.cpp @@ -170,8 +170,15 @@ BucketDatabaseTest::doFindParents(const std::vector<document::BucketId>& ids, } std::vector<BucketDatabase::Entry> entries; + // TODO remove in favor of only read guard once legacy DB usage has been ported over db().getParents(searchId, entries); + std::vector<BucketDatabase::Entry> checked_entries; + db().acquire_read_guard()->find_parents_and_self(searchId, checked_entries); + if (entries != checked_entries) { + return "Mismatch between results from getParents() and ReadGuard!"; + } + std::ostringstream ost; for (uint32_t i = 0; i < ids.size(); ++i) { if (std::find(entries.begin(), entries.end(), diff --git a/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp b/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp index ef198cb1d3f..a901bbdd96b 100644 --- a/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp +++ b/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp @@ -42,14 +42,18 @@ using document::BucketId; BTreeBucketDatabase::BTreeBucketDatabase() : _tree(), - _store(ReplicaStore::optimizedConfigForHugePage(1023, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE, - 4 * 1024, 8 * 1024, 0.2)), + _store(make_default_array_store_config()), _generation_handler() { } BTreeBucketDatabase::~BTreeBucketDatabase() = default; +search::datastore::ArrayStoreConfig BTreeBucketDatabase::make_default_array_store_config() { + return ReplicaStore::optimizedConfigForHugePage(1023, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE, + 4 * 1024, 8 * 1024, 0.2).enable_free_lists(true); +} + namespace { Entry entry_from_replica_array_ref(const BucketId& id, uint32_t gc_timestamp, ConstArrayRef<BucketCopy> replicas) { @@ -232,11 +236,11 @@ void BTreeBucketDatabase::remove(const BucketId& bucket) { * skip scanning through many disjoint subtrees. */ BTreeBucketDatabase::BTree::ConstIterator -BTreeBucketDatabase::find_parents_internal(const document::BucketId& bucket, +BTreeBucketDatabase::find_parents_internal(const BTree::FrozenView& frozen_view, + const document::BucketId& bucket, std::vector<Entry>& entries) const { const uint64_t bucket_key = bucket.toKey(); - const auto frozen_view = _tree.getFrozenView(); auto iter = frozen_view.begin(); // Start at the root level, descending towards the bucket itself. // Try skipping as many levels of the tree as possible as we go. @@ -255,6 +259,16 @@ BTreeBucketDatabase::find_parents_internal(const document::BucketId& bucket, return iter; } +void BTreeBucketDatabase::find_parents_and_self_internal(const BTree::FrozenView& frozen_view, + const document::BucketId& bucket, + std::vector<Entry>& entries) const +{ + auto iter = find_parents_internal(frozen_view, bucket, entries); + if (iter.valid() && iter.getKey() == bucket.toKey()) { + entries.emplace_back(entry_from_iterator(iter)); + } +} + /* * Note: due to legacy API reasons, iff the requested bucket itself exists in the * tree, it will be returned in the result set. I.e. it returns all the nodes on @@ -263,16 +277,15 @@ BTreeBucketDatabase::find_parents_internal(const document::BucketId& bucket, void BTreeBucketDatabase::getParents(const BucketId& bucket, std::vector<Entry>& entries) const { - auto iter = find_parents_internal(bucket, entries); - if (iter.valid() && iter.getKey() == bucket.toKey()) { - entries.emplace_back(entry_from_iterator(iter)); - } + auto view = _tree.getFrozenView(); + find_parents_and_self_internal(view, bucket, entries); } void BTreeBucketDatabase::getAll(const BucketId& bucket, std::vector<Entry>& entries) const { - auto iter = find_parents_internal(bucket, entries); + auto view = _tree.getFrozenView(); + auto iter = find_parents_internal(view, bucket, entries); // `iter` is already pointing at, or beyond, one of the bucket's subtrees. for (; iter.valid(); ++iter) { auto candidate = BucketId(BucketId::keyToBucketId(iter.getKey())); @@ -510,4 +523,18 @@ void BTreeBucketDatabase::print(std::ostream& out, bool verbose, (void)indent; } +BTreeBucketDatabase::ReadGuardImpl::ReadGuardImpl(const BTreeBucketDatabase& db) + : _db(&db), + _guard(_db->_generation_handler.takeGuard()), + _frozen_view(_db->_tree.getFrozenView()) +{} + +BTreeBucketDatabase::ReadGuardImpl::~ReadGuardImpl() = default; + +void BTreeBucketDatabase::ReadGuardImpl::find_parents_and_self(const document::BucketId& bucket, + std::vector<Entry>& entries) const +{ + _db->find_parents_and_self_internal(_frozen_view, bucket, entries); +} + } diff --git a/storage/src/vespa/storage/bucketdb/btree_bucket_database.h b/storage/src/vespa/storage/bucketdb/btree_bucket_database.h index eb527071ad7..5c69b57956b 100644 --- a/storage/src/vespa/storage/bucketdb/btree_bucket_database.h +++ b/storage/src/vespa/storage/bucketdb/btree_bucket_database.h @@ -17,13 +17,9 @@ namespace storage { * this is an uint32 we cheekily mangle it into the value, i.e. each bucket key maps to a * composite key of (gc_timestamp_u32 << 32) | array_ref_u32. * - * This is _not_ yet production ready, for several reasons: - * - Underlying ArrayStore does not have freelists enabled for replica entry reuse - * - Current API design for mutable forEach requires O(n) tree structure mutations instead - * of changing the tree in bulk and reusing ArrayStore refs et al. Needs a redesign. - * - * Also note that the DB is currently _not_ thread safe, as read snapshotting is not yet defined - * or exposed. + * Readers from contexts that are not guaranteed to be the main distributor thread MUST + * only access the database via an acquired read guard. + * Writing MUST only take place from the main distributor thread. */ // TODO create and use a new DB interface with better bulk loading, snapshot and iteration support class BTreeBucketDatabase : public BucketDatabase { @@ -66,11 +62,34 @@ private: ConstEntryRef const_entry_ref_from_iterator(const BTree::ConstIterator& iter) const; document::BucketId bucket_from_valid_iterator(const BTree::ConstIterator& iter) const; void commit_tree_changes(); - BTree::ConstIterator find_parents_internal(const document::BucketId& bucket, + BTree::ConstIterator find_parents_internal(const BTree::FrozenView& frozen_view, + const document::BucketId& bucket, std::vector<Entry>& entries) const; + void find_parents_and_self_internal(const BTree::FrozenView& frozen_view, + const document::BucketId& bucket, + std::vector<Entry>& entries) const; + + static search::datastore::ArrayStoreConfig make_default_array_store_config(); + + class ReadGuardImpl : public ReadGuard { + const BTreeBucketDatabase* _db; + GenerationHandler::Guard _guard; + BTree::FrozenView _frozen_view; + public: + explicit ReadGuardImpl(const BTreeBucketDatabase& db); + ~ReadGuardImpl() override; + + void find_parents_and_self(const document::BucketId& bucket, + std::vector<Entry>& entries) const override; + }; + + friend class ReadGuardImpl; + + std::unique_ptr<ReadGuard> acquire_read_guard() const override { + return std::make_unique<ReadGuardImpl>(*this); + } friend struct BTreeBuilderMerger; - friend struct BTreeMergingBuilder; friend struct BTreeTrailingInserter; }; diff --git a/storage/src/vespa/storage/bucketdb/bucketdatabase.h b/storage/src/vespa/storage/bucketdb/bucketdatabase.h index d12c2d82972..0659d02cc15 100644 --- a/storage/src/vespa/storage/bucketdb/bucketdatabase.h +++ b/storage/src/vespa/storage/bucketdb/bucketdatabase.h @@ -232,6 +232,23 @@ public: const document::BucketId& bid); virtual uint32_t childCount(const document::BucketId&) const = 0; + + struct ReadGuard { + ReadGuard() = default; + virtual ~ReadGuard() = default; + + ReadGuard(ReadGuard&&) = delete; + ReadGuard& operator=(ReadGuard&&) = delete; + ReadGuard(const ReadGuard&) = delete; + ReadGuard& operator=(const ReadGuard&) = delete; + + virtual void find_parents_and_self(const document::BucketId& bucket, + std::vector<Entry>& entries) const = 0; + }; + + virtual std::unique_ptr<ReadGuard> acquire_read_guard() const { + return std::unique_ptr<ReadGuard>(); + } }; template <typename BucketInfoType> diff --git a/storage/src/vespa/storage/bucketdb/mapbucketdatabase.cpp b/storage/src/vespa/storage/bucketdb/mapbucketdatabase.cpp index 9dad421324e..463e4a4b8ce 100644 --- a/storage/src/vespa/storage/bucketdb/mapbucketdatabase.cpp +++ b/storage/src/vespa/storage/bucketdb/mapbucketdatabase.cpp @@ -584,4 +584,14 @@ MapBucketDatabase::print(std::ostream& out, bool verbose, out << ')'; } +std::unique_ptr<BucketDatabase::ReadGuard> MapBucketDatabase::acquire_read_guard() const { + return std::make_unique<ReadGuardImpl>(*this); +} + +void MapBucketDatabase::ReadGuardImpl::find_parents_and_self(const document::BucketId& bucket, + std::vector<Entry>& entries) const +{ + _db->getParents(bucket, entries); +} + } // storage diff --git a/storage/src/vespa/storage/bucketdb/mapbucketdatabase.h b/storage/src/vespa/storage/bucketdb/mapbucketdatabase.h index df3e055bd7a..b295be588a6 100644 --- a/storage/src/vespa/storage/bucketdb/mapbucketdatabase.h +++ b/storage/src/vespa/storage/bucketdb/mapbucketdatabase.h @@ -28,6 +28,8 @@ public: document::BucketId getAppropriateBucket(uint16_t minBits, const document::BucketId& bid) override; void print(std::ostream& out, bool verbose, const std::string& indent) const override; + + std::unique_ptr<ReadGuard> acquire_read_guard() const override; private: struct E { E() : value(-1), e_0(-1), e_1(-1) {}; @@ -62,6 +64,17 @@ private: uint8_t getHighestSplitBit(int index, uint8_t bitCount, const document::BucketId& bid, uint8_t minCount); uint32_t childCountImpl(int index, uint8_t bitCount, const document::BucketId& b) const; + // NOT thread-safe for concurrent reads! + class ReadGuardImpl : public ReadGuard { + const MapBucketDatabase* _db; + public: + explicit ReadGuardImpl(const MapBucketDatabase& db) : _db(&db) {} + ~ReadGuardImpl() override = default; + + void find_parents_and_self(const document::BucketId& bucket, + std::vector<Entry>& entries) const override; + }; + uint32_t allocate(); uint32_t allocateValue(const document::BucketId& bid); diff --git a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp index 04fcd4bd01c..6cfc688db0e 100644 --- a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp @@ -53,7 +53,7 @@ GetOperation::GetOperation(DistributorComponent& manager, _bucketSpace(bucketSpace), _msg(std::move(msg)), _returnCode(api::ReturnCode::OK), - _doc((document::Document*)NULL), + _doc(), _lastModified(0), _metric(metric), _operationTimer(manager.getClock()) @@ -117,10 +117,8 @@ GetOperation::onStart(DistributorMessageSender& sender) { // Send one request for each unique group (BucketId/checksum) bool sent = false; - for (std::map<GroupId, GroupVector>::iterator iter = _responses.begin(); - iter != _responses.end(); ++iter) - { - sent |= sendForChecksum(sender, iter->first.getBucketId(), iter->second); + for (auto& response : _responses) { + sent |= sendForChecksum(sender, response.first.getBucketId(), response.second); } // If nothing was sent (no useful copies), just return NOT_FOUND @@ -133,24 +131,22 @@ GetOperation::onStart(DistributorMessageSender& sender) void GetOperation::onReceive(DistributorMessageSender& sender, const std::shared_ptr<api::StorageReply>& msg) { - api::GetReply* getreply = dynamic_cast<api::GetReply*>(msg.get()); + auto* getreply = dynamic_cast<api::GetReply*>(msg.get()); assert(getreply != nullptr); LOG(debug, "Received %s", msg->toString(true).c_str()); _msg->getTrace().getRoot().addChild(getreply->getTrace().getRoot()); bool allDone = true; - for (std::map<GroupId, GroupVector>::iterator iter = _responses.begin(); - iter != _responses.end(); ++iter) - { - for (uint32_t i = 0; i < iter->second.size(); i++) { - if (iter->second[i].sent == getreply->getMsgId()) { + for (auto& response : _responses) { + for (uint32_t i = 0; i < response.second.size(); i++) { + if (response.second[i].sent == getreply->getMsgId()) { LOG(debug, "Get on %s returned %s", _msg->getDocumentId().toString().c_str(), getreply->getResult().toString().c_str()); - iter->second[i].received = true; - iter->second[i].returnCode = getreply->getResult(); + response.second[i].received = true; + response.second[i].returnCode = getreply->getResult(); if (getreply->getResult().success()) { if (getreply->getLastModifiedTimestamp() > _lastModified) { @@ -164,14 +160,14 @@ GetOperation::onReceive(DistributorMessageSender& sender, const std::shared_ptr< } // Try to send to another node in this checksum group. - bool sent = sendForChecksum(sender, iter->first.getBucketId(), iter->second); + bool sent = sendForChecksum(sender, response.first.getBucketId(), response.second); if (sent) { allDone = false; } } } - if (iter->second[i].sent && !iter->second[i].received) { + if (response.second[i].sent && !response.second[i].received) { LOG(spam, "Have not received all replies yet, setting allDone = false"); allDone = false; } @@ -223,7 +219,7 @@ GetOperation::assignTargetNodeGroups() document::BucketId bid = bucketIdFactory.getBucketId(_msg->getDocumentId()); std::vector<BucketDatabase::Entry> entries; - _bucketSpace.getBucketDatabase().getParents(bid, entries); + _bucketSpace.getBucketDatabase().acquire_read_guard()->find_parents_and_self(bid, entries); for (uint32_t j = 0; j < entries.size(); ++j) { const BucketDatabase::Entry& e = entries[j]; |