diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-08-18 14:23:40 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-18 14:23:40 +0200 |
commit | 2946d0246f693df5e0a5639916446d472c9e54d9 (patch) | |
tree | 235cee120c2a6301f0331ab5dfdfc1c1488f6a83 | |
parent | 5c51af58b98c987975e3db62d9ed09d72b8188d8 (diff) | |
parent | 8039e8194a62f46f2ec827d3c0de317d9b1ce9c5 (diff) |
Merge pull request #28083 from vespa-engine/balder/minor-optimizations-for-many-groups
Balder/minor optimizations for many groups
11 files changed, 108 insertions, 90 deletions
diff --git a/storage/src/tests/distributor/bucketdbmetricupdatertest.cpp b/storage/src/tests/distributor/bucketdbmetricupdatertest.cpp index 4d04e3ca51a..57a7fb529be 100644 --- a/storage/src/tests/distributor/bucketdbmetricupdatertest.cpp +++ b/storage/src/tests/distributor/bucketdbmetricupdatertest.cpp @@ -202,7 +202,7 @@ TEST_F(BucketDBMetricUpdaterTest, buckets_with_varying_trustedness) { { BucketInfo info(makeInfo(100, 200)); info.resetTrusted(); - BucketDatabase::Entry e(document::BucketId(16, 3), info); + BucketDatabase::Entry e(document::BucketId(16, 3), std::move(info)); metricUpdater.visit(e, 2); } metricUpdater.completeRound(false); @@ -233,7 +233,7 @@ TEST_F(BucketDBMetricUpdaterTest, pick_largest_copy_if_no_trusted) { // No trusted copies, so must pick second copy. BucketInfo info(makeInfo(100, 200)); info.resetTrusted(); - BucketDatabase::Entry e(document::BucketId(16, 2), info); + BucketDatabase::Entry e(document::BucketId(16, 2), std::move(info)); metricUpdater.visit(e, 2); metricUpdater.completeRound(false); metricUpdater.getLastCompleteStats().propagateMetrics(ims, dms); @@ -270,7 +270,7 @@ BucketDBMetricUpdaterTest::visitBucketWith2Copies1Trusted(BucketDBMetricUpdater& BucketInfo info; addNode(info, 0, 100); addNode(info, 1, 101); // Note different checksums => #trusted = 1 - BucketDatabase::Entry e(document::BucketId(16, 1), info); + BucketDatabase::Entry e(document::BucketId(16, 1), std::move(info)); metricUpdater.visit(e, 2); } @@ -281,18 +281,17 @@ BucketDBMetricUpdaterTest::visitBucketWith2CopiesBothTrusted(BucketDBMetricUpdat BucketInfo info; addNode(info, 0, 200); addNode(info, 2, 200); - BucketDatabase::Entry e(document::BucketId(16, 2), info); + BucketDatabase::Entry e(document::BucketId(16, 2), std::move(info)); metricUpdater.visit(e, 2); } // Single replica on node 2. void -BucketDBMetricUpdaterTest::visitBucketWith1Copy( - BucketDBMetricUpdater& metricUpdater) +BucketDBMetricUpdaterTest::visitBucketWith1Copy(BucketDBMetricUpdater& metricUpdater) { BucketInfo info; addNode(info, 2, 100); - BucketDatabase::Entry e(document::BucketId(16, 1), info); + BucketDatabase::Entry e(document::BucketId(16, 1), std::move(info)); metricUpdater.visit(e, 2); } diff --git a/storage/src/vespa/storage/bucketdb/bucketdatabase.h b/storage/src/vespa/storage/bucketdb/bucketdatabase.h index 4e0b727036a..d3fdce8f0d8 100644 --- a/storage/src/vespa/storage/bucketdb/bucketdatabase.h +++ b/storage/src/vespa/storage/bucketdb/bucketdatabase.h @@ -22,26 +22,29 @@ public: BucketInfoType _info; public: - EntryBase() : _bucketId(0) {} // Invalid entry - EntryBase(const document::BucketId& bId, const BucketInfoType& bucketInfo) - : _bucketId(bId), _info(bucketInfo) {} - EntryBase(const document::BucketId& bId, BucketInfoType&& bucketInfo) - : _bucketId(bId), _info(std::move(bucketInfo)) {} - explicit EntryBase(const document::BucketId& bId) : _bucketId(bId) {} - - bool operator==(const EntryBase& other) const { + EntryBase() noexcept : _bucketId(0), _info() {} // Invalid entry + EntryBase(const document::BucketId& bId, BucketInfoType&& bucketInfo) noexcept + : _bucketId(bId), + _info(std::move(bucketInfo)) + {} + explicit EntryBase(const document::BucketId& bId) noexcept : _bucketId(bId), _info() {} + EntryBase(EntryBase &&) noexcept = default; + EntryBase & operator=(EntryBase &&) noexcept = default; + EntryBase(const EntryBase &) = default; + EntryBase & operator=(const EntryBase &) = default; + bool operator==(const EntryBase& other) const noexcept { return (_bucketId == other._bucketId && _info == other._info); } - bool valid() const { return (_bucketId.getRawId() != 0); } + bool valid() const noexcept { return (_bucketId.getRawId() != 0); } std::string toString() const; - const document::BucketId& getBucketId() const { return _bucketId; } - const BucketInfoType& getBucketInfo() const { return _info; } - BucketInfoType& getBucketInfo() { return _info; } - BucketInfoType* operator->() { return &_info; } - const BucketInfoType* operator->() const { return &_info; } + const document::BucketId& getBucketId() const noexcept { return _bucketId; } + const BucketInfoType& getBucketInfo() const noexcept { return _info; } + BucketInfoType& getBucketInfo() noexcept { return _info; } + BucketInfoType* operator->() noexcept { return &_info; } + const BucketInfoType* operator->() const noexcept { return &_info; } - static EntryBase createInvalid() { + static EntryBase createInvalid() noexcept { return EntryBase(); } }; diff --git a/storage/src/vespa/storage/bucketdb/bucketinfo.h b/storage/src/vespa/storage/bucketdb/bucketinfo.h index 219d0335966..9c024c31fd3 100644 --- a/storage/src/vespa/storage/bucketdb/bucketinfo.h +++ b/storage/src/vespa/storage/bucketdb/bucketinfo.h @@ -118,12 +118,23 @@ public: std::string toString() const; uint32_t getHighestDocumentCount() const noexcept; - uint32_t getHighestTotalDocumentSize() const noexcept; uint32_t getHighestMetaCount() const noexcept; uint32_t getHighestUsedFileSize() const noexcept; - + struct Highest { + Highest() noexcept : _documentCount(0),_totalDocumentSize(0),_metaCount(0),_usedFileSize(0) {} + void update(const BucketCopy & n) noexcept { + _documentCount = std::max(_documentCount, n.getDocumentCount()); + _totalDocumentSize = std::max(_totalDocumentSize, n.getTotalDocumentSize()); + _metaCount = std::max(_metaCount, n.getMetaCount()); + _usedFileSize = std::max(_usedFileSize, n.getUsedFileSize()); + } + uint32_t _documentCount; + uint32_t _totalDocumentSize; + uint32_t _metaCount; + uint32_t _usedFileSize; + }; + Highest getHighest() const noexcept; bool hasRecentlyCreatedEmptyCopy() const noexcept; - bool operator==(const BucketInfoBase& other) const noexcept; }; diff --git a/storage/src/vespa/storage/bucketdb/bucketinfo.hpp b/storage/src/vespa/storage/bucketdb/bucketinfo.hpp index ce7adc8af67..f8dbff38a99 100644 --- a/storage/src/vespa/storage/bucketdb/bucketinfo.hpp +++ b/storage/src/vespa/storage/bucketdb/bucketinfo.hpp @@ -159,21 +159,21 @@ BucketInfoBase<NodeSeq>::getNodes() const noexcept { } template <typename NodeSeq> -uint32_t -BucketInfoBase<NodeSeq>::getHighestDocumentCount() const noexcept { - uint32_t highest = 0; +BucketInfoBase<NodeSeq>::Highest +BucketInfoBase<NodeSeq>::getHighest() const noexcept { + Highest highest; for (const auto & n : _nodes) { - highest = std::max(highest, n.getDocumentCount()); + highest.update(n); } return highest; } template <typename NodeSeq> uint32_t -BucketInfoBase<NodeSeq>::getHighestTotalDocumentSize() const noexcept { +BucketInfoBase<NodeSeq>::getHighestDocumentCount() const noexcept { uint32_t highest = 0; for (const auto & n : _nodes) { - highest = std::max(highest, n.getTotalDocumentSize()); + highest = std::max(highest, n.getDocumentCount()); } return highest; } diff --git a/storage/src/vespa/storage/distributor/activecopy.h b/storage/src/vespa/storage/distributor/activecopy.h index a2de77306be..91dfb3f0bd0 100644 --- a/storage/src/vespa/storage/distributor/activecopy.h +++ b/storage/src/vespa/storage/distributor/activecopy.h @@ -48,10 +48,9 @@ private: class ActiveList : public vespalib::Printable { public: - ActiveList() {} - ActiveList(std::vector<ActiveCopy>&& v) : _v(std::move(v)) { } + ActiveList() noexcept {} + ActiveList(std::vector<ActiveCopy>&& v) noexcept : _v(std::move(v)) { } - ActiveCopy& operator[](size_t i) noexcept { return _v[i]; } const ActiveCopy& operator[](size_t i) const noexcept { return _v[i]; } [[nodiscard]] bool contains(uint16_t) const noexcept; [[nodiscard]] bool empty() const noexcept { return _v.empty(); } diff --git a/storage/src/vespa/storage/distributor/maintenance/maintenancescanner.h b/storage/src/vespa/storage/distributor/maintenance/maintenancescanner.h index e4ccb6d88ad..b894ec9a1cd 100644 --- a/storage/src/vespa/storage/distributor/maintenance/maintenancescanner.h +++ b/storage/src/vespa/storage/distributor/maintenance/maintenancescanner.h @@ -23,12 +23,20 @@ public: static ScanResult createDone() { return ScanResult(true); } static ScanResult createNotDone(document::BucketSpace bucketSpace, BucketDatabase::Entry entry) { - return ScanResult(bucketSpace, entry); + return ScanResult(bucketSpace, std::move(entry)); } private: - explicit ScanResult(bool done) : _done(done), _bucketSpace(document::BucketSpace::invalid()), _entry() {} - ScanResult(document::BucketSpace bucketSpace, const BucketDatabase::Entry& e) : _done(false), _bucketSpace(bucketSpace), _entry(e) {} + explicit ScanResult(bool done) noexcept + : _done(done), + _bucketSpace(document::BucketSpace::invalid()), + _entry() + {} + ScanResult(document::BucketSpace bucketSpace, BucketDatabase::Entry e) noexcept + : _done(false), + _bucketSpace(bucketSpace), + _entry(std::move(e)) + {} }; virtual ScanResult scanNext() = 0; diff --git a/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp b/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp index 86399c1b620..ab27f2d2e43 100644 --- a/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp +++ b/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp @@ -73,7 +73,7 @@ SimpleMaintenanceScanner::scanNext() countBucket(_bucketSpaceItr->first, entry.getBucketInfo()); prioritizeBucket(document::Bucket(_bucketSpaceItr->first, entry.getBucketId())); _bucketCursor = entry.getBucketId(); - return ScanResult::createNotDone(_bucketSpaceItr->first, entry); + return ScanResult::createNotDone(_bucketSpaceItr->first, std::move(entry)); } } diff --git a/storage/src/vespa/storage/distributor/statecheckers.cpp b/storage/src/vespa/storage/distributor/statecheckers.cpp index 2aef2d17f54..596b6ca390f 100644 --- a/storage/src/vespa/storage/distributor/statecheckers.cpp +++ b/storage/src/vespa/storage/distributor/statecheckers.cpp @@ -42,38 +42,30 @@ SplitBucketStateChecker::validForSplit(Context& c) double SplitBucketStateChecker::getBucketSizeRelativeToMax(Context& c) { - const BucketInfo& info(c.entry.getBucketInfo()); - const uint32_t highestDocumentCount(info.getHighestDocumentCount()); - const uint32_t highestTotalDocumentSize(info.getHighestTotalDocumentSize()); - const uint32_t highestMetaCount(info.getHighestMetaCount()); - const uint32_t highestUsedFileSize(info.getHighestUsedFileSize()); + auto highest = c.entry.getBucketInfo().getHighest(); - if (highestDocumentCount < 2) { + if (highest._documentCount < 2) { return 0; } double byteSplitRatio = 0; if (c.distributorConfig.getSplitSize() > 0) { - byteSplitRatio = static_cast<double>(highestTotalDocumentSize) - / c.distributorConfig.getSplitSize(); + byteSplitRatio = static_cast<double>(highest._totalDocumentSize) / c.distributorConfig.getSplitSize(); } double docSplitRatio = 0; if (c.distributorConfig.getSplitCount() > 0) { - docSplitRatio = static_cast<double>(highestDocumentCount) - / c.distributorConfig.getSplitCount(); + docSplitRatio = static_cast<double>(highest._documentCount) / c.distributorConfig.getSplitCount(); } double fileSizeRatio = 0; if (c.distributorConfig.getSplitSize() > 0) { - fileSizeRatio = static_cast<double>(highestUsedFileSize) - / (2 * c.distributorConfig.getSplitSize()); + fileSizeRatio = static_cast<double>(highest._usedFileSize) / (2 * c.distributorConfig.getSplitSize()); } double metaSplitRatio = 0; if (c.distributorConfig.getSplitCount() > 0) { - metaSplitRatio = static_cast<double>(highestMetaCount) - / (2 * c.distributorConfig.getSplitCount()); + metaSplitRatio = static_cast<double>(highest._metaCount) / (2 * c.distributorConfig.getSplitCount()); } return std::max(std::max(byteSplitRatio, docSplitRatio), @@ -99,17 +91,13 @@ SplitBucketStateChecker::generateMaxSizeExceededSplitOperation(Context& c) so->setPriority(c.distributorConfig.getMaintenancePriorities().splitLargeBucket); - const BucketInfo& info(c.entry.getBucketInfo()); + auto highest = c.entry.getBucketInfo().getHighest(); vespalib::asciistream ost; ost << "[Splitting bucket because its maximum size (" - << info.getHighestTotalDocumentSize() - << " b, " - << info.getHighestDocumentCount() - << " docs, " - << info.getHighestMetaCount() - << " meta, " - << info.getHighestUsedFileSize() - << " b total" + << highest._totalDocumentSize << " b, " + << highest._documentCount << " docs, " + << highest._metaCount << " meta, " + << highest._usedFileSize << " b total" << ") is higher than the configured limit of (" << c.distributorConfig.getSplitSize() << ", " << c.distributorConfig.getSplitCount() << ")]"; @@ -562,13 +550,20 @@ consistentApartFromEmptyBucketsInNonIdealLocationAndInvalidEntries(ConstNodesRef class MergeNodes { public: - MergeNodes() - : _reason(), _nodes(), _problemFlags(0), _priority(255) + MergeNodes() noexcept + : _reason(), + _nodes(), + _problemFlags(0), + _priority(255) {} explicit MergeNodes(const BucketDatabase::Entry& entry) - : _reason(), _nodes(), _problemFlags(0), _priority(255) + : _reason(), + _nodes(), + _problemFlags(0), + _priority(255) { + _nodes.reserve(entry->getNodeCount()); for (uint16_t i = 0; i < entry->getNodeCount(); i++) { addNode(entry->getNodeRef(i).getNode()); } @@ -587,7 +582,7 @@ public: updatePriority(other._priority); } - bool shouldMerge() const { + bool shouldMerge() const noexcept { return _problemFlags != 0; } @@ -599,9 +594,7 @@ public: } void markOutOfSync(const StateChecker::Context& c, uint8_t msgPriority) { - _reason << "[Synchronizing buckets with different checksums " - << c.entry->toString() - << "]"; + _reason << "[Synchronizing buckets with different checksums " << c.entry->toString() << "]"; addProblem(OUT_OF_SYNC); updatePriority(msgPriority); } @@ -613,7 +606,7 @@ public: updatePriority(msgPriority); } - bool needsMoveOnly() const { + bool needsMoveOnly() const noexcept { return _problemFlags == NON_IDEAL_LOCATION; } @@ -626,11 +619,11 @@ public: std::string reason() const { return _reason.str(); } private: - void updatePriority(uint8_t pri) { + void updatePriority(uint8_t pri) noexcept { _priority = std::min(pri, _priority); } - void addProblem(uint8_t newProblem) { + void addProblem(uint8_t newProblem) noexcept { _problemFlags |= newProblem; } @@ -641,8 +634,8 @@ private: }; vespalib::asciistream _reason; std::vector<uint16_t> _nodes; - uint8_t _problemFlags; - uint8_t _priority; + uint8_t _problemFlags; + uint8_t _priority; }; MergeNodes::~MergeNodes() = default; diff --git a/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.cpp b/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.cpp index 09e7d370a98..4c8e51908b0 100644 --- a/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.cpp +++ b/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.cpp @@ -767,6 +767,7 @@ StripeBucketDBUpdater::MergingNodeRemover::merge(storage::BucketDatabase::Merger } std::vector<BucketCopy> remainingCopies; + remainingCopies.reserve(e->getNodeCount()); for (uint16_t i = 0; i < e->getNodeCount(); i++) { const uint16_t node_idx = e->getNodeRef(i).getNode(); if (storage_node_is_available(node_idx)) { diff --git a/storage/src/vespa/storage/storageserver/communicationmanager.cpp b/storage/src/vespa/storage/storageserver/communicationmanager.cpp index 362717f70e6..95ed9188422 100644 --- a/storage/src/vespa/storage/storageserver/communicationmanager.cpp +++ b/storage/src/vespa/storage/storageserver/communicationmanager.cpp @@ -272,7 +272,8 @@ CommunicationManager::~CommunicationManager() LOG(debug, "Deleting link %s.", toString().c_str()); } -void CommunicationManager::onClose() +void +CommunicationManager::onClose() { // Avoid getting config during shutdown _configFetcher.reset(); @@ -328,7 +329,8 @@ CommunicationManager::configureMessageBusLimits(const CommunicationManagerConfig : cfg.mbusContentNodeMaxPendingSize); } -void CommunicationManager::configure(std::unique_ptr<CommunicationManagerConfig> config) +void +CommunicationManager::configure(std::unique_ptr<CommunicationManagerConfig> config) { // Only allow dynamic (live) reconfiguration of message bus limits. if (_mbus) { @@ -494,8 +496,7 @@ CommunicationManager::sendMessageBusMessage(const std::shared_ptr<api::StorageCo } bool -CommunicationManager::sendCommand( - const std::shared_ptr<api::StorageCommand> & msg) +CommunicationManager::sendCommand(const std::shared_ptr<api::StorageCommand> & msg) { if (!msg->getAddress()) { LOGBP(warning, "Got command without address of type %s in CommunicationManager::sendCommand", @@ -570,9 +571,8 @@ CommunicationManager::serializeNodeState(const api::GetNodeStateReply& gns, std: } void -CommunicationManager::sendDirectRPCReply( - RPCRequestWrapper& request, - const std::shared_ptr<api::StorageReply>& reply) +CommunicationManager::sendDirectRPCReply(RPCRequestWrapper& request, + const std::shared_ptr<api::StorageReply>& reply) { std::string_view requestName(request.getMethodName()); // TODO non-name based dispatch // TODO rework this entire dispatch mechanism :D @@ -616,9 +616,8 @@ CommunicationManager::sendDirectRPCReply( } void -CommunicationManager::sendMessageBusReply( - StorageTransportContext& context, - const std::shared_ptr<api::StorageReply>& reply) +CommunicationManager::sendMessageBusReply(StorageTransportContext& context, + const std::shared_ptr<api::StorageReply>& reply) { // Using messagebus for communication. mbus::Reply::UP replyUP; @@ -653,8 +652,7 @@ CommunicationManager::sendMessageBusReply( } bool -CommunicationManager::sendReply( - const std::shared_ptr<api::StorageReply>& reply) +CommunicationManager::sendReply(const std::shared_ptr<api::StorageReply>& reply) { // Relaxed load since we're not doing any dependent reads that aren't // already covered by some other form of explicit synchronization. diff --git a/vdslib/src/vespa/vdslib/distribution/distribution.cpp b/vdslib/src/vespa/vdslib/distribution/distribution.cpp index 87a1f6d3758..ee022b1779a 100644 --- a/vdslib/src/vespa/vdslib/distribution/distribution.cpp +++ b/vdslib/src/vespa/vdslib/distribution/distribution.cpp @@ -9,8 +9,8 @@ #include <vespa/config/print/asciiconfigwriter.h> #include <vespa/config/print/asciiconfigreader.hpp> #include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/config-stor-distribution.h> -#include <algorithm> #include <cmath> #include <cassert> @@ -469,8 +469,7 @@ Distribution::getIdealDistributorNode(const ClusterState& state, const document: std::vector<Distribution::IndexList> Distribution::splitNodesIntoLeafGroups(vespalib::ConstArrayRef<uint16_t> nodeList) const { - std::vector<IndexList> result; - std::map<uint16_t, IndexList> nodes; + vespalib::hash_map<uint16_t, IndexList> nodes(nodeList.size()); for (auto node : nodeList) { const Group* group((node < _node2Group.size()) ? _node2Group[node] : nullptr); if (group == nullptr) { @@ -480,9 +479,16 @@ Distribution::splitNodesIntoLeafGroups(vespalib::ConstArrayRef<uint16_t> nodeLis nodes[group->getIndex()].push_back(node); } } + std::vector<uint16_t> sorted; + sorted.reserve(nodes.size()); + for (const auto & entry : nodes) { + sorted.push_back(entry.first); + } + std::sort(sorted.begin(), sorted.end()); + std::vector<IndexList> result; result.reserve(nodes.size()); - for (auto & node : nodes) { - result.emplace_back(std::move(node.second)); + for (uint16_t groupId : sorted) { + result.emplace_back(std::move(nodes.find(groupId)->second)); } return result; } |