diff options
Diffstat (limited to 'storage')
6 files changed, 156 insertions, 39 deletions
diff --git a/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp b/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp index 597bb4b07ff..5978f71db2f 100644 --- a/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp +++ b/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp @@ -14,11 +14,46 @@ using namespace ::testing; namespace storage { struct DeactivateBucketsTest : FileStorTestFixture { - bool isActive(const document::BucketId&) const; + std::unique_ptr<TestFileStorComponents> _c; + + [[nodiscard]] bool is_active(const document::BucketId&) const; + + void SetUp() override { + FileStorTestFixture::SetUp(); + _c = std::make_unique<TestFileStorComponents>(*this); + + std::string up_state("storage:2 distributor:2"); + _node->getStateUpdater().setClusterState( + std::make_shared<const lib::ClusterState>(up_state)); + + createBucket(test_bucket()); + + api::BucketInfo serviceLayerInfo(1, 2, 3, 4, 5, true, true); + { + StorBucketDatabase::WrappedEntry entry( + _node->getStorageBucketDatabase().get(test_bucket(), "foo", + StorBucketDatabase::CREATE_IF_NONEXISTING)); + entry->info = serviceLayerInfo; + entry.write(); + } + } + + void TearDown() override { + _c.reset(); + FileStorTestFixture::TearDown(); + } + + static document::BucketId test_bucket() noexcept { + return {8, 123}; + } + + static std::shared_ptr<const lib::ClusterState> state_of(const char* str) { + return std::make_shared<const lib::ClusterState>(str); + } }; bool -DeactivateBucketsTest::isActive(const document::BucketId& bucket) const +DeactivateBucketsTest::is_active(const document::BucketId& bucket) const { StorBucketDatabase::WrappedEntry entry( _node->getStorageBucketDatabase().get(bucket, "foo")); @@ -26,31 +61,53 @@ DeactivateBucketsTest::isActive(const document::BucketId& bucket) const return entry->info.isActive(); } -TEST_F(DeactivateBucketsTest, buckets_in_database_deactivated_when_node_down_in_cluster_state) { - TestFileStorComponents c(*this); - // Must set state to up first, or down-edge case won't trigger. - std::string upState("storage:2 distributor:2"); - _node->getStateUpdater().setClusterState( - lib::ClusterState::CSP(new lib::ClusterState(upState))); - - document::BucketId bucket(8, 123); - - createBucket(bucket); - api::BucketInfo serviceLayerInfo(1, 2, 3, 4, 5, true, true); - { - StorBucketDatabase::WrappedEntry entry( - _node->getStorageBucketDatabase().get(bucket, "foo", - StorBucketDatabase::CREATE_IF_NONEXISTING)); - entry->info = serviceLayerInfo; - entry.write(); - } - EXPECT_TRUE(isActive(bucket)); - std::string downState("storage:2 .1.s:d distributor:2"); - _node->getStateUpdater().setClusterState( - lib::ClusterState::CSP(new lib::ClusterState(downState))); - +TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_marked_down) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:d distributor:2")); // Buckets should have been deactivated in content layer - EXPECT_FALSE(isActive(bucket)); + EXPECT_FALSE(is_active(test_bucket())); +} + +TEST_F(DeactivateBucketsTest, buckets_not_deactivated_when_node_marked_maintenance) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); + EXPECT_TRUE(is_active(test_bucket())); +} + +TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_goes_from_maintenance_to_up) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); + _node->getStateUpdater().setClusterState(state_of("storage:2 distributor:2")); + EXPECT_FALSE(is_active(test_bucket())); } +TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_goes_from_maintenance_to_down) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); + _node->getStateUpdater().setClusterState(state_of("storage:2 distributor:2")); + EXPECT_FALSE(is_active(test_bucket())); +} + +// If we only have a subset of the bucket spaces in maintenance mode (i.e. global +// bucket merge enforcement), we treat this as the node being down from the perspective +// of default space bucket deactivation. +TEST_F(DeactivateBucketsTest, bucket_space_subset_in_maintenance_deactivates_buckets) +{ + EXPECT_TRUE(is_active(test_bucket())); + auto derived = lib::ClusterStateBundle::BucketSpaceStateMapping({ + {document::FixedBucketSpaces::default_space(), state_of("storage:2 .1.s:m distributor:2")}, + {document::FixedBucketSpaces::global_space(), state_of("storage:2 distributor:2")} + }); + _node->getStateUpdater().setClusterStateBundle( + std::make_shared<const lib::ClusterStateBundle>(*state_of("storage:2 .1.s:m distributor:2"), + std::move(derived))); + EXPECT_FALSE(is_active(test_bucket())); +} + +// TODO should also test SPI interaction + } // namespace storage diff --git a/storage/src/vespa/storage/common/content_bucket_space.cpp b/storage/src/vespa/storage/common/content_bucket_space.cpp index 56bab9c6edc..1d78f77525a 100644 --- a/storage/src/vespa/storage/common/content_bucket_space.cpp +++ b/storage/src/vespa/storage/common/content_bucket_space.cpp @@ -57,4 +57,18 @@ ContentBucketSpace::setNodeUpInLastNodeStateSeenByProvider(bool nodeUpInLastNode _nodeUpInLastNodeStateSeenByProvider = nodeUpInLastNodeStateSeenByProvider; } +bool +ContentBucketSpace::getNodeMaintenanceInLastNodeStateSeenByProvider() const +{ + std::lock_guard guard(_lock); + return _nodeMaintenanceInLastNodeStateSeenByProvider; +} + +void +ContentBucketSpace::setNodeMaintenanceInLastNodeStateSeenByProvider(bool nodeMaintenanceInLastNodeStateSeenByProvider) +{ + std::lock_guard guard(_lock); + _nodeMaintenanceInLastNodeStateSeenByProvider = nodeMaintenanceInLastNodeStateSeenByProvider; +} + } diff --git a/storage/src/vespa/storage/common/content_bucket_space.h b/storage/src/vespa/storage/common/content_bucket_space.h index 63379d6b8ee..836cd6e15f8 100644 --- a/storage/src/vespa/storage/common/content_bucket_space.h +++ b/storage/src/vespa/storage/common/content_bucket_space.h @@ -23,6 +23,7 @@ private: std::shared_ptr<const lib::ClusterState> _clusterState; std::shared_ptr<const lib::Distribution> _distribution; bool _nodeUpInLastNodeStateSeenByProvider; + bool _nodeMaintenanceInLastNodeStateSeenByProvider; public: using UP = std::unique_ptr<ContentBucketSpace>; @@ -36,6 +37,8 @@ public: std::shared_ptr<const lib::Distribution> getDistribution() const; bool getNodeUpInLastNodeStateSeenByProvider() const; void setNodeUpInLastNodeStateSeenByProvider(bool nodeUpInLastNodeStateSeenByProvider); + bool getNodeMaintenanceInLastNodeStateSeenByProvider() const; + void setNodeMaintenanceInLastNodeStateSeenByProvider(bool nodeMaintenanceInLastNodeStateSeenByProvider); }; } diff --git a/storage/src/vespa/storage/common/content_bucket_space_repo.h b/storage/src/vespa/storage/common/content_bucket_space_repo.h index 7038e9cb1aa..048c2c266f0 100644 --- a/storage/src/vespa/storage/common/content_bucket_space_repo.h +++ b/storage/src/vespa/storage/common/content_bucket_space_repo.h @@ -21,8 +21,8 @@ private: public: explicit ContentBucketSpaceRepo(const ContentBucketDbOptions&); ContentBucketSpace &get(document::BucketSpace bucketSpace) const; - BucketSpaceMap::const_iterator begin() const { return _map.begin(); } - BucketSpaceMap::const_iterator end() const { return _map.end(); } + BucketSpaceMap::const_iterator begin() const noexcept { return _map.begin(); } + BucketSpaceMap::const_iterator end() const noexcept { return _map.end(); } BucketSpaces getBucketSpaces() const; size_t getBucketMemoryUsage() const; diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp index c32efb6aa66..fc87845315f 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp @@ -23,6 +23,7 @@ #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/util/idestructorcallback.h> #include <vespa/vespalib/util/sequencedtaskexecutor.h> +#include <algorithm> #include <thread> #include <vespa/log/bufferedlogger.h> @@ -882,28 +883,64 @@ namespace { }; } +bool +FileStorManager::maintenance_in_all_spaces(const lib::Node& node) const noexcept +{ + return std::ranges::all_of(_component.getBucketSpaceRepo(), [&](const auto& elem) { + ContentBucketSpace& bucket_space = *elem.second; + auto derived_cluster_state = bucket_space.getClusterState(); + return derived_cluster_state->getNodeState(node).getState().oneOf("m"); + }); +} + +bool +FileStorManager::should_deactivate_buckets(const ContentBucketSpace& space, + bool node_up_in_space, + bool maintenance_in_all_spaces) noexcept +{ + // Important: this MUST match the semantics in proton::BucketHandler::notifyClusterStateChanged()! + // Otherwise, the content layer and proton will be out of sync in terms of bucket activation state. + if (maintenance_in_all_spaces) { + return false; + } + return ((space.getNodeUpInLastNodeStateSeenByProvider() && !node_up_in_space) + || space.getNodeMaintenanceInLastNodeStateSeenByProvider()); +} + +void +FileStorManager::maybe_log_received_cluster_state() +{ + if (LOG_WOULD_LOG(debug)) { + auto cluster_state_bundle = _component.getStateUpdater().getClusterStateBundle(); + auto baseline_state = cluster_state_bundle->getBaselineClusterState(); + LOG(debug, "FileStorManager received baseline cluster state '%s'", baseline_state->toString().c_str()); + } +} + void FileStorManager::updateState() { - auto clusterStateBundle = _component.getStateUpdater().getClusterStateBundle(); - lib::ClusterState::CSP state(clusterStateBundle->getBaselineClusterState()); - lib::Node node(_component.getNodeType(), _component.getIndex()); + maybe_log_received_cluster_state(); + const lib::Node node(_component.getNodeType(), _component.getIndex()); + const bool in_maintenance = maintenance_in_all_spaces(node); - LOG(debug, "FileStorManager received cluster state '%s'", state->toString().c_str()); for (const auto &elem : _component.getBucketSpaceRepo()) { BucketSpace bucketSpace(elem.first); - ContentBucketSpace &contentBucketSpace = *elem.second; + ContentBucketSpace& contentBucketSpace = *elem.second; auto derivedClusterState = contentBucketSpace.getClusterState(); - bool nodeUp = derivedClusterState->getNodeState(node).getState().oneOf("uir"); - // If edge where we go down - if (contentBucketSpace.getNodeUpInLastNodeStateSeenByProvider() && !nodeUp) { - LOG(debug, "Received cluster state where this node is down; de-activating all buckets in database for bucket space %s", bucketSpace.toString().c_str()); + const bool node_up_in_space = derivedClusterState->getNodeState(node).getState().oneOf("uir"); + if (should_deactivate_buckets(contentBucketSpace, node_up_in_space, in_maintenance)) { + LOG(debug, "Received cluster state where this node is down; de-activating all buckets " + "in database for bucket space %s", bucketSpace.toString().c_str()); Deactivator deactivator; contentBucketSpace.bucketDatabase().for_each_mutable_unordered( std::ref(deactivator), "FileStorManager::updateState"); } - contentBucketSpace.setNodeUpInLastNodeStateSeenByProvider(nodeUp); - spi::ClusterState spiState(*derivedClusterState, _component.getIndex(), *contentBucketSpace.getDistribution()); + contentBucketSpace.setNodeUpInLastNodeStateSeenByProvider(node_up_in_space); + contentBucketSpace.setNodeMaintenanceInLastNodeStateSeenByProvider(in_maintenance); + spi::ClusterState spiState(*derivedClusterState, _component.getIndex(), + *contentBucketSpace.getDistribution(), + in_maintenance); _provider->setClusterState(bucketSpace, spiState); } } diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h index 76cb31f32d4..b7450de13d8 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h @@ -39,6 +39,7 @@ namespace api { } namespace spi { struct PersistenceProvider; } +class ContentBucketSpace; struct FileStorManagerTest; class ReadBucketList; class BucketOwnershipNotifier; @@ -170,6 +171,11 @@ private: void onFlush(bool downwards) override; void reportHtmlStatus(std::ostream&, const framework::HttpUrlPath&) const override; void storageDistributionChanged() override; + [[nodiscard]] static bool should_deactivate_buckets(const ContentBucketSpace& space, + bool node_up_in_space, + bool maintenance_in_all_spaces) noexcept; + [[nodiscard]] bool maintenance_in_all_spaces(const lib::Node& node) const noexcept; + void maybe_log_received_cluster_state(); void updateState(); void propagateClusterStates(); void update_reported_state_after_db_init(); |