diff options
14 files changed, 216 insertions, 72 deletions
diff --git a/persistence/src/tests/spi/clusterstatetest.cpp b/persistence/src/tests/spi/clusterstatetest.cpp index 9f7507c992b..ac67903244f 100644 --- a/persistence/src/tests/spi/clusterstatetest.cpp +++ b/persistence/src/tests/spi/clusterstatetest.cpp @@ -238,26 +238,26 @@ namespace { bool node_marked_as_maintenance_in_state(const std::string& stateStr, const lib::Distribution& d, - uint16_t node) + uint16_t node, + bool maintenance_in_all_spaces) { lib::ClusterState s(stateStr); - ClusterState state(s, node, d); + ClusterState state(s, node, d, maintenance_in_all_spaces); return state.nodeMaintenance(); } } -TEST(ClusterStateTest, can_infer_own_node_maintenance_state) +// We want to track the maintenance state for the _node_, not just the _bucket space_. +TEST(ClusterStateTest, node_maintenance_state_is_set_independent_of_bucket_space_state_string) { lib::Distribution d(lib::Distribution::getDefaultDistributionConfig(3, 3)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:i", d, 0)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:d", d, 0)); - EXPECT_TRUE( node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:r", d, 0)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 1)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .1.s:m", d, 0)); + // Note: it doesn't actually matter what the cluster state string itself says here + EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0, false)); + EXPECT_TRUE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0, true)); + EXPECT_TRUE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:d", d, 0, true)); + EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0, false)); } } diff --git a/persistence/src/vespa/persistence/spi/clusterstate.cpp b/persistence/src/vespa/persistence/spi/clusterstate.cpp index a244b607391..f82e6165fb8 100644 --- a/persistence/src/vespa/persistence/spi/clusterstate.cpp +++ b/persistence/src/vespa/persistence/spi/clusterstate.cpp @@ -14,10 +14,12 @@ namespace storage::spi { ClusterState::ClusterState(const lib::ClusterState& state, uint16_t nodeIndex, - const lib::Distribution& distribution) + const lib::Distribution& distribution, + bool maintenanceInAllSpaces) : _state(std::make_unique<lib::ClusterState>(state)), _distribution(std::make_unique<lib::Distribution>(distribution.serialize())), - _nodeIndex(nodeIndex) + _nodeIndex(nodeIndex), + _maintenanceInAllSpaces(maintenanceInAllSpaces) { } @@ -33,14 +35,11 @@ void ClusterState::deserialize(vespalib::nbostream& i) { _distribution = std::make_unique<lib::Distribution>(distribution); } -ClusterState::ClusterState(vespalib::nbostream& i) { - deserialize(i); -} - ClusterState::ClusterState(const ClusterState& other) { vespalib::nbostream o; other.serialize(o); deserialize(o); + _maintenanceInAllSpaces = other._maintenanceInAllSpaces; } ClusterState::~ClusterState() = default; @@ -91,7 +90,7 @@ bool ClusterState::nodeRetired() const noexcept { } bool ClusterState::nodeMaintenance() const noexcept { - return nodeHasStateOneOf("m"); + return _maintenanceInAllSpaces; } void ClusterState::serialize(vespalib::nbostream& o) const { diff --git a/persistence/src/vespa/persistence/spi/clusterstate.h b/persistence/src/vespa/persistence/spi/clusterstate.h index df556d09cb7..bde7e5bdbf4 100644 --- a/persistence/src/vespa/persistence/spi/clusterstate.h +++ b/persistence/src/vespa/persistence/spi/clusterstate.h @@ -23,9 +23,9 @@ public: ClusterState(const lib::ClusterState& state, uint16_t nodeIndex, - const lib::Distribution& distribution); + const lib::Distribution& distribution, + bool maintenanceInAllSpaces = false); - ClusterState(vespalib::nbostream& i); ClusterState(const ClusterState& other); ClusterState& operator=(const ClusterState& other) = delete; ~ClusterState(); @@ -45,28 +45,32 @@ public: * compared to the complete list of nodes, and deigns the system to be * unusable. */ - bool clusterUp() const noexcept; + [[nodiscard]] bool clusterUp() const noexcept; /** * Returns false if this node has been set in a state where it should not * receive external load. + * + * TODO rename to indicate bucket space affinity. */ - bool nodeUp() const noexcept; + [[nodiscard]] bool nodeUp() const noexcept; /** * Returns true iff this node is marked as Initializing in the cluster state. + * + * TODO remove, init no longer used internally. */ - bool nodeInitializing() const noexcept; + [[nodiscard]] bool nodeInitializing() const noexcept; /** * Returns true iff this node is marked as Retired in the cluster state. */ - bool nodeRetired() const noexcept; + [[nodiscard]] bool nodeRetired() const noexcept; /** - * Returns true iff this node is marked as Maintenance in the cluster state. + * Returns true iff this node is marked as Maintenance in all bucket space cluster states. */ - bool nodeMaintenance() const noexcept; + [[nodiscard]] bool nodeMaintenance() const noexcept; /** * Returns a serialized form of this object. @@ -77,6 +81,7 @@ private: std::unique_ptr<lib::ClusterState> _state; std::unique_ptr<lib::Distribution> _distribution; uint16_t _nodeIndex; + bool _maintenanceInAllSpaces; void deserialize(vespalib::nbostream&); bool nodeHasStateOneOf(const char* states) const noexcept; diff --git a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp index 82410d28610..cadfa8cd72f 100644 --- a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp +++ b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp @@ -281,6 +281,19 @@ TEST_F("node going from maintenance to up state deactivates all buckets", Fixtur EXPECT_FALSE(f._bucketInfo.getInfo().isActive()); } +TEST_F("node going from maintenance to down state deactivates all buckets", Fixture) +{ + f._handler.handleSetCurrentState(f._ready.bucket(2), + BucketInfo::ACTIVE, f._genResult); + f.sync(); + f.setNodeMaintenance(true); + f.sync(); + f.setNodeUp(false); + f.sync(); + f.handleGetBucketInfo(f._ready.bucket(2)); + EXPECT_FALSE(f._bucketInfo.getInfo().isActive()); +} + TEST_MAIN() { TEST_RUN_ALL(); diff --git a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp index d329d9ce27c..0d9b2f6aaf6 100644 --- a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp @@ -144,6 +144,12 @@ BucketHandler::handlePopulateActiveBuckets(document::BucketId::List &buckets, })); } +namespace { +constexpr const char* bool_str(bool v) noexcept { + return v ? "true" : "false"; +} +} + void BucketHandler::notifyClusterStateChanged(const std::shared_ptr<IBucketStateCalculator> & newCalc) { @@ -151,17 +157,19 @@ BucketHandler::notifyClusterStateChanged(const std::shared_ptr<IBucketStateCalcu bool oldNodeMaintenance = _nodeMaintenance; _nodeUp = newCalc->nodeUp(); // Up, Retired or Initializing _nodeMaintenance = newCalc->nodeMaintenance(); - LOG(spam, "notifyClusterStateChanged: %s -> %s", oldNodeUp ? "up" : "down", _nodeUp ? "up" : "down"); + LOG(spam, "notifyClusterStateChanged; up: %s -> %s, maintenance: %s -> %s", + bool_str(oldNodeUp), bool_str(_nodeUp), + bool_str(oldNodeMaintenance), bool_str(_nodeMaintenance)); if (_nodeMaintenance) { return; // Don't deactivate buckets in maintenance mode; let query traffic drain away naturally. } // We implicitly deactivate buckets in two edge cases: // - Up -> Down (not maintenance; handled above), since the node can not be expected to offer // any graceful query draining when set Down. - // - Maintenance -> Up, since we'd otherwise introduce a bunch of transient duplicate results - // into queries. The assumption is that the system has already activated buckets on other - // nodes in such a scenario. - if ((oldNodeUp && !_nodeUp) || (oldNodeMaintenance && _nodeUp)) { + // - Maintenance -> !Maintenance, since we'd otherwise introduce a bunch of transient duplicate + // results into queries if we transition to an available state. + // The assumption is that the system has already activated buckets on other nodes in such a scenario. + if ((oldNodeUp && !_nodeUp) || oldNodeMaintenance) { deactivateAllActiveBuckets(); } } diff --git a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp index 137b2596eeb..20cd4ced6b2 100644 --- a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp @@ -56,11 +56,12 @@ ClusterStateHandler::performSetClusterState(const ClusterState *calc, IGenericRe { LOG(debug, "performSetClusterState(): " - "clusterUp(%s), nodeUp(%s), nodeInitializing(%s)" + "clusterUp(%s), nodeUp(%s), nodeInitializing(%s), nodeMaintenance(%s)" "changedHandlers.size() = %zu", (calc->clusterUp() ? "true" : "false"), (calc->nodeUp() ? "true" : "false"), (calc->nodeInitializing() ? "true" : "false"), + (calc->nodeMaintenance() ? "true" : "false"), _changedHandlers.size()); if (!_changedHandlers.empty()) { auto newCalc = std::make_shared<ClusterStateAdapter>(*calc); diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp index 621f3acd072..5b21242e397 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp @@ -865,11 +865,11 @@ Proton::setClusterState(BucketSpace bucketSpace, const storage::spi::ClusterStat // forward info sent by cluster controller to persistence engine // about whether node is supposed to be up or not. Match engine // needs to know this in order to stop serving queries. - bool nodeUpInBucketSpace(calc.nodeUp()); + bool nodeUpInBucketSpace(calc.nodeUp()); // TODO rename calculator function to imply bucket space affinity bool nodeRetired(calc.nodeRetired()); bool nodeUp = updateNodeUp(bucketSpace, nodeUpInBucketSpace); _matchEngine->setNodeUp(nodeUp); - _matchEngine->setNodeMaintenance(calc.nodeMaintenance()); + _matchEngine->setNodeMaintenance(calc.nodeMaintenance()); // Note: _all_ bucket spaces in maintenance if (_memoryFlushConfigUpdater) { _memoryFlushConfigUpdater->setNodeRetired(nodeRetired); } diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.h b/searchcore/src/vespa/searchcore/proton/server/proton.h index 6b0bef50cae..6fd31352051 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.h +++ b/searchcore/src/vespa/searchcore/proton/server/proton.h @@ -133,6 +133,7 @@ private: uint32_t getDistributionKey() const override { return _distributionKey; } BootstrapConfig::SP getActiveConfigSnapshot() const; std::shared_ptr<IDocumentDBReferenceRegistry> getDocumentDBReferenceRegistry() const override; + // Returns true if the node is up in _any_ bucket space bool updateNodeUp(BucketSpace bucketSpace, bool nodeUpInBucketSpace); void closeDocumentDBs(vespalib::ThreadStackExecutorBase & executor); public: diff --git a/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp b/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp index 597bb4b07ff..5978f71db2f 100644 --- a/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp +++ b/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp @@ -14,11 +14,46 @@ using namespace ::testing; namespace storage { struct DeactivateBucketsTest : FileStorTestFixture { - bool isActive(const document::BucketId&) const; + std::unique_ptr<TestFileStorComponents> _c; + + [[nodiscard]] bool is_active(const document::BucketId&) const; + + void SetUp() override { + FileStorTestFixture::SetUp(); + _c = std::make_unique<TestFileStorComponents>(*this); + + std::string up_state("storage:2 distributor:2"); + _node->getStateUpdater().setClusterState( + std::make_shared<const lib::ClusterState>(up_state)); + + createBucket(test_bucket()); + + api::BucketInfo serviceLayerInfo(1, 2, 3, 4, 5, true, true); + { + StorBucketDatabase::WrappedEntry entry( + _node->getStorageBucketDatabase().get(test_bucket(), "foo", + StorBucketDatabase::CREATE_IF_NONEXISTING)); + entry->info = serviceLayerInfo; + entry.write(); + } + } + + void TearDown() override { + _c.reset(); + FileStorTestFixture::TearDown(); + } + + static document::BucketId test_bucket() noexcept { + return {8, 123}; + } + + static std::shared_ptr<const lib::ClusterState> state_of(const char* str) { + return std::make_shared<const lib::ClusterState>(str); + } }; bool -DeactivateBucketsTest::isActive(const document::BucketId& bucket) const +DeactivateBucketsTest::is_active(const document::BucketId& bucket) const { StorBucketDatabase::WrappedEntry entry( _node->getStorageBucketDatabase().get(bucket, "foo")); @@ -26,31 +61,53 @@ DeactivateBucketsTest::isActive(const document::BucketId& bucket) const return entry->info.isActive(); } -TEST_F(DeactivateBucketsTest, buckets_in_database_deactivated_when_node_down_in_cluster_state) { - TestFileStorComponents c(*this); - // Must set state to up first, or down-edge case won't trigger. - std::string upState("storage:2 distributor:2"); - _node->getStateUpdater().setClusterState( - lib::ClusterState::CSP(new lib::ClusterState(upState))); - - document::BucketId bucket(8, 123); - - createBucket(bucket); - api::BucketInfo serviceLayerInfo(1, 2, 3, 4, 5, true, true); - { - StorBucketDatabase::WrappedEntry entry( - _node->getStorageBucketDatabase().get(bucket, "foo", - StorBucketDatabase::CREATE_IF_NONEXISTING)); - entry->info = serviceLayerInfo; - entry.write(); - } - EXPECT_TRUE(isActive(bucket)); - std::string downState("storage:2 .1.s:d distributor:2"); - _node->getStateUpdater().setClusterState( - lib::ClusterState::CSP(new lib::ClusterState(downState))); - +TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_marked_down) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:d distributor:2")); // Buckets should have been deactivated in content layer - EXPECT_FALSE(isActive(bucket)); + EXPECT_FALSE(is_active(test_bucket())); +} + +TEST_F(DeactivateBucketsTest, buckets_not_deactivated_when_node_marked_maintenance) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); + EXPECT_TRUE(is_active(test_bucket())); +} + +TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_goes_from_maintenance_to_up) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); + _node->getStateUpdater().setClusterState(state_of("storage:2 distributor:2")); + EXPECT_FALSE(is_active(test_bucket())); } +TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_goes_from_maintenance_to_down) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); + _node->getStateUpdater().setClusterState(state_of("storage:2 distributor:2")); + EXPECT_FALSE(is_active(test_bucket())); +} + +// If we only have a subset of the bucket spaces in maintenance mode (i.e. global +// bucket merge enforcement), we treat this as the node being down from the perspective +// of default space bucket deactivation. +TEST_F(DeactivateBucketsTest, bucket_space_subset_in_maintenance_deactivates_buckets) +{ + EXPECT_TRUE(is_active(test_bucket())); + auto derived = lib::ClusterStateBundle::BucketSpaceStateMapping({ + {document::FixedBucketSpaces::default_space(), state_of("storage:2 .1.s:m distributor:2")}, + {document::FixedBucketSpaces::global_space(), state_of("storage:2 distributor:2")} + }); + _node->getStateUpdater().setClusterStateBundle( + std::make_shared<const lib::ClusterStateBundle>(*state_of("storage:2 .1.s:m distributor:2"), + std::move(derived))); + EXPECT_FALSE(is_active(test_bucket())); +} + +// TODO should also test SPI interaction + } // namespace storage diff --git a/storage/src/vespa/storage/common/content_bucket_space.cpp b/storage/src/vespa/storage/common/content_bucket_space.cpp index 56bab9c6edc..1d78f77525a 100644 --- a/storage/src/vespa/storage/common/content_bucket_space.cpp +++ b/storage/src/vespa/storage/common/content_bucket_space.cpp @@ -57,4 +57,18 @@ ContentBucketSpace::setNodeUpInLastNodeStateSeenByProvider(bool nodeUpInLastNode _nodeUpInLastNodeStateSeenByProvider = nodeUpInLastNodeStateSeenByProvider; } +bool +ContentBucketSpace::getNodeMaintenanceInLastNodeStateSeenByProvider() const +{ + std::lock_guard guard(_lock); + return _nodeMaintenanceInLastNodeStateSeenByProvider; +} + +void +ContentBucketSpace::setNodeMaintenanceInLastNodeStateSeenByProvider(bool nodeMaintenanceInLastNodeStateSeenByProvider) +{ + std::lock_guard guard(_lock); + _nodeMaintenanceInLastNodeStateSeenByProvider = nodeMaintenanceInLastNodeStateSeenByProvider; +} + } diff --git a/storage/src/vespa/storage/common/content_bucket_space.h b/storage/src/vespa/storage/common/content_bucket_space.h index 63379d6b8ee..836cd6e15f8 100644 --- a/storage/src/vespa/storage/common/content_bucket_space.h +++ b/storage/src/vespa/storage/common/content_bucket_space.h @@ -23,6 +23,7 @@ private: std::shared_ptr<const lib::ClusterState> _clusterState; std::shared_ptr<const lib::Distribution> _distribution; bool _nodeUpInLastNodeStateSeenByProvider; + bool _nodeMaintenanceInLastNodeStateSeenByProvider; public: using UP = std::unique_ptr<ContentBucketSpace>; @@ -36,6 +37,8 @@ public: std::shared_ptr<const lib::Distribution> getDistribution() const; bool getNodeUpInLastNodeStateSeenByProvider() const; void setNodeUpInLastNodeStateSeenByProvider(bool nodeUpInLastNodeStateSeenByProvider); + bool getNodeMaintenanceInLastNodeStateSeenByProvider() const; + void setNodeMaintenanceInLastNodeStateSeenByProvider(bool nodeMaintenanceInLastNodeStateSeenByProvider); }; } diff --git a/storage/src/vespa/storage/common/content_bucket_space_repo.h b/storage/src/vespa/storage/common/content_bucket_space_repo.h index 7038e9cb1aa..048c2c266f0 100644 --- a/storage/src/vespa/storage/common/content_bucket_space_repo.h +++ b/storage/src/vespa/storage/common/content_bucket_space_repo.h @@ -21,8 +21,8 @@ private: public: explicit ContentBucketSpaceRepo(const ContentBucketDbOptions&); ContentBucketSpace &get(document::BucketSpace bucketSpace) const; - BucketSpaceMap::const_iterator begin() const { return _map.begin(); } - BucketSpaceMap::const_iterator end() const { return _map.end(); } + BucketSpaceMap::const_iterator begin() const noexcept { return _map.begin(); } + BucketSpaceMap::const_iterator end() const noexcept { return _map.end(); } BucketSpaces getBucketSpaces() const; size_t getBucketMemoryUsage() const; diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp index c32efb6aa66..fc87845315f 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp @@ -23,6 +23,7 @@ #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/util/idestructorcallback.h> #include <vespa/vespalib/util/sequencedtaskexecutor.h> +#include <algorithm> #include <thread> #include <vespa/log/bufferedlogger.h> @@ -882,28 +883,64 @@ namespace { }; } +bool +FileStorManager::maintenance_in_all_spaces(const lib::Node& node) const noexcept +{ + return std::ranges::all_of(_component.getBucketSpaceRepo(), [&](const auto& elem) { + ContentBucketSpace& bucket_space = *elem.second; + auto derived_cluster_state = bucket_space.getClusterState(); + return derived_cluster_state->getNodeState(node).getState().oneOf("m"); + }); +} + +bool +FileStorManager::should_deactivate_buckets(const ContentBucketSpace& space, + bool node_up_in_space, + bool maintenance_in_all_spaces) noexcept +{ + // Important: this MUST match the semantics in proton::BucketHandler::notifyClusterStateChanged()! + // Otherwise, the content layer and proton will be out of sync in terms of bucket activation state. + if (maintenance_in_all_spaces) { + return false; + } + return ((space.getNodeUpInLastNodeStateSeenByProvider() && !node_up_in_space) + || space.getNodeMaintenanceInLastNodeStateSeenByProvider()); +} + +void +FileStorManager::maybe_log_received_cluster_state() +{ + if (LOG_WOULD_LOG(debug)) { + auto cluster_state_bundle = _component.getStateUpdater().getClusterStateBundle(); + auto baseline_state = cluster_state_bundle->getBaselineClusterState(); + LOG(debug, "FileStorManager received baseline cluster state '%s'", baseline_state->toString().c_str()); + } +} + void FileStorManager::updateState() { - auto clusterStateBundle = _component.getStateUpdater().getClusterStateBundle(); - lib::ClusterState::CSP state(clusterStateBundle->getBaselineClusterState()); - lib::Node node(_component.getNodeType(), _component.getIndex()); + maybe_log_received_cluster_state(); + const lib::Node node(_component.getNodeType(), _component.getIndex()); + const bool in_maintenance = maintenance_in_all_spaces(node); - LOG(debug, "FileStorManager received cluster state '%s'", state->toString().c_str()); for (const auto &elem : _component.getBucketSpaceRepo()) { BucketSpace bucketSpace(elem.first); - ContentBucketSpace &contentBucketSpace = *elem.second; + ContentBucketSpace& contentBucketSpace = *elem.second; auto derivedClusterState = contentBucketSpace.getClusterState(); - bool nodeUp = derivedClusterState->getNodeState(node).getState().oneOf("uir"); - // If edge where we go down - if (contentBucketSpace.getNodeUpInLastNodeStateSeenByProvider() && !nodeUp) { - LOG(debug, "Received cluster state where this node is down; de-activating all buckets in database for bucket space %s", bucketSpace.toString().c_str()); + const bool node_up_in_space = derivedClusterState->getNodeState(node).getState().oneOf("uir"); + if (should_deactivate_buckets(contentBucketSpace, node_up_in_space, in_maintenance)) { + LOG(debug, "Received cluster state where this node is down; de-activating all buckets " + "in database for bucket space %s", bucketSpace.toString().c_str()); Deactivator deactivator; contentBucketSpace.bucketDatabase().for_each_mutable_unordered( std::ref(deactivator), "FileStorManager::updateState"); } - contentBucketSpace.setNodeUpInLastNodeStateSeenByProvider(nodeUp); - spi::ClusterState spiState(*derivedClusterState, _component.getIndex(), *contentBucketSpace.getDistribution()); + contentBucketSpace.setNodeUpInLastNodeStateSeenByProvider(node_up_in_space); + contentBucketSpace.setNodeMaintenanceInLastNodeStateSeenByProvider(in_maintenance); + spi::ClusterState spiState(*derivedClusterState, _component.getIndex(), + *contentBucketSpace.getDistribution(), + in_maintenance); _provider->setClusterState(bucketSpace, spiState); } } diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h index 76cb31f32d4..b7450de13d8 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h @@ -39,6 +39,7 @@ namespace api { } namespace spi { struct PersistenceProvider; } +class ContentBucketSpace; struct FileStorManagerTest; class ReadBucketList; class BucketOwnershipNotifier; @@ -170,6 +171,11 @@ private: void onFlush(bool downwards) override; void reportHtmlStatus(std::ostream&, const framework::HttpUrlPath&) const override; void storageDistributionChanged() override; + [[nodiscard]] static bool should_deactivate_buckets(const ContentBucketSpace& space, + bool node_up_in_space, + bool maintenance_in_all_spaces) noexcept; + [[nodiscard]] bool maintenance_in_all_spaces(const lib::Node& node) const noexcept; + void maybe_log_received_cluster_state(); void updateState(); void propagateClusterStates(); void update_reported_state_after_db_init(); |