From 85af738b99995cc04e87e45983a3c333f1274728 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Mon, 22 Nov 2021 14:06:42 +0000 Subject: Continue serving search queries when in Maintenance node state Previously, entering maintenance state would implicitly deactivate all buckets on the searchnode and cause empty responses to be returned for searches. However, container query dispatch uses async health pings to decide which nodes to route queries to, so it would be possible for a node to still be used for queries for a few seconds until the ping discovered that the node should not be used. In the case of multiple groups without multiple ready replicas within the group, this would cause transient coverage loss since the dispatcher would not realize it should route queries to other groups instead. With this commit, maintenance edge behavior is changed as follows: - Buckets are _not_ deactivated when going from an available state to the maintenance state. However, they _are_ deactivate when going from maintenance state to an available state in order to avoid transient query duplicates immediately after the change. - Searches are executed as normal instead of returning empty replies when the node is in maintenance state. The following behavior is intentionally _not_ changed: - The search interface is still marked as offline when in maintenance state, as this signals that the node should be taken out of rotation. In particular, it's critical that the RPC health ping response is explicitly tagged as having zero active docs when the search interface is offline, even though many buckets may now actually be active. Otherwise, queries would not be gracefully drained from the node. --- persistence/src/tests/spi/clusterstatetest.cpp | 27 ++++++++++++++++++++++ .../src/vespa/persistence/spi/clusterstate.cpp | 14 +++++++---- .../src/vespa/persistence/spi/clusterstate.h | 15 ++++++++---- 3 files changed, 46 insertions(+), 10 deletions(-) (limited to 'persistence') diff --git a/persistence/src/tests/spi/clusterstatetest.cpp b/persistence/src/tests/spi/clusterstatetest.cpp index 8a303c1a1ac..9f7507c992b 100644 --- a/persistence/src/tests/spi/clusterstatetest.cpp +++ b/persistence/src/tests/spi/clusterstatetest.cpp @@ -233,4 +233,31 @@ TEST(ClusterStateTest, can_infer_own_node_retired_state) EXPECT_TRUE(!node_marked_as_retired_in_state("distributor:3 storage:3 .1.s:r", d, 0)); } +namespace { + +bool +node_marked_as_maintenance_in_state(const std::string& stateStr, + const lib::Distribution& d, + uint16_t node) +{ + lib::ClusterState s(stateStr); + ClusterState state(s, node, d); + return state.nodeMaintenance(); +} + +} + +TEST(ClusterStateTest, can_infer_own_node_maintenance_state) +{ + lib::Distribution d(lib::Distribution::getDefaultDistributionConfig(3, 3)); + + EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0)); + EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:i", d, 0)); + EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:d", d, 0)); + EXPECT_TRUE( node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0)); + EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:r", d, 0)); + EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 1)); + EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .1.s:m", d, 0)); +} + } diff --git a/persistence/src/vespa/persistence/spi/clusterstate.cpp b/persistence/src/vespa/persistence/spi/clusterstate.cpp index 4bc538996ca..a244b607391 100644 --- a/persistence/src/vespa/persistence/spi/clusterstate.cpp +++ b/persistence/src/vespa/persistence/spi/clusterstate.cpp @@ -68,28 +68,32 @@ ClusterState::shouldBeReady(const Bucket& b) const { return Trinary::False; } -bool ClusterState::clusterUp() const { +bool ClusterState::clusterUp() const noexcept { return _state && _state->getClusterState() == lib::State::UP; } -bool ClusterState::nodeHasStateOneOf(const char* states) const { +bool ClusterState::nodeHasStateOneOf(const char* states) const noexcept { return _state && _state->getNodeState(lib::Node(lib::NodeType::STORAGE, _nodeIndex)). getState().oneOf(states); } -bool ClusterState::nodeUp() const { +bool ClusterState::nodeUp() const noexcept { return nodeHasStateOneOf("uir"); } -bool ClusterState::nodeInitializing() const { +bool ClusterState::nodeInitializing() const noexcept { return nodeHasStateOneOf("i"); } -bool ClusterState::nodeRetired() const { +bool ClusterState::nodeRetired() const noexcept { return nodeHasStateOneOf("r"); } +bool ClusterState::nodeMaintenance() const noexcept { + return nodeHasStateOneOf("m"); +} + void ClusterState::serialize(vespalib::nbostream& o) const { assert(_distribution); assert(_state); diff --git a/persistence/src/vespa/persistence/spi/clusterstate.h b/persistence/src/vespa/persistence/spi/clusterstate.h index 8e48758e243..df556d09cb7 100644 --- a/persistence/src/vespa/persistence/spi/clusterstate.h +++ b/persistence/src/vespa/persistence/spi/clusterstate.h @@ -45,23 +45,28 @@ public: * compared to the complete list of nodes, and deigns the system to be * unusable. */ - bool clusterUp() const; + bool clusterUp() const noexcept; /** * Returns false if this node has been set in a state where it should not * receive external load. */ - bool nodeUp() const; + bool nodeUp() const noexcept; /** * Returns true iff this node is marked as Initializing in the cluster state. */ - bool nodeInitializing() const; + bool nodeInitializing() const noexcept; /** * Returns true iff this node is marked as Retired in the cluster state. */ - bool nodeRetired() const; + bool nodeRetired() const noexcept; + + /** + * Returns true iff this node is marked as Maintenance in the cluster state. + */ + bool nodeMaintenance() const noexcept; /** * Returns a serialized form of this object. @@ -74,7 +79,7 @@ private: uint16_t _nodeIndex; void deserialize(vespalib::nbostream&); - bool nodeHasStateOneOf(const char* states) const; + bool nodeHasStateOneOf(const char* states) const noexcept; }; } -- cgit v1.2.3 From 3d31f8967fc8970835b14a615f393ec4acda3394 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Tue, 23 Nov 2021 15:46:19 +0000 Subject: Handle case where bucket spaces have differing maintenance state for a node Only skip deactivating buckets if the entire _node_ is marked as maintenance state, i.e. the node has maintenance state across all bucket spaces provided in the bundle. Otherwise treat the state transition as if the node goes down, deactivating all buckets. Also ensure that the bucket deactivation logic above the SPI is identical to that within Proton. This avoids bucket DBs getting out of sync between the two. --- persistence/src/tests/spi/clusterstatetest.cpp | 20 ++-- .../src/vespa/persistence/spi/clusterstate.cpp | 13 ++- .../src/vespa/persistence/spi/clusterstate.h | 21 ++-- .../buckethandler/buckethandler_test.cpp | 13 +++ .../searchcore/proton/server/buckethandler.cpp | 18 +++- .../proton/server/clusterstatehandler.cpp | 3 +- .../src/vespa/searchcore/proton/server/proton.cpp | 4 +- .../src/vespa/searchcore/proton/server/proton.h | 1 + .../filestorage/deactivatebucketstest.cpp | 109 ++++++++++++++++----- .../vespa/storage/common/content_bucket_space.cpp | 14 +++ .../vespa/storage/common/content_bucket_space.h | 3 + .../storage/common/content_bucket_space_repo.h | 4 +- .../persistence/filestorage/filestormanager.cpp | 59 ++++++++--- .../persistence/filestorage/filestormanager.h | 6 ++ 14 files changed, 216 insertions(+), 72 deletions(-) (limited to 'persistence') diff --git a/persistence/src/tests/spi/clusterstatetest.cpp b/persistence/src/tests/spi/clusterstatetest.cpp index 9f7507c992b..ac67903244f 100644 --- a/persistence/src/tests/spi/clusterstatetest.cpp +++ b/persistence/src/tests/spi/clusterstatetest.cpp @@ -238,26 +238,26 @@ namespace { bool node_marked_as_maintenance_in_state(const std::string& stateStr, const lib::Distribution& d, - uint16_t node) + uint16_t node, + bool maintenance_in_all_spaces) { lib::ClusterState s(stateStr); - ClusterState state(s, node, d); + ClusterState state(s, node, d, maintenance_in_all_spaces); return state.nodeMaintenance(); } } -TEST(ClusterStateTest, can_infer_own_node_maintenance_state) +// We want to track the maintenance state for the _node_, not just the _bucket space_. +TEST(ClusterStateTest, node_maintenance_state_is_set_independent_of_bucket_space_state_string) { lib::Distribution d(lib::Distribution::getDefaultDistributionConfig(3, 3)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:i", d, 0)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:d", d, 0)); - EXPECT_TRUE( node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:r", d, 0)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 1)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .1.s:m", d, 0)); + // Note: it doesn't actually matter what the cluster state string itself says here + EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0, false)); + EXPECT_TRUE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0, true)); + EXPECT_TRUE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:d", d, 0, true)); + EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0, false)); } } diff --git a/persistence/src/vespa/persistence/spi/clusterstate.cpp b/persistence/src/vespa/persistence/spi/clusterstate.cpp index a244b607391..f82e6165fb8 100644 --- a/persistence/src/vespa/persistence/spi/clusterstate.cpp +++ b/persistence/src/vespa/persistence/spi/clusterstate.cpp @@ -14,10 +14,12 @@ namespace storage::spi { ClusterState::ClusterState(const lib::ClusterState& state, uint16_t nodeIndex, - const lib::Distribution& distribution) + const lib::Distribution& distribution, + bool maintenanceInAllSpaces) : _state(std::make_unique(state)), _distribution(std::make_unique(distribution.serialize())), - _nodeIndex(nodeIndex) + _nodeIndex(nodeIndex), + _maintenanceInAllSpaces(maintenanceInAllSpaces) { } @@ -33,14 +35,11 @@ void ClusterState::deserialize(vespalib::nbostream& i) { _distribution = std::make_unique(distribution); } -ClusterState::ClusterState(vespalib::nbostream& i) { - deserialize(i); -} - ClusterState::ClusterState(const ClusterState& other) { vespalib::nbostream o; other.serialize(o); deserialize(o); + _maintenanceInAllSpaces = other._maintenanceInAllSpaces; } ClusterState::~ClusterState() = default; @@ -91,7 +90,7 @@ bool ClusterState::nodeRetired() const noexcept { } bool ClusterState::nodeMaintenance() const noexcept { - return nodeHasStateOneOf("m"); + return _maintenanceInAllSpaces; } void ClusterState::serialize(vespalib::nbostream& o) const { diff --git a/persistence/src/vespa/persistence/spi/clusterstate.h b/persistence/src/vespa/persistence/spi/clusterstate.h index df556d09cb7..bde7e5bdbf4 100644 --- a/persistence/src/vespa/persistence/spi/clusterstate.h +++ b/persistence/src/vespa/persistence/spi/clusterstate.h @@ -23,9 +23,9 @@ public: ClusterState(const lib::ClusterState& state, uint16_t nodeIndex, - const lib::Distribution& distribution); + const lib::Distribution& distribution, + bool maintenanceInAllSpaces = false); - ClusterState(vespalib::nbostream& i); ClusterState(const ClusterState& other); ClusterState& operator=(const ClusterState& other) = delete; ~ClusterState(); @@ -45,28 +45,32 @@ public: * compared to the complete list of nodes, and deigns the system to be * unusable. */ - bool clusterUp() const noexcept; + [[nodiscard]] bool clusterUp() const noexcept; /** * Returns false if this node has been set in a state where it should not * receive external load. + * + * TODO rename to indicate bucket space affinity. */ - bool nodeUp() const noexcept; + [[nodiscard]] bool nodeUp() const noexcept; /** * Returns true iff this node is marked as Initializing in the cluster state. + * + * TODO remove, init no longer used internally. */ - bool nodeInitializing() const noexcept; + [[nodiscard]] bool nodeInitializing() const noexcept; /** * Returns true iff this node is marked as Retired in the cluster state. */ - bool nodeRetired() const noexcept; + [[nodiscard]] bool nodeRetired() const noexcept; /** - * Returns true iff this node is marked as Maintenance in the cluster state. + * Returns true iff this node is marked as Maintenance in all bucket space cluster states. */ - bool nodeMaintenance() const noexcept; + [[nodiscard]] bool nodeMaintenance() const noexcept; /** * Returns a serialized form of this object. @@ -77,6 +81,7 @@ private: std::unique_ptr _state; std::unique_ptr _distribution; uint16_t _nodeIndex; + bool _maintenanceInAllSpaces; void deserialize(vespalib::nbostream&); bool nodeHasStateOneOf(const char* states) const noexcept; diff --git a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp index 82410d28610..cadfa8cd72f 100644 --- a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp +++ b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp @@ -281,6 +281,19 @@ TEST_F("node going from maintenance to up state deactivates all buckets", Fixtur EXPECT_FALSE(f._bucketInfo.getInfo().isActive()); } +TEST_F("node going from maintenance to down state deactivates all buckets", Fixture) +{ + f._handler.handleSetCurrentState(f._ready.bucket(2), + BucketInfo::ACTIVE, f._genResult); + f.sync(); + f.setNodeMaintenance(true); + f.sync(); + f.setNodeUp(false); + f.sync(); + f.handleGetBucketInfo(f._ready.bucket(2)); + EXPECT_FALSE(f._bucketInfo.getInfo().isActive()); +} + TEST_MAIN() { TEST_RUN_ALL(); diff --git a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp index d329d9ce27c..0d9b2f6aaf6 100644 --- a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp @@ -144,6 +144,12 @@ BucketHandler::handlePopulateActiveBuckets(document::BucketId::List &buckets, })); } +namespace { +constexpr const char* bool_str(bool v) noexcept { + return v ? "true" : "false"; +} +} + void BucketHandler::notifyClusterStateChanged(const std::shared_ptr & newCalc) { @@ -151,17 +157,19 @@ BucketHandler::notifyClusterStateChanged(const std::shared_ptrnodeUp(); // Up, Retired or Initializing _nodeMaintenance = newCalc->nodeMaintenance(); - LOG(spam, "notifyClusterStateChanged: %s -> %s", oldNodeUp ? "up" : "down", _nodeUp ? "up" : "down"); + LOG(spam, "notifyClusterStateChanged; up: %s -> %s, maintenance: %s -> %s", + bool_str(oldNodeUp), bool_str(_nodeUp), + bool_str(oldNodeMaintenance), bool_str(_nodeMaintenance)); if (_nodeMaintenance) { return; // Don't deactivate buckets in maintenance mode; let query traffic drain away naturally. } // We implicitly deactivate buckets in two edge cases: // - Up -> Down (not maintenance; handled above), since the node can not be expected to offer // any graceful query draining when set Down. - // - Maintenance -> Up, since we'd otherwise introduce a bunch of transient duplicate results - // into queries. The assumption is that the system has already activated buckets on other - // nodes in such a scenario. - if ((oldNodeUp && !_nodeUp) || (oldNodeMaintenance && _nodeUp)) { + // - Maintenance -> !Maintenance, since we'd otherwise introduce a bunch of transient duplicate + // results into queries if we transition to an available state. + // The assumption is that the system has already activated buckets on other nodes in such a scenario. + if ((oldNodeUp && !_nodeUp) || oldNodeMaintenance) { deactivateAllActiveBuckets(); } } diff --git a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp index 137b2596eeb..20cd4ced6b2 100644 --- a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp @@ -56,11 +56,12 @@ ClusterStateHandler::performSetClusterState(const ClusterState *calc, IGenericRe { LOG(debug, "performSetClusterState(): " - "clusterUp(%s), nodeUp(%s), nodeInitializing(%s)" + "clusterUp(%s), nodeUp(%s), nodeInitializing(%s), nodeMaintenance(%s)" "changedHandlers.size() = %zu", (calc->clusterUp() ? "true" : "false"), (calc->nodeUp() ? "true" : "false"), (calc->nodeInitializing() ? "true" : "false"), + (calc->nodeMaintenance() ? "true" : "false"), _changedHandlers.size()); if (!_changedHandlers.empty()) { auto newCalc = std::make_shared(*calc); diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp index 621f3acd072..5b21242e397 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp @@ -865,11 +865,11 @@ Proton::setClusterState(BucketSpace bucketSpace, const storage::spi::ClusterStat // forward info sent by cluster controller to persistence engine // about whether node is supposed to be up or not. Match engine // needs to know this in order to stop serving queries. - bool nodeUpInBucketSpace(calc.nodeUp()); + bool nodeUpInBucketSpace(calc.nodeUp()); // TODO rename calculator function to imply bucket space affinity bool nodeRetired(calc.nodeRetired()); bool nodeUp = updateNodeUp(bucketSpace, nodeUpInBucketSpace); _matchEngine->setNodeUp(nodeUp); - _matchEngine->setNodeMaintenance(calc.nodeMaintenance()); + _matchEngine->setNodeMaintenance(calc.nodeMaintenance()); // Note: _all_ bucket spaces in maintenance if (_memoryFlushConfigUpdater) { _memoryFlushConfigUpdater->setNodeRetired(nodeRetired); } diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.h b/searchcore/src/vespa/searchcore/proton/server/proton.h index 6b0bef50cae..6fd31352051 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.h +++ b/searchcore/src/vespa/searchcore/proton/server/proton.h @@ -133,6 +133,7 @@ private: uint32_t getDistributionKey() const override { return _distributionKey; } BootstrapConfig::SP getActiveConfigSnapshot() const; std::shared_ptr getDocumentDBReferenceRegistry() const override; + // Returns true if the node is up in _any_ bucket space bool updateNodeUp(BucketSpace bucketSpace, bool nodeUpInBucketSpace); void closeDocumentDBs(vespalib::ThreadStackExecutorBase & executor); public: diff --git a/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp b/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp index 597bb4b07ff..5978f71db2f 100644 --- a/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp +++ b/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp @@ -14,11 +14,46 @@ using namespace ::testing; namespace storage { struct DeactivateBucketsTest : FileStorTestFixture { - bool isActive(const document::BucketId&) const; + std::unique_ptr _c; + + [[nodiscard]] bool is_active(const document::BucketId&) const; + + void SetUp() override { + FileStorTestFixture::SetUp(); + _c = std::make_unique(*this); + + std::string up_state("storage:2 distributor:2"); + _node->getStateUpdater().setClusterState( + std::make_shared(up_state)); + + createBucket(test_bucket()); + + api::BucketInfo serviceLayerInfo(1, 2, 3, 4, 5, true, true); + { + StorBucketDatabase::WrappedEntry entry( + _node->getStorageBucketDatabase().get(test_bucket(), "foo", + StorBucketDatabase::CREATE_IF_NONEXISTING)); + entry->info = serviceLayerInfo; + entry.write(); + } + } + + void TearDown() override { + _c.reset(); + FileStorTestFixture::TearDown(); + } + + static document::BucketId test_bucket() noexcept { + return {8, 123}; + } + + static std::shared_ptr state_of(const char* str) { + return std::make_shared(str); + } }; bool -DeactivateBucketsTest::isActive(const document::BucketId& bucket) const +DeactivateBucketsTest::is_active(const document::BucketId& bucket) const { StorBucketDatabase::WrappedEntry entry( _node->getStorageBucketDatabase().get(bucket, "foo")); @@ -26,31 +61,53 @@ DeactivateBucketsTest::isActive(const document::BucketId& bucket) const return entry->info.isActive(); } -TEST_F(DeactivateBucketsTest, buckets_in_database_deactivated_when_node_down_in_cluster_state) { - TestFileStorComponents c(*this); - // Must set state to up first, or down-edge case won't trigger. - std::string upState("storage:2 distributor:2"); - _node->getStateUpdater().setClusterState( - lib::ClusterState::CSP(new lib::ClusterState(upState))); - - document::BucketId bucket(8, 123); - - createBucket(bucket); - api::BucketInfo serviceLayerInfo(1, 2, 3, 4, 5, true, true); - { - StorBucketDatabase::WrappedEntry entry( - _node->getStorageBucketDatabase().get(bucket, "foo", - StorBucketDatabase::CREATE_IF_NONEXISTING)); - entry->info = serviceLayerInfo; - entry.write(); - } - EXPECT_TRUE(isActive(bucket)); - std::string downState("storage:2 .1.s:d distributor:2"); - _node->getStateUpdater().setClusterState( - lib::ClusterState::CSP(new lib::ClusterState(downState))); - +TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_marked_down) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:d distributor:2")); // Buckets should have been deactivated in content layer - EXPECT_FALSE(isActive(bucket)); + EXPECT_FALSE(is_active(test_bucket())); +} + +TEST_F(DeactivateBucketsTest, buckets_not_deactivated_when_node_marked_maintenance) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); + EXPECT_TRUE(is_active(test_bucket())); +} + +TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_goes_from_maintenance_to_up) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); + _node->getStateUpdater().setClusterState(state_of("storage:2 distributor:2")); + EXPECT_FALSE(is_active(test_bucket())); } +TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_goes_from_maintenance_to_down) +{ + EXPECT_TRUE(is_active(test_bucket())); + _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); + _node->getStateUpdater().setClusterState(state_of("storage:2 distributor:2")); + EXPECT_FALSE(is_active(test_bucket())); +} + +// If we only have a subset of the bucket spaces in maintenance mode (i.e. global +// bucket merge enforcement), we treat this as the node being down from the perspective +// of default space bucket deactivation. +TEST_F(DeactivateBucketsTest, bucket_space_subset_in_maintenance_deactivates_buckets) +{ + EXPECT_TRUE(is_active(test_bucket())); + auto derived = lib::ClusterStateBundle::BucketSpaceStateMapping({ + {document::FixedBucketSpaces::default_space(), state_of("storage:2 .1.s:m distributor:2")}, + {document::FixedBucketSpaces::global_space(), state_of("storage:2 distributor:2")} + }); + _node->getStateUpdater().setClusterStateBundle( + std::make_shared(*state_of("storage:2 .1.s:m distributor:2"), + std::move(derived))); + EXPECT_FALSE(is_active(test_bucket())); +} + +// TODO should also test SPI interaction + } // namespace storage diff --git a/storage/src/vespa/storage/common/content_bucket_space.cpp b/storage/src/vespa/storage/common/content_bucket_space.cpp index 56bab9c6edc..1d78f77525a 100644 --- a/storage/src/vespa/storage/common/content_bucket_space.cpp +++ b/storage/src/vespa/storage/common/content_bucket_space.cpp @@ -57,4 +57,18 @@ ContentBucketSpace::setNodeUpInLastNodeStateSeenByProvider(bool nodeUpInLastNode _nodeUpInLastNodeStateSeenByProvider = nodeUpInLastNodeStateSeenByProvider; } +bool +ContentBucketSpace::getNodeMaintenanceInLastNodeStateSeenByProvider() const +{ + std::lock_guard guard(_lock); + return _nodeMaintenanceInLastNodeStateSeenByProvider; +} + +void +ContentBucketSpace::setNodeMaintenanceInLastNodeStateSeenByProvider(bool nodeMaintenanceInLastNodeStateSeenByProvider) +{ + std::lock_guard guard(_lock); + _nodeMaintenanceInLastNodeStateSeenByProvider = nodeMaintenanceInLastNodeStateSeenByProvider; +} + } diff --git a/storage/src/vespa/storage/common/content_bucket_space.h b/storage/src/vespa/storage/common/content_bucket_space.h index 63379d6b8ee..836cd6e15f8 100644 --- a/storage/src/vespa/storage/common/content_bucket_space.h +++ b/storage/src/vespa/storage/common/content_bucket_space.h @@ -23,6 +23,7 @@ private: std::shared_ptr _clusterState; std::shared_ptr _distribution; bool _nodeUpInLastNodeStateSeenByProvider; + bool _nodeMaintenanceInLastNodeStateSeenByProvider; public: using UP = std::unique_ptr; @@ -36,6 +37,8 @@ public: std::shared_ptr getDistribution() const; bool getNodeUpInLastNodeStateSeenByProvider() const; void setNodeUpInLastNodeStateSeenByProvider(bool nodeUpInLastNodeStateSeenByProvider); + bool getNodeMaintenanceInLastNodeStateSeenByProvider() const; + void setNodeMaintenanceInLastNodeStateSeenByProvider(bool nodeMaintenanceInLastNodeStateSeenByProvider); }; } diff --git a/storage/src/vespa/storage/common/content_bucket_space_repo.h b/storage/src/vespa/storage/common/content_bucket_space_repo.h index 7038e9cb1aa..048c2c266f0 100644 --- a/storage/src/vespa/storage/common/content_bucket_space_repo.h +++ b/storage/src/vespa/storage/common/content_bucket_space_repo.h @@ -21,8 +21,8 @@ private: public: explicit ContentBucketSpaceRepo(const ContentBucketDbOptions&); ContentBucketSpace &get(document::BucketSpace bucketSpace) const; - BucketSpaceMap::const_iterator begin() const { return _map.begin(); } - BucketSpaceMap::const_iterator end() const { return _map.end(); } + BucketSpaceMap::const_iterator begin() const noexcept { return _map.begin(); } + BucketSpaceMap::const_iterator end() const noexcept { return _map.end(); } BucketSpaces getBucketSpaces() const; size_t getBucketMemoryUsage() const; diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp index c32efb6aa66..fc87845315f 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -882,28 +883,64 @@ namespace { }; } +bool +FileStorManager::maintenance_in_all_spaces(const lib::Node& node) const noexcept +{ + return std::ranges::all_of(_component.getBucketSpaceRepo(), [&](const auto& elem) { + ContentBucketSpace& bucket_space = *elem.second; + auto derived_cluster_state = bucket_space.getClusterState(); + return derived_cluster_state->getNodeState(node).getState().oneOf("m"); + }); +} + +bool +FileStorManager::should_deactivate_buckets(const ContentBucketSpace& space, + bool node_up_in_space, + bool maintenance_in_all_spaces) noexcept +{ + // Important: this MUST match the semantics in proton::BucketHandler::notifyClusterStateChanged()! + // Otherwise, the content layer and proton will be out of sync in terms of bucket activation state. + if (maintenance_in_all_spaces) { + return false; + } + return ((space.getNodeUpInLastNodeStateSeenByProvider() && !node_up_in_space) + || space.getNodeMaintenanceInLastNodeStateSeenByProvider()); +} + +void +FileStorManager::maybe_log_received_cluster_state() +{ + if (LOG_WOULD_LOG(debug)) { + auto cluster_state_bundle = _component.getStateUpdater().getClusterStateBundle(); + auto baseline_state = cluster_state_bundle->getBaselineClusterState(); + LOG(debug, "FileStorManager received baseline cluster state '%s'", baseline_state->toString().c_str()); + } +} + void FileStorManager::updateState() { - auto clusterStateBundle = _component.getStateUpdater().getClusterStateBundle(); - lib::ClusterState::CSP state(clusterStateBundle->getBaselineClusterState()); - lib::Node node(_component.getNodeType(), _component.getIndex()); + maybe_log_received_cluster_state(); + const lib::Node node(_component.getNodeType(), _component.getIndex()); + const bool in_maintenance = maintenance_in_all_spaces(node); - LOG(debug, "FileStorManager received cluster state '%s'", state->toString().c_str()); for (const auto &elem : _component.getBucketSpaceRepo()) { BucketSpace bucketSpace(elem.first); - ContentBucketSpace &contentBucketSpace = *elem.second; + ContentBucketSpace& contentBucketSpace = *elem.second; auto derivedClusterState = contentBucketSpace.getClusterState(); - bool nodeUp = derivedClusterState->getNodeState(node).getState().oneOf("uir"); - // If edge where we go down - if (contentBucketSpace.getNodeUpInLastNodeStateSeenByProvider() && !nodeUp) { - LOG(debug, "Received cluster state where this node is down; de-activating all buckets in database for bucket space %s", bucketSpace.toString().c_str()); + const bool node_up_in_space = derivedClusterState->getNodeState(node).getState().oneOf("uir"); + if (should_deactivate_buckets(contentBucketSpace, node_up_in_space, in_maintenance)) { + LOG(debug, "Received cluster state where this node is down; de-activating all buckets " + "in database for bucket space %s", bucketSpace.toString().c_str()); Deactivator deactivator; contentBucketSpace.bucketDatabase().for_each_mutable_unordered( std::ref(deactivator), "FileStorManager::updateState"); } - contentBucketSpace.setNodeUpInLastNodeStateSeenByProvider(nodeUp); - spi::ClusterState spiState(*derivedClusterState, _component.getIndex(), *contentBucketSpace.getDistribution()); + contentBucketSpace.setNodeUpInLastNodeStateSeenByProvider(node_up_in_space); + contentBucketSpace.setNodeMaintenanceInLastNodeStateSeenByProvider(in_maintenance); + spi::ClusterState spiState(*derivedClusterState, _component.getIndex(), + *contentBucketSpace.getDistribution(), + in_maintenance); _provider->setClusterState(bucketSpace, spiState); } } diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h index 76cb31f32d4..b7450de13d8 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h @@ -39,6 +39,7 @@ namespace api { } namespace spi { struct PersistenceProvider; } +class ContentBucketSpace; struct FileStorManagerTest; class ReadBucketList; class BucketOwnershipNotifier; @@ -170,6 +171,11 @@ private: void onFlush(bool downwards) override; void reportHtmlStatus(std::ostream&, const framework::HttpUrlPath&) const override; void storageDistributionChanged() override; + [[nodiscard]] static bool should_deactivate_buckets(const ContentBucketSpace& space, + bool node_up_in_space, + bool maintenance_in_all_spaces) noexcept; + [[nodiscard]] bool maintenance_in_all_spaces(const lib::Node& node) const noexcept; + void maybe_log_received_cluster_state(); void updateState(); void propagateClusterStates(); void update_reported_state_after_db_init(); -- cgit v1.2.3