summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2021-11-23 15:46:19 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2021-11-23 15:46:19 +0000
commit3d31f8967fc8970835b14a615f393ec4acda3394 (patch)
treefd9091e27653b6775a26680f25a13bb867748709
parent85af738b99995cc04e87e45983a3c333f1274728 (diff)
Handle case where bucket spaces have differing maintenance state for a node
Only skip deactivating buckets if the entire _node_ is marked as maintenance state, i.e. the node has maintenance state across all bucket spaces provided in the bundle. Otherwise treat the state transition as if the node goes down, deactivating all buckets. Also ensure that the bucket deactivation logic above the SPI is identical to that within Proton. This avoids bucket DBs getting out of sync between the two.
-rw-r--r--persistence/src/tests/spi/clusterstatetest.cpp20
-rw-r--r--persistence/src/vespa/persistence/spi/clusterstate.cpp13
-rw-r--r--persistence/src/vespa/persistence/spi/clusterstate.h21
-rw-r--r--searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp13
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp18
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/proton.cpp4
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/proton.h1
-rw-r--r--storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp109
-rw-r--r--storage/src/vespa/storage/common/content_bucket_space.cpp14
-rw-r--r--storage/src/vespa/storage/common/content_bucket_space.h3
-rw-r--r--storage/src/vespa/storage/common/content_bucket_space_repo.h4
-rw-r--r--storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp59
-rw-r--r--storage/src/vespa/storage/persistence/filestorage/filestormanager.h6
14 files changed, 216 insertions, 72 deletions
diff --git a/persistence/src/tests/spi/clusterstatetest.cpp b/persistence/src/tests/spi/clusterstatetest.cpp
index 9f7507c992b..ac67903244f 100644
--- a/persistence/src/tests/spi/clusterstatetest.cpp
+++ b/persistence/src/tests/spi/clusterstatetest.cpp
@@ -238,26 +238,26 @@ namespace {
bool
node_marked_as_maintenance_in_state(const std::string& stateStr,
const lib::Distribution& d,
- uint16_t node)
+ uint16_t node,
+ bool maintenance_in_all_spaces)
{
lib::ClusterState s(stateStr);
- ClusterState state(s, node, d);
+ ClusterState state(s, node, d, maintenance_in_all_spaces);
return state.nodeMaintenance();
}
}
-TEST(ClusterStateTest, can_infer_own_node_maintenance_state)
+// We want to track the maintenance state for the _node_, not just the _bucket space_.
+TEST(ClusterStateTest, node_maintenance_state_is_set_independent_of_bucket_space_state_string)
{
lib::Distribution d(lib::Distribution::getDefaultDistributionConfig(3, 3));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:i", d, 0));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:d", d, 0));
- EXPECT_TRUE( node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:r", d, 0));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 1));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .1.s:m", d, 0));
+ // Note: it doesn't actually matter what the cluster state string itself says here
+ EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0, false));
+ EXPECT_TRUE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0, true));
+ EXPECT_TRUE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:d", d, 0, true));
+ EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0, false));
}
}
diff --git a/persistence/src/vespa/persistence/spi/clusterstate.cpp b/persistence/src/vespa/persistence/spi/clusterstate.cpp
index a244b607391..f82e6165fb8 100644
--- a/persistence/src/vespa/persistence/spi/clusterstate.cpp
+++ b/persistence/src/vespa/persistence/spi/clusterstate.cpp
@@ -14,10 +14,12 @@ namespace storage::spi {
ClusterState::ClusterState(const lib::ClusterState& state,
uint16_t nodeIndex,
- const lib::Distribution& distribution)
+ const lib::Distribution& distribution,
+ bool maintenanceInAllSpaces)
: _state(std::make_unique<lib::ClusterState>(state)),
_distribution(std::make_unique<lib::Distribution>(distribution.serialize())),
- _nodeIndex(nodeIndex)
+ _nodeIndex(nodeIndex),
+ _maintenanceInAllSpaces(maintenanceInAllSpaces)
{
}
@@ -33,14 +35,11 @@ void ClusterState::deserialize(vespalib::nbostream& i) {
_distribution = std::make_unique<lib::Distribution>(distribution);
}
-ClusterState::ClusterState(vespalib::nbostream& i) {
- deserialize(i);
-}
-
ClusterState::ClusterState(const ClusterState& other) {
vespalib::nbostream o;
other.serialize(o);
deserialize(o);
+ _maintenanceInAllSpaces = other._maintenanceInAllSpaces;
}
ClusterState::~ClusterState() = default;
@@ -91,7 +90,7 @@ bool ClusterState::nodeRetired() const noexcept {
}
bool ClusterState::nodeMaintenance() const noexcept {
- return nodeHasStateOneOf("m");
+ return _maintenanceInAllSpaces;
}
void ClusterState::serialize(vespalib::nbostream& o) const {
diff --git a/persistence/src/vespa/persistence/spi/clusterstate.h b/persistence/src/vespa/persistence/spi/clusterstate.h
index df556d09cb7..bde7e5bdbf4 100644
--- a/persistence/src/vespa/persistence/spi/clusterstate.h
+++ b/persistence/src/vespa/persistence/spi/clusterstate.h
@@ -23,9 +23,9 @@ public:
ClusterState(const lib::ClusterState& state,
uint16_t nodeIndex,
- const lib::Distribution& distribution);
+ const lib::Distribution& distribution,
+ bool maintenanceInAllSpaces = false);
- ClusterState(vespalib::nbostream& i);
ClusterState(const ClusterState& other);
ClusterState& operator=(const ClusterState& other) = delete;
~ClusterState();
@@ -45,28 +45,32 @@ public:
* compared to the complete list of nodes, and deigns the system to be
* unusable.
*/
- bool clusterUp() const noexcept;
+ [[nodiscard]] bool clusterUp() const noexcept;
/**
* Returns false if this node has been set in a state where it should not
* receive external load.
+ *
+ * TODO rename to indicate bucket space affinity.
*/
- bool nodeUp() const noexcept;
+ [[nodiscard]] bool nodeUp() const noexcept;
/**
* Returns true iff this node is marked as Initializing in the cluster state.
+ *
+ * TODO remove, init no longer used internally.
*/
- bool nodeInitializing() const noexcept;
+ [[nodiscard]] bool nodeInitializing() const noexcept;
/**
* Returns true iff this node is marked as Retired in the cluster state.
*/
- bool nodeRetired() const noexcept;
+ [[nodiscard]] bool nodeRetired() const noexcept;
/**
- * Returns true iff this node is marked as Maintenance in the cluster state.
+ * Returns true iff this node is marked as Maintenance in all bucket space cluster states.
*/
- bool nodeMaintenance() const noexcept;
+ [[nodiscard]] bool nodeMaintenance() const noexcept;
/**
* Returns a serialized form of this object.
@@ -77,6 +81,7 @@ private:
std::unique_ptr<lib::ClusterState> _state;
std::unique_ptr<lib::Distribution> _distribution;
uint16_t _nodeIndex;
+ bool _maintenanceInAllSpaces;
void deserialize(vespalib::nbostream&);
bool nodeHasStateOneOf(const char* states) const noexcept;
diff --git a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp
index 82410d28610..cadfa8cd72f 100644
--- a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp
@@ -281,6 +281,19 @@ TEST_F("node going from maintenance to up state deactivates all buckets", Fixtur
EXPECT_FALSE(f._bucketInfo.getInfo().isActive());
}
+TEST_F("node going from maintenance to down state deactivates all buckets", Fixture)
+{
+ f._handler.handleSetCurrentState(f._ready.bucket(2),
+ BucketInfo::ACTIVE, f._genResult);
+ f.sync();
+ f.setNodeMaintenance(true);
+ f.sync();
+ f.setNodeUp(false);
+ f.sync();
+ f.handleGetBucketInfo(f._ready.bucket(2));
+ EXPECT_FALSE(f._bucketInfo.getInfo().isActive());
+}
+
TEST_MAIN()
{
TEST_RUN_ALL();
diff --git a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp
index d329d9ce27c..0d9b2f6aaf6 100644
--- a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp
@@ -144,6 +144,12 @@ BucketHandler::handlePopulateActiveBuckets(document::BucketId::List &buckets,
}));
}
+namespace {
+constexpr const char* bool_str(bool v) noexcept {
+ return v ? "true" : "false";
+}
+}
+
void
BucketHandler::notifyClusterStateChanged(const std::shared_ptr<IBucketStateCalculator> & newCalc)
{
@@ -151,17 +157,19 @@ BucketHandler::notifyClusterStateChanged(const std::shared_ptr<IBucketStateCalcu
bool oldNodeMaintenance = _nodeMaintenance;
_nodeUp = newCalc->nodeUp(); // Up, Retired or Initializing
_nodeMaintenance = newCalc->nodeMaintenance();
- LOG(spam, "notifyClusterStateChanged: %s -> %s", oldNodeUp ? "up" : "down", _nodeUp ? "up" : "down");
+ LOG(spam, "notifyClusterStateChanged; up: %s -> %s, maintenance: %s -> %s",
+ bool_str(oldNodeUp), bool_str(_nodeUp),
+ bool_str(oldNodeMaintenance), bool_str(_nodeMaintenance));
if (_nodeMaintenance) {
return; // Don't deactivate buckets in maintenance mode; let query traffic drain away naturally.
}
// We implicitly deactivate buckets in two edge cases:
// - Up -> Down (not maintenance; handled above), since the node can not be expected to offer
// any graceful query draining when set Down.
- // - Maintenance -> Up, since we'd otherwise introduce a bunch of transient duplicate results
- // into queries. The assumption is that the system has already activated buckets on other
- // nodes in such a scenario.
- if ((oldNodeUp && !_nodeUp) || (oldNodeMaintenance && _nodeUp)) {
+ // - Maintenance -> !Maintenance, since we'd otherwise introduce a bunch of transient duplicate
+ // results into queries if we transition to an available state.
+ // The assumption is that the system has already activated buckets on other nodes in such a scenario.
+ if ((oldNodeUp && !_nodeUp) || oldNodeMaintenance) {
deactivateAllActiveBuckets();
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp
index 137b2596eeb..20cd4ced6b2 100644
--- a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp
@@ -56,11 +56,12 @@ ClusterStateHandler::performSetClusterState(const ClusterState *calc, IGenericRe
{
LOG(debug,
"performSetClusterState(): "
- "clusterUp(%s), nodeUp(%s), nodeInitializing(%s)"
+ "clusterUp(%s), nodeUp(%s), nodeInitializing(%s), nodeMaintenance(%s)"
"changedHandlers.size() = %zu",
(calc->clusterUp() ? "true" : "false"),
(calc->nodeUp() ? "true" : "false"),
(calc->nodeInitializing() ? "true" : "false"),
+ (calc->nodeMaintenance() ? "true" : "false"),
_changedHandlers.size());
if (!_changedHandlers.empty()) {
auto newCalc = std::make_shared<ClusterStateAdapter>(*calc);
diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp
index 621f3acd072..5b21242e397 100644
--- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp
@@ -865,11 +865,11 @@ Proton::setClusterState(BucketSpace bucketSpace, const storage::spi::ClusterStat
// forward info sent by cluster controller to persistence engine
// about whether node is supposed to be up or not. Match engine
// needs to know this in order to stop serving queries.
- bool nodeUpInBucketSpace(calc.nodeUp());
+ bool nodeUpInBucketSpace(calc.nodeUp()); // TODO rename calculator function to imply bucket space affinity
bool nodeRetired(calc.nodeRetired());
bool nodeUp = updateNodeUp(bucketSpace, nodeUpInBucketSpace);
_matchEngine->setNodeUp(nodeUp);
- _matchEngine->setNodeMaintenance(calc.nodeMaintenance());
+ _matchEngine->setNodeMaintenance(calc.nodeMaintenance()); // Note: _all_ bucket spaces in maintenance
if (_memoryFlushConfigUpdater) {
_memoryFlushConfigUpdater->setNodeRetired(nodeRetired);
}
diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.h b/searchcore/src/vespa/searchcore/proton/server/proton.h
index 6b0bef50cae..6fd31352051 100644
--- a/searchcore/src/vespa/searchcore/proton/server/proton.h
+++ b/searchcore/src/vespa/searchcore/proton/server/proton.h
@@ -133,6 +133,7 @@ private:
uint32_t getDistributionKey() const override { return _distributionKey; }
BootstrapConfig::SP getActiveConfigSnapshot() const;
std::shared_ptr<IDocumentDBReferenceRegistry> getDocumentDBReferenceRegistry() const override;
+ // Returns true if the node is up in _any_ bucket space
bool updateNodeUp(BucketSpace bucketSpace, bool nodeUpInBucketSpace);
void closeDocumentDBs(vespalib::ThreadStackExecutorBase & executor);
public:
diff --git a/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp b/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp
index 597bb4b07ff..5978f71db2f 100644
--- a/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp
+++ b/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp
@@ -14,11 +14,46 @@ using namespace ::testing;
namespace storage {
struct DeactivateBucketsTest : FileStorTestFixture {
- bool isActive(const document::BucketId&) const;
+ std::unique_ptr<TestFileStorComponents> _c;
+
+ [[nodiscard]] bool is_active(const document::BucketId&) const;
+
+ void SetUp() override {
+ FileStorTestFixture::SetUp();
+ _c = std::make_unique<TestFileStorComponents>(*this);
+
+ std::string up_state("storage:2 distributor:2");
+ _node->getStateUpdater().setClusterState(
+ std::make_shared<const lib::ClusterState>(up_state));
+
+ createBucket(test_bucket());
+
+ api::BucketInfo serviceLayerInfo(1, 2, 3, 4, 5, true, true);
+ {
+ StorBucketDatabase::WrappedEntry entry(
+ _node->getStorageBucketDatabase().get(test_bucket(), "foo",
+ StorBucketDatabase::CREATE_IF_NONEXISTING));
+ entry->info = serviceLayerInfo;
+ entry.write();
+ }
+ }
+
+ void TearDown() override {
+ _c.reset();
+ FileStorTestFixture::TearDown();
+ }
+
+ static document::BucketId test_bucket() noexcept {
+ return {8, 123};
+ }
+
+ static std::shared_ptr<const lib::ClusterState> state_of(const char* str) {
+ return std::make_shared<const lib::ClusterState>(str);
+ }
};
bool
-DeactivateBucketsTest::isActive(const document::BucketId& bucket) const
+DeactivateBucketsTest::is_active(const document::BucketId& bucket) const
{
StorBucketDatabase::WrappedEntry entry(
_node->getStorageBucketDatabase().get(bucket, "foo"));
@@ -26,31 +61,53 @@ DeactivateBucketsTest::isActive(const document::BucketId& bucket) const
return entry->info.isActive();
}
-TEST_F(DeactivateBucketsTest, buckets_in_database_deactivated_when_node_down_in_cluster_state) {
- TestFileStorComponents c(*this);
- // Must set state to up first, or down-edge case won't trigger.
- std::string upState("storage:2 distributor:2");
- _node->getStateUpdater().setClusterState(
- lib::ClusterState::CSP(new lib::ClusterState(upState)));
-
- document::BucketId bucket(8, 123);
-
- createBucket(bucket);
- api::BucketInfo serviceLayerInfo(1, 2, 3, 4, 5, true, true);
- {
- StorBucketDatabase::WrappedEntry entry(
- _node->getStorageBucketDatabase().get(bucket, "foo",
- StorBucketDatabase::CREATE_IF_NONEXISTING));
- entry->info = serviceLayerInfo;
- entry.write();
- }
- EXPECT_TRUE(isActive(bucket));
- std::string downState("storage:2 .1.s:d distributor:2");
- _node->getStateUpdater().setClusterState(
- lib::ClusterState::CSP(new lib::ClusterState(downState)));
-
+TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_marked_down)
+{
+ EXPECT_TRUE(is_active(test_bucket()));
+ _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:d distributor:2"));
// Buckets should have been deactivated in content layer
- EXPECT_FALSE(isActive(bucket));
+ EXPECT_FALSE(is_active(test_bucket()));
+}
+
+TEST_F(DeactivateBucketsTest, buckets_not_deactivated_when_node_marked_maintenance)
+{
+ EXPECT_TRUE(is_active(test_bucket()));
+ _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2"));
+ EXPECT_TRUE(is_active(test_bucket()));
+}
+
+TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_goes_from_maintenance_to_up)
+{
+ EXPECT_TRUE(is_active(test_bucket()));
+ _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2"));
+ _node->getStateUpdater().setClusterState(state_of("storage:2 distributor:2"));
+ EXPECT_FALSE(is_active(test_bucket()));
}
+TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_goes_from_maintenance_to_down)
+{
+ EXPECT_TRUE(is_active(test_bucket()));
+ _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2"));
+ _node->getStateUpdater().setClusterState(state_of("storage:2 distributor:2"));
+ EXPECT_FALSE(is_active(test_bucket()));
+}
+
+// If we only have a subset of the bucket spaces in maintenance mode (i.e. global
+// bucket merge enforcement), we treat this as the node being down from the perspective
+// of default space bucket deactivation.
+TEST_F(DeactivateBucketsTest, bucket_space_subset_in_maintenance_deactivates_buckets)
+{
+ EXPECT_TRUE(is_active(test_bucket()));
+ auto derived = lib::ClusterStateBundle::BucketSpaceStateMapping({
+ {document::FixedBucketSpaces::default_space(), state_of("storage:2 .1.s:m distributor:2")},
+ {document::FixedBucketSpaces::global_space(), state_of("storage:2 distributor:2")}
+ });
+ _node->getStateUpdater().setClusterStateBundle(
+ std::make_shared<const lib::ClusterStateBundle>(*state_of("storage:2 .1.s:m distributor:2"),
+ std::move(derived)));
+ EXPECT_FALSE(is_active(test_bucket()));
+}
+
+// TODO should also test SPI interaction
+
} // namespace storage
diff --git a/storage/src/vespa/storage/common/content_bucket_space.cpp b/storage/src/vespa/storage/common/content_bucket_space.cpp
index 56bab9c6edc..1d78f77525a 100644
--- a/storage/src/vespa/storage/common/content_bucket_space.cpp
+++ b/storage/src/vespa/storage/common/content_bucket_space.cpp
@@ -57,4 +57,18 @@ ContentBucketSpace::setNodeUpInLastNodeStateSeenByProvider(bool nodeUpInLastNode
_nodeUpInLastNodeStateSeenByProvider = nodeUpInLastNodeStateSeenByProvider;
}
+bool
+ContentBucketSpace::getNodeMaintenanceInLastNodeStateSeenByProvider() const
+{
+ std::lock_guard guard(_lock);
+ return _nodeMaintenanceInLastNodeStateSeenByProvider;
+}
+
+void
+ContentBucketSpace::setNodeMaintenanceInLastNodeStateSeenByProvider(bool nodeMaintenanceInLastNodeStateSeenByProvider)
+{
+ std::lock_guard guard(_lock);
+ _nodeMaintenanceInLastNodeStateSeenByProvider = nodeMaintenanceInLastNodeStateSeenByProvider;
+}
+
}
diff --git a/storage/src/vespa/storage/common/content_bucket_space.h b/storage/src/vespa/storage/common/content_bucket_space.h
index 63379d6b8ee..836cd6e15f8 100644
--- a/storage/src/vespa/storage/common/content_bucket_space.h
+++ b/storage/src/vespa/storage/common/content_bucket_space.h
@@ -23,6 +23,7 @@ private:
std::shared_ptr<const lib::ClusterState> _clusterState;
std::shared_ptr<const lib::Distribution> _distribution;
bool _nodeUpInLastNodeStateSeenByProvider;
+ bool _nodeMaintenanceInLastNodeStateSeenByProvider;
public:
using UP = std::unique_ptr<ContentBucketSpace>;
@@ -36,6 +37,8 @@ public:
std::shared_ptr<const lib::Distribution> getDistribution() const;
bool getNodeUpInLastNodeStateSeenByProvider() const;
void setNodeUpInLastNodeStateSeenByProvider(bool nodeUpInLastNodeStateSeenByProvider);
+ bool getNodeMaintenanceInLastNodeStateSeenByProvider() const;
+ void setNodeMaintenanceInLastNodeStateSeenByProvider(bool nodeMaintenanceInLastNodeStateSeenByProvider);
};
}
diff --git a/storage/src/vespa/storage/common/content_bucket_space_repo.h b/storage/src/vespa/storage/common/content_bucket_space_repo.h
index 7038e9cb1aa..048c2c266f0 100644
--- a/storage/src/vespa/storage/common/content_bucket_space_repo.h
+++ b/storage/src/vespa/storage/common/content_bucket_space_repo.h
@@ -21,8 +21,8 @@ private:
public:
explicit ContentBucketSpaceRepo(const ContentBucketDbOptions&);
ContentBucketSpace &get(document::BucketSpace bucketSpace) const;
- BucketSpaceMap::const_iterator begin() const { return _map.begin(); }
- BucketSpaceMap::const_iterator end() const { return _map.end(); }
+ BucketSpaceMap::const_iterator begin() const noexcept { return _map.begin(); }
+ BucketSpaceMap::const_iterator end() const noexcept { return _map.end(); }
BucketSpaces getBucketSpaces() const;
size_t getBucketMemoryUsage() const;
diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp
index c32efb6aa66..fc87845315f 100644
--- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp
+++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp
@@ -23,6 +23,7 @@
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/vespalib/util/idestructorcallback.h>
#include <vespa/vespalib/util/sequencedtaskexecutor.h>
+#include <algorithm>
#include <thread>
#include <vespa/log/bufferedlogger.h>
@@ -882,28 +883,64 @@ namespace {
};
}
+bool
+FileStorManager::maintenance_in_all_spaces(const lib::Node& node) const noexcept
+{
+ return std::ranges::all_of(_component.getBucketSpaceRepo(), [&](const auto& elem) {
+ ContentBucketSpace& bucket_space = *elem.second;
+ auto derived_cluster_state = bucket_space.getClusterState();
+ return derived_cluster_state->getNodeState(node).getState().oneOf("m");
+ });
+}
+
+bool
+FileStorManager::should_deactivate_buckets(const ContentBucketSpace& space,
+ bool node_up_in_space,
+ bool maintenance_in_all_spaces) noexcept
+{
+ // Important: this MUST match the semantics in proton::BucketHandler::notifyClusterStateChanged()!
+ // Otherwise, the content layer and proton will be out of sync in terms of bucket activation state.
+ if (maintenance_in_all_spaces) {
+ return false;
+ }
+ return ((space.getNodeUpInLastNodeStateSeenByProvider() && !node_up_in_space)
+ || space.getNodeMaintenanceInLastNodeStateSeenByProvider());
+}
+
+void
+FileStorManager::maybe_log_received_cluster_state()
+{
+ if (LOG_WOULD_LOG(debug)) {
+ auto cluster_state_bundle = _component.getStateUpdater().getClusterStateBundle();
+ auto baseline_state = cluster_state_bundle->getBaselineClusterState();
+ LOG(debug, "FileStorManager received baseline cluster state '%s'", baseline_state->toString().c_str());
+ }
+}
+
void
FileStorManager::updateState()
{
- auto clusterStateBundle = _component.getStateUpdater().getClusterStateBundle();
- lib::ClusterState::CSP state(clusterStateBundle->getBaselineClusterState());
- lib::Node node(_component.getNodeType(), _component.getIndex());
+ maybe_log_received_cluster_state();
+ const lib::Node node(_component.getNodeType(), _component.getIndex());
+ const bool in_maintenance = maintenance_in_all_spaces(node);
- LOG(debug, "FileStorManager received cluster state '%s'", state->toString().c_str());
for (const auto &elem : _component.getBucketSpaceRepo()) {
BucketSpace bucketSpace(elem.first);
- ContentBucketSpace &contentBucketSpace = *elem.second;
+ ContentBucketSpace& contentBucketSpace = *elem.second;
auto derivedClusterState = contentBucketSpace.getClusterState();
- bool nodeUp = derivedClusterState->getNodeState(node).getState().oneOf("uir");
- // If edge where we go down
- if (contentBucketSpace.getNodeUpInLastNodeStateSeenByProvider() && !nodeUp) {
- LOG(debug, "Received cluster state where this node is down; de-activating all buckets in database for bucket space %s", bucketSpace.toString().c_str());
+ const bool node_up_in_space = derivedClusterState->getNodeState(node).getState().oneOf("uir");
+ if (should_deactivate_buckets(contentBucketSpace, node_up_in_space, in_maintenance)) {
+ LOG(debug, "Received cluster state where this node is down; de-activating all buckets "
+ "in database for bucket space %s", bucketSpace.toString().c_str());
Deactivator deactivator;
contentBucketSpace.bucketDatabase().for_each_mutable_unordered(
std::ref(deactivator), "FileStorManager::updateState");
}
- contentBucketSpace.setNodeUpInLastNodeStateSeenByProvider(nodeUp);
- spi::ClusterState spiState(*derivedClusterState, _component.getIndex(), *contentBucketSpace.getDistribution());
+ contentBucketSpace.setNodeUpInLastNodeStateSeenByProvider(node_up_in_space);
+ contentBucketSpace.setNodeMaintenanceInLastNodeStateSeenByProvider(in_maintenance);
+ spi::ClusterState spiState(*derivedClusterState, _component.getIndex(),
+ *contentBucketSpace.getDistribution(),
+ in_maintenance);
_provider->setClusterState(bucketSpace, spiState);
}
}
diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h
index 76cb31f32d4..b7450de13d8 100644
--- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h
+++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h
@@ -39,6 +39,7 @@ namespace api {
}
namespace spi { struct PersistenceProvider; }
+class ContentBucketSpace;
struct FileStorManagerTest;
class ReadBucketList;
class BucketOwnershipNotifier;
@@ -170,6 +171,11 @@ private:
void onFlush(bool downwards) override;
void reportHtmlStatus(std::ostream&, const framework::HttpUrlPath&) const override;
void storageDistributionChanged() override;
+ [[nodiscard]] static bool should_deactivate_buckets(const ContentBucketSpace& space,
+ bool node_up_in_space,
+ bool maintenance_in_all_spaces) noexcept;
+ [[nodiscard]] bool maintenance_in_all_spaces(const lib::Node& node) const noexcept;
+ void maybe_log_received_cluster_state();
void updateState();
void propagateClusterStates();
void update_reported_state_after_db_init();