summaryrefslogtreecommitdiffstats
path: root/persistence
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2021-11-23 15:46:19 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2021-11-24 10:13:50 +0000
commit7fe561bae2574f6ce70fe3255c57ff7308310b9a (patch)
treebdd8d0c9ad1f1f2cbfe302a8c96661401ff28c16 /persistence
parenta428b8f2754eec0b3748451b7aca79bc00d0a04e (diff)
Handle case where bucket spaces have differing maintenance state for a node
Only skip deactivating buckets if the entire _node_ is marked as maintenance state, i.e. the node has maintenance state across all bucket spaces provided in the bundle. Otherwise treat the state transition as if the node goes down, deactivating all buckets. Also ensure that the bucket deactivation logic above the SPI is identical to that within Proton. This avoids bucket DBs getting out of sync between the two.
Diffstat (limited to 'persistence')
-rw-r--r--persistence/src/tests/spi/clusterstatetest.cpp20
-rw-r--r--persistence/src/vespa/persistence/spi/clusterstate.cpp13
-rw-r--r--persistence/src/vespa/persistence/spi/clusterstate.h21
3 files changed, 29 insertions, 25 deletions
diff --git a/persistence/src/tests/spi/clusterstatetest.cpp b/persistence/src/tests/spi/clusterstatetest.cpp
index 9f7507c992b..ac67903244f 100644
--- a/persistence/src/tests/spi/clusterstatetest.cpp
+++ b/persistence/src/tests/spi/clusterstatetest.cpp
@@ -238,26 +238,26 @@ namespace {
bool
node_marked_as_maintenance_in_state(const std::string& stateStr,
const lib::Distribution& d,
- uint16_t node)
+ uint16_t node,
+ bool maintenance_in_all_spaces)
{
lib::ClusterState s(stateStr);
- ClusterState state(s, node, d);
+ ClusterState state(s, node, d, maintenance_in_all_spaces);
return state.nodeMaintenance();
}
}
-TEST(ClusterStateTest, can_infer_own_node_maintenance_state)
+// We want to track the maintenance state for the _node_, not just the _bucket space_.
+TEST(ClusterStateTest, node_maintenance_state_is_set_independent_of_bucket_space_state_string)
{
lib::Distribution d(lib::Distribution::getDefaultDistributionConfig(3, 3));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:i", d, 0));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:d", d, 0));
- EXPECT_TRUE( node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:r", d, 0));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 1));
- EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .1.s:m", d, 0));
+ // Note: it doesn't actually matter what the cluster state string itself says here
+ EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0, false));
+ EXPECT_TRUE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0, true));
+ EXPECT_TRUE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:d", d, 0, true));
+ EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0, false));
}
}
diff --git a/persistence/src/vespa/persistence/spi/clusterstate.cpp b/persistence/src/vespa/persistence/spi/clusterstate.cpp
index a244b607391..f82e6165fb8 100644
--- a/persistence/src/vespa/persistence/spi/clusterstate.cpp
+++ b/persistence/src/vespa/persistence/spi/clusterstate.cpp
@@ -14,10 +14,12 @@ namespace storage::spi {
ClusterState::ClusterState(const lib::ClusterState& state,
uint16_t nodeIndex,
- const lib::Distribution& distribution)
+ const lib::Distribution& distribution,
+ bool maintenanceInAllSpaces)
: _state(std::make_unique<lib::ClusterState>(state)),
_distribution(std::make_unique<lib::Distribution>(distribution.serialize())),
- _nodeIndex(nodeIndex)
+ _nodeIndex(nodeIndex),
+ _maintenanceInAllSpaces(maintenanceInAllSpaces)
{
}
@@ -33,14 +35,11 @@ void ClusterState::deserialize(vespalib::nbostream& i) {
_distribution = std::make_unique<lib::Distribution>(distribution);
}
-ClusterState::ClusterState(vespalib::nbostream& i) {
- deserialize(i);
-}
-
ClusterState::ClusterState(const ClusterState& other) {
vespalib::nbostream o;
other.serialize(o);
deserialize(o);
+ _maintenanceInAllSpaces = other._maintenanceInAllSpaces;
}
ClusterState::~ClusterState() = default;
@@ -91,7 +90,7 @@ bool ClusterState::nodeRetired() const noexcept {
}
bool ClusterState::nodeMaintenance() const noexcept {
- return nodeHasStateOneOf("m");
+ return _maintenanceInAllSpaces;
}
void ClusterState::serialize(vespalib::nbostream& o) const {
diff --git a/persistence/src/vespa/persistence/spi/clusterstate.h b/persistence/src/vespa/persistence/spi/clusterstate.h
index df556d09cb7..bde7e5bdbf4 100644
--- a/persistence/src/vespa/persistence/spi/clusterstate.h
+++ b/persistence/src/vespa/persistence/spi/clusterstate.h
@@ -23,9 +23,9 @@ public:
ClusterState(const lib::ClusterState& state,
uint16_t nodeIndex,
- const lib::Distribution& distribution);
+ const lib::Distribution& distribution,
+ bool maintenanceInAllSpaces = false);
- ClusterState(vespalib::nbostream& i);
ClusterState(const ClusterState& other);
ClusterState& operator=(const ClusterState& other) = delete;
~ClusterState();
@@ -45,28 +45,32 @@ public:
* compared to the complete list of nodes, and deigns the system to be
* unusable.
*/
- bool clusterUp() const noexcept;
+ [[nodiscard]] bool clusterUp() const noexcept;
/**
* Returns false if this node has been set in a state where it should not
* receive external load.
+ *
+ * TODO rename to indicate bucket space affinity.
*/
- bool nodeUp() const noexcept;
+ [[nodiscard]] bool nodeUp() const noexcept;
/**
* Returns true iff this node is marked as Initializing in the cluster state.
+ *
+ * TODO remove, init no longer used internally.
*/
- bool nodeInitializing() const noexcept;
+ [[nodiscard]] bool nodeInitializing() const noexcept;
/**
* Returns true iff this node is marked as Retired in the cluster state.
*/
- bool nodeRetired() const noexcept;
+ [[nodiscard]] bool nodeRetired() const noexcept;
/**
- * Returns true iff this node is marked as Maintenance in the cluster state.
+ * Returns true iff this node is marked as Maintenance in all bucket space cluster states.
*/
- bool nodeMaintenance() const noexcept;
+ [[nodiscard]] bool nodeMaintenance() const noexcept;
/**
* Returns a serialized form of this object.
@@ -77,6 +81,7 @@ private:
std::unique_ptr<lib::ClusterState> _state;
std::unique_ptr<lib::Distribution> _distribution;
uint16_t _nodeIndex;
+ bool _maintenanceInAllSpaces;
void deserialize(vespalib::nbostream&);
bool nodeHasStateOneOf(const char* states) const noexcept;