From 225323f025b0ba5aae956f4c80e071b7d159278d Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Mon, 3 Jan 2022 14:15:44 +0000 Subject: Make host info cluster state version reporting correct for deferred state bundles If an application uses deferred cluster state activations, do not report back a given cluster state version as being in use by the node until the state version has been explicitly activated by the cluster controller. This change is due to the fact that the replication invalidation happens upon recovery mode entry, and for deferred state bundles this takes place when a cluster state is _activated_, not when the distributor is otherwise done gathering bucket info (for a non-deferred bundle the activation happens implicitly at this point). If the state manager reports that the new cluster state is in effect even though it has not been activated, the cluster controller could still end up using stale replication stats, as the invalidation logic has not yet run at this point in time. The cluster controller will ignore any host info responses for older versions, so any stale replication statistics should not be taken into account with this change. --- .../src/tests/storageserver/statemanagertest.cpp | 99 ++++++++++++++++------ 1 file changed, 73 insertions(+), 26 deletions(-) (limited to 'storage/src/tests/storageserver') diff --git a/storage/src/tests/storageserver/statemanagertest.cpp b/storage/src/tests/storageserver/statemanagertest.cpp index fc22759139b..729976d2dce 100644 --- a/storage/src/tests/storageserver/statemanagertest.cpp +++ b/storage/src/tests/storageserver/statemanagertest.cpp @@ -43,6 +43,8 @@ struct StateManagerTest : Test { std::string get_node_info() const { return _manager->getNodeInfo(); } + + void extract_cluster_state_version_from_host_info(uint32_t& version_out); }; StateManagerTest::StateManagerTest() @@ -54,8 +56,8 @@ StateManagerTest::StateManagerTest() } void -StateManagerTest::SetUp() { - vdstestlib::DirConfig config(getStandardConfig(true)); +StateManagerTest::SetUp() +{ _node = std::make_unique(NodeIndex(2)); // Clock will increase 1 sec per call. _node->getClock().setAbsoluteTimeInSeconds(1); @@ -85,12 +87,39 @@ StateManagerTest::TearDown() { _metricManager.reset(); } -void StateManagerTest::force_current_cluster_state_version(uint32_t version) { +void +StateManagerTest::force_current_cluster_state_version(uint32_t version) +{ ClusterState state(*_manager->getClusterStateBundle()->getBaselineClusterState()); state.setVersion(version); _manager->setClusterStateBundle(lib::ClusterStateBundle(state)); } +void +StateManagerTest::extract_cluster_state_version_from_host_info(uint32_t& version_out) +{ + std::string nodeInfoString = get_node_info(); + vespalib::Slime nodeInfo; + vespalib::slime::JsonFormat::decode(nodeInfoString, nodeInfo); + + vespalib::slime::Symbol lookupSymbol = nodeInfo.lookup("cluster-state-version"); + if (lookupSymbol.undefined()) { + FAIL() << "No cluster-state-version was found in the node info"; + } + + auto& cursor = nodeInfo.get(); + auto& clusterStateVersionCursor = cursor["cluster-state-version"]; + if (!clusterStateVersionCursor.valid()) { + FAIL() << "No cluster-state-version was found in the node info"; + } + + if (clusterStateVersionCursor.type().getId() != vespalib::slime::LONG::ID) { + FAIL() << "No cluster-state-version was found in the node info"; + } + + version_out = clusterStateVersionCursor.asLong(); +} + #define GET_ONLY_OK_REPLY(varname) \ { \ ASSERT_EQ(size_t(1), _upper->getNumReplies()); \ @@ -214,29 +243,9 @@ TEST_F(StateManagerTest, reported_node_state) { TEST_F(StateManagerTest, current_cluster_state_version_is_included_in_host_info_json) { force_current_cluster_state_version(123); - - std::string nodeInfoString = get_node_info(); - vespalib::Memory goldenMemory(nodeInfoString); - vespalib::Slime nodeInfo; - vespalib::slime::JsonFormat::decode(nodeInfoString, nodeInfo); - - vespalib::slime::Symbol lookupSymbol = nodeInfo.lookup("cluster-state-version"); - if (lookupSymbol.undefined()) { - FAIL() << "No cluster-state-version was found in the node info"; - } - - auto& cursor = nodeInfo.get(); - auto& clusterStateVersionCursor = cursor["cluster-state-version"]; - if (!clusterStateVersionCursor.valid()) { - FAIL() << "No cluster-state-version was found in the node info"; - } - - if (clusterStateVersionCursor.type().getId() != vespalib::slime::LONG::ID) { - FAIL() << "No cluster-state-version was found in the node info"; - } - - int version = clusterStateVersionCursor.asLong(); - EXPECT_EQ(123, version); + uint32_t version; + ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version)); + EXPECT_EQ(version, 123); } void StateManagerTest::mark_reported_node_state_up() { @@ -349,4 +358,42 @@ TEST_F(StateManagerTest, activation_command_is_bounced_with_current_cluster_stat EXPECT_EQ(12345, activate_reply.actualVersion()); } +TEST_F(StateManagerTest, non_deferred_cluster_state_sets_reported_cluster_state_version) { + auto cmd = std::make_shared(lib::ClusterState("version:1234 distributor:1 storage:1")); + cmd->setTimeout(1000s); + cmd->setSourceIndex(0); + _upper->sendDown(cmd); + std::shared_ptr reply; + GET_ONLY_OK_REPLY(reply); + + uint32_t version; + ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version)); + EXPECT_EQ(version, 1234); +} + +TEST_F(StateManagerTest, deferred_cluster_state_does_not_update_state_until_activation_edge) { + force_current_cluster_state_version(100); + + lib::ClusterStateBundle deferred_bundle(lib::ClusterState("version:101 distributor:1 storage:1"), {}, true); + auto state_cmd = std::make_shared(deferred_bundle); + state_cmd->setTimeout(1000s); + state_cmd->setSourceIndex(0); + _upper->sendDown(state_cmd); + std::shared_ptr reply; + GET_ONLY_OK_REPLY(reply); + + uint32_t version; + ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version)); + EXPECT_EQ(version, 100); // Not yet updated to version 101 + + auto activation_cmd = std::make_shared(101); + activation_cmd->setTimeout(1000s); + activation_cmd->setSourceIndex(0); + _upper->sendDown(activation_cmd); + GET_ONLY_OK_REPLY(reply); + + ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version)); + EXPECT_EQ(version, 101); +} + } // storage -- cgit v1.2.3