summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2022-01-03 14:15:44 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2022-01-03 14:56:49 +0000
commit225323f025b0ba5aae956f4c80e071b7d159278d (patch)
tree4935f90ff7e63891f1beb0a16194ac3009c9322b
parent831cd97c7ef749ba486e524122a90e1513a139fd (diff)
Make host info cluster state version reporting correct for deferred state bundles
If an application uses deferred cluster state activations, do not report back a given cluster state version as being in use by the node until the state version has been explicitly activated by the cluster controller. This change is due to the fact that the replication invalidation happens upon recovery mode entry, and for deferred state bundles this takes place when a cluster state is _activated_, not when the distributor is otherwise done gathering bucket info (for a non-deferred bundle the activation happens implicitly at this point). If the state manager reports that the new cluster state is in effect even though it has not been activated, the cluster controller could still end up using stale replication stats, as the invalidation logic has not yet run at this point in time. The cluster controller will ignore any host info responses for older versions, so any stale replication statistics should not be taken into account with this change.
-rw-r--r--storage/src/tests/storageserver/statemanagertest.cpp99
-rw-r--r--storage/src/vespa/storage/storageserver/statemanager.cpp58
-rw-r--r--storage/src/vespa/storage/storageserver/statemanager.h1
3 files changed, 83 insertions, 75 deletions
diff --git a/storage/src/tests/storageserver/statemanagertest.cpp b/storage/src/tests/storageserver/statemanagertest.cpp
index fc22759139b..729976d2dce 100644
--- a/storage/src/tests/storageserver/statemanagertest.cpp
+++ b/storage/src/tests/storageserver/statemanagertest.cpp
@@ -43,6 +43,8 @@ struct StateManagerTest : Test {
std::string get_node_info() const {
return _manager->getNodeInfo();
}
+
+ void extract_cluster_state_version_from_host_info(uint32_t& version_out);
};
StateManagerTest::StateManagerTest()
@@ -54,8 +56,8 @@ StateManagerTest::StateManagerTest()
}
void
-StateManagerTest::SetUp() {
- vdstestlib::DirConfig config(getStandardConfig(true));
+StateManagerTest::SetUp()
+{
_node = std::make_unique<TestServiceLayerApp>(NodeIndex(2));
// Clock will increase 1 sec per call.
_node->getClock().setAbsoluteTimeInSeconds(1);
@@ -85,12 +87,39 @@ StateManagerTest::TearDown() {
_metricManager.reset();
}
-void StateManagerTest::force_current_cluster_state_version(uint32_t version) {
+void
+StateManagerTest::force_current_cluster_state_version(uint32_t version)
+{
ClusterState state(*_manager->getClusterStateBundle()->getBaselineClusterState());
state.setVersion(version);
_manager->setClusterStateBundle(lib::ClusterStateBundle(state));
}
+void
+StateManagerTest::extract_cluster_state_version_from_host_info(uint32_t& version_out)
+{
+ std::string nodeInfoString = get_node_info();
+ vespalib::Slime nodeInfo;
+ vespalib::slime::JsonFormat::decode(nodeInfoString, nodeInfo);
+
+ vespalib::slime::Symbol lookupSymbol = nodeInfo.lookup("cluster-state-version");
+ if (lookupSymbol.undefined()) {
+ FAIL() << "No cluster-state-version was found in the node info";
+ }
+
+ auto& cursor = nodeInfo.get();
+ auto& clusterStateVersionCursor = cursor["cluster-state-version"];
+ if (!clusterStateVersionCursor.valid()) {
+ FAIL() << "No cluster-state-version was found in the node info";
+ }
+
+ if (clusterStateVersionCursor.type().getId() != vespalib::slime::LONG::ID) {
+ FAIL() << "No cluster-state-version was found in the node info";
+ }
+
+ version_out = clusterStateVersionCursor.asLong();
+}
+
#define GET_ONLY_OK_REPLY(varname) \
{ \
ASSERT_EQ(size_t(1), _upper->getNumReplies()); \
@@ -214,29 +243,9 @@ TEST_F(StateManagerTest, reported_node_state) {
TEST_F(StateManagerTest, current_cluster_state_version_is_included_in_host_info_json) {
force_current_cluster_state_version(123);
-
- std::string nodeInfoString = get_node_info();
- vespalib::Memory goldenMemory(nodeInfoString);
- vespalib::Slime nodeInfo;
- vespalib::slime::JsonFormat::decode(nodeInfoString, nodeInfo);
-
- vespalib::slime::Symbol lookupSymbol = nodeInfo.lookup("cluster-state-version");
- if (lookupSymbol.undefined()) {
- FAIL() << "No cluster-state-version was found in the node info";
- }
-
- auto& cursor = nodeInfo.get();
- auto& clusterStateVersionCursor = cursor["cluster-state-version"];
- if (!clusterStateVersionCursor.valid()) {
- FAIL() << "No cluster-state-version was found in the node info";
- }
-
- if (clusterStateVersionCursor.type().getId() != vespalib::slime::LONG::ID) {
- FAIL() << "No cluster-state-version was found in the node info";
- }
-
- int version = clusterStateVersionCursor.asLong();
- EXPECT_EQ(123, version);
+ uint32_t version;
+ ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version));
+ EXPECT_EQ(version, 123);
}
void StateManagerTest::mark_reported_node_state_up() {
@@ -349,4 +358,42 @@ TEST_F(StateManagerTest, activation_command_is_bounced_with_current_cluster_stat
EXPECT_EQ(12345, activate_reply.actualVersion());
}
+TEST_F(StateManagerTest, non_deferred_cluster_state_sets_reported_cluster_state_version) {
+ auto cmd = std::make_shared<api::SetSystemStateCommand>(lib::ClusterState("version:1234 distributor:1 storage:1"));
+ cmd->setTimeout(1000s);
+ cmd->setSourceIndex(0);
+ _upper->sendDown(cmd);
+ std::shared_ptr<api::StorageReply> reply;
+ GET_ONLY_OK_REPLY(reply);
+
+ uint32_t version;
+ ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version));
+ EXPECT_EQ(version, 1234);
+}
+
+TEST_F(StateManagerTest, deferred_cluster_state_does_not_update_state_until_activation_edge) {
+ force_current_cluster_state_version(100);
+
+ lib::ClusterStateBundle deferred_bundle(lib::ClusterState("version:101 distributor:1 storage:1"), {}, true);
+ auto state_cmd = std::make_shared<api::SetSystemStateCommand>(deferred_bundle);
+ state_cmd->setTimeout(1000s);
+ state_cmd->setSourceIndex(0);
+ _upper->sendDown(state_cmd);
+ std::shared_ptr<api::StorageReply> reply;
+ GET_ONLY_OK_REPLY(reply);
+
+ uint32_t version;
+ ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version));
+ EXPECT_EQ(version, 100); // Not yet updated to version 101
+
+ auto activation_cmd = std::make_shared<api::ActivateClusterStateVersionCommand>(101);
+ activation_cmd->setTimeout(1000s);
+ activation_cmd->setSourceIndex(0);
+ _upper->sendDown(activation_cmd);
+ GET_ONLY_OK_REPLY(reply);
+
+ ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version));
+ EXPECT_EQ(version, 101);
+}
+
} // storage
diff --git a/storage/src/vespa/storage/storageserver/statemanager.cpp b/storage/src/vespa/storage/storageserver/statemanager.cpp
index 7d08f738abe..7cb66ab447f 100644
--- a/storage/src/vespa/storage/storageserver/statemanager.cpp
+++ b/storage/src/vespa/storage/storageserver/statemanager.cpp
@@ -39,6 +39,7 @@ StateManager::StateManager(StorageComponentRegister& compReg,
_nextNodeState(),
_systemState(std::make_shared<const ClusterStateBundle>(lib::ClusterState())),
_nextSystemState(),
+ _reported_host_info_cluster_state_version(0),
_stateListeners(),
_queuedStateRequests(),
_threadLock(),
@@ -94,58 +95,11 @@ StateManager::print(std::ostream& out, bool verbose,
out << "StateManager()";
}
-#ifdef ENABLE_BUCKET_OPERATION_LOGGING
-namespace {
-
-vespalib::string
-escapeHtml(vespalib::stringref str)
-{
- vespalib::asciistream ss;
- for (size_t i = 0; i < str.size(); ++i) {
- switch (str[i]) {
- case '<':
- ss << "&lt;";
- break;
- case '>':
- ss << "&gt;";
- break;
- case '&':
- ss << "&amp;";
- break;
- default:
- ss << str[i];
- }
- }
- return ss.str();
-}
-
-}
-#endif
-
void
StateManager::reportHtmlStatus(std::ostream& out,
const framework::HttpUrlPath& path) const
{
(void) path;
-#ifdef ENABLE_BUCKET_OPERATION_LOGGING
- if (path.hasAttribute("history")) {
- std::istringstream iss(path.getAttribute("history"), std::istringstream::in);
- uint64_t rawId;
- iss >> std::hex >> rawId;
- document::BucketId bid(rawId);
- out << "<h3>History for " << bid << "</h3>\n";
- vespalib::string history(
- debug::BucketOperationLogger::getInstance().getHistory(bid));
- out << "<pre>" << escapeHtml(history) << "</pre>\n";
- return;
- } else if (path.hasAttribute("search")) {
- vespalib::string substr(path.getAttribute("search"));
- out << debug::BucketOperationLogger::getInstance()
- .searchBucketHistories(substr, "/systemstate?history=");
- return;
- }
-#endif
-
{
std::lock_guard lock(_stateLock);
const auto &baseLineClusterState = _systemState->getBaselineClusterState();
@@ -342,6 +296,9 @@ StateManager::enableNextClusterState()
// overwritten by a non-null pending cluster state afterwards.
logNodeClusterStateTransition(*_systemState, *_nextSystemState);
_systemState = _nextSystemState;
+ if (!_nextSystemState->deferredActivation()) {
+ _reported_host_info_cluster_state_version = _systemState->getVersion();
+ } // else: reported version updated upon explicit activation edge
_nextSystemState.reset();
_systemStateHistory.emplace_back(_component.getClock().getTimeInMillis(), _systemState);
}
@@ -511,7 +468,10 @@ StateManager::onActivateClusterStateVersion(
auto reply = std::make_shared<api::ActivateClusterStateVersionReply>(*cmd);
{
std::lock_guard lock(_stateLock);
- reply->setActualVersion(_systemState ? _systemState->getVersion() : 0);
+ reply->setActualVersion(_systemState->getVersion());
+ if (cmd->version() == _systemState->getVersion()) {
+ _reported_host_info_cluster_state_version = _systemState->getVersion();
+ }
}
sendUp(reply);
return true;
@@ -610,7 +570,7 @@ StateManager::getNodeInfo() const
// _systemLock.
// - getNodeInfo() (this function) always acquires the same lock.
std::lock_guard guard(_stateLock);
- stream << "cluster-state-version" << _systemState->getVersion();
+ stream << "cluster-state-version" << _reported_host_info_cluster_state_version;
_hostInfo->printReport(stream);
stream << End();
diff --git a/storage/src/vespa/storage/storageserver/statemanager.h b/storage/src/vespa/storage/storageserver/statemanager.h
index 7f8320567bf..194991723f4 100644
--- a/storage/src/vespa/storage/storageserver/statemanager.h
+++ b/storage/src/vespa/storage/storageserver/statemanager.h
@@ -51,6 +51,7 @@ class StateManager : public NodeStateUpdater,
using ClusterStateBundle = lib::ClusterStateBundle;
std::shared_ptr<const ClusterStateBundle> _systemState;
std::shared_ptr<const ClusterStateBundle> _nextSystemState;
+ uint32_t _reported_host_info_cluster_state_version;
std::list<StateListener*> _stateListeners;
typedef std::pair<framework::MilliSecTime, api::GetNodeStateCommand::SP> TimeStatePair;
std::list<TimeStatePair> _queuedStateRequests;