summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2022-01-03 16:39:47 +0100
committerGitHub <noreply@github.com>2022-01-03 16:39:47 +0100
commit96ede726dd237b16ea2b3a0f1b1c1c0a9cce835e (patch)
tree1c053f21a334c332a6dc125e4e40522fb83198f2
parentc8068aed934dbd3451654915aa8bbcdf37aefbd5 (diff)
parent225323f025b0ba5aae956f4c80e071b7d159278d (diff)
Merge pull request #20638 from vespa-engine/vekterli/invalidate-min-replica-stats-on-recovery-mode-entry
Invalidate bucket DB replica statistics upon recovery mode entry [run-systemtest]
-rw-r--r--storage/src/tests/distributor/top_level_distributor_test.cpp30
-rw-r--r--storage/src/tests/storageserver/statemanagertest.cpp99
-rw-r--r--storage/src/vespa/storage/common/hostreporter/hostinfo.cpp3
-rw-r--r--storage/src/vespa/storage/common/hostreporter/hostreporter.h2
-rw-r--r--storage/src/vespa/storage/distributor/distributor_stripe.cpp36
-rw-r--r--storage/src/vespa/storage/distributor/distributor_stripe.h4
-rw-r--r--storage/src/vespa/storage/distributor/min_replica_provider.h2
-rw-r--r--storage/src/vespa/storage/storageserver/statemanager.cpp58
-rw-r--r--storage/src/vespa/storage/storageserver/statemanager.h1
9 files changed, 144 insertions, 91 deletions
diff --git a/storage/src/tests/distributor/top_level_distributor_test.cpp b/storage/src/tests/distributor/top_level_distributor_test.cpp
index 548f0f6be2c..a0477e352d1 100644
--- a/storage/src/tests/distributor/top_level_distributor_test.cpp
+++ b/storage/src/tests/distributor/top_level_distributor_test.cpp
@@ -90,6 +90,10 @@ struct TopLevelDistributorTest : Test, TopLevelDistributorTestUtil {
return _distributor->getBucketSpacesStats();
}
+ std::unordered_map<uint16_t, uint32_t> distributor_min_replica_stats() {
+ return _distributor->getMinReplica();
+ }
+
uint64_t db_sample_interval_sec() const noexcept {
// Sampling interval is equal across stripes, so just grab the first one and go with it.
return std::chrono::duration_cast<std::chrono::seconds>(
@@ -471,7 +475,7 @@ TEST_F(TopLevelDistributorTest, host_info_reporter_config_is_propagated_to_repor
namespace {
-void assert_invalid_stats_for_all_spaces(
+void assert_invalid_bucket_stats_for_all_spaces(
const BucketSpacesStatsProvider::PerNodeBucketSpacesStats& stats,
uint16_t node_index)
{
@@ -486,9 +490,15 @@ void assert_invalid_stats_for_all_spaces(
ASSERT_FALSE(space_iter->second.valid());
}
+void assert_min_replica_stats_zeroed(const std::unordered_map<uint16_t, uint32_t>& stats, uint16_t node_index) {
+ auto iter = stats.find(node_index);
+ ASSERT_TRUE(iter != stats.cend());
+ EXPECT_EQ(iter->second, 0);
}
-TEST_F(TopLevelDistributorTest, entering_recovery_mode_resets_bucket_space_stats_across_all_stripes) {
+}
+
+TEST_F(TopLevelDistributorTest, entering_recovery_mode_resets_bucket_space_and_min_replica_stats_across_all_stripes) {
// Set up a cluster state + DB contents which implies merge maintenance ops
setup_distributor(Redundancy(2), NodeCount(2), "version:1 distributor:1 storage:2");
add_nodes_to_stripe_bucket_db(document::BucketId(16, 1), "0=1/1/1/t/a");
@@ -503,10 +513,18 @@ TEST_F(TopLevelDistributorTest, entering_recovery_mode_resets_bucket_space_stats
// from state version 2. Exposing stats from version 1 risks reporting stale
// information back to the cluster controller.
const auto stats = distributor_bucket_spaces_stats();
- ASSERT_EQ(2, stats.size());
-
- assert_invalid_stats_for_all_spaces(stats, 0);
- assert_invalid_stats_for_all_spaces(stats, 2);
+ ASSERT_EQ(stats.size(), 2);
+
+ assert_invalid_bucket_stats_for_all_spaces(stats, 0);
+ assert_invalid_bucket_stats_for_all_spaces(stats, 2);
+
+ auto min_replica_stats = distributor_min_replica_stats();
+ ASSERT_EQ(min_replica_stats.size(), 2);
+ assert_min_replica_stats_zeroed(min_replica_stats, 0);
+ // Even though we don't have any replicas on node 2 in the DB, we don't know this until
+ // we've completed a full DB scan and updated the stats. Until that point in time we
+ // have to assume we _do_ have replicas with an unknown replication factor.
+ assert_min_replica_stats_zeroed(min_replica_stats, 2);
}
TEST_F(TopLevelDistributorTest, leaving_recovery_mode_immediately_sends_getnodestate_replies) {
diff --git a/storage/src/tests/storageserver/statemanagertest.cpp b/storage/src/tests/storageserver/statemanagertest.cpp
index fc22759139b..729976d2dce 100644
--- a/storage/src/tests/storageserver/statemanagertest.cpp
+++ b/storage/src/tests/storageserver/statemanagertest.cpp
@@ -43,6 +43,8 @@ struct StateManagerTest : Test {
std::string get_node_info() const {
return _manager->getNodeInfo();
}
+
+ void extract_cluster_state_version_from_host_info(uint32_t& version_out);
};
StateManagerTest::StateManagerTest()
@@ -54,8 +56,8 @@ StateManagerTest::StateManagerTest()
}
void
-StateManagerTest::SetUp() {
- vdstestlib::DirConfig config(getStandardConfig(true));
+StateManagerTest::SetUp()
+{
_node = std::make_unique<TestServiceLayerApp>(NodeIndex(2));
// Clock will increase 1 sec per call.
_node->getClock().setAbsoluteTimeInSeconds(1);
@@ -85,12 +87,39 @@ StateManagerTest::TearDown() {
_metricManager.reset();
}
-void StateManagerTest::force_current_cluster_state_version(uint32_t version) {
+void
+StateManagerTest::force_current_cluster_state_version(uint32_t version)
+{
ClusterState state(*_manager->getClusterStateBundle()->getBaselineClusterState());
state.setVersion(version);
_manager->setClusterStateBundle(lib::ClusterStateBundle(state));
}
+void
+StateManagerTest::extract_cluster_state_version_from_host_info(uint32_t& version_out)
+{
+ std::string nodeInfoString = get_node_info();
+ vespalib::Slime nodeInfo;
+ vespalib::slime::JsonFormat::decode(nodeInfoString, nodeInfo);
+
+ vespalib::slime::Symbol lookupSymbol = nodeInfo.lookup("cluster-state-version");
+ if (lookupSymbol.undefined()) {
+ FAIL() << "No cluster-state-version was found in the node info";
+ }
+
+ auto& cursor = nodeInfo.get();
+ auto& clusterStateVersionCursor = cursor["cluster-state-version"];
+ if (!clusterStateVersionCursor.valid()) {
+ FAIL() << "No cluster-state-version was found in the node info";
+ }
+
+ if (clusterStateVersionCursor.type().getId() != vespalib::slime::LONG::ID) {
+ FAIL() << "No cluster-state-version was found in the node info";
+ }
+
+ version_out = clusterStateVersionCursor.asLong();
+}
+
#define GET_ONLY_OK_REPLY(varname) \
{ \
ASSERT_EQ(size_t(1), _upper->getNumReplies()); \
@@ -214,29 +243,9 @@ TEST_F(StateManagerTest, reported_node_state) {
TEST_F(StateManagerTest, current_cluster_state_version_is_included_in_host_info_json) {
force_current_cluster_state_version(123);
-
- std::string nodeInfoString = get_node_info();
- vespalib::Memory goldenMemory(nodeInfoString);
- vespalib::Slime nodeInfo;
- vespalib::slime::JsonFormat::decode(nodeInfoString, nodeInfo);
-
- vespalib::slime::Symbol lookupSymbol = nodeInfo.lookup("cluster-state-version");
- if (lookupSymbol.undefined()) {
- FAIL() << "No cluster-state-version was found in the node info";
- }
-
- auto& cursor = nodeInfo.get();
- auto& clusterStateVersionCursor = cursor["cluster-state-version"];
- if (!clusterStateVersionCursor.valid()) {
- FAIL() << "No cluster-state-version was found in the node info";
- }
-
- if (clusterStateVersionCursor.type().getId() != vespalib::slime::LONG::ID) {
- FAIL() << "No cluster-state-version was found in the node info";
- }
-
- int version = clusterStateVersionCursor.asLong();
- EXPECT_EQ(123, version);
+ uint32_t version;
+ ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version));
+ EXPECT_EQ(version, 123);
}
void StateManagerTest::mark_reported_node_state_up() {
@@ -349,4 +358,42 @@ TEST_F(StateManagerTest, activation_command_is_bounced_with_current_cluster_stat
EXPECT_EQ(12345, activate_reply.actualVersion());
}
+TEST_F(StateManagerTest, non_deferred_cluster_state_sets_reported_cluster_state_version) {
+ auto cmd = std::make_shared<api::SetSystemStateCommand>(lib::ClusterState("version:1234 distributor:1 storage:1"));
+ cmd->setTimeout(1000s);
+ cmd->setSourceIndex(0);
+ _upper->sendDown(cmd);
+ std::shared_ptr<api::StorageReply> reply;
+ GET_ONLY_OK_REPLY(reply);
+
+ uint32_t version;
+ ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version));
+ EXPECT_EQ(version, 1234);
+}
+
+TEST_F(StateManagerTest, deferred_cluster_state_does_not_update_state_until_activation_edge) {
+ force_current_cluster_state_version(100);
+
+ lib::ClusterStateBundle deferred_bundle(lib::ClusterState("version:101 distributor:1 storage:1"), {}, true);
+ auto state_cmd = std::make_shared<api::SetSystemStateCommand>(deferred_bundle);
+ state_cmd->setTimeout(1000s);
+ state_cmd->setSourceIndex(0);
+ _upper->sendDown(state_cmd);
+ std::shared_ptr<api::StorageReply> reply;
+ GET_ONLY_OK_REPLY(reply);
+
+ uint32_t version;
+ ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version));
+ EXPECT_EQ(version, 100); // Not yet updated to version 101
+
+ auto activation_cmd = std::make_shared<api::ActivateClusterStateVersionCommand>(101);
+ activation_cmd->setTimeout(1000s);
+ activation_cmd->setSourceIndex(0);
+ _upper->sendDown(activation_cmd);
+ GET_ONLY_OK_REPLY(reply);
+
+ ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version));
+ EXPECT_EQ(version, 101);
+}
+
} // storage
diff --git a/storage/src/vespa/storage/common/hostreporter/hostinfo.cpp b/storage/src/vespa/storage/common/hostreporter/hostinfo.cpp
index f15885769e6..7ff999735c0 100644
--- a/storage/src/vespa/storage/common/hostreporter/hostinfo.cpp
+++ b/storage/src/vespa/storage/common/hostreporter/hostinfo.cpp
@@ -9,8 +9,7 @@ HostInfo::HostInfo() {
registerReporter(&versionReporter);
}
-HostInfo::~HostInfo() {
-}
+HostInfo::~HostInfo() = default;
void HostInfo::printReport(vespalib::JsonStream& report) {
for (HostReporter* reporter : customReporters) {
diff --git a/storage/src/vespa/storage/common/hostreporter/hostreporter.h b/storage/src/vespa/storage/common/hostreporter/hostreporter.h
index 115115328cc..6ce6bd803df 100644
--- a/storage/src/vespa/storage/common/hostreporter/hostreporter.h
+++ b/storage/src/vespa/storage/common/hostreporter/hostreporter.h
@@ -11,7 +11,7 @@ namespace storage {
class HostReporter {
public:
virtual void report(vespalib::JsonStream& jsonreport) = 0;
- virtual ~HostReporter() {}
+ virtual ~HostReporter() = default;
};
}
diff --git a/storage/src/vespa/storage/distributor/distributor_stripe.cpp b/storage/src/vespa/storage/distributor/distributor_stripe.cpp
index 50c70306d92..bcba976f2c3 100644
--- a/storage/src/vespa/storage/distributor/distributor_stripe.cpp
+++ b/storage/src/vespa/storage/distributor/distributor_stripe.cpp
@@ -316,9 +316,12 @@ DistributorStripe::enterRecoveryMode()
LOG(debug, "Entering recovery mode");
_schedulingMode = MaintenanceScheduler::RECOVERY_SCHEDULING_MODE;
_scanner->reset();
- _bucketDBMetricUpdater.reset();
- // TODO reset _bucketDbStats?
- invalidate_bucket_spaces_stats();
+ // We enter recovery mode due to cluster state or distribution config changes.
+ // Until we have completed a new DB scan round, we don't know the state of our
+ // newly owned buckets and must not report stats for these out to the cluster
+ // controller as they will be stale (valid only for the _previous_ state/config).
+ // As a consequence, we must explicitly invalidate all such statistics in this edge.
+ invalidate_internal_db_dependent_stats();
_recoveryTimeStarted = framework::MilliSecTimer(_component.getClock());
}
@@ -337,6 +340,17 @@ DistributorStripe::leaveRecoveryMode()
_schedulingMode = MaintenanceScheduler::NORMAL_SCHEDULING_MODE;
}
+void
+DistributorStripe::invalidate_internal_db_dependent_stats()
+{
+ _bucketDBMetricUpdater.reset();
+ {
+ std::lock_guard guard(_metricLock);
+ invalidate_bucket_spaces_stats(guard);
+ invalidate_min_replica_stats(guard);
+ }
+}
+
template <typename NodeFunctor>
void DistributorStripe::for_each_available_content_node_in(const lib::ClusterState& state, NodeFunctor&& func) {
const auto node_count = state.getNodeCount(lib::NodeType::STORAGE);
@@ -357,8 +371,9 @@ BucketSpacesStatsProvider::BucketSpacesStats DistributorStripe::make_invalid_sta
return invalid_space_stats;
}
-void DistributorStripe::invalidate_bucket_spaces_stats() {
- std::lock_guard guard(_metricLock);
+void
+DistributorStripe::invalidate_bucket_spaces_stats([[maybe_unused]] std::lock_guard<std::mutex>& held_metric_lock)
+{
_bucketSpacesStats = BucketSpacesStatsProvider::PerNodeBucketSpacesStats();
auto invalid_space_stats = make_invalid_stats_per_configured_space();
@@ -369,6 +384,17 @@ void DistributorStripe::invalidate_bucket_spaces_stats() {
}
void
+DistributorStripe::invalidate_min_replica_stats([[maybe_unused]] std::lock_guard<std::mutex>& held_metric_lock)
+{
+ _bucketDbStats._minBucketReplica.clear();
+ // Insert an explicit zero value for all nodes that are up in the pending/current cluster state
+ const auto& baseline = *_clusterStateBundle.getBaselineClusterState();
+ for_each_available_content_node_in(baseline, [this](const lib::Node& node) {
+ _bucketDbStats._minBucketReplica[node.getIndex()] = 0;
+ });
+}
+
+void
DistributorStripe::recheckBucketInfo(uint16_t nodeIdx, const document::Bucket &bucket) {
_bucketDBUpdater.recheckBucketInfo(nodeIdx, bucket);
}
diff --git a/storage/src/vespa/storage/distributor/distributor_stripe.h b/storage/src/vespa/storage/distributor/distributor_stripe.h
index ce6a2071efd..809b4dd0e41 100644
--- a/storage/src/vespa/storage/distributor/distributor_stripe.h
+++ b/storage/src/vespa/storage/distributor/distributor_stripe.h
@@ -259,7 +259,9 @@ private:
BucketSpacesStatsProvider::BucketSpacesStats make_invalid_stats_per_configured_space() const;
template <typename NodeFunctor>
void for_each_available_content_node_in(const lib::ClusterState&, NodeFunctor&&);
- void invalidate_bucket_spaces_stats();
+ void invalidate_internal_db_dependent_stats();
+ void invalidate_bucket_spaces_stats(std::lock_guard<std::mutex>& held_metric_lock);
+ void invalidate_min_replica_stats(std::lock_guard<std::mutex>& held_metric_lock);
void send_updated_host_info_if_required();
void propagate_config_snapshot_to_internal_components();
diff --git a/storage/src/vespa/storage/distributor/min_replica_provider.h b/storage/src/vespa/storage/distributor/min_replica_provider.h
index 56fd1e8fc81..a4374b906fe 100644
--- a/storage/src/vespa/storage/distributor/min_replica_provider.h
+++ b/storage/src/vespa/storage/distributor/min_replica_provider.h
@@ -9,7 +9,7 @@ namespace storage::distributor {
class MinReplicaProvider
{
public:
- virtual ~MinReplicaProvider() {}
+ virtual ~MinReplicaProvider() = default;
/**
* Get a snapshot of the minimum bucket replica for each of the nodes.
diff --git a/storage/src/vespa/storage/storageserver/statemanager.cpp b/storage/src/vespa/storage/storageserver/statemanager.cpp
index 7d08f738abe..7cb66ab447f 100644
--- a/storage/src/vespa/storage/storageserver/statemanager.cpp
+++ b/storage/src/vespa/storage/storageserver/statemanager.cpp
@@ -39,6 +39,7 @@ StateManager::StateManager(StorageComponentRegister& compReg,
_nextNodeState(),
_systemState(std::make_shared<const ClusterStateBundle>(lib::ClusterState())),
_nextSystemState(),
+ _reported_host_info_cluster_state_version(0),
_stateListeners(),
_queuedStateRequests(),
_threadLock(),
@@ -94,58 +95,11 @@ StateManager::print(std::ostream& out, bool verbose,
out << "StateManager()";
}
-#ifdef ENABLE_BUCKET_OPERATION_LOGGING
-namespace {
-
-vespalib::string
-escapeHtml(vespalib::stringref str)
-{
- vespalib::asciistream ss;
- for (size_t i = 0; i < str.size(); ++i) {
- switch (str[i]) {
- case '<':
- ss << "&lt;";
- break;
- case '>':
- ss << "&gt;";
- break;
- case '&':
- ss << "&amp;";
- break;
- default:
- ss << str[i];
- }
- }
- return ss.str();
-}
-
-}
-#endif
-
void
StateManager::reportHtmlStatus(std::ostream& out,
const framework::HttpUrlPath& path) const
{
(void) path;
-#ifdef ENABLE_BUCKET_OPERATION_LOGGING
- if (path.hasAttribute("history")) {
- std::istringstream iss(path.getAttribute("history"), std::istringstream::in);
- uint64_t rawId;
- iss >> std::hex >> rawId;
- document::BucketId bid(rawId);
- out << "<h3>History for " << bid << "</h3>\n";
- vespalib::string history(
- debug::BucketOperationLogger::getInstance().getHistory(bid));
- out << "<pre>" << escapeHtml(history) << "</pre>\n";
- return;
- } else if (path.hasAttribute("search")) {
- vespalib::string substr(path.getAttribute("search"));
- out << debug::BucketOperationLogger::getInstance()
- .searchBucketHistories(substr, "/systemstate?history=");
- return;
- }
-#endif
-
{
std::lock_guard lock(_stateLock);
const auto &baseLineClusterState = _systemState->getBaselineClusterState();
@@ -342,6 +296,9 @@ StateManager::enableNextClusterState()
// overwritten by a non-null pending cluster state afterwards.
logNodeClusterStateTransition(*_systemState, *_nextSystemState);
_systemState = _nextSystemState;
+ if (!_nextSystemState->deferredActivation()) {
+ _reported_host_info_cluster_state_version = _systemState->getVersion();
+ } // else: reported version updated upon explicit activation edge
_nextSystemState.reset();
_systemStateHistory.emplace_back(_component.getClock().getTimeInMillis(), _systemState);
}
@@ -511,7 +468,10 @@ StateManager::onActivateClusterStateVersion(
auto reply = std::make_shared<api::ActivateClusterStateVersionReply>(*cmd);
{
std::lock_guard lock(_stateLock);
- reply->setActualVersion(_systemState ? _systemState->getVersion() : 0);
+ reply->setActualVersion(_systemState->getVersion());
+ if (cmd->version() == _systemState->getVersion()) {
+ _reported_host_info_cluster_state_version = _systemState->getVersion();
+ }
}
sendUp(reply);
return true;
@@ -610,7 +570,7 @@ StateManager::getNodeInfo() const
// _systemLock.
// - getNodeInfo() (this function) always acquires the same lock.
std::lock_guard guard(_stateLock);
- stream << "cluster-state-version" << _systemState->getVersion();
+ stream << "cluster-state-version" << _reported_host_info_cluster_state_version;
_hostInfo->printReport(stream);
stream << End();
diff --git a/storage/src/vespa/storage/storageserver/statemanager.h b/storage/src/vespa/storage/storageserver/statemanager.h
index 7f8320567bf..194991723f4 100644
--- a/storage/src/vespa/storage/storageserver/statemanager.h
+++ b/storage/src/vespa/storage/storageserver/statemanager.h
@@ -51,6 +51,7 @@ class StateManager : public NodeStateUpdater,
using ClusterStateBundle = lib::ClusterStateBundle;
std::shared_ptr<const ClusterStateBundle> _systemState;
std::shared_ptr<const ClusterStateBundle> _nextSystemState;
+ uint32_t _reported_host_info_cluster_state_version;
std::list<StateListener*> _stateListeners;
typedef std::pair<framework::MilliSecTime, api::GetNodeStateCommand::SP> TimeStatePair;
std::list<TimeStatePair> _queuedStateRequests;