diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2022-01-03 16:39:47 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-03 16:39:47 +0100 |
commit | 96ede726dd237b16ea2b3a0f1b1c1c0a9cce835e (patch) | |
tree | 1c053f21a334c332a6dc125e4e40522fb83198f2 | |
parent | c8068aed934dbd3451654915aa8bbcdf37aefbd5 (diff) | |
parent | 225323f025b0ba5aae956f4c80e071b7d159278d (diff) |
Merge pull request #20638 from vespa-engine/vekterli/invalidate-min-replica-stats-on-recovery-mode-entry
Invalidate bucket DB replica statistics upon recovery mode entry [run-systemtest]
9 files changed, 144 insertions, 91 deletions
diff --git a/storage/src/tests/distributor/top_level_distributor_test.cpp b/storage/src/tests/distributor/top_level_distributor_test.cpp index 548f0f6be2c..a0477e352d1 100644 --- a/storage/src/tests/distributor/top_level_distributor_test.cpp +++ b/storage/src/tests/distributor/top_level_distributor_test.cpp @@ -90,6 +90,10 @@ struct TopLevelDistributorTest : Test, TopLevelDistributorTestUtil { return _distributor->getBucketSpacesStats(); } + std::unordered_map<uint16_t, uint32_t> distributor_min_replica_stats() { + return _distributor->getMinReplica(); + } + uint64_t db_sample_interval_sec() const noexcept { // Sampling interval is equal across stripes, so just grab the first one and go with it. return std::chrono::duration_cast<std::chrono::seconds>( @@ -471,7 +475,7 @@ TEST_F(TopLevelDistributorTest, host_info_reporter_config_is_propagated_to_repor namespace { -void assert_invalid_stats_for_all_spaces( +void assert_invalid_bucket_stats_for_all_spaces( const BucketSpacesStatsProvider::PerNodeBucketSpacesStats& stats, uint16_t node_index) { @@ -486,9 +490,15 @@ void assert_invalid_stats_for_all_spaces( ASSERT_FALSE(space_iter->second.valid()); } +void assert_min_replica_stats_zeroed(const std::unordered_map<uint16_t, uint32_t>& stats, uint16_t node_index) { + auto iter = stats.find(node_index); + ASSERT_TRUE(iter != stats.cend()); + EXPECT_EQ(iter->second, 0); } -TEST_F(TopLevelDistributorTest, entering_recovery_mode_resets_bucket_space_stats_across_all_stripes) { +} + +TEST_F(TopLevelDistributorTest, entering_recovery_mode_resets_bucket_space_and_min_replica_stats_across_all_stripes) { // Set up a cluster state + DB contents which implies merge maintenance ops setup_distributor(Redundancy(2), NodeCount(2), "version:1 distributor:1 storage:2"); add_nodes_to_stripe_bucket_db(document::BucketId(16, 1), "0=1/1/1/t/a"); @@ -503,10 +513,18 @@ TEST_F(TopLevelDistributorTest, entering_recovery_mode_resets_bucket_space_stats // from state version 2. Exposing stats from version 1 risks reporting stale // information back to the cluster controller. const auto stats = distributor_bucket_spaces_stats(); - ASSERT_EQ(2, stats.size()); - - assert_invalid_stats_for_all_spaces(stats, 0); - assert_invalid_stats_for_all_spaces(stats, 2); + ASSERT_EQ(stats.size(), 2); + + assert_invalid_bucket_stats_for_all_spaces(stats, 0); + assert_invalid_bucket_stats_for_all_spaces(stats, 2); + + auto min_replica_stats = distributor_min_replica_stats(); + ASSERT_EQ(min_replica_stats.size(), 2); + assert_min_replica_stats_zeroed(min_replica_stats, 0); + // Even though we don't have any replicas on node 2 in the DB, we don't know this until + // we've completed a full DB scan and updated the stats. Until that point in time we + // have to assume we _do_ have replicas with an unknown replication factor. + assert_min_replica_stats_zeroed(min_replica_stats, 2); } TEST_F(TopLevelDistributorTest, leaving_recovery_mode_immediately_sends_getnodestate_replies) { diff --git a/storage/src/tests/storageserver/statemanagertest.cpp b/storage/src/tests/storageserver/statemanagertest.cpp index fc22759139b..729976d2dce 100644 --- a/storage/src/tests/storageserver/statemanagertest.cpp +++ b/storage/src/tests/storageserver/statemanagertest.cpp @@ -43,6 +43,8 @@ struct StateManagerTest : Test { std::string get_node_info() const { return _manager->getNodeInfo(); } + + void extract_cluster_state_version_from_host_info(uint32_t& version_out); }; StateManagerTest::StateManagerTest() @@ -54,8 +56,8 @@ StateManagerTest::StateManagerTest() } void -StateManagerTest::SetUp() { - vdstestlib::DirConfig config(getStandardConfig(true)); +StateManagerTest::SetUp() +{ _node = std::make_unique<TestServiceLayerApp>(NodeIndex(2)); // Clock will increase 1 sec per call. _node->getClock().setAbsoluteTimeInSeconds(1); @@ -85,12 +87,39 @@ StateManagerTest::TearDown() { _metricManager.reset(); } -void StateManagerTest::force_current_cluster_state_version(uint32_t version) { +void +StateManagerTest::force_current_cluster_state_version(uint32_t version) +{ ClusterState state(*_manager->getClusterStateBundle()->getBaselineClusterState()); state.setVersion(version); _manager->setClusterStateBundle(lib::ClusterStateBundle(state)); } +void +StateManagerTest::extract_cluster_state_version_from_host_info(uint32_t& version_out) +{ + std::string nodeInfoString = get_node_info(); + vespalib::Slime nodeInfo; + vespalib::slime::JsonFormat::decode(nodeInfoString, nodeInfo); + + vespalib::slime::Symbol lookupSymbol = nodeInfo.lookup("cluster-state-version"); + if (lookupSymbol.undefined()) { + FAIL() << "No cluster-state-version was found in the node info"; + } + + auto& cursor = nodeInfo.get(); + auto& clusterStateVersionCursor = cursor["cluster-state-version"]; + if (!clusterStateVersionCursor.valid()) { + FAIL() << "No cluster-state-version was found in the node info"; + } + + if (clusterStateVersionCursor.type().getId() != vespalib::slime::LONG::ID) { + FAIL() << "No cluster-state-version was found in the node info"; + } + + version_out = clusterStateVersionCursor.asLong(); +} + #define GET_ONLY_OK_REPLY(varname) \ { \ ASSERT_EQ(size_t(1), _upper->getNumReplies()); \ @@ -214,29 +243,9 @@ TEST_F(StateManagerTest, reported_node_state) { TEST_F(StateManagerTest, current_cluster_state_version_is_included_in_host_info_json) { force_current_cluster_state_version(123); - - std::string nodeInfoString = get_node_info(); - vespalib::Memory goldenMemory(nodeInfoString); - vespalib::Slime nodeInfo; - vespalib::slime::JsonFormat::decode(nodeInfoString, nodeInfo); - - vespalib::slime::Symbol lookupSymbol = nodeInfo.lookup("cluster-state-version"); - if (lookupSymbol.undefined()) { - FAIL() << "No cluster-state-version was found in the node info"; - } - - auto& cursor = nodeInfo.get(); - auto& clusterStateVersionCursor = cursor["cluster-state-version"]; - if (!clusterStateVersionCursor.valid()) { - FAIL() << "No cluster-state-version was found in the node info"; - } - - if (clusterStateVersionCursor.type().getId() != vespalib::slime::LONG::ID) { - FAIL() << "No cluster-state-version was found in the node info"; - } - - int version = clusterStateVersionCursor.asLong(); - EXPECT_EQ(123, version); + uint32_t version; + ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version)); + EXPECT_EQ(version, 123); } void StateManagerTest::mark_reported_node_state_up() { @@ -349,4 +358,42 @@ TEST_F(StateManagerTest, activation_command_is_bounced_with_current_cluster_stat EXPECT_EQ(12345, activate_reply.actualVersion()); } +TEST_F(StateManagerTest, non_deferred_cluster_state_sets_reported_cluster_state_version) { + auto cmd = std::make_shared<api::SetSystemStateCommand>(lib::ClusterState("version:1234 distributor:1 storage:1")); + cmd->setTimeout(1000s); + cmd->setSourceIndex(0); + _upper->sendDown(cmd); + std::shared_ptr<api::StorageReply> reply; + GET_ONLY_OK_REPLY(reply); + + uint32_t version; + ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version)); + EXPECT_EQ(version, 1234); +} + +TEST_F(StateManagerTest, deferred_cluster_state_does_not_update_state_until_activation_edge) { + force_current_cluster_state_version(100); + + lib::ClusterStateBundle deferred_bundle(lib::ClusterState("version:101 distributor:1 storage:1"), {}, true); + auto state_cmd = std::make_shared<api::SetSystemStateCommand>(deferred_bundle); + state_cmd->setTimeout(1000s); + state_cmd->setSourceIndex(0); + _upper->sendDown(state_cmd); + std::shared_ptr<api::StorageReply> reply; + GET_ONLY_OK_REPLY(reply); + + uint32_t version; + ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version)); + EXPECT_EQ(version, 100); // Not yet updated to version 101 + + auto activation_cmd = std::make_shared<api::ActivateClusterStateVersionCommand>(101); + activation_cmd->setTimeout(1000s); + activation_cmd->setSourceIndex(0); + _upper->sendDown(activation_cmd); + GET_ONLY_OK_REPLY(reply); + + ASSERT_NO_FATAL_FAILURE(extract_cluster_state_version_from_host_info(version)); + EXPECT_EQ(version, 101); +} + } // storage diff --git a/storage/src/vespa/storage/common/hostreporter/hostinfo.cpp b/storage/src/vespa/storage/common/hostreporter/hostinfo.cpp index f15885769e6..7ff999735c0 100644 --- a/storage/src/vespa/storage/common/hostreporter/hostinfo.cpp +++ b/storage/src/vespa/storage/common/hostreporter/hostinfo.cpp @@ -9,8 +9,7 @@ HostInfo::HostInfo() { registerReporter(&versionReporter); } -HostInfo::~HostInfo() { -} +HostInfo::~HostInfo() = default; void HostInfo::printReport(vespalib::JsonStream& report) { for (HostReporter* reporter : customReporters) { diff --git a/storage/src/vespa/storage/common/hostreporter/hostreporter.h b/storage/src/vespa/storage/common/hostreporter/hostreporter.h index 115115328cc..6ce6bd803df 100644 --- a/storage/src/vespa/storage/common/hostreporter/hostreporter.h +++ b/storage/src/vespa/storage/common/hostreporter/hostreporter.h @@ -11,7 +11,7 @@ namespace storage { class HostReporter { public: virtual void report(vespalib::JsonStream& jsonreport) = 0; - virtual ~HostReporter() {} + virtual ~HostReporter() = default; }; } diff --git a/storage/src/vespa/storage/distributor/distributor_stripe.cpp b/storage/src/vespa/storage/distributor/distributor_stripe.cpp index 50c70306d92..bcba976f2c3 100644 --- a/storage/src/vespa/storage/distributor/distributor_stripe.cpp +++ b/storage/src/vespa/storage/distributor/distributor_stripe.cpp @@ -316,9 +316,12 @@ DistributorStripe::enterRecoveryMode() LOG(debug, "Entering recovery mode"); _schedulingMode = MaintenanceScheduler::RECOVERY_SCHEDULING_MODE; _scanner->reset(); - _bucketDBMetricUpdater.reset(); - // TODO reset _bucketDbStats? - invalidate_bucket_spaces_stats(); + // We enter recovery mode due to cluster state or distribution config changes. + // Until we have completed a new DB scan round, we don't know the state of our + // newly owned buckets and must not report stats for these out to the cluster + // controller as they will be stale (valid only for the _previous_ state/config). + // As a consequence, we must explicitly invalidate all such statistics in this edge. + invalidate_internal_db_dependent_stats(); _recoveryTimeStarted = framework::MilliSecTimer(_component.getClock()); } @@ -337,6 +340,17 @@ DistributorStripe::leaveRecoveryMode() _schedulingMode = MaintenanceScheduler::NORMAL_SCHEDULING_MODE; } +void +DistributorStripe::invalidate_internal_db_dependent_stats() +{ + _bucketDBMetricUpdater.reset(); + { + std::lock_guard guard(_metricLock); + invalidate_bucket_spaces_stats(guard); + invalidate_min_replica_stats(guard); + } +} + template <typename NodeFunctor> void DistributorStripe::for_each_available_content_node_in(const lib::ClusterState& state, NodeFunctor&& func) { const auto node_count = state.getNodeCount(lib::NodeType::STORAGE); @@ -357,8 +371,9 @@ BucketSpacesStatsProvider::BucketSpacesStats DistributorStripe::make_invalid_sta return invalid_space_stats; } -void DistributorStripe::invalidate_bucket_spaces_stats() { - std::lock_guard guard(_metricLock); +void +DistributorStripe::invalidate_bucket_spaces_stats([[maybe_unused]] std::lock_guard<std::mutex>& held_metric_lock) +{ _bucketSpacesStats = BucketSpacesStatsProvider::PerNodeBucketSpacesStats(); auto invalid_space_stats = make_invalid_stats_per_configured_space(); @@ -369,6 +384,17 @@ void DistributorStripe::invalidate_bucket_spaces_stats() { } void +DistributorStripe::invalidate_min_replica_stats([[maybe_unused]] std::lock_guard<std::mutex>& held_metric_lock) +{ + _bucketDbStats._minBucketReplica.clear(); + // Insert an explicit zero value for all nodes that are up in the pending/current cluster state + const auto& baseline = *_clusterStateBundle.getBaselineClusterState(); + for_each_available_content_node_in(baseline, [this](const lib::Node& node) { + _bucketDbStats._minBucketReplica[node.getIndex()] = 0; + }); +} + +void DistributorStripe::recheckBucketInfo(uint16_t nodeIdx, const document::Bucket &bucket) { _bucketDBUpdater.recheckBucketInfo(nodeIdx, bucket); } diff --git a/storage/src/vespa/storage/distributor/distributor_stripe.h b/storage/src/vespa/storage/distributor/distributor_stripe.h index ce6a2071efd..809b4dd0e41 100644 --- a/storage/src/vespa/storage/distributor/distributor_stripe.h +++ b/storage/src/vespa/storage/distributor/distributor_stripe.h @@ -259,7 +259,9 @@ private: BucketSpacesStatsProvider::BucketSpacesStats make_invalid_stats_per_configured_space() const; template <typename NodeFunctor> void for_each_available_content_node_in(const lib::ClusterState&, NodeFunctor&&); - void invalidate_bucket_spaces_stats(); + void invalidate_internal_db_dependent_stats(); + void invalidate_bucket_spaces_stats(std::lock_guard<std::mutex>& held_metric_lock); + void invalidate_min_replica_stats(std::lock_guard<std::mutex>& held_metric_lock); void send_updated_host_info_if_required(); void propagate_config_snapshot_to_internal_components(); diff --git a/storage/src/vespa/storage/distributor/min_replica_provider.h b/storage/src/vespa/storage/distributor/min_replica_provider.h index 56fd1e8fc81..a4374b906fe 100644 --- a/storage/src/vespa/storage/distributor/min_replica_provider.h +++ b/storage/src/vespa/storage/distributor/min_replica_provider.h @@ -9,7 +9,7 @@ namespace storage::distributor { class MinReplicaProvider { public: - virtual ~MinReplicaProvider() {} + virtual ~MinReplicaProvider() = default; /** * Get a snapshot of the minimum bucket replica for each of the nodes. diff --git a/storage/src/vespa/storage/storageserver/statemanager.cpp b/storage/src/vespa/storage/storageserver/statemanager.cpp index 7d08f738abe..7cb66ab447f 100644 --- a/storage/src/vespa/storage/storageserver/statemanager.cpp +++ b/storage/src/vespa/storage/storageserver/statemanager.cpp @@ -39,6 +39,7 @@ StateManager::StateManager(StorageComponentRegister& compReg, _nextNodeState(), _systemState(std::make_shared<const ClusterStateBundle>(lib::ClusterState())), _nextSystemState(), + _reported_host_info_cluster_state_version(0), _stateListeners(), _queuedStateRequests(), _threadLock(), @@ -94,58 +95,11 @@ StateManager::print(std::ostream& out, bool verbose, out << "StateManager()"; } -#ifdef ENABLE_BUCKET_OPERATION_LOGGING -namespace { - -vespalib::string -escapeHtml(vespalib::stringref str) -{ - vespalib::asciistream ss; - for (size_t i = 0; i < str.size(); ++i) { - switch (str[i]) { - case '<': - ss << "<"; - break; - case '>': - ss << ">"; - break; - case '&': - ss << "&"; - break; - default: - ss << str[i]; - } - } - return ss.str(); -} - -} -#endif - void StateManager::reportHtmlStatus(std::ostream& out, const framework::HttpUrlPath& path) const { (void) path; -#ifdef ENABLE_BUCKET_OPERATION_LOGGING - if (path.hasAttribute("history")) { - std::istringstream iss(path.getAttribute("history"), std::istringstream::in); - uint64_t rawId; - iss >> std::hex >> rawId; - document::BucketId bid(rawId); - out << "<h3>History for " << bid << "</h3>\n"; - vespalib::string history( - debug::BucketOperationLogger::getInstance().getHistory(bid)); - out << "<pre>" << escapeHtml(history) << "</pre>\n"; - return; - } else if (path.hasAttribute("search")) { - vespalib::string substr(path.getAttribute("search")); - out << debug::BucketOperationLogger::getInstance() - .searchBucketHistories(substr, "/systemstate?history="); - return; - } -#endif - { std::lock_guard lock(_stateLock); const auto &baseLineClusterState = _systemState->getBaselineClusterState(); @@ -342,6 +296,9 @@ StateManager::enableNextClusterState() // overwritten by a non-null pending cluster state afterwards. logNodeClusterStateTransition(*_systemState, *_nextSystemState); _systemState = _nextSystemState; + if (!_nextSystemState->deferredActivation()) { + _reported_host_info_cluster_state_version = _systemState->getVersion(); + } // else: reported version updated upon explicit activation edge _nextSystemState.reset(); _systemStateHistory.emplace_back(_component.getClock().getTimeInMillis(), _systemState); } @@ -511,7 +468,10 @@ StateManager::onActivateClusterStateVersion( auto reply = std::make_shared<api::ActivateClusterStateVersionReply>(*cmd); { std::lock_guard lock(_stateLock); - reply->setActualVersion(_systemState ? _systemState->getVersion() : 0); + reply->setActualVersion(_systemState->getVersion()); + if (cmd->version() == _systemState->getVersion()) { + _reported_host_info_cluster_state_version = _systemState->getVersion(); + } } sendUp(reply); return true; @@ -610,7 +570,7 @@ StateManager::getNodeInfo() const // _systemLock. // - getNodeInfo() (this function) always acquires the same lock. std::lock_guard guard(_stateLock); - stream << "cluster-state-version" << _systemState->getVersion(); + stream << "cluster-state-version" << _reported_host_info_cluster_state_version; _hostInfo->printReport(stream); stream << End(); diff --git a/storage/src/vespa/storage/storageserver/statemanager.h b/storage/src/vespa/storage/storageserver/statemanager.h index 7f8320567bf..194991723f4 100644 --- a/storage/src/vespa/storage/storageserver/statemanager.h +++ b/storage/src/vespa/storage/storageserver/statemanager.h @@ -51,6 +51,7 @@ class StateManager : public NodeStateUpdater, using ClusterStateBundle = lib::ClusterStateBundle; std::shared_ptr<const ClusterStateBundle> _systemState; std::shared_ptr<const ClusterStateBundle> _nextSystemState; + uint32_t _reported_host_info_cluster_state_version; std::list<StateListener*> _stateListeners; typedef std::pair<framework::MilliSecTime, api::GetNodeStateCommand::SP> TimeStatePair; std::list<TimeStatePair> _queuedStateRequests; |