From bd3239e62b3d83c849443c923240421ef9ef04cb Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Wed, 14 Mar 2018 14:59:14 +0000 Subject: Immediately send GetNodeState replies when leaving recovery mode --- storage/src/tests/common/testnodestateupdater.cpp | 3 ++- storage/src/tests/common/testnodestateupdater.h | 9 ++++++- storage/src/tests/distributor/distributortest.cpp | 29 +++++++++++++++++++++- .../src/vespa/storage/distributor/distributor.cpp | 5 +++- .../vespa/storage/storageserver/statemanager.cpp | 1 + 5 files changed, 43 insertions(+), 4 deletions(-) (limited to 'storage') diff --git a/storage/src/tests/common/testnodestateupdater.cpp b/storage/src/tests/common/testnodestateupdater.cpp index ae3f69e2605..0547980b847 100644 --- a/storage/src/tests/common/testnodestateupdater.cpp +++ b/storage/src/tests/common/testnodestateupdater.cpp @@ -9,7 +9,8 @@ TestNodeStateUpdater::TestNodeStateUpdater(const lib::NodeType& type) : _reported(new lib::NodeState(type, lib::State::UP)), _current(new lib::NodeState(type, lib::State::UP)), _clusterStateBundle(std::make_shared(lib::ClusterState())), - _listeners() + _listeners(), + _explicit_node_state_reply_send_invocations(0) { } TestNodeStateUpdater::~TestNodeStateUpdater() = default; diff --git a/storage/src/tests/common/testnodestateupdater.h b/storage/src/tests/common/testnodestateupdater.h index 49f6b4f27b2..5e3001abee4 100644 --- a/storage/src/tests/common/testnodestateupdater.h +++ b/storage/src/tests/common/testnodestateupdater.h @@ -18,6 +18,7 @@ struct TestNodeStateUpdater : public NodeStateUpdater lib::NodeState::CSP _current; std::shared_ptr _clusterStateBundle; std::vector _listeners; + size_t _explicit_node_state_reply_send_invocations; public: explicit TestNodeStateUpdater(const lib::NodeType& type); @@ -32,13 +33,19 @@ public: void setReportedNodeState(const lib::NodeState& state) override { _reported = std::make_shared(state); } - void immediately_send_get_node_state_replies() override {} + void immediately_send_get_node_state_replies() override { + ++_explicit_node_state_reply_send_invocations; + } void setCurrentNodeState(const lib::NodeState& state) { _current = std::make_shared(state); } void setClusterState(lib::ClusterState::CSP c); + + size_t explicit_node_state_reply_send_invocations() const noexcept { + return _explicit_node_state_reply_send_invocations; + } }; } // storage diff --git a/storage/src/tests/distributor/distributortest.cpp b/storage/src/tests/distributor/distributortest.cpp index d9f6012bee5..cc1c9df0509 100644 --- a/storage/src/tests/distributor/distributortest.cpp +++ b/storage/src/tests/distributor/distributortest.cpp @@ -59,6 +59,7 @@ class Distributor_Test : public CppUnit::TestFixture, CPPUNIT_TEST(internal_messages_are_started_in_fifo_order_batch); CPPUNIT_TEST(closing_aborts_priority_queued_client_requests); CPPUNIT_TEST(entering_recovery_mode_resets_bucket_space_stats); + CPPUNIT_TEST(leaving_recovery_mode_immediately_sends_getnodestate_replies); CPPUNIT_TEST_SUITE_END(); public: @@ -95,6 +96,7 @@ protected: void internal_messages_are_started_in_fifo_order_batch(); void closing_aborts_priority_queued_client_requests(); void entering_recovery_mode_resets_bucket_space_stats(); + void leaving_recovery_mode_immediately_sends_getnodestate_replies(); void assertBucketSpaceStats(size_t expBucketPending, uint16_t node, const vespalib::string &bucketSpace, const BucketSpacesStatsProvider::PerNodeBucketSpacesStats &stats); @@ -197,6 +199,10 @@ private: return retVal; } + size_t explicit_node_state_reply_send_invocations() const noexcept { + return _node->getNodeStateUpdater().explicit_node_state_reply_send_invocations(); + } + void configureMaxClusterClockSkew(int seconds); void sendDownClusterStateCommand(); void replyToSingleRequestBucketInfoCommandWith1Bucket(); @@ -1021,7 +1027,7 @@ void Distributor_Test::entering_recovery_mode_resets_bucket_space_stats() { addNodesToBucketDB(document::BucketId(16, 2), "0=1/1/1/t/a"); addNodesToBucketDB(document::BucketId(16, 3), "0=2/2/2/t/a"); - tickDistributorNTimes(5); // 2/3rds into second round through database + tickDistributorNTimes(5); // 1/3rds into second round through database enableDistributorClusterState("version:2 distributor:1 storage:3 .1.s:d"); CPPUNIT_ASSERT(_distributor->isInRecoveryMode()); @@ -1035,6 +1041,27 @@ void Distributor_Test::entering_recovery_mode_resets_bucket_space_stats() { assert_invalid_stats_for_all_spaces(stats, 2); } +void Distributor_Test::leaving_recovery_mode_immediately_sends_getnodestate_replies() { + setupDistributor(Redundancy(2), NodeCount(2), "version:1 distributor:1 storage:2"); + // Should not send explicit replies during init stage + CPPUNIT_ASSERT_EQUAL(size_t(0), explicit_node_state_reply_send_invocations()); + // Add a couple of buckets so we have something to iterate over + addNodesToBucketDB(document::BucketId(16, 1), "0=1/1/1/t/a"); + addNodesToBucketDB(document::BucketId(16, 2), "0=1/1/1/t/a"); + + enableDistributorClusterState("version:2 distributor:1 storage:3 .1.s:d"); + CPPUNIT_ASSERT(_distributor->isInRecoveryMode()); + CPPUNIT_ASSERT_EQUAL(size_t(0), explicit_node_state_reply_send_invocations()); + tickDistributorNTimes(1); // DB round not yet complete + CPPUNIT_ASSERT_EQUAL(size_t(0), explicit_node_state_reply_send_invocations()); + tickDistributorNTimes(2); // DB round complete after 2nd bucket + "scan done" discovery tick + CPPUNIT_ASSERT_EQUAL(size_t(1), explicit_node_state_reply_send_invocations()); + CPPUNIT_ASSERT(!_distributor->isInRecoveryMode()); + // Now out of recovery mode, subsequent round completions should not send replies + tickDistributorNTimes(10); + CPPUNIT_ASSERT_EQUAL(size_t(1), explicit_node_state_reply_send_invocations()); +} + } } diff --git a/storage/src/vespa/storage/distributor/distributor.cpp b/storage/src/vespa/storage/distributor/distributor.cpp index d698d07fa34..28b9fb7f0ef 100644 --- a/storage/src/vespa/storage/distributor/distributor.cpp +++ b/storage/src/vespa/storage/distributor/distributor.cpp @@ -419,6 +419,9 @@ Distributor::leaveRecoveryMode() LOG(debug, "Leaving recovery mode"); _metrics->recoveryModeTime.addValue( _recoveryTimeStarted.getElapsedTimeAsDouble()); + if (_doneInitializing) { + _component.getStateUpdater().immediately_send_get_node_state_replies(); + } } _schedulingMode = MaintenanceScheduler::NORMAL_SCHEDULING_MODE; } @@ -730,8 +733,8 @@ Distributor::scanNextBucket() { MaintenanceScanner::ScanResult scanResult(_scanner->scanNext()); if (scanResult.isDone()) { - leaveRecoveryMode(); updateInternalMetricsForCompletedScan(); + leaveRecoveryMode(); // Must happen after internal metrics updates _scanner->reset(); } else { const auto &distribution(_bucketSpaceRepo->get(scanResult.getBucketSpace()).getDistribution()); diff --git a/storage/src/vespa/storage/storageserver/statemanager.cpp b/storage/src/vespa/storage/storageserver/statemanager.cpp index 00f26798127..0efae3c5f8f 100644 --- a/storage/src/vespa/storage/storageserver/statemanager.cpp +++ b/storage/src/vespa/storage/storageserver/statemanager.cpp @@ -557,6 +557,7 @@ StateManager::getNodeInfo() const } void StateManager::immediately_send_get_node_state_replies() { + LOG(debug, "Immediately replying to all pending GetNodeState requests"); sendGetNodeStateReplies(); } -- cgit v1.2.3