summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@oath.com>2018-03-14 14:59:14 +0000
committerTor Brede Vekterli <vekterli@oath.com>2018-03-14 15:02:41 +0000
commitbd3239e62b3d83c849443c923240421ef9ef04cb (patch)
tree2323f3eb03341de24a46b944f20b7fc432f6046a /storage
parent717d2a537f1e060a00657fedb61343e8e9af8bc3 (diff)
Immediately send GetNodeState replies when leaving recovery mode
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/common/testnodestateupdater.cpp3
-rw-r--r--storage/src/tests/common/testnodestateupdater.h9
-rw-r--r--storage/src/tests/distributor/distributortest.cpp29
-rw-r--r--storage/src/vespa/storage/distributor/distributor.cpp5
-rw-r--r--storage/src/vespa/storage/storageserver/statemanager.cpp1
5 files changed, 43 insertions, 4 deletions
diff --git a/storage/src/tests/common/testnodestateupdater.cpp b/storage/src/tests/common/testnodestateupdater.cpp
index ae3f69e2605..0547980b847 100644
--- a/storage/src/tests/common/testnodestateupdater.cpp
+++ b/storage/src/tests/common/testnodestateupdater.cpp
@@ -9,7 +9,8 @@ TestNodeStateUpdater::TestNodeStateUpdater(const lib::NodeType& type)
: _reported(new lib::NodeState(type, lib::State::UP)),
_current(new lib::NodeState(type, lib::State::UP)),
_clusterStateBundle(std::make_shared<const lib::ClusterStateBundle>(lib::ClusterState())),
- _listeners()
+ _listeners(),
+ _explicit_node_state_reply_send_invocations(0)
{ }
TestNodeStateUpdater::~TestNodeStateUpdater() = default;
diff --git a/storage/src/tests/common/testnodestateupdater.h b/storage/src/tests/common/testnodestateupdater.h
index 49f6b4f27b2..5e3001abee4 100644
--- a/storage/src/tests/common/testnodestateupdater.h
+++ b/storage/src/tests/common/testnodestateupdater.h
@@ -18,6 +18,7 @@ struct TestNodeStateUpdater : public NodeStateUpdater
lib::NodeState::CSP _current;
std::shared_ptr<const lib::ClusterStateBundle> _clusterStateBundle;
std::vector<StateListener*> _listeners;
+ size_t _explicit_node_state_reply_send_invocations;
public:
explicit TestNodeStateUpdater(const lib::NodeType& type);
@@ -32,13 +33,19 @@ public:
void setReportedNodeState(const lib::NodeState& state) override {
_reported = std::make_shared<lib::NodeState>(state);
}
- void immediately_send_get_node_state_replies() override {}
+ void immediately_send_get_node_state_replies() override {
+ ++_explicit_node_state_reply_send_invocations;
+ }
void setCurrentNodeState(const lib::NodeState& state) {
_current = std::make_shared<lib::NodeState>(state);
}
void setClusterState(lib::ClusterState::CSP c);
+
+ size_t explicit_node_state_reply_send_invocations() const noexcept {
+ return _explicit_node_state_reply_send_invocations;
+ }
};
} // storage
diff --git a/storage/src/tests/distributor/distributortest.cpp b/storage/src/tests/distributor/distributortest.cpp
index d9f6012bee5..cc1c9df0509 100644
--- a/storage/src/tests/distributor/distributortest.cpp
+++ b/storage/src/tests/distributor/distributortest.cpp
@@ -59,6 +59,7 @@ class Distributor_Test : public CppUnit::TestFixture,
CPPUNIT_TEST(internal_messages_are_started_in_fifo_order_batch);
CPPUNIT_TEST(closing_aborts_priority_queued_client_requests);
CPPUNIT_TEST(entering_recovery_mode_resets_bucket_space_stats);
+ CPPUNIT_TEST(leaving_recovery_mode_immediately_sends_getnodestate_replies);
CPPUNIT_TEST_SUITE_END();
public:
@@ -95,6 +96,7 @@ protected:
void internal_messages_are_started_in_fifo_order_batch();
void closing_aborts_priority_queued_client_requests();
void entering_recovery_mode_resets_bucket_space_stats();
+ void leaving_recovery_mode_immediately_sends_getnodestate_replies();
void assertBucketSpaceStats(size_t expBucketPending, uint16_t node, const vespalib::string &bucketSpace,
const BucketSpacesStatsProvider::PerNodeBucketSpacesStats &stats);
@@ -197,6 +199,10 @@ private:
return retVal;
}
+ size_t explicit_node_state_reply_send_invocations() const noexcept {
+ return _node->getNodeStateUpdater().explicit_node_state_reply_send_invocations();
+ }
+
void configureMaxClusterClockSkew(int seconds);
void sendDownClusterStateCommand();
void replyToSingleRequestBucketInfoCommandWith1Bucket();
@@ -1021,7 +1027,7 @@ void Distributor_Test::entering_recovery_mode_resets_bucket_space_stats() {
addNodesToBucketDB(document::BucketId(16, 2), "0=1/1/1/t/a");
addNodesToBucketDB(document::BucketId(16, 3), "0=2/2/2/t/a");
- tickDistributorNTimes(5); // 2/3rds into second round through database
+ tickDistributorNTimes(5); // 1/3rds into second round through database
enableDistributorClusterState("version:2 distributor:1 storage:3 .1.s:d");
CPPUNIT_ASSERT(_distributor->isInRecoveryMode());
@@ -1035,6 +1041,27 @@ void Distributor_Test::entering_recovery_mode_resets_bucket_space_stats() {
assert_invalid_stats_for_all_spaces(stats, 2);
}
+void Distributor_Test::leaving_recovery_mode_immediately_sends_getnodestate_replies() {
+ setupDistributor(Redundancy(2), NodeCount(2), "version:1 distributor:1 storage:2");
+ // Should not send explicit replies during init stage
+ CPPUNIT_ASSERT_EQUAL(size_t(0), explicit_node_state_reply_send_invocations());
+ // Add a couple of buckets so we have something to iterate over
+ addNodesToBucketDB(document::BucketId(16, 1), "0=1/1/1/t/a");
+ addNodesToBucketDB(document::BucketId(16, 2), "0=1/1/1/t/a");
+
+ enableDistributorClusterState("version:2 distributor:1 storage:3 .1.s:d");
+ CPPUNIT_ASSERT(_distributor->isInRecoveryMode());
+ CPPUNIT_ASSERT_EQUAL(size_t(0), explicit_node_state_reply_send_invocations());
+ tickDistributorNTimes(1); // DB round not yet complete
+ CPPUNIT_ASSERT_EQUAL(size_t(0), explicit_node_state_reply_send_invocations());
+ tickDistributorNTimes(2); // DB round complete after 2nd bucket + "scan done" discovery tick
+ CPPUNIT_ASSERT_EQUAL(size_t(1), explicit_node_state_reply_send_invocations());
+ CPPUNIT_ASSERT(!_distributor->isInRecoveryMode());
+ // Now out of recovery mode, subsequent round completions should not send replies
+ tickDistributorNTimes(10);
+ CPPUNIT_ASSERT_EQUAL(size_t(1), explicit_node_state_reply_send_invocations());
+}
+
}
}
diff --git a/storage/src/vespa/storage/distributor/distributor.cpp b/storage/src/vespa/storage/distributor/distributor.cpp
index d698d07fa34..28b9fb7f0ef 100644
--- a/storage/src/vespa/storage/distributor/distributor.cpp
+++ b/storage/src/vespa/storage/distributor/distributor.cpp
@@ -419,6 +419,9 @@ Distributor::leaveRecoveryMode()
LOG(debug, "Leaving recovery mode");
_metrics->recoveryModeTime.addValue(
_recoveryTimeStarted.getElapsedTimeAsDouble());
+ if (_doneInitializing) {
+ _component.getStateUpdater().immediately_send_get_node_state_replies();
+ }
}
_schedulingMode = MaintenanceScheduler::NORMAL_SCHEDULING_MODE;
}
@@ -730,8 +733,8 @@ Distributor::scanNextBucket()
{
MaintenanceScanner::ScanResult scanResult(_scanner->scanNext());
if (scanResult.isDone()) {
- leaveRecoveryMode();
updateInternalMetricsForCompletedScan();
+ leaveRecoveryMode(); // Must happen after internal metrics updates
_scanner->reset();
} else {
const auto &distribution(_bucketSpaceRepo->get(scanResult.getBucketSpace()).getDistribution());
diff --git a/storage/src/vespa/storage/storageserver/statemanager.cpp b/storage/src/vespa/storage/storageserver/statemanager.cpp
index 00f26798127..0efae3c5f8f 100644
--- a/storage/src/vespa/storage/storageserver/statemanager.cpp
+++ b/storage/src/vespa/storage/storageserver/statemanager.cpp
@@ -557,6 +557,7 @@ StateManager::getNodeInfo() const
}
void StateManager::immediately_send_get_node_state_replies() {
+ LOG(debug, "Immediately replying to all pending GetNodeState requests");
sendGetNodeStateReplies();
}