diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2019-06-04 15:48:16 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2019-06-06 11:50:23 +0000 |
commit | 3c8f88a252127b98ea7a94e123b5d65a56ffd571 (patch) | |
tree | 581c77dd672109dd1f9d0e6d3d3ca2c793cac05a /storage | |
parent | 60f28da57c182f342f2af0f22cf3244a105c7d2c (diff) |
Do not block scanning all buckets on first cluster state edge
Scanning all buckets is expensive and blocks the main distributor thread
during the entire process. Instead, use the standard "recovery mode"
functionality that is triggered for all subsequent state transitions.
Recovery mode scans allow client operations to be scheduled alongside
the scanning, but still tries to scan the DB as quickly as possible.
There shouldn't be anything that special with the first cluster state
that implies a full scan is inherently required.
Diffstat (limited to 'storage')
-rw-r--r-- | storage/src/tests/distributor/bucketdbupdatertest.cpp | 10 | ||||
-rw-r--r-- | storage/src/tests/distributor/distributortest.cpp | 12 | ||||
-rw-r--r-- | storage/src/vespa/storage/distributor/distributor.cpp | 4 |
3 files changed, 16 insertions, 10 deletions
diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp index cdaa6e9aaa3..2ddf41236c9 100644 --- a/storage/src/tests/distributor/bucketdbupdatertest.cpp +++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp @@ -544,6 +544,10 @@ public: } uint32_t populate_bucket_db_via_request_bucket_info_for_benchmarking(); + + void complete_recovery_mode() { + _distributor->scanAllBuckets(); + } }; BucketDBUpdaterTest::BucketDBUpdaterTest() @@ -1900,8 +1904,8 @@ TEST_F(BucketDBUpdaterTest, testClusterStateAlwaysSendsFullFetchWhenDistribution TEST_F(BucketDBUpdaterTest, testChangedDistributionConfigTriggersRecoveryMode) { ASSERT_NO_FATAL_FAILURE(setAndEnableClusterState(lib::ClusterState("distributor:6 storage:6"), messageCount(6), 20)); _sender.clear(); - // First cluster state; implicit scan of all buckets which does not - // use normal recovery mode ticking-path. + EXPECT_TRUE(_distributor->isInRecoveryMode()); + complete_recovery_mode(); EXPECT_FALSE(_distributor->isInRecoveryMode()); std::string distConfig(getDistConfig6Nodes4Groups()); @@ -1920,6 +1924,8 @@ TEST_F(BucketDBUpdaterTest, testChangedDistributionConfigTriggersRecoveryMode) { // Pending cluster state (i.e. distribution) has been enabled, which should // cause recovery mode to be entered. EXPECT_TRUE(_distributor->isInRecoveryMode()); + complete_recovery_mode(); + EXPECT_FALSE(_distributor->isInRecoveryMode()); } namespace { diff --git a/storage/src/tests/distributor/distributortest.cpp b/storage/src/tests/distributor/distributortest.cpp index 46c756001d9..c519ef0713b 100644 --- a/storage/src/tests/distributor/distributortest.cpp +++ b/storage/src/tests/distributor/distributortest.cpp @@ -1072,6 +1072,7 @@ void Distributor_Test::leaving_recovery_mode_immediately_sends_getnodestate_repl void Distributor_Test::do_test_pending_merge_getnodestate_reply_edge(BucketSpace space) { setupDistributor(Redundancy(2), NodeCount(2), "version:1 distributor:1 storage:2"); + CPPUNIT_ASSERT(_distributor->isInRecoveryMode()); // 2 buckets with missing replicas triggering merge pending stats addNodesToBucketDB(Bucket(space, BucketId(16, 1)), "0=1/1/1/t/a"); addNodesToBucketDB(Bucket(space, BucketId(16, 2)), "0=1/1/1/t/a"); @@ -1079,29 +1080,30 @@ void Distributor_Test::do_test_pending_merge_getnodestate_reply_edge(BucketSpace CPPUNIT_ASSERT(!_distributor->isInRecoveryMode()); const auto space_name = FixedBucketSpaces::to_string(space); assertBucketSpaceStats(2, 0, 1, space_name, _distributor->getBucketSpacesStats()); - CPPUNIT_ASSERT_EQUAL(size_t(0), explicit_node_state_reply_send_invocations()); + // First completed scan sends off merge stats et al to cluster controller + CPPUNIT_ASSERT_EQUAL(size_t(1), explicit_node_state_reply_send_invocations()); // Edge not triggered when 1 bucket with missing replica left addNodesToBucketDB(Bucket(space, BucketId(16, 1)), "0=1/1/1/t/a,1=1/1/1/t"); tickDistributorNTimes(3); assertBucketSpaceStats(1, 1, 1, space_name, _distributor->getBucketSpacesStats()); - CPPUNIT_ASSERT_EQUAL(size_t(0), explicit_node_state_reply_send_invocations()); + CPPUNIT_ASSERT_EQUAL(size_t(1), explicit_node_state_reply_send_invocations()); // Edge triggered when no more buckets with requiring merge addNodesToBucketDB(Bucket(space, BucketId(16, 2)), "0=1/1/1/t/a,1=1/1/1/t"); tickDistributorNTimes(3); assertBucketSpaceStats(0, 2, 1, space_name, _distributor->getBucketSpacesStats()); - CPPUNIT_ASSERT_EQUAL(size_t(1), explicit_node_state_reply_send_invocations()); + CPPUNIT_ASSERT_EQUAL(size_t(2), explicit_node_state_reply_send_invocations()); // Should only send when edge happens, not in subsequent DB iterations tickDistributorNTimes(10); - CPPUNIT_ASSERT_EQUAL(size_t(1), explicit_node_state_reply_send_invocations()); + CPPUNIT_ASSERT_EQUAL(size_t(2), explicit_node_state_reply_send_invocations()); // Going back to merges pending should _not_ send a getnodestate reply (at least for now) addNodesToBucketDB(Bucket(space, BucketId(16, 1)), "0=1/1/1/t/a"); tickDistributorNTimes(3); assertBucketSpaceStats(1, 1, 1, space_name, _distributor->getBucketSpacesStats()); - CPPUNIT_ASSERT_EQUAL(size_t(1), explicit_node_state_reply_send_invocations()); + CPPUNIT_ASSERT_EQUAL(size_t(2), explicit_node_state_reply_send_invocations()); } void Distributor_Test::pending_to_no_pending_default_merges_edge_immediately_sends_getnodestate_replies() { diff --git a/storage/src/vespa/storage/distributor/distributor.cpp b/storage/src/vespa/storage/distributor/distributor.cpp index 771baa04fca..7cb7d687446 100644 --- a/storage/src/vespa/storage/distributor/distributor.cpp +++ b/storage/src/vespa/storage/distributor/distributor.cpp @@ -359,12 +359,10 @@ Distributor::enableClusterStateBundle(const lib::ClusterStateBundle& state) if (!_doneInitializing && baselineState.getNodeState(myNode).getState() == lib::State::UP) { - scanAllBuckets(); _doneInitializing = true; _doneInitializeHandler.notifyDoneInitializing(); - } else { - enterRecoveryMode(); } + enterRecoveryMode(); // Clear all active messages on nodes that are down. for (uint16_t i = 0; i < baselineState.getNodeCount(lib::NodeType::STORAGE); ++i) { |