diff options
6 files changed, 166 insertions, 13 deletions
diff --git a/storage/src/tests/bucketdb/bucketinfotest.cpp b/storage/src/tests/bucketdb/bucketinfotest.cpp index 9ee5895a41f..a625f6303ea 100644 --- a/storage/src/tests/bucketdb/bucketinfotest.cpp +++ b/storage/src/tests/bucketdb/bucketinfotest.cpp @@ -22,6 +22,10 @@ struct BucketInfoTest : public CppUnit::TestFixture { void testTrustedResetWhenCopiesBecomeInconsistent(); void testTrustedResetWhenTrustedCopiesGoOutOfSync(); void testTrustedNotResetWhenNonTrustedCopiesStillOutOfSync(); + void add_nodes_can_immediately_update_trusted_flag(); + void add_nodes_can_defer_update_of_trusted_flag(); + void remove_node_can_immediately_update_trusted_flag(); + void remove_node_can_defer_update_of_trusted_flag(); CPPUNIT_TEST_SUITE(BucketInfoTest); CPPUNIT_TEST(testBucketInfoEntriesWithNewestTimestampsAreKept); @@ -31,6 +35,10 @@ struct BucketInfoTest : public CppUnit::TestFixture { CPPUNIT_TEST_IGNORED(testTrustedResetWhenCopiesBecomeInconsistent); CPPUNIT_TEST(testTrustedResetWhenTrustedCopiesGoOutOfSync); CPPUNIT_TEST(testTrustedNotResetWhenNonTrustedCopiesStillOutOfSync); + CPPUNIT_TEST(add_nodes_can_immediately_update_trusted_flag); + CPPUNIT_TEST(add_nodes_can_defer_update_of_trusted_flag); + CPPUNIT_TEST(remove_node_can_immediately_update_trusted_flag); + CPPUNIT_TEST(remove_node_can_defer_update_of_trusted_flag); CPPUNIT_TEST_SUITE_END(); }; @@ -195,6 +203,43 @@ BucketInfoTest::testTrustedNotResetWhenNonTrustedCopiesStillOutOfSync() CPPUNIT_ASSERT(!info.getNode(2)->trusted()); } +void BucketInfoTest::add_nodes_can_immediately_update_trusted_flag() { + BucketInfo info; + std::vector<uint16_t> order; + info.addNodes({BucketCopy(0, 0, api::BucketInfo(10, 100, 1000))}, order, TrustedUpdate::UPDATE); + // Only one replica, so implicitly trusted iff trusted flag update is invoked. + CPPUNIT_ASSERT(info.getNode(0)->trusted()); +} + +void BucketInfoTest::add_nodes_can_defer_update_of_trusted_flag() { + BucketInfo info; + std::vector<uint16_t> order; + info.addNodes({BucketCopy(0, 0, api::BucketInfo(10, 100, 1000))}, order, TrustedUpdate::DEFER); + CPPUNIT_ASSERT(!info.getNode(0)->trusted()); +} + +void BucketInfoTest::remove_node_can_immediately_update_trusted_flag() { + BucketInfo info; + std::vector<uint16_t> order; + info.addNodes({BucketCopy(0, 0, api::BucketInfo(10, 100, 1000)), + BucketCopy(0, 1, api::BucketInfo(20, 200, 2000))}, + order, TrustedUpdate::UPDATE); + CPPUNIT_ASSERT(!info.getNode(0)->trusted()); + info.removeNode(1, TrustedUpdate::UPDATE); + // Only one replica remaining after remove, so implicitly trusted iff trusted flag update is invoked. + CPPUNIT_ASSERT(info.getNode(0)->trusted()); +} + +void BucketInfoTest::remove_node_can_defer_update_of_trusted_flag() { + BucketInfo info; + std::vector<uint16_t> order; + info.addNodes({BucketCopy(0, 0, api::BucketInfo(10, 100, 1000)), + BucketCopy(0, 1, api::BucketInfo(20, 200, 2000))}, + order, TrustedUpdate::UPDATE); + info.removeNode(1, TrustedUpdate::DEFER); + CPPUNIT_ASSERT(!info.getNode(0)->trusted()); +} + } } // storage diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp index d89ce2a03d0..7ad083dc5a1 100644 --- a/storage/src/tests/distributor/bucketdbupdatertest.cpp +++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp @@ -73,6 +73,13 @@ class BucketDBUpdaterTest : public CppUnit::TestFixture, CPPUNIT_TEST(transition_time_reset_across_non_preempting_state_changes); CPPUNIT_TEST(transition_time_tracked_for_distribution_config_change); CPPUNIT_TEST(transition_time_tracked_across_preempted_transitions); + CPPUNIT_TEST(batch_update_of_existing_diverging_replicas_does_not_mark_any_as_trusted); + CPPUNIT_TEST(batch_add_of_new_diverging_replicas_does_not_mark_any_as_trusted); + CPPUNIT_TEST(batch_add_with_single_resulting_replica_implicitly_marks_as_trusted); + CPPUNIT_TEST(identity_update_of_single_replica_does_not_clear_trusted); + CPPUNIT_TEST(identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted); + CPPUNIT_TEST(adding_diverging_replica_to_existing_trusted_does_not_remove_trusted); + CPPUNIT_TEST(batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted); CPPUNIT_TEST_SUITE_END(); protected: @@ -127,6 +134,13 @@ protected: void transition_time_reset_across_non_preempting_state_changes(); void transition_time_tracked_for_distribution_config_change(); void transition_time_tracked_across_preempted_transitions(); + void batch_update_of_existing_diverging_replicas_does_not_mark_any_as_trusted(); + void batch_add_of_new_diverging_replicas_does_not_mark_any_as_trusted(); + void batch_add_with_single_resulting_replica_implicitly_marks_as_trusted(); + void identity_update_of_single_replica_does_not_clear_trusted(); + void identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted(); + void adding_diverging_replica_to_existing_trusted_does_not_remove_trusted(); + void batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted(); bool bucketExistsThatHasNode(int bucketCount, uint16_t node) const; @@ -2414,5 +2428,73 @@ BucketDBUpdaterTest::transition_time_tracked_across_preempted_transitions() CPPUNIT_ASSERT_EQUAL(uint64_t(8000), lastTransitionTimeInMillis()); } +/* + * Brief reminder on test DSL for checking bucket merge operations: + * + * mergeBucketLists() takes as input strings of the format + * <node>:<raw bucket id>/<checksum>/<num docs>/<doc size>|<node>: + * and returns a string describing the bucket DB post-merge with the format + * <raw bucket id>:<node>/<checksum>/<num docs>/<doc size>,<node>:....|<raw bucket id>:.... + * + * Yes, the order of node<->bucket id is reversed between the two, perhaps to make sure you're awake. + */ + +void BucketDBUpdaterTest::batch_update_of_existing_diverging_replicas_does_not_mark_any_as_trusted() { + // Replacing bucket information for content node 0 should not mark existing + // untrusted replica as trusted as a side effect. + CPPUNIT_ASSERT_EQUAL( + std::string("5:1/7/8/9/u,0/1/2/3/u|"), + mergeBucketLists( + lib::ClusterState("distributor:1 storage:3 .0.s:i"), + "0:5/0/0/0|1:5/7/8/9", + lib::ClusterState("distributor:1 storage:3 .0.s:u"), + "0:5/1/2/3|1:5/7/8/9", true)); +} + +void BucketDBUpdaterTest::batch_add_of_new_diverging_replicas_does_not_mark_any_as_trusted() { + CPPUNIT_ASSERT_EQUAL( + std::string("5:1/7/8/9/u,0/1/2/3/u|"), + mergeBucketLists("", "0:5/1/2/3|1:5/7/8/9", true)); +} + +void BucketDBUpdaterTest::batch_add_with_single_resulting_replica_implicitly_marks_as_trusted() { + CPPUNIT_ASSERT_EQUAL( + std::string("5:0/1/2/3/t|"), + mergeBucketLists("", "0:5/1/2/3", true)); +} + +void BucketDBUpdaterTest::identity_update_of_single_replica_does_not_clear_trusted() { + CPPUNIT_ASSERT_EQUAL( + std::string("5:0/1/2/3/t|"), + mergeBucketLists("0:5/1/2/3", "0:5/1/2/3", true)); +} + +void BucketDBUpdaterTest::identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted() { + CPPUNIT_ASSERT_EQUAL( + std::string("5:1/7/8/9/u,0/1/2/3/u|"), + mergeBucketLists("0:5/1/2/3|1:5/7/8/9", "0:5/1/2/3|1:5/7/8/9", true)); +} + +void BucketDBUpdaterTest::adding_diverging_replica_to_existing_trusted_does_not_remove_trusted() { + CPPUNIT_ASSERT_EQUAL( + std::string("5:1/2/3/4/u,0/1/2/3/t|"), + mergeBucketLists("0:5/1/2/3", "0:5/1/2/3|1:5/2/3/4", true)); +} + +void BucketDBUpdaterTest::batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted() { + // This differs from batch_update_of_existing_diverging_replicas_does_not_mark_any_as_trusted + // in that _all_ content nodes are considered outdated when distributor changes take place, + // and therefore a slightly different code path is taken. In particular, bucket info for + // outdated nodes gets removed before possibly being re-added (if present in the bucket info + // response). + CPPUNIT_ASSERT_EQUAL( + std::string("5:1/7/8/9/u,0/1/2/3/u|"), + mergeBucketLists( + lib::ClusterState("distributor:2 storage:3"), + "0:5/1/2/3|1:5/7/8/9", + lib::ClusterState("distributor:1 storage:3"), + "0:5/1/2/3|1:5/7/8/9", true)); +} + } // distributor } // storage diff --git a/storage/src/vespa/storage/bucketdb/bucketinfo.cpp b/storage/src/vespa/storage/bucketdb/bucketinfo.cpp index cbf39bef65f..4a58c9dac6c 100644 --- a/storage/src/vespa/storage/bucketdb/bucketinfo.cpp +++ b/storage/src/vespa/storage/bucketdb/bucketinfo.cpp @@ -165,7 +165,8 @@ BucketInfo::updateNode(const BucketCopy& newCopy) void BucketInfo::addNodes(const std::vector<BucketCopy>& newCopies, - const std::vector<uint16_t>& recommendedOrder) + const std::vector<uint16_t>& recommendedOrder, + TrustedUpdate update) { for (uint32_t i = 0; i < newCopies.size(); ++i) { BucketCopy* found = getNodeInternal(newCopies[i].getNode()); @@ -182,7 +183,9 @@ BucketInfo::addNodes(const std::vector<BucketCopy>& newCopies, std::sort(_nodes.begin(), _nodes.end(), Sorter(recommendedOrder)); - updateTrusted(); + if (update == TrustedUpdate::UPDATE) { + updateTrusted(); + } } void @@ -194,14 +197,16 @@ BucketInfo::addNode(const BucketCopy& newCopy, } bool -BucketInfo::removeNode(unsigned short node) +BucketInfo::removeNode(unsigned short node, TrustedUpdate update) { for (std::vector<BucketCopy>::iterator iter = _nodes.begin(); iter != _nodes.end(); iter++) { if (iter->getNode() == node) { _nodes.erase(iter); - updateTrusted(); + if (update == TrustedUpdate::UPDATE) { + updateTrusted(); + } return true; } } diff --git a/storage/src/vespa/storage/bucketdb/bucketinfo.h b/storage/src/vespa/storage/bucketdb/bucketinfo.h index 2bdd4e65cff..e4105be9427 100644 --- a/storage/src/vespa/storage/bucketdb/bucketinfo.h +++ b/storage/src/vespa/storage/bucketdb/bucketinfo.h @@ -9,6 +9,11 @@ namespace distributor { class DistributorTestUtil; } +enum class TrustedUpdate { + UPDATE, + DEFER +}; + class BucketInfo { private: @@ -84,9 +89,11 @@ public: listed. Any nodes not in this list will be order numerically afterward. @param replace If replace is true, replaces old ones that may exist. + @param update If true, will invoke updateTrusted() after replicas are added */ void addNodes(const std::vector<BucketCopy>& newCopies, - const std::vector<uint16_t>& recommendedOrder); + const std::vector<uint16_t>& recommendedOrder, + TrustedUpdate update = TrustedUpdate::UPDATE); /** Simplified API for the common case of inserting one node. See addNodes(). @@ -103,7 +110,7 @@ public: /** Returns true if the node existed and was removed. */ - bool removeNode(uint16_t node); + bool removeNode(uint16_t node, TrustedUpdate update = TrustedUpdate::UPDATE); /** * Returns the bucket copy struct for the given node, null if nonexisting @@ -113,7 +120,7 @@ public: /** * Returns the number of nodes this entry has. */ - uint32_t getNodeCount() const { return _nodes.size(); } + uint32_t getNodeCount() const noexcept { return static_cast<uint32_t>(_nodes.size()); } /** * Returns a list of the nodes this entry has. diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp index 5fc9c27c639..27b3198606c 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp @@ -412,7 +412,7 @@ PendingClusterState::insertInfo( _clusterInfo->getIdealStorageNodesForState( _newClusterState, _entries[range.first].bucketId)); - info->addNodes(copiesToAddOrUpdate, order); + info->addNodes(copiesToAddOrUpdate, order, TrustedUpdate::DEFER); LOG_BUCKET_OPERATION_NO_LOCK( _entries[range.first].bucketId, @@ -458,20 +458,26 @@ PendingClusterState::removeCopiesFromNodesThatWereRequested( const document::BucketId& bucketId) { bool updated = false; - for (uint32_t i = 0; i < e->getNodeCount(); ++i) { + for (uint32_t i = 0; i < e->getNodeCount();) { auto& info(e->getNodeRef(i)); const uint16_t entryNode(info.getNode()); // Don't remove an entry if it's been updated in the time after the // bucket info requests were sent, as this would erase newer state. + // Don't immediately update trusted state, as that could erroneously + // mark a single remaining replica as trusted even though there might + // be one or more additional replicas pending merge into the database. if (nodeIsOutdated(entryNode) && (info.getTimestamp() < _creationTimestamp) - && e->removeNode(entryNode)) + && e->removeNode(entryNode, TrustedUpdate::DEFER)) { LOG(spam, "Removed bucket %s from node %d", bucketId.toString().c_str(), entryNode); updated = true; + // After removing current node, getNodeRef(i) will point to the _next_ node, so don't increment `i`. + } else { + ++i; } } return updated; @@ -520,9 +526,13 @@ PendingClusterState::process(BucketDatabase::Entry& e) updated = true; } - // Remove bucket if we've previously removed all nodes from it - if (e->getNodeCount() == 0 && updated) { - _removedBuckets.push_back(bucketId); + if (updated) { + // Remove bucket if we've previously removed all nodes from it + if (e->getNodeCount() == 0) { + _removedBuckets.push_back(bucketId); + } else { + e.getBucketInfo().updateTrusted(); + } } LOG(spam, @@ -549,6 +559,7 @@ PendingClusterState::addToBucketDB(BucketDatabase& db, framework::MicroSecTime(_creationTimestamp) .getSeconds().getTime()); } + e.getBucketInfo().updateTrusted(); db.update(e); } diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.h b/storage/src/vespa/storage/distributor/pendingclusterstate.h index 939eb5fcd13..a741d5cec51 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.h +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.h @@ -238,6 +238,9 @@ private: std::string requestNodesToString(); + // Returns whether at least one replica was removed from the entry. + // Does NOT implicitly update trusted status on remaining replicas; caller must do + // this explicitly. bool removeCopiesFromNodesThatWereRequested( BucketDatabase::Entry& e, const document::BucketId& bucketId); |