summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2017-04-20 16:38:11 +0200
committerGitHub <noreply@github.com>2017-04-20 16:38:11 +0200
commit093ebf49c8c2516dcd34ce97eda96794d6414509 (patch)
tree943a091505a4517cc28a175b29b2686cc6d33832 /storage
parent656c30b4d6c2ba47ec9818a7bf2f7c402f9daf6b (diff)
parent313e155efceb3a81efcc6ac7caaecc9b9af0a20d (diff)
Merge pull request #2212 from yahoo/vekterli/do-not-erroneously-mark-replica-as-trusted-during-bucket-info-merge
Do not erroneously mark replicas as trusted when merging bucket info
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/bucketdb/bucketinfotest.cpp45
-rw-r--r--storage/src/tests/distributor/bucketdbupdatertest.cpp82
-rw-r--r--storage/src/vespa/storage/bucketdb/bucketinfo.cpp13
-rw-r--r--storage/src/vespa/storage/bucketdb/bucketinfo.h13
-rw-r--r--storage/src/vespa/storage/distributor/pendingclusterstate.cpp23
-rw-r--r--storage/src/vespa/storage/distributor/pendingclusterstate.h3
6 files changed, 166 insertions, 13 deletions
diff --git a/storage/src/tests/bucketdb/bucketinfotest.cpp b/storage/src/tests/bucketdb/bucketinfotest.cpp
index 9ee5895a41f..a625f6303ea 100644
--- a/storage/src/tests/bucketdb/bucketinfotest.cpp
+++ b/storage/src/tests/bucketdb/bucketinfotest.cpp
@@ -22,6 +22,10 @@ struct BucketInfoTest : public CppUnit::TestFixture {
void testTrustedResetWhenCopiesBecomeInconsistent();
void testTrustedResetWhenTrustedCopiesGoOutOfSync();
void testTrustedNotResetWhenNonTrustedCopiesStillOutOfSync();
+ void add_nodes_can_immediately_update_trusted_flag();
+ void add_nodes_can_defer_update_of_trusted_flag();
+ void remove_node_can_immediately_update_trusted_flag();
+ void remove_node_can_defer_update_of_trusted_flag();
CPPUNIT_TEST_SUITE(BucketInfoTest);
CPPUNIT_TEST(testBucketInfoEntriesWithNewestTimestampsAreKept);
@@ -31,6 +35,10 @@ struct BucketInfoTest : public CppUnit::TestFixture {
CPPUNIT_TEST_IGNORED(testTrustedResetWhenCopiesBecomeInconsistent);
CPPUNIT_TEST(testTrustedResetWhenTrustedCopiesGoOutOfSync);
CPPUNIT_TEST(testTrustedNotResetWhenNonTrustedCopiesStillOutOfSync);
+ CPPUNIT_TEST(add_nodes_can_immediately_update_trusted_flag);
+ CPPUNIT_TEST(add_nodes_can_defer_update_of_trusted_flag);
+ CPPUNIT_TEST(remove_node_can_immediately_update_trusted_flag);
+ CPPUNIT_TEST(remove_node_can_defer_update_of_trusted_flag);
CPPUNIT_TEST_SUITE_END();
};
@@ -195,6 +203,43 @@ BucketInfoTest::testTrustedNotResetWhenNonTrustedCopiesStillOutOfSync()
CPPUNIT_ASSERT(!info.getNode(2)->trusted());
}
+void BucketInfoTest::add_nodes_can_immediately_update_trusted_flag() {
+ BucketInfo info;
+ std::vector<uint16_t> order;
+ info.addNodes({BucketCopy(0, 0, api::BucketInfo(10, 100, 1000))}, order, TrustedUpdate::UPDATE);
+ // Only one replica, so implicitly trusted iff trusted flag update is invoked.
+ CPPUNIT_ASSERT(info.getNode(0)->trusted());
+}
+
+void BucketInfoTest::add_nodes_can_defer_update_of_trusted_flag() {
+ BucketInfo info;
+ std::vector<uint16_t> order;
+ info.addNodes({BucketCopy(0, 0, api::BucketInfo(10, 100, 1000))}, order, TrustedUpdate::DEFER);
+ CPPUNIT_ASSERT(!info.getNode(0)->trusted());
+}
+
+void BucketInfoTest::remove_node_can_immediately_update_trusted_flag() {
+ BucketInfo info;
+ std::vector<uint16_t> order;
+ info.addNodes({BucketCopy(0, 0, api::BucketInfo(10, 100, 1000)),
+ BucketCopy(0, 1, api::BucketInfo(20, 200, 2000))},
+ order, TrustedUpdate::UPDATE);
+ CPPUNIT_ASSERT(!info.getNode(0)->trusted());
+ info.removeNode(1, TrustedUpdate::UPDATE);
+ // Only one replica remaining after remove, so implicitly trusted iff trusted flag update is invoked.
+ CPPUNIT_ASSERT(info.getNode(0)->trusted());
+}
+
+void BucketInfoTest::remove_node_can_defer_update_of_trusted_flag() {
+ BucketInfo info;
+ std::vector<uint16_t> order;
+ info.addNodes({BucketCopy(0, 0, api::BucketInfo(10, 100, 1000)),
+ BucketCopy(0, 1, api::BucketInfo(20, 200, 2000))},
+ order, TrustedUpdate::UPDATE);
+ info.removeNode(1, TrustedUpdate::DEFER);
+ CPPUNIT_ASSERT(!info.getNode(0)->trusted());
+}
+
}
} // storage
diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp
index d89ce2a03d0..7ad083dc5a1 100644
--- a/storage/src/tests/distributor/bucketdbupdatertest.cpp
+++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp
@@ -73,6 +73,13 @@ class BucketDBUpdaterTest : public CppUnit::TestFixture,
CPPUNIT_TEST(transition_time_reset_across_non_preempting_state_changes);
CPPUNIT_TEST(transition_time_tracked_for_distribution_config_change);
CPPUNIT_TEST(transition_time_tracked_across_preempted_transitions);
+ CPPUNIT_TEST(batch_update_of_existing_diverging_replicas_does_not_mark_any_as_trusted);
+ CPPUNIT_TEST(batch_add_of_new_diverging_replicas_does_not_mark_any_as_trusted);
+ CPPUNIT_TEST(batch_add_with_single_resulting_replica_implicitly_marks_as_trusted);
+ CPPUNIT_TEST(identity_update_of_single_replica_does_not_clear_trusted);
+ CPPUNIT_TEST(identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted);
+ CPPUNIT_TEST(adding_diverging_replica_to_existing_trusted_does_not_remove_trusted);
+ CPPUNIT_TEST(batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted);
CPPUNIT_TEST_SUITE_END();
protected:
@@ -127,6 +134,13 @@ protected:
void transition_time_reset_across_non_preempting_state_changes();
void transition_time_tracked_for_distribution_config_change();
void transition_time_tracked_across_preempted_transitions();
+ void batch_update_of_existing_diverging_replicas_does_not_mark_any_as_trusted();
+ void batch_add_of_new_diverging_replicas_does_not_mark_any_as_trusted();
+ void batch_add_with_single_resulting_replica_implicitly_marks_as_trusted();
+ void identity_update_of_single_replica_does_not_clear_trusted();
+ void identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted();
+ void adding_diverging_replica_to_existing_trusted_does_not_remove_trusted();
+ void batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted();
bool bucketExistsThatHasNode(int bucketCount, uint16_t node) const;
@@ -2414,5 +2428,73 @@ BucketDBUpdaterTest::transition_time_tracked_across_preempted_transitions()
CPPUNIT_ASSERT_EQUAL(uint64_t(8000), lastTransitionTimeInMillis());
}
+/*
+ * Brief reminder on test DSL for checking bucket merge operations:
+ *
+ * mergeBucketLists() takes as input strings of the format
+ * <node>:<raw bucket id>/<checksum>/<num docs>/<doc size>|<node>:
+ * and returns a string describing the bucket DB post-merge with the format
+ * <raw bucket id>:<node>/<checksum>/<num docs>/<doc size>,<node>:....|<raw bucket id>:....
+ *
+ * Yes, the order of node<->bucket id is reversed between the two, perhaps to make sure you're awake.
+ */
+
+void BucketDBUpdaterTest::batch_update_of_existing_diverging_replicas_does_not_mark_any_as_trusted() {
+ // Replacing bucket information for content node 0 should not mark existing
+ // untrusted replica as trusted as a side effect.
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("5:1/7/8/9/u,0/1/2/3/u|"),
+ mergeBucketLists(
+ lib::ClusterState("distributor:1 storage:3 .0.s:i"),
+ "0:5/0/0/0|1:5/7/8/9",
+ lib::ClusterState("distributor:1 storage:3 .0.s:u"),
+ "0:5/1/2/3|1:5/7/8/9", true));
+}
+
+void BucketDBUpdaterTest::batch_add_of_new_diverging_replicas_does_not_mark_any_as_trusted() {
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("5:1/7/8/9/u,0/1/2/3/u|"),
+ mergeBucketLists("", "0:5/1/2/3|1:5/7/8/9", true));
+}
+
+void BucketDBUpdaterTest::batch_add_with_single_resulting_replica_implicitly_marks_as_trusted() {
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("5:0/1/2/3/t|"),
+ mergeBucketLists("", "0:5/1/2/3", true));
+}
+
+void BucketDBUpdaterTest::identity_update_of_single_replica_does_not_clear_trusted() {
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("5:0/1/2/3/t|"),
+ mergeBucketLists("0:5/1/2/3", "0:5/1/2/3", true));
+}
+
+void BucketDBUpdaterTest::identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted() {
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("5:1/7/8/9/u,0/1/2/3/u|"),
+ mergeBucketLists("0:5/1/2/3|1:5/7/8/9", "0:5/1/2/3|1:5/7/8/9", true));
+}
+
+void BucketDBUpdaterTest::adding_diverging_replica_to_existing_trusted_does_not_remove_trusted() {
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("5:1/2/3/4/u,0/1/2/3/t|"),
+ mergeBucketLists("0:5/1/2/3", "0:5/1/2/3|1:5/2/3/4", true));
+}
+
+void BucketDBUpdaterTest::batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted() {
+ // This differs from batch_update_of_existing_diverging_replicas_does_not_mark_any_as_trusted
+ // in that _all_ content nodes are considered outdated when distributor changes take place,
+ // and therefore a slightly different code path is taken. In particular, bucket info for
+ // outdated nodes gets removed before possibly being re-added (if present in the bucket info
+ // response).
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("5:1/7/8/9/u,0/1/2/3/u|"),
+ mergeBucketLists(
+ lib::ClusterState("distributor:2 storage:3"),
+ "0:5/1/2/3|1:5/7/8/9",
+ lib::ClusterState("distributor:1 storage:3"),
+ "0:5/1/2/3|1:5/7/8/9", true));
+}
+
} // distributor
} // storage
diff --git a/storage/src/vespa/storage/bucketdb/bucketinfo.cpp b/storage/src/vespa/storage/bucketdb/bucketinfo.cpp
index cbf39bef65f..4a58c9dac6c 100644
--- a/storage/src/vespa/storage/bucketdb/bucketinfo.cpp
+++ b/storage/src/vespa/storage/bucketdb/bucketinfo.cpp
@@ -165,7 +165,8 @@ BucketInfo::updateNode(const BucketCopy& newCopy)
void
BucketInfo::addNodes(const std::vector<BucketCopy>& newCopies,
- const std::vector<uint16_t>& recommendedOrder)
+ const std::vector<uint16_t>& recommendedOrder,
+ TrustedUpdate update)
{
for (uint32_t i = 0; i < newCopies.size(); ++i) {
BucketCopy* found = getNodeInternal(newCopies[i].getNode());
@@ -182,7 +183,9 @@ BucketInfo::addNodes(const std::vector<BucketCopy>& newCopies,
std::sort(_nodes.begin(), _nodes.end(), Sorter(recommendedOrder));
- updateTrusted();
+ if (update == TrustedUpdate::UPDATE) {
+ updateTrusted();
+ }
}
void
@@ -194,14 +197,16 @@ BucketInfo::addNode(const BucketCopy& newCopy,
}
bool
-BucketInfo::removeNode(unsigned short node)
+BucketInfo::removeNode(unsigned short node, TrustedUpdate update)
{
for (std::vector<BucketCopy>::iterator iter = _nodes.begin();
iter != _nodes.end();
iter++) {
if (iter->getNode() == node) {
_nodes.erase(iter);
- updateTrusted();
+ if (update == TrustedUpdate::UPDATE) {
+ updateTrusted();
+ }
return true;
}
}
diff --git a/storage/src/vespa/storage/bucketdb/bucketinfo.h b/storage/src/vespa/storage/bucketdb/bucketinfo.h
index 2bdd4e65cff..e4105be9427 100644
--- a/storage/src/vespa/storage/bucketdb/bucketinfo.h
+++ b/storage/src/vespa/storage/bucketdb/bucketinfo.h
@@ -9,6 +9,11 @@ namespace distributor {
class DistributorTestUtil;
}
+enum class TrustedUpdate {
+ UPDATE,
+ DEFER
+};
+
class BucketInfo
{
private:
@@ -84,9 +89,11 @@ public:
listed. Any nodes not in this list will be order numerically afterward.
@param replace If replace is true, replaces old ones that may exist.
+ @param update If true, will invoke updateTrusted() after replicas are added
*/
void addNodes(const std::vector<BucketCopy>& newCopies,
- const std::vector<uint16_t>& recommendedOrder);
+ const std::vector<uint16_t>& recommendedOrder,
+ TrustedUpdate update = TrustedUpdate::UPDATE);
/**
Simplified API for the common case of inserting one node. See addNodes().
@@ -103,7 +110,7 @@ public:
/**
Returns true if the node existed and was removed.
*/
- bool removeNode(uint16_t node);
+ bool removeNode(uint16_t node, TrustedUpdate update = TrustedUpdate::UPDATE);
/**
* Returns the bucket copy struct for the given node, null if nonexisting
@@ -113,7 +120,7 @@ public:
/**
* Returns the number of nodes this entry has.
*/
- uint32_t getNodeCount() const { return _nodes.size(); }
+ uint32_t getNodeCount() const noexcept { return static_cast<uint32_t>(_nodes.size()); }
/**
* Returns a list of the nodes this entry has.
diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp
index 5fc9c27c639..27b3198606c 100644
--- a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp
+++ b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp
@@ -412,7 +412,7 @@ PendingClusterState::insertInfo(
_clusterInfo->getIdealStorageNodesForState(
_newClusterState,
_entries[range.first].bucketId));
- info->addNodes(copiesToAddOrUpdate, order);
+ info->addNodes(copiesToAddOrUpdate, order, TrustedUpdate::DEFER);
LOG_BUCKET_OPERATION_NO_LOCK(
_entries[range.first].bucketId,
@@ -458,20 +458,26 @@ PendingClusterState::removeCopiesFromNodesThatWereRequested(
const document::BucketId& bucketId)
{
bool updated = false;
- for (uint32_t i = 0; i < e->getNodeCount(); ++i) {
+ for (uint32_t i = 0; i < e->getNodeCount();) {
auto& info(e->getNodeRef(i));
const uint16_t entryNode(info.getNode());
// Don't remove an entry if it's been updated in the time after the
// bucket info requests were sent, as this would erase newer state.
+ // Don't immediately update trusted state, as that could erroneously
+ // mark a single remaining replica as trusted even though there might
+ // be one or more additional replicas pending merge into the database.
if (nodeIsOutdated(entryNode)
&& (info.getTimestamp() < _creationTimestamp)
- && e->removeNode(entryNode))
+ && e->removeNode(entryNode, TrustedUpdate::DEFER))
{
LOG(spam,
"Removed bucket %s from node %d",
bucketId.toString().c_str(),
entryNode);
updated = true;
+ // After removing current node, getNodeRef(i) will point to the _next_ node, so don't increment `i`.
+ } else {
+ ++i;
}
}
return updated;
@@ -520,9 +526,13 @@ PendingClusterState::process(BucketDatabase::Entry& e)
updated = true;
}
- // Remove bucket if we've previously removed all nodes from it
- if (e->getNodeCount() == 0 && updated) {
- _removedBuckets.push_back(bucketId);
+ if (updated) {
+ // Remove bucket if we've previously removed all nodes from it
+ if (e->getNodeCount() == 0) {
+ _removedBuckets.push_back(bucketId);
+ } else {
+ e.getBucketInfo().updateTrusted();
+ }
}
LOG(spam,
@@ -549,6 +559,7 @@ PendingClusterState::addToBucketDB(BucketDatabase& db,
framework::MicroSecTime(_creationTimestamp)
.getSeconds().getTime());
}
+ e.getBucketInfo().updateTrusted();
db.update(e);
}
diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.h b/storage/src/vespa/storage/distributor/pendingclusterstate.h
index 939eb5fcd13..a741d5cec51 100644
--- a/storage/src/vespa/storage/distributor/pendingclusterstate.h
+++ b/storage/src/vespa/storage/distributor/pendingclusterstate.h
@@ -238,6 +238,9 @@ private:
std::string requestNodesToString();
+ // Returns whether at least one replica was removed from the entry.
+ // Does NOT implicitly update trusted status on remaining replicas; caller must do
+ // this explicitly.
bool removeCopiesFromNodesThatWereRequested(
BucketDatabase::Entry& e,
const document::BucketId& bucketId);