diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2019-12-20 12:39:47 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2019-12-20 12:49:27 +0000 |
commit | 4bbdb8486fb564a516fc8e5da1871fd2a37ffc91 (patch) | |
tree | 7b417a2872822b16c0ad0a7bb4a57a74fd813276 | |
parent | ff2ec3d5751a959af277c896b4cdb3528c6f3cab (diff) |
Ensure missing documents on replicas are not erroneously considered consistent
Introducing a new member in the category "stupid bugs that I should have
added explicit tests for when adding the feature initially".
There was ambiguity in the GetOperation code where a timestamp sentinel
value of zero was used to denote not having received any replies yet,
but where a timestamp of zero also means "document not found on replica".
This means that if the first reply was from a replica _without_ a document
and the second reply was from a replica _with_ a document, the code
would act as if the first reply effectively did not exist. Consequently
the Get operation would be tagged as consistent. This had very bad
consequences for the two-phase update operation logic that relied on
this information to be correct.
This change ensures there is no ambiguity between not having received
a reply and having a received a reply with a missing document.
4 files changed, 38 insertions, 6 deletions
diff --git a/storage/src/tests/distributor/getoperationtest.cpp b/storage/src/tests/distributor/getoperationtest.cpp index 72a124d45b4..db790639869 100644 --- a/storage/src/tests/distributor/getoperationtest.cpp +++ b/storage/src/tests/distributor/getoperationtest.cpp @@ -363,6 +363,21 @@ TEST_F(GetOperationTest, not_found) { EXPECT_TRUE(last_reply_had_consistent_replicas()); } +TEST_F(GetOperationTest, not_found_on_subset_of_replicas_marks_get_as_inconsistent) { + setClusterState("distributor:1 storage:2"); + addNodesToBucketDB(bucketId, "0=100,1=200"); + sendGet(); + ASSERT_EQ("Get => 0,Get => 1", _sender.getCommands(true)); + + ASSERT_NO_FATAL_FAILURE(sendReply(0, api::ReturnCode::OK, "newauthor", 101)); + ASSERT_NO_FATAL_FAILURE(sendReply(1, api::ReturnCode::OK, "", 0)); // Not found. + + ASSERT_EQ("GetReply(BucketId(0x0000000000000000), id:ns:text/html::uri, " + "timestamp 101) ReturnCode(NONE)", + _sender.getLastReply()); + EXPECT_FALSE(last_reply_had_consistent_replicas()); +} + TEST_F(GetOperationTest, resend_on_storage_failure) { setClusterState("distributor:1 storage:3"); diff --git a/storage/src/tests/distributor/twophaseupdateoperationtest.cpp b/storage/src/tests/distributor/twophaseupdateoperationtest.cpp index 8f728c39bb8..c8bfcd754f7 100644 --- a/storage/src/tests/distributor/twophaseupdateoperationtest.cpp +++ b/storage/src/tests/distributor/twophaseupdateoperationtest.cpp @@ -1072,6 +1072,22 @@ TEST_F(TwoPhaseUpdateOperationTest, fast_path_not_restarted_if_replica_set_alter ASSERT_EQ("Put => 1,Put => 2,Put => 0", sender.getCommands(true, false, 2)); } +TEST_F(TwoPhaseUpdateOperationTest, fast_path_not_restarted_if_document_not_found_on_a_replica_node) { + setupDistributor(2, 2, "storage:2 distributor:1"); + getConfig().set_update_fast_path_restart_enabled(true); + + std::shared_ptr<TwoPhaseUpdateOperation> cb(sendUpdate("0=1/2/3,1=2/3/4")); // Inconsistent replicas. + DistributorMessageSenderStub sender; + cb->start(sender, framework::MilliSecTime(0)); + + ASSERT_EQ("Get => 0,Get => 1", sender.getCommands(true)); + replyToGet(*cb, sender, 0, Timestamp(0), false); + replyToGet(*cb, sender, 1, Timestamp(500)); + + // Should _not_ send Update operations! + ASSERT_EQ("Put => 1,Put => 0", sender.getCommands(true, false, 2)); +} + // XXX currently differs in behavior from content nodes in that updates for // document IDs without explicit doctypes will _not_ be auto-failed on the // distributor. diff --git a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp index a0177db3d3c..52b6da09a76 100644 --- a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp @@ -55,7 +55,7 @@ GetOperation::GetOperation(DistributorComponent& manager, _msg(std::move(msg)), _returnCode(api::ReturnCode::OK), _doc(), - _lastModified(0), + _lastModified(), _metric(metric), _operationTimer(manager.getClock()), _has_replica_inconsistency(false) @@ -151,17 +151,17 @@ GetOperation::onReceive(DistributorMessageSender& sender, const std::shared_ptr< response.second[i].returnCode = getreply->getResult(); if (getreply->getResult().success()) { - if ((_lastModified != 0) && (getreply->getLastModifiedTimestamp() != _lastModified)) { + if (_lastModified.has_value() && (getreply->getLastModifiedTimestamp() != *_lastModified)) { // At least two document versions returned had different timestamps. _has_replica_inconsistency = true; // This is a one-way toggle. } - if (getreply->getLastModifiedTimestamp() > _lastModified) { + if (!_lastModified.has_value() || getreply->getLastModifiedTimestamp() > *_lastModified) { _returnCode = getreply->getResult(); _lastModified = getreply->getLastModifiedTimestamp(); _doc = getreply->getDocument(); } } else { - if (_lastModified == 0) { + if (!_lastModified.has_value()) { _returnCode = getreply->getResult(); } if (!all_bucket_metadata_initially_consistent()) { @@ -216,7 +216,7 @@ void GetOperation::sendReply(DistributorMessageSender& sender) { if (_msg.get()) { - auto repl = std::make_shared<api::GetReply>(*_msg, _doc, _lastModified, !_has_replica_inconsistency); + auto repl = std::make_shared<api::GetReply>(*_msg, _doc, _lastModified.value_or(0), !_has_replica_inconsistency); repl->setResult(_returnCode); update_internal_metrics(); sender.sendReply(repl); diff --git a/storage/src/vespa/storage/distributor/operations/external/getoperation.h b/storage/src/vespa/storage/distributor/operations/external/getoperation.h index 0b3027b6a9c..69f7b5580f4 100644 --- a/storage/src/vespa/storage/distributor/operations/external/getoperation.h +++ b/storage/src/vespa/storage/distributor/operations/external/getoperation.h @@ -6,6 +6,7 @@ #include <vespa/storage/bucketdb/bucketdatabase.h> #include <vespa/storageapi/messageapi/storagemessage.h> #include <vespa/storageframework/generic/clock/timer.h> +#include <optional> namespace document { class Document; } @@ -88,7 +89,7 @@ private: api::ReturnCode _returnCode; std::shared_ptr<document::Document> _doc; - api::Timestamp _lastModified; + std::optional<api::Timestamp> _lastModified; PersistenceOperationMetricSet& _metric; framework::MilliSecTimer _operationTimer; |