aboutsummaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2019-12-20 12:39:47 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2019-12-20 12:49:27 +0000
commit4bbdb8486fb564a516fc8e5da1871fd2a37ffc91 (patch)
tree7b417a2872822b16c0ad0a7bb4a57a74fd813276 /storage
parentff2ec3d5751a959af277c896b4cdb3528c6f3cab (diff)
Ensure missing documents on replicas are not erroneously considered consistent
Introducing a new member in the category "stupid bugs that I should have added explicit tests for when adding the feature initially". There was ambiguity in the GetOperation code where a timestamp sentinel value of zero was used to denote not having received any replies yet, but where a timestamp of zero also means "document not found on replica". This means that if the first reply was from a replica _without_ a document and the second reply was from a replica _with_ a document, the code would act as if the first reply effectively did not exist. Consequently the Get operation would be tagged as consistent. This had very bad consequences for the two-phase update operation logic that relied on this information to be correct. This change ensures there is no ambiguity between not having received a reply and having a received a reply with a missing document.
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/distributor/getoperationtest.cpp15
-rw-r--r--storage/src/tests/distributor/twophaseupdateoperationtest.cpp16
-rw-r--r--storage/src/vespa/storage/distributor/operations/external/getoperation.cpp10
-rw-r--r--storage/src/vespa/storage/distributor/operations/external/getoperation.h3
4 files changed, 38 insertions, 6 deletions
diff --git a/storage/src/tests/distributor/getoperationtest.cpp b/storage/src/tests/distributor/getoperationtest.cpp
index 72a124d45b4..db790639869 100644
--- a/storage/src/tests/distributor/getoperationtest.cpp
+++ b/storage/src/tests/distributor/getoperationtest.cpp
@@ -363,6 +363,21 @@ TEST_F(GetOperationTest, not_found) {
EXPECT_TRUE(last_reply_had_consistent_replicas());
}
+TEST_F(GetOperationTest, not_found_on_subset_of_replicas_marks_get_as_inconsistent) {
+ setClusterState("distributor:1 storage:2");
+ addNodesToBucketDB(bucketId, "0=100,1=200");
+ sendGet();
+ ASSERT_EQ("Get => 0,Get => 1", _sender.getCommands(true));
+
+ ASSERT_NO_FATAL_FAILURE(sendReply(0, api::ReturnCode::OK, "newauthor", 101));
+ ASSERT_NO_FATAL_FAILURE(sendReply(1, api::ReturnCode::OK, "", 0)); // Not found.
+
+ ASSERT_EQ("GetReply(BucketId(0x0000000000000000), id:ns:text/html::uri, "
+ "timestamp 101) ReturnCode(NONE)",
+ _sender.getLastReply());
+ EXPECT_FALSE(last_reply_had_consistent_replicas());
+}
+
TEST_F(GetOperationTest, resend_on_storage_failure) {
setClusterState("distributor:1 storage:3");
diff --git a/storage/src/tests/distributor/twophaseupdateoperationtest.cpp b/storage/src/tests/distributor/twophaseupdateoperationtest.cpp
index 8f728c39bb8..c8bfcd754f7 100644
--- a/storage/src/tests/distributor/twophaseupdateoperationtest.cpp
+++ b/storage/src/tests/distributor/twophaseupdateoperationtest.cpp
@@ -1072,6 +1072,22 @@ TEST_F(TwoPhaseUpdateOperationTest, fast_path_not_restarted_if_replica_set_alter
ASSERT_EQ("Put => 1,Put => 2,Put => 0", sender.getCommands(true, false, 2));
}
+TEST_F(TwoPhaseUpdateOperationTest, fast_path_not_restarted_if_document_not_found_on_a_replica_node) {
+ setupDistributor(2, 2, "storage:2 distributor:1");
+ getConfig().set_update_fast_path_restart_enabled(true);
+
+ std::shared_ptr<TwoPhaseUpdateOperation> cb(sendUpdate("0=1/2/3,1=2/3/4")); // Inconsistent replicas.
+ DistributorMessageSenderStub sender;
+ cb->start(sender, framework::MilliSecTime(0));
+
+ ASSERT_EQ("Get => 0,Get => 1", sender.getCommands(true));
+ replyToGet(*cb, sender, 0, Timestamp(0), false);
+ replyToGet(*cb, sender, 1, Timestamp(500));
+
+ // Should _not_ send Update operations!
+ ASSERT_EQ("Put => 1,Put => 0", sender.getCommands(true, false, 2));
+}
+
// XXX currently differs in behavior from content nodes in that updates for
// document IDs without explicit doctypes will _not_ be auto-failed on the
// distributor.
diff --git a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp
index a0177db3d3c..52b6da09a76 100644
--- a/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp
+++ b/storage/src/vespa/storage/distributor/operations/external/getoperation.cpp
@@ -55,7 +55,7 @@ GetOperation::GetOperation(DistributorComponent& manager,
_msg(std::move(msg)),
_returnCode(api::ReturnCode::OK),
_doc(),
- _lastModified(0),
+ _lastModified(),
_metric(metric),
_operationTimer(manager.getClock()),
_has_replica_inconsistency(false)
@@ -151,17 +151,17 @@ GetOperation::onReceive(DistributorMessageSender& sender, const std::shared_ptr<
response.second[i].returnCode = getreply->getResult();
if (getreply->getResult().success()) {
- if ((_lastModified != 0) && (getreply->getLastModifiedTimestamp() != _lastModified)) {
+ if (_lastModified.has_value() && (getreply->getLastModifiedTimestamp() != *_lastModified)) {
// At least two document versions returned had different timestamps.
_has_replica_inconsistency = true; // This is a one-way toggle.
}
- if (getreply->getLastModifiedTimestamp() > _lastModified) {
+ if (!_lastModified.has_value() || getreply->getLastModifiedTimestamp() > *_lastModified) {
_returnCode = getreply->getResult();
_lastModified = getreply->getLastModifiedTimestamp();
_doc = getreply->getDocument();
}
} else {
- if (_lastModified == 0) {
+ if (!_lastModified.has_value()) {
_returnCode = getreply->getResult();
}
if (!all_bucket_metadata_initially_consistent()) {
@@ -216,7 +216,7 @@ void
GetOperation::sendReply(DistributorMessageSender& sender)
{
if (_msg.get()) {
- auto repl = std::make_shared<api::GetReply>(*_msg, _doc, _lastModified, !_has_replica_inconsistency);
+ auto repl = std::make_shared<api::GetReply>(*_msg, _doc, _lastModified.value_or(0), !_has_replica_inconsistency);
repl->setResult(_returnCode);
update_internal_metrics();
sender.sendReply(repl);
diff --git a/storage/src/vespa/storage/distributor/operations/external/getoperation.h b/storage/src/vespa/storage/distributor/operations/external/getoperation.h
index 0b3027b6a9c..69f7b5580f4 100644
--- a/storage/src/vespa/storage/distributor/operations/external/getoperation.h
+++ b/storage/src/vespa/storage/distributor/operations/external/getoperation.h
@@ -6,6 +6,7 @@
#include <vespa/storage/bucketdb/bucketdatabase.h>
#include <vespa/storageapi/messageapi/storagemessage.h>
#include <vespa/storageframework/generic/clock/timer.h>
+#include <optional>
namespace document { class Document; }
@@ -88,7 +89,7 @@ private:
api::ReturnCode _returnCode;
std::shared_ptr<document::Document> _doc;
- api::Timestamp _lastModified;
+ std::optional<api::Timestamp> _lastModified;
PersistenceOperationMetricSet& _metric;
framework::MilliSecTimer _operationTimer;