summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-04-16 13:07:03 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2020-04-16 13:24:19 +0000
commit7013079db0062d1d23fbc503cc6d3ce69e570f54 (patch)
tree2b4eb67cb1a3b6871b02a98590d5927b9756e6a8 /storage
parent43f1f598ad89f7a787a311ddb8a4e86dc0958055 (diff)
Remove redundant bucket DB lookup in persistence reply handling
Bucket DB updating happened unconditionally anyway; this was only used for failing operations in an overly pessimistic way. Removing this lookup has two benefits: - Less CPU spent in DB - Less impact expected during feeding during node state transitions since fewer operations will have to be needlessly retried by the client. Rationale: an operation towards a given bucket completes (i.e. is ACKed by all its replica nodes) at time t and the bucket is removed from the DB at time T. There is no fundamental change in correctness or behavior from the client's perspective if the order of events is tT or Tt. Both are equally valid, as the state transition edge happens independently of any reply processing.
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/distributor/putoperationtest.cpp15
-rw-r--r--storage/src/vespa/storage/distributor/persistencemessagetracker.cpp48
-rw-r--r--storage/src/vespa/storage/distributor/persistencemessagetracker.h1
3 files changed, 10 insertions, 54 deletions
diff --git a/storage/src/tests/distributor/putoperationtest.cpp b/storage/src/tests/distributor/putoperationtest.cpp
index bd76b559490..f5bacfae252 100644
--- a/storage/src/tests/distributor/putoperationtest.cpp
+++ b/storage/src/tests/distributor/putoperationtest.cpp
@@ -182,7 +182,7 @@ TEST_F(PutOperationTest, do_not_send_inline_split_if_not_configured) {
_sender.getCommands(true, true));
}
-TEST_F(PutOperationTest, node_removed_on_reply) {
+TEST_F(PutOperationTest, return_success_if_op_acked_on_all_replicas_even_if_bucket_concurrently_removed_from_db) {
setupDistributor(2, 2, "storage:2 distributor:1");
createAndSendSampleDocument(TIMEOUT);
@@ -194,14 +194,19 @@ TEST_F(PutOperationTest, node_removed_on_reply) {
getExternalOperationHandler().removeNodeFromDB(makeDocumentBucket(document::BucketId(16, 0x1dd4)), 0);
+ // If we get an ACK from the backend nodes, the operation has been persisted OK.
+ // Even if the bucket has been removed from the DB in the meantime (usually would
+ // happen due to ownership changes) there is no reason for us to trigger a client
+ // resend in this scenario.
+ // If a node goes down (as opposed to distributor ownership transfer) and therefore
+ // has its replicas removed from the DB, this by definition has happened-after
+ // the ACK was sent from the node, so returning OK here still maintains the
+ // backend persistence property.
sendReply(0);
sendReply(1);
ASSERT_EQ("PutReply(id:test:testdoctype1::, BucketId(0x0000000000000000), "
- "timestamp 100) ReturnCode(BUCKET_DELETED, "
- "Bucket(BucketSpace(0x0000000000000001), BucketId(0x4000000000001dd4)) was deleted from nodes [0] "
- "after message was sent but before it was done. "
- "Sent to [0,1])",
+ "timestamp 100) ReturnCode(NONE)",
_sender.getLastReply());
}
diff --git a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp
index bc0baf2faf7..d98a2ce6a74 100644
--- a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp
+++ b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp
@@ -150,52 +150,6 @@ PersistenceMessageTrackerImpl::canSendReplyEarly() const
}
void
-PersistenceMessageTrackerImpl::checkCopiesDeleted()
-{
- if ( ! _reply) {
- return;
- }
-
- // Don't check the buckets that have been remapped here, as we will
- // create them.
- const auto &bucketSpaceRepo(_manager.getBucketSpaceRepo());
- for (const auto & entry : _bucketInfo) {
- const auto &bucketSpace(bucketSpaceRepo.get(entry.first.getBucketSpace()));
- const auto &bucketDb(bucketSpace.getBucketDatabase());
- BucketDatabase::Entry dbentry = bucketDb.get(entry.first.getBucketId());
-
- if (!dbentry.valid()) {
- continue;
- }
-
- std::vector<uint16_t> missing;
- std::vector<uint16_t> total;
-
- for (const BucketCopy & bucketCopy : entry.second) {
- if (dbentry->getNode(bucketCopy.getNode()) == nullptr) {
- missing.push_back(bucketCopy.getNode());
- }
-
- total.push_back(bucketCopy.getNode());
- }
-
- if (!missing.empty()) {
- std::ostringstream msg;
- msg << entry.first.toString() << " was deleted from nodes ["
- << commaSeparated(missing)
- << "] after message was sent but before it was done. Sent to ["
- << commaSeparated(total)
- << "]";
-
- LOG(debug, "%s", msg.str().c_str());
- _reply->setResult(api::ReturnCode(api::ReturnCode::BUCKET_DELETED,
- msg.str()));
- break;
- }
- }
-}
-
-void
PersistenceMessageTrackerImpl::addBucketInfoFromReply(
uint16_t node,
const api::BucketInfoReply& reply)
@@ -334,7 +288,6 @@ PersistenceMessageTrackerImpl::updateFromReply(
if (finished()) {
bool doRevert(shouldRevert());
- checkCopiesDeleted();
updateDB();
if (!hasSentReply()) {
@@ -345,7 +298,6 @@ PersistenceMessageTrackerImpl::updateFromReply(
}
} else if (canSendReplyEarly()) {
LOG(debug, "Sending reply early because initial redundancy has been reached");
- checkCopiesDeleted();
sendReply(sender);
}
}
diff --git a/storage/src/vespa/storage/distributor/persistencemessagetracker.h b/storage/src/vespa/storage/distributor/persistencemessagetracker.h
index 5a4d8e17452..21977ddd881 100644
--- a/storage/src/vespa/storage/distributor/persistencemessagetracker.h
+++ b/storage/src/vespa/storage/distributor/persistencemessagetracker.h
@@ -84,7 +84,6 @@ private:
bool hasSentReply() const { return _reply.get() == 0; }
bool shouldRevert() const;
void sendReply(MessageSender& sender);
- void checkCopiesDeleted();
void updateFailureResult(const api::BucketInfoReply& reply);
void handleCreateBucketReply(api::BucketInfoReply& reply, uint16_t node);
void handlePersistenceReply(api::BucketInfoReply& reply, uint16_t node);