diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2022-07-13 14:41:31 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahooinc.com> | 2022-08-17 13:42:50 +0000 |
commit | 54693fc154c0fabae6ac82607765a22057977bbb (patch) | |
tree | 4522fe00d0f72311a1c5b99909ffcce7956fbd88 /storage/src/tests/persistence/processalltest.cpp | |
parent | dddd2b3708358da2a855cbbef456c94c985cf08e (diff) |
Add support for two-phase document garbage collection
If enabled, garbage collection is performed in two phases (metadata
gathering and deletion) instead of just a single phase. Two-phase GC
allows for ensuring the same set of documents is deleted across all
nodes and explicitly takes write locks on the distributor to prevent
concurrent feed ops to GC'd documents from potentially creating
inconsistencies.
Two-phase GC is only used _iff_ all replica content nodes support
the feature _and_ it's enabled in config. An additional field has
been added to the feature negotiation functionality to communicate
support from content nodes to distributors.
Diffstat (limited to 'storage/src/tests/persistence/processalltest.cpp')
-rw-r--r-- | storage/src/tests/persistence/processalltest.cpp | 94 |
1 files changed, 92 insertions, 2 deletions
diff --git a/storage/src/tests/persistence/processalltest.cpp b/storage/src/tests/persistence/processalltest.cpp index a02167bb08a..04ab5ad0cf4 100644 --- a/storage/src/tests/persistence/processalltest.cpp +++ b/storage/src/tests/persistence/processalltest.cpp @@ -10,6 +10,7 @@ #include <vespa/document/fieldvalue/intfieldvalue.h> using document::test::makeDocumentBucket; +using document::DocumentId; using namespace ::testing; namespace storage { @@ -32,7 +33,7 @@ TEST_F(ProcessAllHandlerTest, change_of_repos_is_reflected) { EXPECT_EQ(newDocRepo.get(), &getEnv().getDocumentTypeRepo()); } -TEST_F(ProcessAllHandlerTest, remove_location) { +TEST_F(ProcessAllHandlerTest, legacy_remove_location) { document::BucketId bucketId(16, 4); doPut(4, spi::Timestamp(1234)); doPut(4, spi::Timestamp(2345)); @@ -54,7 +55,7 @@ TEST_F(ProcessAllHandlerTest, remove_location) { EXPECT_EQ(2u, reply->documents_removed()); } -TEST_F(ProcessAllHandlerTest, remove_location_document_subset) { +TEST_F(ProcessAllHandlerTest, legacy_remove_location_document_subset) { document::BucketId bucketId(16, 4); AsyncHandler handler(getEnv(), getPersistenceProvider(), _bucketOwnershipNotifier, *_sequenceTaskExecutor, _bucketIdFactory); @@ -89,6 +90,95 @@ TEST_F(ProcessAllHandlerTest, remove_location_document_subset) { EXPECT_EQ(5u, reply->documents_removed()); } +TEST_F(ProcessAllHandlerTest, remove_location_with_enumerate_only_returns_match_set_only) { + document::BucketId bucketId(16, 4); + AsyncHandler handler(getEnv(), getPersistenceProvider(), _bucketOwnershipNotifier, + *_sequenceTaskExecutor, _bucketIdFactory); + + document::TestDocMan docMan; + for (int i = 0; i < 10; ++i) { + document::Document::SP doc(docMan.createRandomDocumentAtLocation(4, 1234 + i)); + doc->setValue(doc->getField("headerval"), document::IntFieldValue(i)); + doPut(doc, bucketId, spi::Timestamp(100 + i)); + } + + document::Bucket bucket = makeDocumentBucket(bucketId); + auto cmd = std::make_shared<api::RemoveLocationCommand>("testdoctype1.headerval % 2 == 0", bucket); + cmd->set_only_enumerate_docs(true); + auto tracker = handler.handleRemoveLocation(*cmd, createTracker(cmd, bucket)); + // Enumeration is synchronous, so we get the reply in the _tracker_, not on the reply queue. + ASSERT_TRUE(tracker->hasReply()); + auto* reply = dynamic_cast<api::RemoveLocationReply*>(&tracker->getReply()); + ASSERT_TRUE(reply); + EXPECT_EQ(0u, reply->documents_removed()); + + // No docs should be removed (remove flag is all zero) + EXPECT_EQ("DocEntry(100, 0, Doc(id:mail:testdoctype1:n=4:3619.html))\n" + "DocEntry(101, 0, Doc(id:mail:testdoctype1:n=4:33113.html))\n" + "DocEntry(102, 0, Doc(id:mail:testdoctype1:n=4:62608.html))\n" + "DocEntry(103, 0, Doc(id:mail:testdoctype1:n=4:26566.html))\n" + "DocEntry(104, 0, Doc(id:mail:testdoctype1:n=4:56061.html))\n" + "DocEntry(105, 0, Doc(id:mail:testdoctype1:n=4:20019.html))\n" + "DocEntry(106, 0, Doc(id:mail:testdoctype1:n=4:49514.html))\n" + "DocEntry(107, 0, Doc(id:mail:testdoctype1:n=4:13472.html))\n" + "DocEntry(108, 0, Doc(id:mail:testdoctype1:n=4:42967.html))\n" + "DocEntry(109, 0, Doc(id:mail:testdoctype1:n=4:6925.html))\n", + dumpBucket(bucketId)); + + std::vector<spi::IdAndTimestamp> expected = { + {DocumentId("id:mail:testdoctype1:n=4:3619.html"), spi::Timestamp(100)}, + {DocumentId("id:mail:testdoctype1:n=4:62608.html"), spi::Timestamp(102)}, + {DocumentId("id:mail:testdoctype1:n=4:56061.html"), spi::Timestamp(104)}, + {DocumentId("id:mail:testdoctype1:n=4:49514.html"), spi::Timestamp(106)}, + {DocumentId("id:mail:testdoctype1:n=4:42967.html"), spi::Timestamp(108)}, + }; + EXPECT_EQ(reply->selection_matches(), expected); +} + +TEST_F(ProcessAllHandlerTest, remove_location_with_remove_set_only_removes_listed_docs) { + document::BucketId bucketId(16, 4); + AsyncHandler handler(getEnv(), getPersistenceProvider(), _bucketOwnershipNotifier, + *_sequenceTaskExecutor, _bucketIdFactory); + + document::TestDocMan docMan; + for (int i = 0; i < 10; ++i) { + document::Document::SP doc(docMan.createRandomDocumentAtLocation(4, 1234 + i)); + doc->setValue(doc->getField("headerval"), document::IntFieldValue(i)); + doPut(doc, bucketId, spi::Timestamp(100 + i)); + } + + document::Bucket bucket = makeDocumentBucket(bucketId); + // Use a selection that, if naively used, removes everything. + auto cmd = std::make_shared<api::RemoveLocationCommand>("true", bucket); + std::vector<spi::IdAndTimestamp> to_remove = { + {DocumentId("id:mail:testdoctype1:n=4:62608.html"), spi::Timestamp(102)}, + {DocumentId("id:mail:testdoctype1:n=4:49514.html"), spi::Timestamp(106)}, + {DocumentId("id:mail:testdoctype1:n=4:42967.html"), spi::Timestamp(108)}, + }; + cmd->set_explicit_remove_set(std::move(to_remove)); + auto tracker = handler.handleRemoveLocation(*cmd, createTracker(cmd, bucket)); + // Actually removing the documents is asynchronous, so the response will be on the queue. + std::shared_ptr<api::StorageMessage> msg; + ASSERT_TRUE(_replySender.queue.getNext(msg, 60s)); + + // Remove flag toggled for the entries provided in the command + EXPECT_EQ("DocEntry(100, 0, Doc(id:mail:testdoctype1:n=4:3619.html))\n" + "DocEntry(101, 0, Doc(id:mail:testdoctype1:n=4:33113.html))\n" + "DocEntry(102, 1, id:mail:testdoctype1:n=4:62608.html)\n" + "DocEntry(103, 0, Doc(id:mail:testdoctype1:n=4:26566.html))\n" + "DocEntry(104, 0, Doc(id:mail:testdoctype1:n=4:56061.html))\n" + "DocEntry(105, 0, Doc(id:mail:testdoctype1:n=4:20019.html))\n" + "DocEntry(106, 1, id:mail:testdoctype1:n=4:49514.html)\n" + "DocEntry(107, 0, Doc(id:mail:testdoctype1:n=4:13472.html))\n" + "DocEntry(108, 1, id:mail:testdoctype1:n=4:42967.html)\n" + "DocEntry(109, 0, Doc(id:mail:testdoctype1:n=4:6925.html))\n", + dumpBucket(bucketId)); + + auto reply = std::dynamic_pointer_cast<api::RemoveLocationReply>(msg); + ASSERT_TRUE(reply); + EXPECT_EQ(3u, reply->documents_removed()); +} + TEST_F(ProcessAllHandlerTest, remove_location_throws_exception_on_unknown_doc_type) { document::BucketId bucketId(16, 4); doPut(4, spi::Timestamp(1234)); |