summaryrefslogtreecommitdiffstats
path: root/storage/src/tests/persistence/processalltest.cpp
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2022-07-13 14:41:31 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2022-08-17 13:42:50 +0000
commit54693fc154c0fabae6ac82607765a22057977bbb (patch)
tree4522fe00d0f72311a1c5b99909ffcce7956fbd88 /storage/src/tests/persistence/processalltest.cpp
parentdddd2b3708358da2a855cbbef456c94c985cf08e (diff)
Add support for two-phase document garbage collection
If enabled, garbage collection is performed in two phases (metadata gathering and deletion) instead of just a single phase. Two-phase GC allows for ensuring the same set of documents is deleted across all nodes and explicitly takes write locks on the distributor to prevent concurrent feed ops to GC'd documents from potentially creating inconsistencies. Two-phase GC is only used _iff_ all replica content nodes support the feature _and_ it's enabled in config. An additional field has been added to the feature negotiation functionality to communicate support from content nodes to distributors.
Diffstat (limited to 'storage/src/tests/persistence/processalltest.cpp')
-rw-r--r--storage/src/tests/persistence/processalltest.cpp94
1 files changed, 92 insertions, 2 deletions
diff --git a/storage/src/tests/persistence/processalltest.cpp b/storage/src/tests/persistence/processalltest.cpp
index a02167bb08a..04ab5ad0cf4 100644
--- a/storage/src/tests/persistence/processalltest.cpp
+++ b/storage/src/tests/persistence/processalltest.cpp
@@ -10,6 +10,7 @@
#include <vespa/document/fieldvalue/intfieldvalue.h>
using document::test::makeDocumentBucket;
+using document::DocumentId;
using namespace ::testing;
namespace storage {
@@ -32,7 +33,7 @@ TEST_F(ProcessAllHandlerTest, change_of_repos_is_reflected) {
EXPECT_EQ(newDocRepo.get(), &getEnv().getDocumentTypeRepo());
}
-TEST_F(ProcessAllHandlerTest, remove_location) {
+TEST_F(ProcessAllHandlerTest, legacy_remove_location) {
document::BucketId bucketId(16, 4);
doPut(4, spi::Timestamp(1234));
doPut(4, spi::Timestamp(2345));
@@ -54,7 +55,7 @@ TEST_F(ProcessAllHandlerTest, remove_location) {
EXPECT_EQ(2u, reply->documents_removed());
}
-TEST_F(ProcessAllHandlerTest, remove_location_document_subset) {
+TEST_F(ProcessAllHandlerTest, legacy_remove_location_document_subset) {
document::BucketId bucketId(16, 4);
AsyncHandler handler(getEnv(), getPersistenceProvider(), _bucketOwnershipNotifier, *_sequenceTaskExecutor, _bucketIdFactory);
@@ -89,6 +90,95 @@ TEST_F(ProcessAllHandlerTest, remove_location_document_subset) {
EXPECT_EQ(5u, reply->documents_removed());
}
+TEST_F(ProcessAllHandlerTest, remove_location_with_enumerate_only_returns_match_set_only) {
+ document::BucketId bucketId(16, 4);
+ AsyncHandler handler(getEnv(), getPersistenceProvider(), _bucketOwnershipNotifier,
+ *_sequenceTaskExecutor, _bucketIdFactory);
+
+ document::TestDocMan docMan;
+ for (int i = 0; i < 10; ++i) {
+ document::Document::SP doc(docMan.createRandomDocumentAtLocation(4, 1234 + i));
+ doc->setValue(doc->getField("headerval"), document::IntFieldValue(i));
+ doPut(doc, bucketId, spi::Timestamp(100 + i));
+ }
+
+ document::Bucket bucket = makeDocumentBucket(bucketId);
+ auto cmd = std::make_shared<api::RemoveLocationCommand>("testdoctype1.headerval % 2 == 0", bucket);
+ cmd->set_only_enumerate_docs(true);
+ auto tracker = handler.handleRemoveLocation(*cmd, createTracker(cmd, bucket));
+ // Enumeration is synchronous, so we get the reply in the _tracker_, not on the reply queue.
+ ASSERT_TRUE(tracker->hasReply());
+ auto* reply = dynamic_cast<api::RemoveLocationReply*>(&tracker->getReply());
+ ASSERT_TRUE(reply);
+ EXPECT_EQ(0u, reply->documents_removed());
+
+ // No docs should be removed (remove flag is all zero)
+ EXPECT_EQ("DocEntry(100, 0, Doc(id:mail:testdoctype1:n=4:3619.html))\n"
+ "DocEntry(101, 0, Doc(id:mail:testdoctype1:n=4:33113.html))\n"
+ "DocEntry(102, 0, Doc(id:mail:testdoctype1:n=4:62608.html))\n"
+ "DocEntry(103, 0, Doc(id:mail:testdoctype1:n=4:26566.html))\n"
+ "DocEntry(104, 0, Doc(id:mail:testdoctype1:n=4:56061.html))\n"
+ "DocEntry(105, 0, Doc(id:mail:testdoctype1:n=4:20019.html))\n"
+ "DocEntry(106, 0, Doc(id:mail:testdoctype1:n=4:49514.html))\n"
+ "DocEntry(107, 0, Doc(id:mail:testdoctype1:n=4:13472.html))\n"
+ "DocEntry(108, 0, Doc(id:mail:testdoctype1:n=4:42967.html))\n"
+ "DocEntry(109, 0, Doc(id:mail:testdoctype1:n=4:6925.html))\n",
+ dumpBucket(bucketId));
+
+ std::vector<spi::IdAndTimestamp> expected = {
+ {DocumentId("id:mail:testdoctype1:n=4:3619.html"), spi::Timestamp(100)},
+ {DocumentId("id:mail:testdoctype1:n=4:62608.html"), spi::Timestamp(102)},
+ {DocumentId("id:mail:testdoctype1:n=4:56061.html"), spi::Timestamp(104)},
+ {DocumentId("id:mail:testdoctype1:n=4:49514.html"), spi::Timestamp(106)},
+ {DocumentId("id:mail:testdoctype1:n=4:42967.html"), spi::Timestamp(108)},
+ };
+ EXPECT_EQ(reply->selection_matches(), expected);
+}
+
+TEST_F(ProcessAllHandlerTest, remove_location_with_remove_set_only_removes_listed_docs) {
+ document::BucketId bucketId(16, 4);
+ AsyncHandler handler(getEnv(), getPersistenceProvider(), _bucketOwnershipNotifier,
+ *_sequenceTaskExecutor, _bucketIdFactory);
+
+ document::TestDocMan docMan;
+ for (int i = 0; i < 10; ++i) {
+ document::Document::SP doc(docMan.createRandomDocumentAtLocation(4, 1234 + i));
+ doc->setValue(doc->getField("headerval"), document::IntFieldValue(i));
+ doPut(doc, bucketId, spi::Timestamp(100 + i));
+ }
+
+ document::Bucket bucket = makeDocumentBucket(bucketId);
+ // Use a selection that, if naively used, removes everything.
+ auto cmd = std::make_shared<api::RemoveLocationCommand>("true", bucket);
+ std::vector<spi::IdAndTimestamp> to_remove = {
+ {DocumentId("id:mail:testdoctype1:n=4:62608.html"), spi::Timestamp(102)},
+ {DocumentId("id:mail:testdoctype1:n=4:49514.html"), spi::Timestamp(106)},
+ {DocumentId("id:mail:testdoctype1:n=4:42967.html"), spi::Timestamp(108)},
+ };
+ cmd->set_explicit_remove_set(std::move(to_remove));
+ auto tracker = handler.handleRemoveLocation(*cmd, createTracker(cmd, bucket));
+ // Actually removing the documents is asynchronous, so the response will be on the queue.
+ std::shared_ptr<api::StorageMessage> msg;
+ ASSERT_TRUE(_replySender.queue.getNext(msg, 60s));
+
+ // Remove flag toggled for the entries provided in the command
+ EXPECT_EQ("DocEntry(100, 0, Doc(id:mail:testdoctype1:n=4:3619.html))\n"
+ "DocEntry(101, 0, Doc(id:mail:testdoctype1:n=4:33113.html))\n"
+ "DocEntry(102, 1, id:mail:testdoctype1:n=4:62608.html)\n"
+ "DocEntry(103, 0, Doc(id:mail:testdoctype1:n=4:26566.html))\n"
+ "DocEntry(104, 0, Doc(id:mail:testdoctype1:n=4:56061.html))\n"
+ "DocEntry(105, 0, Doc(id:mail:testdoctype1:n=4:20019.html))\n"
+ "DocEntry(106, 1, id:mail:testdoctype1:n=4:49514.html)\n"
+ "DocEntry(107, 0, Doc(id:mail:testdoctype1:n=4:13472.html))\n"
+ "DocEntry(108, 1, id:mail:testdoctype1:n=4:42967.html)\n"
+ "DocEntry(109, 0, Doc(id:mail:testdoctype1:n=4:6925.html))\n",
+ dumpBucket(bucketId));
+
+ auto reply = std::dynamic_pointer_cast<api::RemoveLocationReply>(msg);
+ ASSERT_TRUE(reply);
+ EXPECT_EQ(3u, reply->documents_removed());
+}
+
TEST_F(ProcessAllHandlerTest, remove_location_throws_exception_on_unknown_doc_type) {
document::BucketId bucketId(16, 4);
doPut(4, spi::Timestamp(1234));