summaryrefslogtreecommitdiffstats
path: root/persistence
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-11-06 16:50:33 +0100
committerTor Egge <Tor.Egge@online.no>2023-11-06 16:50:33 +0100
commit5c4c6f9fe13163b055d1ad5ea05c5c9468fbe179 (patch)
tree2ce11e4321a15ca0603c8ea44aed762252600cee /persistence
parentebd259f9bc66031921b04ca0502c2f9c0a153023 (diff)
Add removeByGidAsync() to spi.
Diffstat (limited to 'persistence')
-rw-r--r--persistence/src/vespa/persistence/conformancetest/conformancetest.cpp49
-rw-r--r--persistence/src/vespa/persistence/conformancetest/conformancetest.h7
-rw-r--r--persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp37
-rw-r--r--persistence/src/vespa/persistence/dummyimpl/dummypersistence.h2
-rw-r--r--persistence/src/vespa/persistence/spi/CMakeLists.txt1
-rw-r--r--persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.cpp43
-rw-r--r--persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.h50
-rw-r--r--persistence/src/vespa/persistence/spi/persistenceprovider.h8
8 files changed, 197 insertions, 0 deletions
diff --git a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp
index 75916d6b66c..0c46e1269d9 100644
--- a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp
+++ b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp
@@ -4,6 +4,7 @@
#include <vespa/persistence/conformancetest/conformancetest.h>
#include <vespa/persistence/spi/test.h>
#include <vespa/persistence/spi/catchresult.h>
+#include <vespa/persistence/spi/doctype_gid_and_timestamp.h>
#include <vespa/persistence/spi/resource_usage_listener.h>
#include <vespa/persistence/spi/docentry.h>
#include <vespa/document/fieldset/fieldsets.h>
@@ -907,6 +908,35 @@ TEST_F(ConformanceTest, testRemoveMerge)
}
}
+TEST_F(ConformanceTest, testRemoveByGid)
+{
+ document::TestDocMan testDocMan;
+ _factory->clear();
+ PersistenceProviderUP spi(getSpi(*_factory, testDocMan));
+ Context context(Priority(0), Trace::TraceLevel(0));
+
+ Bucket bucket(makeSpiBucket(BucketId(8, 0x01)));
+ std::shared_ptr<Document> doc1 = testDocMan.createRandomDocumentAtLocation(0x01, 1);
+ std::shared_ptr<Document> doc2 = testDocMan.createRandomDocumentAtLocation(0x01, 2);
+ spi->createBucket(bucket);
+ EXPECT_EQ(Result(), Result(spi->put(bucket, Timestamp(11), doc1)));
+ EXPECT_EQ(Result(), Result(spi->put(bucket, Timestamp(12), doc2)));
+ auto info = spi->getBucketInfo(bucket).getBucketInfo();
+ EXPECT_EQ(2, info.getDocumentCount());
+ std::vector<DocTypeGidAndTimestamp> ids;
+ ids.emplace_back(doc1->getId().getDocType(), doc1->getId().getGlobalId(), Timestamp(10));
+ assert_remove_by_gid(*spi, bucket, ids, 0, 2, "ignored removebygid");
+ ids.back().timestamp = Timestamp(11);
+ assert_remove_by_gid(*spi, bucket, ids, 1, 1, "removebygid");
+ if (_factory->hasPersistence()) {
+ spi.reset();
+ document::TestDocMan testDocMan2;
+ spi = getSpi(*_factory, testDocMan2);
+ info = spi->getBucketInfo(bucket).getBucketInfo();
+ EXPECT_EQ(1, info.getDocumentCount());
+ }
+}
+
TEST_F(ConformanceTest, testUpdate)
{
document::TestDocMan testDocMan;
@@ -2227,6 +2257,25 @@ ConformanceTest::test_empty_bucket_info(bool bucket_exists, bool active)
EXPECT_EQ(active, info_result.getBucketInfo().isActive());
}
+void
+ConformanceTest::assert_remove_by_gid(PersistenceProvider& spi,
+ const Bucket& bucket, std::vector<DocTypeGidAndTimestamp> ids,
+ size_t exp_removed, size_t exp_remaining,
+ const vespalib::string& label)
+{
+ SCOPED_TRACE(label);
+ auto onDone = std::make_unique<CatchResult>();
+ auto future = onDone->future_result();
+ spi.removeByGidAsync(bucket, std::move(ids), std::move(onDone));
+ auto result = future.get();
+ ASSERT_TRUE(result);
+ auto removeResult = dynamic_cast<spi::RemoveResult *>(result.get());
+ ASSERT_TRUE(removeResult != nullptr);
+ EXPECT_EQ(exp_removed, removeResult->num_removed());
+ auto info = spi.getBucketInfo(bucket).getBucketInfo();
+ EXPECT_EQ(exp_remaining, info.getDocumentCount());
+}
+
TEST_F(ConformanceTest, test_empty_bucket_gives_empty_bucket_info)
{
test_empty_bucket_info(true, false);
diff --git a/persistence/src/vespa/persistence/conformancetest/conformancetest.h b/persistence/src/vespa/persistence/conformancetest/conformancetest.h
index 7cea989bdf5..555c4254ad9 100644
--- a/persistence/src/vespa/persistence/conformancetest/conformancetest.h
+++ b/persistence/src/vespa/persistence/conformancetest/conformancetest.h
@@ -152,6 +152,13 @@ protected:
void test_empty_bucket_info(bool bucket_exists, bool active);
+ void assert_remove_by_gid(PersistenceProvider& spi,
+ const Bucket& bucket,
+ std::vector<DocTypeGidAndTimestamp> ids,
+ size_t exp_removed,
+ size_t exp_remaining,
+ const vespalib::string& label);
+
ConformanceTest();
ConformanceTest(const std::string &docType);
};
diff --git a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp
index 96cb37be3c2..0ea872a54a5 100644
--- a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp
+++ b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp
@@ -6,6 +6,7 @@
#include <vespa/document/fieldvalue/document.h>
#include <vespa/document/update/documentupdate.h>
#include <vespa/document/bucket/fixed_bucket_spaces.h>
+#include <vespa/persistence/spi/doctype_gid_and_timestamp.h>
#include <vespa/persistence/spi/i_resource_usage_listener.h>
#include <vespa/persistence/spi/resource_usage.h>
#include <vespa/persistence/spi/bucketexecutor.h>
@@ -252,6 +253,15 @@ BucketContent::getEntry(const DocumentId &did) const {
return DocEntry::SP();
}
+std::shared_ptr<DocEntry>
+BucketContent::getEntry(const document::GlobalId& gid) const {
+ auto it(_gidMap.find(gid));
+ if (it != _gidMap.end()) {
+ return it->second;
+ }
+ return {};
+}
+
DocEntry::SP
BucketContent::getEntry(Timestamp t) const {
auto iter = lower_bound(_entries.begin(), _entries.end(), t, TimestampLess());
@@ -529,6 +539,33 @@ DummyPersistence::removeAsync(const Bucket& b, std::vector<spi::IdAndTimestamp>
onComplete->onComplete(std::make_unique<RemoveResult>(numRemoves));
}
+void
+DummyPersistence::removeByGidAsync(const Bucket& b, std::vector<spi::DocTypeGidAndTimestamp> ids, std::unique_ptr<OperationComplete> onComplete)
+{
+ verifyInitialized();
+ assert(b.getBucketSpace() == FixedBucketSpaces::default_space());
+ BucketContentGuard::UP bc(acquireBucketWithLock(b));
+
+ uint32_t numRemoves(0);
+ for (const auto& dt_gid_ts : ids) {
+ auto& gid = dt_gid_ts.gid;
+ auto t = dt_gid_ts.timestamp;
+ LOG(debug, "removeByGidAsync(%s, %" PRIu64 ", %s, %s)", b.toString().c_str(), uint64_t(t), dt_gid_ts.doc_type.c_str(), gid.toString().c_str());
+
+ while (!bc) {
+ internal_create_bucket(b);
+ bc = acquireBucketWithLock(b);
+ }
+ DocEntry::SP entry((*bc)->getEntry(gid));
+ if (entry && entry->getTimestamp() <= t) {
+ numRemoves += entry->isRemove() ? 0 : 1;
+ (*bc)->eraseEntry(entry->getTimestamp());
+ }
+ }
+ bc.reset();
+ onComplete->onComplete(std::make_unique<RemoveResult>(numRemoves));
+}
+
GetResult
DummyPersistence::get(const Bucket& b, const FieldSet& fieldSet, const DocumentId& did, Context&) const
{
diff --git a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h
index 26b19a43aee..f7f58612b8d 100644
--- a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h
+++ b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h
@@ -74,6 +74,7 @@ struct BucketContent {
bool hasTimestamp(Timestamp) const;
void insert(DocEntry::SP);
DocEntry::SP getEntry(const DocumentId&) const;
+ std::shared_ptr<DocEntry> getEntry(const GlobalId& gid) const;
DocEntry::SP getEntry(Timestamp) const;
void eraseEntry(Timestamp t);
void setActive(bool active = true) {
@@ -161,6 +162,7 @@ public:
GetResult get(const Bucket&, const document::FieldSet&, const DocumentId&, Context&) const override;
void putAsync(const Bucket&, Timestamp, DocumentSP, OperationComplete::UP) override;
void removeAsync(const Bucket& b, std::vector<spi::IdAndTimestamp> ids, OperationComplete::UP) override;
+ void removeByGidAsync(const Bucket& b, std::vector<spi::DocTypeGidAndTimestamp> ids, std::unique_ptr<OperationComplete>) override;
void updateAsync(const Bucket&, Timestamp, DocumentUpdateSP, OperationComplete::UP) override;
CreateIteratorResult
diff --git a/persistence/src/vespa/persistence/spi/CMakeLists.txt b/persistence/src/vespa/persistence/spi/CMakeLists.txt
index 617334317e7..6c3eddaaabf 100644
--- a/persistence/src/vespa/persistence/spi/CMakeLists.txt
+++ b/persistence/src/vespa/persistence/spi/CMakeLists.txt
@@ -9,6 +9,7 @@ vespa_add_library(persistence_spi OBJECT
clusterstate.cpp
context.cpp
docentry.cpp
+ doctype_gid_and_timestamp.cpp
exceptions.cpp
id_and_timestamp.cpp
persistenceprovider.cpp
diff --git a/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.cpp b/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.cpp
new file mode 100644
index 00000000000..1184c0b409f
--- /dev/null
+++ b/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.cpp
@@ -0,0 +1,43 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "doctype_gid_and_timestamp.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+
+namespace storage::spi {
+
+DocTypeGidAndTimestamp::DocTypeGidAndTimestamp()
+ : doc_type(),
+ gid()
+{
+}
+
+DocTypeGidAndTimestamp::DocTypeGidAndTimestamp(const vespalib::string& doc_type_, document::GlobalId gid_, Timestamp timestamp_) noexcept
+ : doc_type(doc_type_),
+ gid(std::move(gid_)),
+ timestamp(timestamp_)
+{}
+
+DocTypeGidAndTimestamp::DocTypeGidAndTimestamp(const DocTypeGidAndTimestamp&) = default;
+DocTypeGidAndTimestamp& DocTypeGidAndTimestamp::operator=(const DocTypeGidAndTimestamp&) = default;
+DocTypeGidAndTimestamp::DocTypeGidAndTimestamp(DocTypeGidAndTimestamp&&) noexcept = default;
+DocTypeGidAndTimestamp& DocTypeGidAndTimestamp::operator=(DocTypeGidAndTimestamp&&) noexcept = default;
+
+void DocTypeGidAndTimestamp::print(vespalib::asciistream& os) const {
+ os << doc_type << ":" << gid.toString() << " at time " << timestamp.getValue();
+}
+
+vespalib::string DocTypeGidAndTimestamp::to_string() const {
+ vespalib::asciistream os;
+ print(os);
+ return os.str();
+}
+
+vespalib::asciistream& operator<<(vespalib::asciistream& os, const DocTypeGidAndTimestamp& dt_gid_ts) {
+ dt_gid_ts.print(os);
+ return os;
+}
+std::ostream& operator<<(std::ostream& os, const DocTypeGidAndTimestamp& dt_gid_ts) {
+ os << dt_gid_ts.to_string();
+ return os;
+}
+
+}
diff --git a/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.h b/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.h
new file mode 100644
index 00000000000..449935ef830
--- /dev/null
+++ b/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.h
@@ -0,0 +1,50 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "types.h"
+#include <vespa/document/base/globalid.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <iosfwd>
+
+namespace vespalib { class asciistream; }
+
+namespace storage::spi {
+
+/**
+ * Convenience wrapper for referencing a document type and global id with
+ * a timestamp.
+ *
+ * Prefer this instead of a std::tuple due to named fields and a pre-provided hash function.
+ */
+struct DocTypeGidAndTimestamp {
+ vespalib::string doc_type;
+ document::GlobalId gid;
+ Timestamp timestamp;
+
+ DocTypeGidAndTimestamp();
+ DocTypeGidAndTimestamp(const vespalib::string& doc_type_, document::GlobalId gid_, Timestamp timestamp_) noexcept;
+
+ DocTypeGidAndTimestamp(const DocTypeGidAndTimestamp&);
+ DocTypeGidAndTimestamp& operator=(const DocTypeGidAndTimestamp&);
+ DocTypeGidAndTimestamp(DocTypeGidAndTimestamp&&) noexcept;
+ DocTypeGidAndTimestamp& operator=(DocTypeGidAndTimestamp&&) noexcept;
+
+ bool operator==(const DocTypeGidAndTimestamp& rhs) const noexcept {
+ return ((doc_type == rhs.doc_type) && (gid == rhs.gid));
+ }
+
+ void print(vespalib::asciistream&) const;
+ vespalib::string to_string() const;
+
+ struct hash {
+ size_t operator()(const DocTypeGidAndTimestamp& dt_gid_ts) const noexcept {
+ const size_t h = document::GlobalId::hash()(dt_gid_ts.gid);
+ return h ^ (dt_gid_ts.timestamp + 0x9e3779b9U + (h << 6U) + (h >> 2U)); // Basically boost::hash_combine
+ }
+ };
+};
+
+vespalib::asciistream& operator<<(vespalib::asciistream&, const DocTypeGidAndTimestamp&);
+std::ostream& operator<<(std::ostream&, const DocTypeGidAndTimestamp&);
+
+}
diff --git a/persistence/src/vespa/persistence/spi/persistenceprovider.h b/persistence/src/vespa/persistence/spi/persistenceprovider.h
index cb32dc05eec..5fb90046c65 100644
--- a/persistence/src/vespa/persistence/spi/persistenceprovider.h
+++ b/persistence/src/vespa/persistence/spi/persistenceprovider.h
@@ -18,6 +18,7 @@ namespace storage::spi {
class IResourceUsageListener;
struct BucketExecutor;
+struct DocTypeGidAndTimestamp;
/**
* This interface is the basis for a persistence provider in Vespa. A
@@ -173,6 +174,13 @@ struct PersistenceProvider
*/
virtual void removeAsync(const Bucket&, std::vector<IdAndTimestamp> ids, OperationComplete::UP) = 0;
+ /*
+ * Remove documents based on document type and gid and forget about them
+ * (don't keep track of the removed document). This operation is typically
+ * used as part of removing documents in a bucket that will be deleted.
+ */
+ virtual void removeByGidAsync(const Bucket&, std::vector<DocTypeGidAndTimestamp> ids, std::unique_ptr<OperationComplete>) = 0;
+
/**
* @see remove()
* <p/>