diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-11-06 16:50:33 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-11-06 16:50:33 +0100 |
commit | 5c4c6f9fe13163b055d1ad5ea05c5c9468fbe179 (patch) | |
tree | 2ce11e4321a15ca0603c8ea44aed762252600cee /persistence | |
parent | ebd259f9bc66031921b04ca0502c2f9c0a153023 (diff) |
Add removeByGidAsync() to spi.
Diffstat (limited to 'persistence')
8 files changed, 197 insertions, 0 deletions
diff --git a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp index 75916d6b66c..0c46e1269d9 100644 --- a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp +++ b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp @@ -4,6 +4,7 @@ #include <vespa/persistence/conformancetest/conformancetest.h> #include <vespa/persistence/spi/test.h> #include <vespa/persistence/spi/catchresult.h> +#include <vespa/persistence/spi/doctype_gid_and_timestamp.h> #include <vespa/persistence/spi/resource_usage_listener.h> #include <vespa/persistence/spi/docentry.h> #include <vespa/document/fieldset/fieldsets.h> @@ -907,6 +908,35 @@ TEST_F(ConformanceTest, testRemoveMerge) } } +TEST_F(ConformanceTest, testRemoveByGid) +{ + document::TestDocMan testDocMan; + _factory->clear(); + PersistenceProviderUP spi(getSpi(*_factory, testDocMan)); + Context context(Priority(0), Trace::TraceLevel(0)); + + Bucket bucket(makeSpiBucket(BucketId(8, 0x01))); + std::shared_ptr<Document> doc1 = testDocMan.createRandomDocumentAtLocation(0x01, 1); + std::shared_ptr<Document> doc2 = testDocMan.createRandomDocumentAtLocation(0x01, 2); + spi->createBucket(bucket); + EXPECT_EQ(Result(), Result(spi->put(bucket, Timestamp(11), doc1))); + EXPECT_EQ(Result(), Result(spi->put(bucket, Timestamp(12), doc2))); + auto info = spi->getBucketInfo(bucket).getBucketInfo(); + EXPECT_EQ(2, info.getDocumentCount()); + std::vector<DocTypeGidAndTimestamp> ids; + ids.emplace_back(doc1->getId().getDocType(), doc1->getId().getGlobalId(), Timestamp(10)); + assert_remove_by_gid(*spi, bucket, ids, 0, 2, "ignored removebygid"); + ids.back().timestamp = Timestamp(11); + assert_remove_by_gid(*spi, bucket, ids, 1, 1, "removebygid"); + if (_factory->hasPersistence()) { + spi.reset(); + document::TestDocMan testDocMan2; + spi = getSpi(*_factory, testDocMan2); + info = spi->getBucketInfo(bucket).getBucketInfo(); + EXPECT_EQ(1, info.getDocumentCount()); + } +} + TEST_F(ConformanceTest, testUpdate) { document::TestDocMan testDocMan; @@ -2227,6 +2257,25 @@ ConformanceTest::test_empty_bucket_info(bool bucket_exists, bool active) EXPECT_EQ(active, info_result.getBucketInfo().isActive()); } +void +ConformanceTest::assert_remove_by_gid(PersistenceProvider& spi, + const Bucket& bucket, std::vector<DocTypeGidAndTimestamp> ids, + size_t exp_removed, size_t exp_remaining, + const vespalib::string& label) +{ + SCOPED_TRACE(label); + auto onDone = std::make_unique<CatchResult>(); + auto future = onDone->future_result(); + spi.removeByGidAsync(bucket, std::move(ids), std::move(onDone)); + auto result = future.get(); + ASSERT_TRUE(result); + auto removeResult = dynamic_cast<spi::RemoveResult *>(result.get()); + ASSERT_TRUE(removeResult != nullptr); + EXPECT_EQ(exp_removed, removeResult->num_removed()); + auto info = spi.getBucketInfo(bucket).getBucketInfo(); + EXPECT_EQ(exp_remaining, info.getDocumentCount()); +} + TEST_F(ConformanceTest, test_empty_bucket_gives_empty_bucket_info) { test_empty_bucket_info(true, false); diff --git a/persistence/src/vespa/persistence/conformancetest/conformancetest.h b/persistence/src/vespa/persistence/conformancetest/conformancetest.h index 7cea989bdf5..555c4254ad9 100644 --- a/persistence/src/vespa/persistence/conformancetest/conformancetest.h +++ b/persistence/src/vespa/persistence/conformancetest/conformancetest.h @@ -152,6 +152,13 @@ protected: void test_empty_bucket_info(bool bucket_exists, bool active); + void assert_remove_by_gid(PersistenceProvider& spi, + const Bucket& bucket, + std::vector<DocTypeGidAndTimestamp> ids, + size_t exp_removed, + size_t exp_remaining, + const vespalib::string& label); + ConformanceTest(); ConformanceTest(const std::string &docType); }; diff --git a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp index 96cb37be3c2..0ea872a54a5 100644 --- a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp +++ b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp @@ -6,6 +6,7 @@ #include <vespa/document/fieldvalue/document.h> #include <vespa/document/update/documentupdate.h> #include <vespa/document/bucket/fixed_bucket_spaces.h> +#include <vespa/persistence/spi/doctype_gid_and_timestamp.h> #include <vespa/persistence/spi/i_resource_usage_listener.h> #include <vespa/persistence/spi/resource_usage.h> #include <vespa/persistence/spi/bucketexecutor.h> @@ -252,6 +253,15 @@ BucketContent::getEntry(const DocumentId &did) const { return DocEntry::SP(); } +std::shared_ptr<DocEntry> +BucketContent::getEntry(const document::GlobalId& gid) const { + auto it(_gidMap.find(gid)); + if (it != _gidMap.end()) { + return it->second; + } + return {}; +} + DocEntry::SP BucketContent::getEntry(Timestamp t) const { auto iter = lower_bound(_entries.begin(), _entries.end(), t, TimestampLess()); @@ -529,6 +539,33 @@ DummyPersistence::removeAsync(const Bucket& b, std::vector<spi::IdAndTimestamp> onComplete->onComplete(std::make_unique<RemoveResult>(numRemoves)); } +void +DummyPersistence::removeByGidAsync(const Bucket& b, std::vector<spi::DocTypeGidAndTimestamp> ids, std::unique_ptr<OperationComplete> onComplete) +{ + verifyInitialized(); + assert(b.getBucketSpace() == FixedBucketSpaces::default_space()); + BucketContentGuard::UP bc(acquireBucketWithLock(b)); + + uint32_t numRemoves(0); + for (const auto& dt_gid_ts : ids) { + auto& gid = dt_gid_ts.gid; + auto t = dt_gid_ts.timestamp; + LOG(debug, "removeByGidAsync(%s, %" PRIu64 ", %s, %s)", b.toString().c_str(), uint64_t(t), dt_gid_ts.doc_type.c_str(), gid.toString().c_str()); + + while (!bc) { + internal_create_bucket(b); + bc = acquireBucketWithLock(b); + } + DocEntry::SP entry((*bc)->getEntry(gid)); + if (entry && entry->getTimestamp() <= t) { + numRemoves += entry->isRemove() ? 0 : 1; + (*bc)->eraseEntry(entry->getTimestamp()); + } + } + bc.reset(); + onComplete->onComplete(std::make_unique<RemoveResult>(numRemoves)); +} + GetResult DummyPersistence::get(const Bucket& b, const FieldSet& fieldSet, const DocumentId& did, Context&) const { diff --git a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h index 26b19a43aee..f7f58612b8d 100644 --- a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h +++ b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.h @@ -74,6 +74,7 @@ struct BucketContent { bool hasTimestamp(Timestamp) const; void insert(DocEntry::SP); DocEntry::SP getEntry(const DocumentId&) const; + std::shared_ptr<DocEntry> getEntry(const GlobalId& gid) const; DocEntry::SP getEntry(Timestamp) const; void eraseEntry(Timestamp t); void setActive(bool active = true) { @@ -161,6 +162,7 @@ public: GetResult get(const Bucket&, const document::FieldSet&, const DocumentId&, Context&) const override; void putAsync(const Bucket&, Timestamp, DocumentSP, OperationComplete::UP) override; void removeAsync(const Bucket& b, std::vector<spi::IdAndTimestamp> ids, OperationComplete::UP) override; + void removeByGidAsync(const Bucket& b, std::vector<spi::DocTypeGidAndTimestamp> ids, std::unique_ptr<OperationComplete>) override; void updateAsync(const Bucket&, Timestamp, DocumentUpdateSP, OperationComplete::UP) override; CreateIteratorResult diff --git a/persistence/src/vespa/persistence/spi/CMakeLists.txt b/persistence/src/vespa/persistence/spi/CMakeLists.txt index 617334317e7..6c3eddaaabf 100644 --- a/persistence/src/vespa/persistence/spi/CMakeLists.txt +++ b/persistence/src/vespa/persistence/spi/CMakeLists.txt @@ -9,6 +9,7 @@ vespa_add_library(persistence_spi OBJECT clusterstate.cpp context.cpp docentry.cpp + doctype_gid_and_timestamp.cpp exceptions.cpp id_and_timestamp.cpp persistenceprovider.cpp diff --git a/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.cpp b/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.cpp new file mode 100644 index 00000000000..1184c0b409f --- /dev/null +++ b/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.cpp @@ -0,0 +1,43 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "doctype_gid_and_timestamp.h" +#include <vespa/vespalib/stllike/asciistream.h> + +namespace storage::spi { + +DocTypeGidAndTimestamp::DocTypeGidAndTimestamp() + : doc_type(), + gid() +{ +} + +DocTypeGidAndTimestamp::DocTypeGidAndTimestamp(const vespalib::string& doc_type_, document::GlobalId gid_, Timestamp timestamp_) noexcept + : doc_type(doc_type_), + gid(std::move(gid_)), + timestamp(timestamp_) +{} + +DocTypeGidAndTimestamp::DocTypeGidAndTimestamp(const DocTypeGidAndTimestamp&) = default; +DocTypeGidAndTimestamp& DocTypeGidAndTimestamp::operator=(const DocTypeGidAndTimestamp&) = default; +DocTypeGidAndTimestamp::DocTypeGidAndTimestamp(DocTypeGidAndTimestamp&&) noexcept = default; +DocTypeGidAndTimestamp& DocTypeGidAndTimestamp::operator=(DocTypeGidAndTimestamp&&) noexcept = default; + +void DocTypeGidAndTimestamp::print(vespalib::asciistream& os) const { + os << doc_type << ":" << gid.toString() << " at time " << timestamp.getValue(); +} + +vespalib::string DocTypeGidAndTimestamp::to_string() const { + vespalib::asciistream os; + print(os); + return os.str(); +} + +vespalib::asciistream& operator<<(vespalib::asciistream& os, const DocTypeGidAndTimestamp& dt_gid_ts) { + dt_gid_ts.print(os); + return os; +} +std::ostream& operator<<(std::ostream& os, const DocTypeGidAndTimestamp& dt_gid_ts) { + os << dt_gid_ts.to_string(); + return os; +} + +} diff --git a/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.h b/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.h new file mode 100644 index 00000000000..449935ef830 --- /dev/null +++ b/persistence/src/vespa/persistence/spi/doctype_gid_and_timestamp.h @@ -0,0 +1,50 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "types.h" +#include <vespa/document/base/globalid.h> +#include <vespa/vespalib/stllike/string.h> +#include <iosfwd> + +namespace vespalib { class asciistream; } + +namespace storage::spi { + +/** + * Convenience wrapper for referencing a document type and global id with + * a timestamp. + * + * Prefer this instead of a std::tuple due to named fields and a pre-provided hash function. + */ +struct DocTypeGidAndTimestamp { + vespalib::string doc_type; + document::GlobalId gid; + Timestamp timestamp; + + DocTypeGidAndTimestamp(); + DocTypeGidAndTimestamp(const vespalib::string& doc_type_, document::GlobalId gid_, Timestamp timestamp_) noexcept; + + DocTypeGidAndTimestamp(const DocTypeGidAndTimestamp&); + DocTypeGidAndTimestamp& operator=(const DocTypeGidAndTimestamp&); + DocTypeGidAndTimestamp(DocTypeGidAndTimestamp&&) noexcept; + DocTypeGidAndTimestamp& operator=(DocTypeGidAndTimestamp&&) noexcept; + + bool operator==(const DocTypeGidAndTimestamp& rhs) const noexcept { + return ((doc_type == rhs.doc_type) && (gid == rhs.gid)); + } + + void print(vespalib::asciistream&) const; + vespalib::string to_string() const; + + struct hash { + size_t operator()(const DocTypeGidAndTimestamp& dt_gid_ts) const noexcept { + const size_t h = document::GlobalId::hash()(dt_gid_ts.gid); + return h ^ (dt_gid_ts.timestamp + 0x9e3779b9U + (h << 6U) + (h >> 2U)); // Basically boost::hash_combine + } + }; +}; + +vespalib::asciistream& operator<<(vespalib::asciistream&, const DocTypeGidAndTimestamp&); +std::ostream& operator<<(std::ostream&, const DocTypeGidAndTimestamp&); + +} diff --git a/persistence/src/vespa/persistence/spi/persistenceprovider.h b/persistence/src/vespa/persistence/spi/persistenceprovider.h index cb32dc05eec..5fb90046c65 100644 --- a/persistence/src/vespa/persistence/spi/persistenceprovider.h +++ b/persistence/src/vespa/persistence/spi/persistenceprovider.h @@ -18,6 +18,7 @@ namespace storage::spi { class IResourceUsageListener; struct BucketExecutor; +struct DocTypeGidAndTimestamp; /** * This interface is the basis for a persistence provider in Vespa. A @@ -173,6 +174,13 @@ struct PersistenceProvider */ virtual void removeAsync(const Bucket&, std::vector<IdAndTimestamp> ids, OperationComplete::UP) = 0; + /* + * Remove documents based on document type and gid and forget about them + * (don't keep track of the removed document). This operation is typically + * used as part of removing documents in a bucket that will be deleted. + */ + virtual void removeByGidAsync(const Bucket&, std::vector<DocTypeGidAndTimestamp> ids, std::unique_ptr<OperationComplete>) = 0; + /** * @see remove() * <p/> |