diff options
33 files changed, 352 insertions, 122 deletions
diff --git a/document/src/vespa/document/base/documentid.h b/document/src/vespa/document/base/documentid.h index 5dcda838623..9da28f54b82 100644 --- a/document/src/vespa/document/base/documentid.h +++ b/document/src/vespa/document/base/documentid.h @@ -65,7 +65,7 @@ public: const IdString& getScheme() const { return _id; } bool hasDocType() const { return _id.hasDocType(); } - vespalib::string getDocType() const { return _id.getDocType(); } + vespalib::stringref getDocType() const { return _id.getDocType(); } const GlobalId& getGlobalId() const { if (!_globalId.first) { calculateGlobalId(); } diff --git a/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp b/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp index 6c046c1787b..273038b1cf3 100644 --- a/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp +++ b/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp @@ -47,7 +47,7 @@ void ReferenceFieldValue::requireIdOfMatchingType( make_string("Can't assign document ID '%s' (of type '%s') to " "reference of document type '%s'", id.toString().c_str(), - id.getDocType().c_str(), + vespalib::string(id.getDocType()).c_str(), type.getName().c_str()), VESPA_STRLOC); } diff --git a/searchcore/src/tests/proton/documentdb/combiningfeedview/combiningfeedview_test.cpp b/searchcore/src/tests/proton/documentdb/combiningfeedview/combiningfeedview_test.cpp index 958be6a4686..fd8ce978d43 100644 --- a/searchcore/src/tests/proton/documentdb/combiningfeedview/combiningfeedview_test.cpp +++ b/searchcore/src/tests/proton/documentdb/combiningfeedview/combiningfeedview_test.cpp @@ -155,9 +155,9 @@ struct Fixture const test::Document &doc = userDocs().getDocs(userId)[0]; return PutOperation(doc.getBucket(), doc.getTimestamp(), doc.getDoc()); } - RemoveOperation remove(uint32_t userId) { + RemoveOperationWithDocId remove(uint32_t userId) { const test::Document &doc = userDocs().getDocs(userId)[0]; - return RemoveOperation(doc.getBucket(), doc.getTimestamp(), doc.getDoc()->getId()); + return RemoveOperationWithDocId(doc.getBucket(), doc.getTimestamp(), doc.getDoc()->getId()); } UpdateOperation update(uint32_t userId) { const test::Document &doc = userDocs().getDocs(userId)[0]; @@ -234,7 +234,7 @@ TEST_F("require that handlePut() sends to 2 feed views", Fixture) TEST_F("require that prepareRemove() sends to removed view", Fixture) { - RemoveOperation op = f.remove(1); + RemoveOperationWithDocId op = f.remove(1); f._view.prepareRemove(op); EXPECT_EQUAL(0u, f._ready._view->_prepareRemove); EXPECT_EQUAL(1u, f._removed._view->_prepareRemove); @@ -246,7 +246,7 @@ TEST_F("require that prepareRemove() sends to removed view", Fixture) TEST_F("require that prepareRemove() can fill previous dbdId", Fixture) { f._ready.insertDocs(f.userDocs(1)); - RemoveOperation op = f.remove(1); + RemoveOperationWithDocId op = f.remove(1); f._view.prepareRemove(op); EXPECT_EQUAL(1u, op.getPrevLid()); EXPECT_EQUAL(READY, op.getPrevSubDbId()); @@ -257,7 +257,7 @@ TEST_F("require that prepareRemove() can fill previous dbdId", Fixture) TEST_F("require that handleRemove() sends op with valid dbdId to 1 feed view", Fixture) { - RemoveOperation op = f.remove(1); + RemoveOperationWithDocId op = f.remove(1); op.setDbDocumentId(DbDocumentId(REMOVED, 1)); f._view.handleRemove(FeedToken(), op); EXPECT_EQUAL(0u, f._ready._view->_handleRemove); @@ -268,7 +268,7 @@ TEST_F("require that handleRemove() sends op with valid dbdId to 1 feed view", F TEST_F("require that handleRemove() sends op with valid dbdId to 2 feed views", Fixture) { - RemoveOperation op = f.remove(1); + RemoveOperationWithDocId op = f.remove(1); op.setDbDocumentId(DbDocumentId(REMOVED, 1)); op.setPrevDbDocumentId(DbDocumentId(READY, 1)); f._view.handleRemove(FeedToken(), op); @@ -280,7 +280,7 @@ TEST_F("require that handleRemove() sends op with valid dbdId to 2 feed views", TEST_F("require that handleRemove() sends op with invalid dbdId to prev view", Fixture) { - RemoveOperation op = f.remove(1); + RemoveOperationWithDocId op = f.remove(1); // can be used in the case where removed feed view does not remember removes. op.setPrevDbDocumentId(DbDocumentId(READY, 1)); f._view.handleRemove(FeedToken(), op); diff --git a/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp b/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp index 898f014cea3..2785b744265 100644 --- a/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp +++ b/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp @@ -743,13 +743,13 @@ struct DocumentHandler op.setSerialNum(serialNum); return op; } - RemoveOperation createRemove(const DocumentId &docId, Timestamp timestamp, SerialNum serialNum) + RemoveOperationWithDocId createRemove(const DocumentId &docId, Timestamp timestamp, SerialNum serialNum) { const document::GlobalId &gid = docId.getGlobalId(); BucketId bucket = gid.convertToBucketId(); bucket.setUsedBits(BUCKET_USED_BITS); bucket = bucket.stripUnused(); - RemoveOperation op(bucket, timestamp, docId); + RemoveOperationWithDocId op(bucket, timestamp, docId); op.setSerialNum(serialNum); return op; } @@ -883,7 +883,7 @@ TEST_F("require that lid allocation uses lowest free lid", StoreOnlyFixture) DocumentHandler<StoreOnlyFixture> handler(f); Document::UP doc; PutOperation putOp; - RemoveOperation rmOp; + RemoveOperationWithDocId rmOp; MoveOperation moveOp; doc = handler.createEmptyDoc(1); diff --git a/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp b/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp index 6c9ffc210a1..796ec4436fe 100644 --- a/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp +++ b/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp @@ -193,7 +193,7 @@ struct MyFeedView : public test::DummyFeedView { MyFeedView(const std::shared_ptr<const DocumentTypeRepo> &dtr, const DocTypeName &docTypeName); ~MyFeedView() override; - void resetPutLatch(uint32_t count) { putLatch.reset(new vespalib::CountDownLatch(count)); } + void resetPutLatch(uint32_t count) { putLatch = std::make_unique<vespalib::CountDownLatch>(count); } void preparePut(PutOperation &op) override { prepareDocumentOperation(op, op.getDocument()->getId().getGlobalId()); } @@ -532,7 +532,7 @@ TEST_F("require that heartBeat calls FeedView's heartBeat", TEST_F("require that outdated remove is ignored", FeedHandlerFixture) { DocumentContext doc_context("id:ns:searchdocument::foo", *f.schema.builder); - FeedOperation::UP op(new RemoveOperation(doc_context.bucketId, Timestamp(10), doc_context.doc->getId())); + auto op = std::make_unique<RemoveOperationWithDocId>(doc_context.bucketId, Timestamp(10), doc_context.doc->getId()); static_cast<DocumentOperation &>(*op).setPrevDbDocumentId(DbDocumentId(4)); static_cast<DocumentOperation &>(*op).setPrevTimestamp(Timestamp(10000)); FeedTokenContext token_context; @@ -544,8 +544,7 @@ TEST_F("require that outdated remove is ignored", FeedHandlerFixture) TEST_F("require that outdated put is ignored", FeedHandlerFixture) { DocumentContext doc_context("id:ns:searchdocument::foo", *f.schema.builder); - FeedOperation::UP op(new PutOperation(doc_context.bucketId, - Timestamp(10), doc_context.doc)); + auto op =std::make_unique<PutOperation>(doc_context.bucketId, Timestamp(10), doc_context.doc); static_cast<DocumentOperation &>(*op).setPrevTimestamp(Timestamp(10000)); FeedTokenContext token_context; f.handler.performOperation(std::move(token_context.token), std::move(op)); @@ -556,7 +555,7 @@ TEST_F("require that outdated put is ignored", FeedHandlerFixture) void addLidToRemove(RemoveDocumentsOperation &op) { - LidVectorContext::SP lids(new LidVectorContext(42)); + auto lids = std::make_shared<LidVectorContext>(42); lids->addLid(4); op.setLidsToRemove(0, lids); } @@ -625,7 +624,7 @@ TEST_F("require that flush cannot unprune", FeedHandlerFixture) TEST_F("require that remove of unknown document with known data type stores remove", FeedHandlerFixture) { DocumentContext doc_context("id:test:searchdocument::foo", *f.schema.builder); - FeedOperation::UP op(new RemoveOperation(doc_context.bucketId, Timestamp(10), doc_context.doc->getId())); + auto op = std::make_unique<RemoveOperationWithDocId>(doc_context.bucketId, Timestamp(10), doc_context.doc->getId()); FeedTokenContext token_context; f.handler.performOperation(std::move(token_context.token), std::move(op)); EXPECT_EQUAL(1, f.feedView.remove_count); @@ -635,7 +634,7 @@ TEST_F("require that remove of unknown document with known data type stores remo TEST_F("require that partial update for non-existing document is tagged as such", FeedHandlerFixture) { UpdateContext upCtx("id:test:searchdocument::foo", *f.schema.builder); - FeedOperation::UP op(new UpdateOperation(upCtx.bucketId, Timestamp(10), upCtx.update)); + auto op = std::make_unique<UpdateOperation>(upCtx.bucketId, Timestamp(10), upCtx.update); FeedTokenContext token_context; f.handler.performOperation(std::move(token_context.token), std::move(op)); const UpdateResult *result = static_cast<const UpdateResult *>(token_context.getResult()); @@ -653,7 +652,7 @@ TEST_F("require that partial update for non-existing document is created if spec UpdateContext upCtx("id:test:searchdocument::foo", *f.schema.builder); upCtx.update->setCreateIfNonExistent(true); f.feedView.metaStore.insert(upCtx.update->getId().getGlobalId(), MyDocumentMetaStore::Entry(5, 5, Timestamp(10))); - FeedOperation::UP op(new UpdateOperation(upCtx.bucketId, Timestamp(10), upCtx.update)); + auto op = std::make_unique<UpdateOperation>(upCtx.bucketId, Timestamp(10), upCtx.update); FeedTokenContext token_context; f.handler.performOperation(std::move(token_context.token), std::move(op)); const UpdateResult *result = static_cast<const UpdateResult *>(token_context.getResult()); @@ -674,7 +673,7 @@ TEST_F("require that put is rejected if resource limit is reached", FeedHandlerF f.writeFilter._message = "Attribute resource limit reached"; DocumentContext docCtx("id:test:searchdocument::foo", *f.schema.builder); - FeedOperation::UP op = std::make_unique<PutOperation>(docCtx.bucketId, Timestamp(10), docCtx.doc); + auto op = std::make_unique<PutOperation>(docCtx.bucketId, Timestamp(10), docCtx.doc); FeedTokenContext token; f.handler.performOperation(std::move(token.token), std::move(op)); EXPECT_EQUAL(0, f.feedView.put_count); @@ -689,7 +688,7 @@ TEST_F("require that update is rejected if resource limit is reached", FeedHandl f.writeFilter._message = "Attribute resource limit reached"; UpdateContext updCtx("id:test:searchdocument::foo", *f.schema.builder); - FeedOperation::UP op = std::make_unique<UpdateOperation>(updCtx.bucketId, Timestamp(10), updCtx.update); + auto op = std::make_unique<UpdateOperation>(updCtx.bucketId, Timestamp(10), updCtx.update); FeedTokenContext token; f.handler.performOperation(std::move(token.token), std::move(op)); EXPECT_EQUAL(0, f.feedView.update_count); @@ -705,7 +704,7 @@ TEST_F("require that remove is NOT rejected if resource limit is reached", FeedH f.writeFilter._message = "Attribute resource limit reached"; DocumentContext docCtx("id:test:searchdocument::foo", *f.schema.builder); - FeedOperation::UP op = std::make_unique<RemoveOperation>(docCtx.bucketId, Timestamp(10), docCtx.doc->getId()); + auto op = std::make_unique<RemoveOperationWithDocId>(docCtx.bucketId, Timestamp(10), docCtx.doc->getId()); FeedTokenContext token; f.handler.performOperation(std::move(token.token), std::move(op)); EXPECT_EQUAL(1, f.feedView.remove_count); @@ -726,7 +725,7 @@ checkUpdate(FeedHandlerFixture &f, SchemaContext &schemaContext, } else { updCtx.update->setCreateIfNonExistent(true); } - FeedOperation::UP op = std::make_unique<UpdateOperation>(updCtx.bucketId, Timestamp(10), updCtx.update); + auto op = std::make_unique<UpdateOperation>(updCtx.bucketId, Timestamp(10), updCtx.update); FeedTokenContext token; f.handler.performOperation(std::move(token.token), std::move(op)); EXPECT_TRUE(dynamic_cast<const UpdateResult *>(token.getResult())); diff --git a/searchcore/src/tests/proton/documentdb/feedview/feedview_test.cpp b/searchcore/src/tests/proton/documentdb/feedview/feedview_test.cpp index 6a6b05be7f0..ddf45d6a509 100644 --- a/searchcore/src/tests/proton/documentdb/feedview/feedview_test.cpp +++ b/searchcore/src/tests/proton/documentdb/feedview/feedview_test.cpp @@ -620,7 +620,7 @@ struct FixtureBase void removeAndWait(const DocumentContext &docCtx) { FeedTokenContext token(_tracer); - RemoveOperation op(docCtx.bid, docCtx.ts, docCtx.doc->getId()); + RemoveOperationWithDocId op(docCtx.bid, docCtx.ts, docCtx.doc->getId()); runInMaster([&] () { performRemove(token.ft, op); }); } diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp index 50d4106282c..64299c70588 100644 --- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp +++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp @@ -1,6 +1,4 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/log/log.h> -LOG_SETUP("lid_space_compaction_test"); #include <vespa/searchcore/proton/server/i_disk_mem_usage_notifier.h> #include <vespa/searchcore/proton/server/i_lid_space_compaction_handler.h> @@ -15,6 +13,9 @@ LOG_SETUP("lid_space_compaction_test"); #include <vespa/searchlib/index/docbuilder.h> #include <vespa/vespalib/gtest/gtest.h> +#include <vespa/log/log.h> +LOG_SETUP("lid_space_compaction_test"); + using namespace document; using namespace proton; using namespace search::index; diff --git a/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp b/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp index d4aebd0b8a7..7e2e258476f 100644 --- a/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp +++ b/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp @@ -110,7 +110,7 @@ public: MaintenanceDocumentSubDB getSubDB(); void handlePruneRemovedDocuments(const PruneRemovedDocumentsOperation &op); void handlePut(PutOperation &op); - void handleRemove(RemoveOperation &op); + void handleRemove(RemoveOperationWithDocId &op); void prepareMove(MoveOperation &op); void handleMove(const MoveOperation &op); uint32_t getNumUsedLids() const; @@ -565,11 +565,11 @@ MyDocumentSubDB::handlePut(PutOperation &op) void -MyDocumentSubDB::handleRemove(RemoveOperation &op) +MyDocumentSubDB::handleRemove(RemoveOperationWithDocId &op) { const SerialNum serialNum = op.getSerialNum(); const DocumentId &docId = op.getDocumentId(); - const document::GlobalId &gid = docId.getGlobalId(); + const document::GlobalId &gid = op.getGlobalId(); bool needCommit = false; if (op.getValidDbdId(_subDBId)) { @@ -584,7 +584,7 @@ MyDocumentSubDB::handleRemove(RemoveOperation &op) assert(op.getLid() == putRes._lid); const document::DocumentType *docType = _repo->getDocumentType(_docTypeName.getName()); - Document::UP doc(new Document(*docType, docId)); + auto doc = std::make_unique<Document>(*docType, docId); doc->setRepo(*_repo); _docs[op.getLid()] = std::move(doc); needCommit = true; @@ -948,7 +948,7 @@ MaintenanceControllerFixture::removeDocs(const test::UserDocuments &docs, const test::BucketDocuments &bucketDocs = itr->second; for (size_t i = 0; i < bucketDocs.getDocs().size(); ++i) { const test::Document &testDoc = bucketDocs.getDocs()[i]; - RemoveOperation op(testDoc.getBucket(), timestamp, testDoc.getDoc()->getId()); + RemoveOperationWithDocId op(testDoc.getBucket(), timestamp, testDoc.getDoc()->getId()); op.setDbDocumentId(DbDocumentId(_removed.getSubDBId(), testDoc.getLid())); _fh.storeOperation(op, std::make_shared<search::IgnoreCallback>()); _removed.handleRemove(op); diff --git a/searchcore/src/tests/proton/feedoperation/feedoperation_test.cpp b/searchcore/src/tests/proton/feedoperation/feedoperation_test.cpp index de2cd3c624f..5fffd70f11d 100644 --- a/searchcore/src/tests/proton/feedoperation/feedoperation_test.cpp +++ b/searchcore/src/tests/proton/feedoperation/feedoperation_test.cpp @@ -194,10 +194,10 @@ TEST("require that toString() on derived classes are meaningful") EXPECT_EQUAL("Remove(id::::, BucketId(0x0000000000000000), timestamp=0, dbdId=(subDbId=0, lid=0), " "prevDbdId=(subDbId=0, lid=0), prevMarkedAsRemoved=false, prevTimestamp=0, serialNum=0)", - RemoveOperation().toString()); + RemoveOperationWithDocId().toString()); EXPECT_EQUAL("Remove(id:ns:foo:::bar, BucketId(0x000000000000002a), timestamp=10, dbdId=(subDbId=0, lid=0), " "prevDbdId=(subDbId=0, lid=0), prevMarkedAsRemoved=false, prevTimestamp=0, serialNum=0)", - RemoveOperation(bucket_id1, timestamp, doc_id).toString()); + RemoveOperationWithDocId(bucket_id1, timestamp, doc_id).toString()); EXPECT_EQUAL("SplitBucket(" "source=BucketId(0x0000000000000000), " @@ -311,7 +311,7 @@ TEST_F("require that we can serialize and deserialize remove operations", Fixtur uint32_t expSerializedDocSize = getDocIdSize(docId); EXPECT_NOT_EQUAL(0u, expSerializedDocSize); { - RemoveOperation op(bucket, Timestamp(10), docId); + RemoveOperationWithDocId op(bucket, Timestamp(10), docId); op.setDbDocumentId({1, 2}); op.setPrevDbDocumentId({3, 4}); EXPECT_EQUAL(0u, op.getSerializedDocSize()); @@ -319,13 +319,43 @@ TEST_F("require that we can serialize and deserialize remove operations", Fixtur EXPECT_EQUAL(expSerializedDocSize, op.getSerializedDocSize()); } { - RemoveOperation op; + RemoveOperationWithDocId op; op.deserialize(stream, *f._repo); EXPECT_EQUAL(docId, op.getDocumentId()); TEST_DO(assertDocumentOperation(op, bucket, expSerializedDocSize)); } } +TEST_F("require that we can serialize and deserialize remove by gid operations", Fixture) +{ + vespalib::nbostream stream; + GlobalId gid = docId.getGlobalId(); + BucketId bucket(toBucket(gid)); + uint32_t expSerializedDocSize = 25; + vespalib::string expDocType = "testdoc_type"; + EXPECT_NOT_EQUAL(0u, expSerializedDocSize); + { + RemoveOperationWithGid op(bucket, Timestamp(10), gid, expDocType); + op.setPrevDbDocumentId({3, 4}); + EXPECT_EQUAL(0u, op.getSerializedDocSize()); + op.serialize(stream); + EXPECT_EQUAL(expSerializedDocSize, op.getSerializedDocSize()); + } + { + RemoveOperationWithGid op; + op.deserialize(stream, *f._repo); + EXPECT_EQUAL(gid, op.getGlobalId()); + EXPECT_EQUAL(expDocType, op.getDocType()); + EXPECT_EQUAL(bucket, op.getBucketId()); + EXPECT_EQUAL(10u, op.getTimestamp().getValue()); + EXPECT_EQUAL(expSerializedDocSize, op.getSerializedDocSize()); + EXPECT_FALSE( op.getValidDbdId()); + EXPECT_EQUAL(3u, op.getPrevSubDbId()); + EXPECT_EQUAL(4u, op.getPrevLid()); + EXPECT_TRUE(stream.empty()); + } +} + } // namespace TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/tests/proton/server/feedstates_test.cpp b/searchcore/src/tests/proton/server/feedstates_test.cpp index ca48fb773d8..6643eb925b9 100644 --- a/searchcore/src/tests/proton/server/feedstates_test.cpp +++ b/searchcore/src/tests/proton/server/feedstates_test.cpp @@ -91,7 +91,7 @@ Fixture::~Fixture() = default; struct RemoveOperationContext { DocumentId doc_id; - RemoveOperation op; + RemoveOperationWithDocId op; nbostream str; std::unique_ptr<Packet> packet; diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def index f895bd0c88d..0501ab6ed7c 100644 --- a/searchcore/src/vespa/searchcore/config/proton.def +++ b/searchcore/src/vespa/searchcore/config/proton.def @@ -31,6 +31,11 @@ numsummarythreads int default=16 restart ## Stop on io errors ? stoponioerrors bool default=false restart +## Perform extra validation of stored data on startup +## It requires a restart to be turned, but no restart to turned off. +## Hence it must always be followed by a manual restart. +validate_and_sanitize_docstore enum {NO, YES} default = NO + ## Maximum number of concurrent flushes outstanding. flush.maxconcurrent int default=2 restart diff --git a/searchcore/src/vespa/searchcore/proton/feedoperation/documentoperation.cpp b/searchcore/src/vespa/searchcore/proton/feedoperation/documentoperation.cpp index 37b23449315..f1161c8ebdd 100644 --- a/searchcore/src/vespa/searchcore/proton/feedoperation/documentoperation.cpp +++ b/searchcore/src/vespa/searchcore/proton/feedoperation/documentoperation.cpp @@ -41,9 +41,14 @@ DocumentOperation::DocumentOperation(Type type, const BucketId &bucketId, const void DocumentOperation::assertValidBucketId(const document::DocumentId &docId) const { + assertValidBucketId(docId.getGlobalId()); +} + +void +DocumentOperation::assertValidBucketId(const document::GlobalId &gid) const +{ assert(_bucketId.valid()); uint8_t bucketUsedBits = _bucketId.getUsedBits(); - const GlobalId &gid = docId.getGlobalId(); BucketId verId(gid.convertToBucketId()); verId.setUsedBits(bucketUsedBits); assert(_bucketId.getRawId() == verId.getRawId() || diff --git a/searchcore/src/vespa/searchcore/proton/feedoperation/documentoperation.h b/searchcore/src/vespa/searchcore/proton/feedoperation/documentoperation.h index 9a823c553bd..6847dbfd943 100644 --- a/searchcore/src/vespa/searchcore/proton/feedoperation/documentoperation.h +++ b/searchcore/src/vespa/searchcore/proton/feedoperation/documentoperation.h @@ -26,6 +26,7 @@ protected: const storage::spi::Timestamp ×tamp); void assertValidBucketId(const document::DocumentId &docId) const; + void assertValidBucketId(const document::GlobalId &docId) const; vespalib::string docArgsToString() const; public: diff --git a/searchcore/src/vespa/searchcore/proton/feedoperation/feedoperation.h b/searchcore/src/vespa/searchcore/proton/feedoperation/feedoperation.h index 3509af0de5c..10518c74340 100644 --- a/searchcore/src/vespa/searchcore/proton/feedoperation/feedoperation.h +++ b/searchcore/src/vespa/searchcore/proton/feedoperation/feedoperation.h @@ -36,7 +36,8 @@ public: MOVE = 15, CREATE_BUCKET = 16, COMPACT_LID_SPACE = 17, - UPDATE = 18 + UPDATE = 18, + REMOVE_GID = 19 }; private: diff --git a/searchcore/src/vespa/searchcore/proton/feedoperation/removedocumentsoperation.h b/searchcore/src/vespa/searchcore/proton/feedoperation/removedocumentsoperation.h index df74de1bd16..ad058bb153d 100644 --- a/searchcore/src/vespa/searchcore/proton/feedoperation/removedocumentsoperation.h +++ b/searchcore/src/vespa/searchcore/proton/feedoperation/removedocumentsoperation.h @@ -18,22 +18,22 @@ protected: void serializeLidsToRemove(vespalib::nbostream &os) const; void deserializeLidsToRemove(vespalib::nbostream &is); public: - virtual ~RemoveDocumentsOperation() { } + ~RemoveDocumentsOperation() override { } void setLidsToRemove(uint32_t subDbId, const LidVectorContext::SP &lidsToRemove) { _lidsToRemoveMap[subDbId] = lidsToRemove; } + bool hasLidsToRemove() const { + return !_lidsToRemoveMap.empty(); + } + const LidVectorContext::SP getLidsToRemove(uint32_t subDbId) const { LidsToRemoveMap::const_iterator found(_lidsToRemoveMap.find(subDbId)); - if (found != _lidsToRemoveMap.end()) - return found->second; - else - return LidVectorContext::SP(); + return (found != _lidsToRemoveMap.end()) ? found->second : LidVectorContext::SP(); } }; } // namespace proton - diff --git a/searchcore/src/vespa/searchcore/proton/feedoperation/removeoperation.cpp b/searchcore/src/vespa/searchcore/proton/feedoperation/removeoperation.cpp index 006a7e2b035..c6bdeb08ad5 100644 --- a/searchcore/src/vespa/searchcore/proton/feedoperation/removeoperation.cpp +++ b/searchcore/src/vespa/searchcore/proton/feedoperation/removeoperation.cpp @@ -1,37 +1,37 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "removeoperation.h" +#include <cassert> using document::BucketId; using document::DocumentId; +using document::GlobalId; using document::DocumentTypeRepo; using storage::spi::Timestamp; using vespalib::make_string; namespace proton { -RemoveOperation::RemoveOperation() - : DocumentOperation(FeedOperation::REMOVE), +RemoveOperationWithDocId::RemoveOperationWithDocId() + : RemoveOperation(FeedOperation::REMOVE), _docId() { } -RemoveOperation::RemoveOperation(const BucketId &bucketId, - const Timestamp ×tamp, - const DocumentId &docId) - : DocumentOperation(FeedOperation::REMOVE, - bucketId, - timestamp), +RemoveOperationWithDocId::RemoveOperationWithDocId(BucketId bucketId, Timestamp timestamp, const DocumentId &docId) + : RemoveOperation(FeedOperation::REMOVE, bucketId, timestamp), _docId(docId) { } +RemoveOperationWithDocId::~RemoveOperationWithDocId() = default; + void -RemoveOperation::serialize(vespalib::nbostream &os) const +RemoveOperationWithDocId::serialize(vespalib::nbostream &os) const { assertValidBucketId(_docId); - DocumentOperation::serialize(os); + RemoveOperation::serialize(os); size_t oldSize = os.size(); vespalib::string rawId = _docId.toString(); os.write(rawId.c_str(), rawId.size() + 1); @@ -40,18 +40,64 @@ RemoveOperation::serialize(vespalib::nbostream &os) const void -RemoveOperation::deserialize(vespalib::nbostream &is, - const DocumentTypeRepo &repo) +RemoveOperationWithDocId::deserialize(vespalib::nbostream &is, const DocumentTypeRepo &repo) { - DocumentOperation::deserialize(is, repo); + RemoveOperation::deserialize(is, repo); size_t oldSize = is.size(); _docId = DocumentId(is); _serializedDocSize = oldSize - is.size(); } -vespalib::string RemoveOperation::toString() const { +vespalib::string +RemoveOperationWithDocId::toString() const { return make_string("Remove(%s, %s)", - _docId.getScheme().toString().c_str(), - docArgsToString().c_str()); + _docId.getScheme().toString().c_str(), docArgsToString().c_str()); +} + +RemoveOperationWithGid::RemoveOperationWithGid() + : RemoveOperation(FeedOperation::REMOVE_GID), + _gid(), + _docType() +{} + + +RemoveOperationWithGid::RemoveOperationWithGid(BucketId bucketId, Timestamp timestamp, const GlobalId &gid, vespalib::stringref docType) + : RemoveOperation(FeedOperation::REMOVE_GID, bucketId, timestamp), + _gid(gid), + _docType(docType) +{} + +RemoveOperationWithGid::~RemoveOperationWithGid() = default; + +void +RemoveOperationWithGid::serialize(vespalib::nbostream &os) const +{ + assertValidBucketId(_gid); + assert( ! getValidDbdId()); + RemoveOperation::serialize(os); + size_t oldSize = os.size(); + os.write(_gid.get(), GlobalId::LENGTH); + os.writeSmallString(_docType); + _serializedDocSize = os.size() - oldSize; } + + +void +RemoveOperationWithGid::deserialize(vespalib::nbostream &is, const DocumentTypeRepo &repo) +{ + RemoveOperation::deserialize(is, repo); + size_t oldSize = is.size(); + char buf[GlobalId::LENGTH]; + is.read(buf, sizeof(buf)); + _gid.set(buf); + is.readSmallString(_docType); + _serializedDocSize = oldSize - is.size(); +} + +vespalib::string +RemoveOperationWithGid::toString() const { + return make_string("RemoveGid(%s, %s, %s)", + _gid.toString().c_str(), _docType.c_str(), docArgsToString().c_str()); +} + } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/feedoperation/removeoperation.h b/searchcore/src/vespa/searchcore/proton/feedoperation/removeoperation.h index 8c10107d944..ea1e62b8651 100644 --- a/searchcore/src/vespa/searchcore/proton/feedoperation/removeoperation.h +++ b/searchcore/src/vespa/searchcore/proton/feedoperation/removeoperation.h @@ -7,22 +7,54 @@ namespace proton { class RemoveOperation : public DocumentOperation { +protected: + explicit RemoveOperation(Type type) : DocumentOperation(type) {} + RemoveOperation(Type type, document::BucketId bucketId, storage::spi::Timestamp timestamp) + : DocumentOperation(type, bucketId, timestamp) + {} +public: + virtual bool hasDocType() const = 0; + virtual vespalib::stringref getDocType() const = 0; + virtual const document::GlobalId & getGlobalId() const = 0; +}; + +class RemoveOperationWithDocId : public RemoveOperation { document::DocumentId _docId; public: - RemoveOperation(); - RemoveOperation(const document::BucketId &bucketId, - const storage::spi::Timestamp ×tamp, - const document::DocumentId &docId); - virtual ~RemoveOperation() {} + RemoveOperationWithDocId(); + RemoveOperationWithDocId(document::BucketId bucketId, + storage::spi::Timestamp timestamp, + const document::DocumentId &docId); + ~RemoveOperationWithDocId() override; const document::DocumentId &getDocumentId() const { return _docId; } - virtual void serialize(vespalib::nbostream &os) const override; - virtual void deserialize(vespalib::nbostream &is, - const document::DocumentTypeRepo &repo) override; - virtual vespalib::string toString() const override; + const document::GlobalId & getGlobalId() const override { return _docId.getGlobalId(); } + void serialize(vespalib::nbostream &os) const override; + void deserialize(vespalib::nbostream &is, const document::DocumentTypeRepo &repo) override; + vespalib::string toString() const override; + + bool hasDocType() const override { return _docId.hasDocType(); } + vespalib::stringref getDocType() const override { return _docId.getDocType(); } +}; + +class RemoveOperationWithGid : public RemoveOperation { + document::GlobalId _gid; + vespalib::string _docType; + +public: + RemoveOperationWithGid(); + RemoveOperationWithGid(document::BucketId bucketId, + storage::spi::Timestamp timestamp, + const document::GlobalId & gid, + vespalib::stringref docType); + ~RemoveOperationWithGid() override; + const document::GlobalId & getGlobalId() const override { return _gid; } + void serialize(vespalib::nbostream &os) const override; + void deserialize(vespalib::nbostream &is, const document::DocumentTypeRepo &repo) override; + vespalib::string toString() const override; - bool hasDocType() const { return _docId.hasDocType(); } - vespalib::string getDocType() const { return _docId.getDocType(); } + bool hasDocType() const override { return true; } + vespalib::stringref getDocType() const override { return _docType; } }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/combiningfeedview.cpp b/searchcore/src/vespa/searchcore/proton/server/combiningfeedview.cpp index 20306e92ea8..d25570794fe 100644 --- a/searchcore/src/vespa/searchcore/proton/server/combiningfeedview.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/combiningfeedview.cpp @@ -151,9 +151,7 @@ CombiningFeedView::prepareRemove(RemoveOperation &rmOp) { getRemFeedView()->prepareRemove(rmOp); if (!rmOp.getPrevDbDocumentId().valid()) { - const DocumentId &docId = rmOp.getDocumentId(); - const document::GlobalId &gid = docId.getGlobalId(); - findPrevDbdId(gid, rmOp); + findPrevDbdId(rmOp.getGlobalId(), rmOp); } } diff --git a/searchcore/src/vespa/searchcore/proton/server/docstorevalidator.cpp b/searchcore/src/vespa/searchcore/proton/server/docstorevalidator.cpp index a195471d493..cc2beed8de7 100644 --- a/searchcore/src/vespa/searchcore/proton/server/docstorevalidator.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/docstorevalidator.cpp @@ -1,8 +1,16 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "docstorevalidator.h" +#include "feedhandler.h" +#include <vespa/searchcore/proton/feedoperation/removeoperation.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/document/fieldvalue/document.h> +#include <vespa/document/datatype/documenttype.h> +#include <vespa/searchcore/proton/common/feedtoken.h> +#include <vespa/searchcore/proton/feedoperation/lidvectorcontext.h> + +#include <vespa/log/log.h> +LOG_SETUP(".server.docstorevalidator"); namespace proton { @@ -92,19 +100,39 @@ DocStoreValidator::killOrphans(search::IDocumentStore &store, } -LidVectorContext::SP +std::shared_ptr<LidVectorContext> DocStoreValidator::getInvalidLids() const { - LidVectorContext::SP res(new LidVectorContext(_docIdLimit)); + auto res = std::make_unique<LidVectorContext>(_docIdLimit); assert(_invalid->size() == _docIdLimit); for (search::DocumentIdT lid(_invalid->getFirstTrueBit(1)); lid < _docIdLimit; - lid = _invalid->getNextTrueBit(lid + 1)) { - + lid = _invalid->getNextTrueBit(lid + 1)) + { res->addLid(lid); } return res; } +void +DocStoreValidator::performRemoves(FeedHandler & feedHandler, const search::IDocumentStore &store, const document::DocumentTypeRepo & repo) const { + for (search::DocumentIdT lid(_invalid->getFirstTrueBit(1)); + lid < _docIdLimit; + lid = _invalid->getNextTrueBit(lid + 1)) + { + document::GlobalId gid; + bool found = _dms.getGid(lid, gid); + assert(found); + if (found) { + search::DocumentMetaData metaData = _dms.getMetaData(gid); + assert(metaData.valid()); + document::Document::UP document = store.read(lid, repo); + assert(document); + LOG(info, "Removing document with id %s and lid %u with gid %s in bucket %s", document->getId().toString().c_str(), lid, metaData.gid.toString().c_str(), metaData.bucketId.toString().c_str()); + auto remove = std::make_unique<RemoveOperationWithGid>(metaData.bucketId, metaData.timestamp, gid, document->getType().getName()); + feedHandler.performOperation(FeedToken(), std::move(remove)); + } + } +} } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/docstorevalidator.h b/searchcore/src/vespa/searchcore/proton/server/docstorevalidator.h index fba57d4f718..d51d924655a 100644 --- a/searchcore/src/vespa/searchcore/proton/server/docstorevalidator.h +++ b/searchcore/src/vespa/searchcore/proton/server/docstorevalidator.h @@ -4,11 +4,13 @@ #include <vespa/searchlib/common/serialnum.h> #include <vespa/searchlib/docstore/idocumentstore.h> #include <vespa/searchcore/proton/documentmetastore/i_document_meta_store.h> -#include <vespa/searchcore/proton/feedoperation/lidvectorcontext.h> namespace search { class BitVector; } namespace proton { +class FeedHandler; +class LidVectorContext; + class DocStoreValidator : public search::IDocumentStoreReadVisitor { IDocumentMetaStore &_dms; @@ -17,7 +19,7 @@ class DocStoreValidator : public search::IDocumentStoreReadVisitor std::unique_ptr<search::BitVector> _orphans; uint32_t _visitCount; uint32_t _visitEmptyCount; - + public: DocStoreValidator(IDocumentMetaStore &dms); @@ -30,9 +32,8 @@ public: uint32_t getOrphanCount() const; uint32_t getVisitCount() const { return _visitCount; } uint32_t getVisitEmptyCount() const { return _visitEmptyCount; } - LidVectorContext::SP getInvalidLids() const; + std::shared_ptr<LidVectorContext> getInvalidLids() const; + void performRemoves(FeedHandler & feedHandler, const search::IDocumentStore &store, const document::DocumentTypeRepo & repo) const; }; - } // namespace proton - diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp index a88fe109326..55f95ce0518 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp @@ -147,6 +147,7 @@ DocumentDB::DocumentDB(const vespalib::string &baseDir, _activeConfigSnapshot(), _activeConfigSnapshotGeneration(0), _activeConfigSnapshotSerialNum(0u), + _validateAndSanitizeDocStore(protonCfg.validateAndSanitizeDocstore == vespa::config::search::core::ProtonConfig::ValidateAndSanitizeDocstore::YES), _initGate(), _clusterStateHandler(_writeService.master()), _bucketHandler(_writeService.master()), @@ -661,6 +662,12 @@ DocumentDB::onTransactionLogReplayDone() // must signal that all existing buckets must be checked. notifyAllBucketsChanged(); } + if (_validateAndSanitizeDocStore) { + LOG(info, "Validating documentdb %s", getName().c_str()); + SerialNum serialNum = _feedHandler.getSerialNum(); + sync(serialNum); + _subDBs.validateDocStore(_feedHandler, serialNum); + } } diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb.h b/searchcore/src/vespa/searchcore/proton/server/documentdb.h index 1a1d97a657b..917d753683a 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb.h @@ -112,6 +112,7 @@ private: DocumentDBConfig::SP _activeConfigSnapshot; int64_t _activeConfigSnapshotGeneration; SerialNum _activeConfigSnapshotSerialNum; + const bool _validateAndSanitizeDocStore; vespalib::Gate _initGate; diff --git a/searchcore/src/vespa/searchcore/proton/server/documentsubdbcollection.cpp b/searchcore/src/vespa/searchcore/proton/server/documentsubdbcollection.cpp index 74010391fcb..1e01203b431 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentsubdbcollection.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentsubdbcollection.cpp @@ -315,4 +315,10 @@ DocumentSubDBCollection::tearDownReferences(IDocumentDBReferenceResolver &resolv } } +void DocumentSubDBCollection::validateDocStore(FeedHandler & feedHandler, SerialNum serialNum) { + for (auto subDb : _subDBs) { + subDb->validateDocStore(feedHandler, serialNum); + } +} + } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/documentsubdbcollection.h b/searchcore/src/vespa/searchcore/proton/server/documentsubdbcollection.h index d09afb92cc8..2936051538d 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentsubdbcollection.h +++ b/searchcore/src/vespa/searchcore/proton/server/documentsubdbcollection.h @@ -26,6 +26,7 @@ namespace searchcorespi { } namespace proton { + class DocumentDBConfig; struct DocumentDBTaggedMetrics; class MaintenanceController; @@ -41,6 +42,8 @@ class IDocumentSubDBOwner; class IDocumentSubDB; class IDocumentRetriever; class ReconfigParams; +class RemoveDocumentsOperation; +class FeedHandler; namespace matching { class QueryLimiter; @@ -163,6 +166,7 @@ public: double getReprocessingProgress() const; void close(); void tearDownReferences(IDocumentDBReferenceResolver &resolver); + void validateDocStore(FeedHandler & feedHandler, SerialNum serialNum); }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/feedhandler.cpp b/searchcore/src/vespa/searchcore/proton/server/feedhandler.cpp index 338fc738040..362de7ee780 100644 --- a/searchcore/src/vespa/searchcore/proton/server/feedhandler.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/feedhandler.cpp @@ -169,8 +169,8 @@ FeedHandler::createNonExistingDocument(FeedToken token, const UpdateOperation &o void FeedHandler::performRemove(FeedToken token, RemoveOperation &op) { _activeFeedView->prepareRemove(op); if (ignoreOperation(op)) { - LOG(debug, "performRemove(): ignoreOperation: docId(%s), timestamp(%" PRIu64 "), prevTimestamp(%" PRIu64 ")", - op.getDocumentId().toString().c_str(), (uint64_t)op.getTimestamp(), (uint64_t)op.getPrevTimestamp()); + LOG(debug, "performRemove(): ignoreOperation: remove(%s), timestamp(%" PRIu64 "), prevTimestamp(%" PRIu64 ")", + op.toString().c_str(), (uint64_t)op.getTimestamp(), (uint64_t)op.getPrevTimestamp()); if (token) { token->setResult(make_unique<RemoveResult>(false), false); } @@ -564,6 +564,7 @@ FeedHandler::performOperation(FeedToken token, FeedOperation::UP op) performPut(std::move(token), static_cast<PutOperation &>(*op)); return; case FeedOperation::REMOVE: + case FeedOperation::REMOVE_GID: performRemove(std::move(token), static_cast<RemoveOperation &>(*op)); return; case FeedOperation::UPDATE_42: diff --git a/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h b/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h index eb297fc7987..65724e66913 100644 --- a/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/idocumentsubdb.h @@ -40,6 +40,7 @@ class ISearchHandler; class ISummaryAdapter; class ISummaryManager; class ReconfigParams; +class RemoveDocumentsOperation; /** * Interface for a document sub database that handles a subset of the documents that belong to a @@ -117,6 +118,7 @@ public: virtual void close() = 0; virtual std::shared_ptr<IDocumentDBReference> getDocumentDBReference() = 0; virtual void tearDownReferences(IDocumentDBReferenceResolver &resolver) = 0; + virtual void validateDocStore(FeedHandler &op, SerialNum serialNum) const = 0; }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/persistencehandlerproxy.cpp b/searchcore/src/vespa/searchcore/proton/server/persistencehandlerproxy.cpp index beee2716cc7..16af4e87795 100644 --- a/searchcore/src/vespa/searchcore/proton/server/persistencehandlerproxy.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/persistencehandlerproxy.cpp @@ -41,21 +41,21 @@ PersistenceHandlerProxy::initialize() void PersistenceHandlerProxy::handlePut(FeedToken token, const Bucket &bucket, Timestamp timestamp, const DocumentSP &doc) { - FeedOperation::UP op(new PutOperation(bucket.getBucketId().stripUnused(), timestamp, doc)); + auto op = std::make_unique<PutOperation>(bucket.getBucketId().stripUnused(), timestamp, doc); _feedHandler.handleOperation(token, std::move(op)); } void PersistenceHandlerProxy::handleUpdate(FeedToken token, const Bucket &bucket, Timestamp timestamp, const DocumentUpdateSP &upd) { - FeedOperation::UP op(new UpdateOperation(bucket.getBucketId().stripUnused(), timestamp, upd)); + auto op = std::make_unique<UpdateOperation>(bucket.getBucketId().stripUnused(), timestamp, upd); _feedHandler.handleOperation(token, std::move(op)); } void PersistenceHandlerProxy::handleRemove(FeedToken token, const Bucket &bucket, Timestamp timestamp, const document::DocumentId &id) { - FeedOperation::UP op(new RemoveOperation(bucket.getBucketId().stripUnused(), timestamp, id)); + auto op = std::make_unique<RemoveOperationWithDocId>(bucket.getBucketId().stripUnused(), timestamp, id); _feedHandler.handleOperation(token, std::move(op)); } @@ -88,14 +88,14 @@ PersistenceHandlerProxy::handleGetBucketInfo(const Bucket &bucket, IBucketInfoRe void PersistenceHandlerProxy::handleCreateBucket(FeedToken token, const Bucket &bucket) { - FeedOperation::UP op(new CreateBucketOperation(bucket.getBucketId().stripUnused())); + auto op = std::make_unique<CreateBucketOperation>(bucket.getBucketId().stripUnused()); _feedHandler.handleOperation(token, std::move(op)); } void PersistenceHandlerProxy::handleDeleteBucket(FeedToken token, const Bucket &bucket) { - FeedOperation::UP op(new DeleteBucketOperation(bucket.getBucketId().stripUnused())); + auto op = std::make_unique<DeleteBucketOperation>(bucket.getBucketId().stripUnused()); _feedHandler.handleOperation(token, std::move(op)); } @@ -108,9 +108,9 @@ PersistenceHandlerProxy::handleGetModifiedBuckets(IBucketIdListResultHandler &re void PersistenceHandlerProxy::handleSplit(FeedToken token, const Bucket &source, const Bucket &target1, const Bucket &target2) { - FeedOperation::UP op(new SplitBucketOperation(source.getBucketId().stripUnused(), - target1.getBucketId().stripUnused(), - target2.getBucketId().stripUnused())); + auto op = std::make_unique<SplitBucketOperation>(source.getBucketId().stripUnused(), + target1.getBucketId().stripUnused(), + target2.getBucketId().stripUnused()); _feedHandler.handleOperation(token, std::move(op)); } diff --git a/searchcore/src/vespa/searchcore/proton/server/replaypacketdispatcher.cpp b/searchcore/src/vespa/searchcore/proton/server/replaypacketdispatcher.cpp index fcf1cf2a58c..a1138503085 100644 --- a/searchcore/src/vespa/searchcore/proton/server/replaypacketdispatcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/replaypacketdispatcher.cpp @@ -36,7 +36,11 @@ ReplayPacketDispatcher::replayEntry(const Packet::Entry &entry) replay(op, is, entry); break; } case FeedOperation::REMOVE: { - RemoveOperation op; + RemoveOperationWithDocId op; + replay(op, is, entry); + break; + } case FeedOperation::REMOVE_GID: { + RemoveOperationWithGid op; replay(op, is, entry); break; } case FeedOperation::UPDATE: { @@ -84,7 +88,7 @@ ReplayPacketDispatcher::replayEntry(const Packet::Entry &entry) throw IllegalStateException (make_string("Got packet entry with unknown type id '%u' from TLS", entry.type())); } - if (is.size() > 0) { + if ( ! is.empty()) { throw document::DeserializeException (make_string("Too much data in packet entry (type id '%u', %ld bytes)", entry.type(), is.size())); diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp index 646346d53ad..59a115ce5d1 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp @@ -367,6 +367,30 @@ StoreOnlyDocSubDB::initViews(const DocumentDBConfig &configSnapshot, const Sessi } void +StoreOnlyDocSubDB::validateDocStore(FeedHandler & feedHandler, SerialNum serialNum) const +{ + LOG(info, "Validating document store for sub db %u doctype %s", _subDbId, _docTypeName.toString().c_str()); + + search::IDocumentStore &docStore = _iSummaryMgr->getBackingStore(); + DocStoreValidator validator(_metaStoreCtx->get()); + search::DocumentStoreVisitorProgress validatorProgress; + + docStore.accept(validator, validatorProgress, *_iFeedView.get()->getDocumentTypeRepo()); + + validator.visitDone(); + + LOG(info, "Validated document store for sub db %u, doctype %s, %u orphans, %u invalid, %u visits, %u empty visits", + _subDbId, _docTypeName.toString().c_str(), validator.getOrphanCount(), + validator.getInvalidCount(), validator.getVisitCount(), validator.getVisitEmptyCount()); + + validator.killOrphans(docStore, serialNum); + if (validator.getInvalidCount() != 0u) { + validator.performRemoves(feedHandler, docStore, *_iFeedView.get()->getDocumentTypeRepo()); + } +} + + +void StoreOnlyDocSubDB::initFeedView(const DocumentDBConfig &configSnapshot) { assert(_writeService.master().isCurrentThread()); diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h index 14f62513c34..700a6d29460 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h @@ -207,6 +207,8 @@ public: void setup(const DocumentSubDbInitializerResult &initResult) override; void initViews(const DocumentDBConfig &configSnapshot, const std::shared_ptr<matching::SessionManager> &sessionManager) override; + void validateDocStore(FeedHandler & feedHandler, SerialNum serialNum) const override; + IReprocessingTask::List applyConfig(const DocumentDBConfig &newConfigSnapshot, const DocumentDBConfig &oldConfigSnapshot, SerialNum serialNum, const ReconfigParams ¶ms, IDocumentDBReferenceResolver &resolver) override; diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.cpp b/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.cpp index a6695f6318d..bee8a0e0473 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.cpp @@ -27,6 +27,7 @@ LOG_SETUP(".proton.server.storeonlyfeedview"); using document::BucketId; using document::Document; using document::DocumentId; +using document::GlobalId; using document::DocumentTypeRepo; using document::DocumentUpdate; using proton::attribute::isUpdateableInMemoryOnly; @@ -55,7 +56,7 @@ public: : PutDoneContext(std::move(token), gidToLidChangeHandler, std::move(doc), gid, lid, serialNum, enableNotifyPut), _moveDoneCtx(std::move(moveDoneCtx)) {} - ~PutDoneContextForMove() = default; + ~PutDoneContextForMove() override = default; }; std::shared_ptr<PutDoneContext> @@ -80,7 +81,7 @@ createPutDoneContext(FeedToken token, IGidToLidChangeHandler &gidToLidChangeHand std::shared_ptr<const Document> doc, const document::GlobalId &gid, uint32_t lid, SerialNum serialNum, bool enableNotifyPut) { - return createPutDoneContext(token, gidToLidChangeHandler, std::move(doc), gid, lid, serialNum, enableNotifyPut, IDestructorCallback::SP()); + return createPutDoneContext(std::move(token), gidToLidChangeHandler, std::move(doc), gid, lid, serialNum, enableNotifyPut, IDestructorCallback::SP()); } std::shared_ptr<UpdateDoneContext> @@ -110,7 +111,7 @@ public: : RemoveDoneContext(std::move(token), executor, documentMetaStore, std::move(pendingNotifyRemoveDone) ,lid), _moveDoneCtx(std::move(moveDoneCtx)) {} - ~RemoveDoneContextForMove() = default; + ~RemoveDoneContextForMove() override = default; }; std::shared_ptr<RemoveDoneContext> @@ -142,7 +143,7 @@ std::vector<document::GlobalId> getGidsToRemove(const IDocumentMetaStore &metaSt return gids; } -void putMetaData(documentmetastore::IStore &meta_store, const DocumentId &doc_id, +void putMetaData(documentmetastore::IStore &meta_store, const DocumentId & doc_id, const DocumentOperation &op, bool is_removed_doc) { documentmetastore::IStore::Result putRes( @@ -157,23 +158,23 @@ void putMetaData(documentmetastore::IStore &meta_store, const DocumentId &doc_id assert(op.getLid() == putRes._lid); } -void removeMetaData(documentmetastore::IStore &meta_store, const DocumentId &doc_id, +void removeMetaData(documentmetastore::IStore &meta_store, const GlobalId & gid, const DocumentId &doc_id, const DocumentOperation &op, bool is_removed_doc) { assert(meta_store.validLid(op.getPrevLid())); assert(is_removed_doc == op.getPrevMarkedAsRemoved()); const RawDocumentMetaData &meta(meta_store.getRawMetaData(op.getPrevLid())); - assert(meta.getGid() == doc_id.getGlobalId()); + assert(meta.getGid() == gid); (void) meta; if (!meta_store.remove(op.getPrevLid())) { throw IllegalStateException( make_string("Could not remove <lid, gid> pair for %sdocument with id '%s' and gid '%s'", is_removed_doc ? "removed " : "", doc_id.toString().c_str(), - doc_id.getGlobalId().toString().c_str())); + gid.toString().c_str())); } } void -moveMetaData(documentmetastore::IStore &meta_store, const DocumentId &doc_id, const DocumentOperation &op) +moveMetaData(documentmetastore::IStore &meta_store, const DocumentId & doc_id, const DocumentOperation &op) { (void) doc_id; assert(op.getLid() != op.getPrevLid()); @@ -280,7 +281,7 @@ StoreOnlyFeedView::internalPut(FeedToken token, const PutOperation &putOp) putOp.getSubDbId(), putOp.getLid(), putOp.getPrevSubDbId(), putOp.getPrevLid(), _params._subDbId, doc->toString(true).size(), doc->toString(true).c_str()); - PendingNotifyRemoveDone pendingNotifyRemoveDone = adjustMetaStore(putOp, docId); + PendingNotifyRemoveDone pendingNotifyRemoveDone = adjustMetaStore(putOp, docId.getGlobalId(), docId); considerEarlyAck(token); bool docAlreadyExists = putOp.getValidPrevDbdId(_params._subDbId); @@ -530,10 +531,8 @@ StoreOnlyFeedView::removeIndexedFields(SerialNum, Lid, bool, OnRemoveDoneType) { void StoreOnlyFeedView::prepareRemove(RemoveOperation &rmOp) { - const DocumentId &id = rmOp.getDocumentId(); - const document::GlobalId &gid = id.getGlobalId(); - documentmetastore::IStore::Result inspectRes = _metaStore.inspect(gid); - if (_params._subDbType == SubDbType::REMOVED) { + documentmetastore::IStore::Result inspectRes = _metaStore.inspect(rmOp.getGlobalId()); + if ((_params._subDbType == SubDbType::REMOVED) && (rmOp.getType() == FeedOperation::REMOVE)) { rmOp.setDbDocumentId(DbDocumentId(_params._subDbId, inspectRes._lid)); } setPrev(rmOp, inspectRes, _params._subDbId, _params._subDbType == SubDbType::REMOVED); @@ -541,11 +540,18 @@ StoreOnlyFeedView::prepareRemove(RemoveOperation &rmOp) void StoreOnlyFeedView::handleRemove(FeedToken token, const RemoveOperation &rmOp) { - internalRemove(std::move(token), rmOp); + if (rmOp.getType() == FeedOperation::REMOVE) { + internalRemove(std::move(token), dynamic_cast<const RemoveOperationWithDocId &>(rmOp)); + } else if (rmOp.getType() == FeedOperation::REMOVE_GID) { + internalRemove(std::move(token), dynamic_cast<const RemoveOperationWithGid &>(rmOp)); + } else { + assert(rmOp.getType() == FeedOperation::REMOVE); + } + } void -StoreOnlyFeedView::internalRemove(FeedToken token, const RemoveOperation &rmOp) +StoreOnlyFeedView::internalRemove(FeedToken token, const RemoveOperationWithDocId &rmOp) { assert(rmOp.getValidNewOrPrevDbdId()); assert(rmOp.notMovingLidInSameSubDb()); @@ -557,11 +563,11 @@ StoreOnlyFeedView::internalRemove(FeedToken token, const RemoveOperation &rmOp) _params._docTypeName.toString().c_str(), serialNum, docId.toString().c_str(), rmOp.getSubDbId(), rmOp.getLid(), rmOp.getPrevSubDbId(), rmOp.getPrevLid(), _params._subDbId); - PendingNotifyRemoveDone pendingNotifyRemoveDone = adjustMetaStore(rmOp, docId); + PendingNotifyRemoveDone pendingNotifyRemoveDone = adjustMetaStore(rmOp, docId.getGlobalId(), docId); considerEarlyAck(token); if (rmOp.getValidDbdId(_params._subDbId)) { - Document::UP clearDoc(new Document(*_docType, docId)); + auto clearDoc = std::make_unique<Document>(*_docType, docId); clearDoc->setRepo(*_repo); putSummary(serialNum, rmOp.getLid(), std::move(clearDoc), std::shared_ptr<OperationDoneContext>()); @@ -576,6 +582,25 @@ StoreOnlyFeedView::internalRemove(FeedToken token, const RemoveOperation &rmOp) } void +StoreOnlyFeedView::internalRemove(FeedToken token, const RemoveOperationWithGid &rmOp) +{ + assert(rmOp.getValidNewOrPrevDbdId()); + assert(rmOp.notMovingLidInSameSubDb()); + const SerialNum serialNum = rmOp.getSerialNum(); + DocumentId dummy; + PendingNotifyRemoveDone pendingNotifyRemoveDone = adjustMetaStore(rmOp, rmOp.getGlobalId(), dummy); + considerEarlyAck(token); + + if (rmOp.getValidPrevDbdId(_params._subDbId)) { + if (rmOp.changedDbdId()) { + assert(!rmOp.getValidDbdId(_params._subDbId)); + internalRemove(std::move(token), serialNum, std::move(pendingNotifyRemoveDone), + rmOp.getPrevLid(), IDestructorCallback::SP()); + } + } +} + +void StoreOnlyFeedView::internalRemove(FeedToken token, SerialNum serialNum, PendingNotifyRemoveDone &&pendingNotifyRemoveDone, Lid lid, IDestructorCallback::SP moveDoneCtx) @@ -592,7 +617,7 @@ StoreOnlyFeedView::internalRemove(FeedToken token, SerialNum serialNum, } PendingNotifyRemoveDone -StoreOnlyFeedView::adjustMetaStore(const DocumentOperation &op, const DocumentId &docId) +StoreOnlyFeedView::adjustMetaStore(const DocumentOperation &op, const GlobalId & gid, const DocumentId &docId) { PendingNotifyRemoveDone pendingNotifyRemoveDone; const SerialNum serialNum = op.getSerialNum(); @@ -607,9 +632,9 @@ StoreOnlyFeedView::adjustMetaStore(const DocumentOperation &op, const DocumentId putMetaData(_metaStore, docId, op, _params._subDbType == SubDbType::REMOVED); } } else if (op.getValidPrevDbdId(_params._subDbId)) { - _gidToLidChangeHandler.notifyRemove(docId.getGlobalId(), serialNum); - pendingNotifyRemoveDone.setup(_gidToLidChangeHandler, docId.getGlobalId(), serialNum); - removeMetaData(_metaStore, docId, op, _params._subDbType == SubDbType::REMOVED); + _gidToLidChangeHandler.notifyRemove(gid, serialNum); + pendingNotifyRemoveDone.setup(_gidToLidChangeHandler, gid, serialNum); + removeMetaData(_metaStore, gid, docId, op, _params._subDbType == SubDbType::REMOVED); } _metaStore.commit(serialNum, serialNum); } @@ -728,7 +753,7 @@ StoreOnlyFeedView::handleMove(const MoveOperation &moveOp, IDestructorCallback:: moveOp.getSubDbId(), moveOp.getLid(), moveOp.getPrevSubDbId(), moveOp.getPrevLid(), _params._subDbId, doc->toString(true).size(), doc->toString(true).c_str()); - PendingNotifyRemoveDone pendingNotifyRemoveDone = adjustMetaStore(moveOp, docId); + PendingNotifyRemoveDone pendingNotifyRemoveDone = adjustMetaStore(moveOp, docId.getGlobalId(), docId); bool docAlreadyExists = moveOp.getValidPrevDbdId(_params._subDbId); if (moveOp.getValidDbdId(_params._subDbId)) { bool immediateCommit = _commitTimeTracker.needCommit(); diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.h b/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.h index 4c1d50232dd..be2ed9af126 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.h +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.h @@ -20,6 +20,7 @@ #include <vespa/searchlib/query/base.h> #include <vespa/vespalib/util/threadstackexecutorbase.h> #include <future> +#include <vespa/searchcore/proton/feedoperation/operations.h> namespace search { class IDestructorCallback; } @@ -171,12 +172,13 @@ private: return replaySerialNum > _params._flushedDocumentMetaStoreSerialNum; } - PendingNotifyRemoveDone adjustMetaStore(const DocumentOperation &op, const document::DocumentId &docId); + PendingNotifyRemoveDone adjustMetaStore(const DocumentOperation &op, const document::GlobalId & gid, const document::DocumentId &docId); void internalPut(FeedToken token, const PutOperation &putOp); void internalUpdate(FeedToken token, const UpdateOperation &updOp); bool lookupDocId(const document::DocumentId &docId, Lid & lid) const; - void internalRemove(FeedToken token, const RemoveOperation &rmOp); + void internalRemove(FeedToken token, const RemoveOperationWithDocId &rmOp); + void internalRemove(FeedToken token, const RemoveOperationWithGid &rmOp); // Removes documents from meta store and document store. // returns the number of documents removed. diff --git a/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h b/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h index 6c11ebbbf6b..7358a78de61 100644 --- a/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h +++ b/searchcore/src/vespa/searchcore/proton/test/dummy_document_sub_db.h @@ -31,7 +31,7 @@ struct DummyDocumentSubDb : public IDocumentSubDB DummyDocumentSubDb(std::shared_ptr<BucketDBOwner> bucketDB, uint32_t subDbId) : _subDbId(subDbId), - _metaStoreCtx(bucketDB), + _metaStoreCtx(std::move(bucketDB)), _summaryManager(), _indexManager(), _summaryAdapter(), @@ -40,7 +40,7 @@ struct DummyDocumentSubDb : public IDocumentSubDB _writeService(std::make_unique<ExecutorThreadingService>(_sharedExecutor, 1)) { } - ~DummyDocumentSubDb() {} + ~DummyDocumentSubDb() override { } void close() override { } uint32_t getSubDbId() const override { return _subDbId; } vespalib::string getName() const override { return "dummysubdb"; } @@ -64,6 +64,11 @@ struct DummyDocumentSubDb : public IDocumentSubDB proton::IAttributeManager::SP getAttributeManager() const override { return proton::IAttributeManager::SP(); } + + void validateDocStore(FeedHandler &, SerialNum ) const override { + + } + const IIndexManager::SP &getIndexManager() const override { return _indexManager; } const ISummaryAdapter::SP &getSummaryAdapter() const override { return _summaryAdapter; } const IIndexWriter::SP &getIndexWriter() const override { return _indexWriter; } |