diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-01-06 13:51:07 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2022-01-06 13:51:43 +0000 |
commit | ea5c573e8685badafd67c9311577a22a1113bd27 (patch) | |
tree | 260e734aba5698ede8b1adcc2860f738893bf716 | |
parent | 20abbaa6eb1b411bbc43b98c3bb5bc8e79a86153 (diff) |
Only care about size of payload. Also add payload containing only doctype and gid
11 files changed, 83 insertions, 39 deletions
diff --git a/persistence/src/tests/spi/clusterstatetest.cpp b/persistence/src/tests/spi/clusterstatetest.cpp index b044552d672..c6c523c202d 100644 --- a/persistence/src/tests/spi/clusterstatetest.cpp +++ b/persistence/src/tests/spi/clusterstatetest.cpp @@ -10,6 +10,7 @@ using storage::spi::test::makeSpiBucket; using vespalib::Trinary; +using document::GlobalId; namespace storage::spi { @@ -270,9 +271,10 @@ TEST(DocEntryTest, test_meta_only) { EXPECT_EQ(9, e->getTimestamp()); EXPECT_FALSE(e->isRemove()); EXPECT_EQ(24, e->getSize()); - EXPECT_EQ(0, e->getDocumentSize()); EXPECT_EQ(nullptr, e->getDocument()); EXPECT_EQ(nullptr, e->getDocumentId()); + EXPECT_EQ("", e->getDocumentType()); + EXPECT_EQ(GlobalId(), e->getGid()); DocEntry::UP r = DocEntry::create(Timestamp(666), DocumentMetaFlags::REMOVE_ENTRY); EXPECT_EQ(666, r->getTimestamp()); @@ -283,10 +285,22 @@ TEST(DocEntryTest, test_docid_only) { DocEntry::UP e = DocEntry::create(Timestamp(9), DocumentMetaFlags::NONE, DocumentId("id:test:test::1")); EXPECT_EQ(9, e->getTimestamp()); EXPECT_FALSE(e->isRemove()); - EXPECT_EQ(48, e->getSize()); - EXPECT_EQ(16, e->getDocumentSize()); + EXPECT_EQ(16, e->getSize()); EXPECT_EQ(nullptr, e->getDocument()); EXPECT_NE(nullptr, e->getDocumentId()); + EXPECT_EQ("test", e->getDocumentType()); + EXPECT_EQ(GlobalId::parse("gid(0xc4ca4238f9f9649222750be2)"), e->getGid()); +} + +TEST(DocEntryTest, test_doctype_and_gid) { + DocEntry::UP e = DocEntry::create(Timestamp(9), DocumentMetaFlags::NONE, "doc_type", GlobalId::parse("gid(0xc4cef118f9f9649222750be2)")); + EXPECT_EQ(9, e->getTimestamp()); + EXPECT_FALSE(e->isRemove()); + EXPECT_EQ(20, e->getSize()); + EXPECT_EQ(nullptr, e->getDocument()); + EXPECT_EQ(nullptr, e->getDocumentId()); + EXPECT_EQ("doc_type", e->getDocumentType()); + EXPECT_EQ(GlobalId::parse("gid(0xc4cef118f9f9649222750be2)"), e->getGid()); } TEST(DocEntryTest, test_document_only) { @@ -294,10 +308,11 @@ TEST(DocEntryTest, test_document_only) { DocEntry::UP e = DocEntry::create(Timestamp(9), testDocMan.createRandomDocument(0, 1000)); EXPECT_EQ(9, e->getTimestamp()); EXPECT_FALSE(e->isRemove()); - EXPECT_EQ(664, e->getSize()); - EXPECT_EQ(632, e->getDocumentSize()); + EXPECT_EQ(632, e->getSize()); EXPECT_NE(nullptr, e->getDocument()); EXPECT_NE(nullptr, e->getDocumentId()); + EXPECT_EQ("testdoctype1", e->getDocumentType()); + EXPECT_EQ(GlobalId::parse("gid(0x4bc7000087365609f22f1f4b)"), e->getGid()); } } diff --git a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp index ec68f02f8b0..84494c312b6 100644 --- a/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp +++ b/persistence/src/vespa/persistence/conformancetest/conformancetest.cpp @@ -258,15 +258,13 @@ verifyDocs(const std::vector<DocAndTimestamp>& wanted, } EXPECT_EQ(wanted[wantedIdx].timestamp, entry.getTimestamp()); size_t serSize = wanted[wantedIdx].doc->serialize().size(); - EXPECT_EQ(serSize + entry.getOwnSize(), size_t(entry.getSize())); - EXPECT_EQ(serSize, size_t(entry.getDocumentSize())); + EXPECT_EQ(serSize, size_t(entry.getSize())); ++wantedIdx; } else { // Remove-entry EXPECT_TRUE(entry.getDocumentId() != 0); size_t serSize = entry.getDocumentId()->getSerializedSize(); - EXPECT_EQ(serSize + entry.getOwnSize(), size_t(entry.getSize())); - EXPECT_EQ(serSize, size_t(entry.getDocumentSize())); + EXPECT_EQ(serSize, size_t(entry.getSize())); if (removes.find(entry.getDocumentId()->toString()) == removes.end()) { FAIL() << "Got unexpected remove entry for document id " << *entry.getDocumentId(); diff --git a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp index a85c1c49752..30d5061d37e 100644 --- a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp +++ b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp @@ -658,7 +658,7 @@ DummyPersistence::iterate(IteratorId id, uint64_t maxByteSize, Context& ctx) con assert(entry->getDocument()); // Create new document with only wanted fields. Document::UP filtered(FieldSet::createDocumentSubsetCopy(*entry->getDocument(), *it->_fieldSet)); - auto ret = DocEntry::create(entry->getTimestamp(), std::move(filtered), entry->getDocumentSize()); + auto ret = DocEntry::create(entry->getTimestamp(), std::move(filtered), entry->getSize()); entries.push_back(std::move(ret)); } else { // Use entry as-is. diff --git a/persistence/src/vespa/persistence/spi/docentry.cpp b/persistence/src/vespa/persistence/spi/docentry.cpp index 3235de25d46..43a7e519b53 100644 --- a/persistence/src/vespa/persistence/spi/docentry.cpp +++ b/persistence/src/vespa/persistence/spi/docentry.cpp @@ -14,10 +14,23 @@ public: DocEntryWithId(Timestamp t, DocumentMetaFlags metaFlags, const DocumentId &docId); ~DocEntryWithId(); vespalib::string toString() const override; - const DocumentId* getDocumentId() const override { return _documentId.get(); } + const DocumentId* getDocumentId() const override { return & _documentId; } + vespalib::stringref getDocumentType() const override { return _documentId.getDocType(); } + GlobalId getGid() const override { return _documentId.getGlobalId(); } private: - SizeType getOwnSize() const override{ return sizeof(DocEntryWithId); } - DocumentIdUP _documentId; + DocumentId _documentId; +}; + +class DocEntryWithTypeAndGid final : public DocEntry { +public: + DocEntryWithTypeAndGid(Timestamp t, DocumentMetaFlags metaFlags, vespalib::stringref docType, GlobalId gid); + ~DocEntryWithTypeAndGid(); + vespalib::string toString() const override; + vespalib::stringref getDocumentType() const override { return _type; } + GlobalId getGid() const override { return _gid; } +private: + vespalib::string _type; + GlobalId _gid; }; class DocEntryWithDoc final : public DocEntry { @@ -36,8 +49,9 @@ public: const Document* getDocument() const override { return _document.get(); } const DocumentId* getDocumentId() const override { return &_document->getId(); } DocumentUP releaseDocument() override { return std::move(_document); } + vespalib::stringref getDocumentType() const override { return _document->getId().getDocType(); } + GlobalId getGid() const override { return _document->getId().getGlobalId(); } private: - SizeType getOwnSize() const override { return sizeof(DocEntryWithDoc); } DocumentUP _document; }; @@ -53,9 +67,16 @@ DocEntryWithDoc::DocEntryWithDoc(Timestamp t, DocumentUP doc, size_t serializedD DocEntryWithId::DocEntryWithId(Timestamp t, DocumentMetaFlags metaFlags, const DocumentId& docId) : DocEntry(t, metaFlags, docId.getSerializedSize()), - _documentId(std::make_unique<DocumentId>(docId)) + _documentId(docId) +{ } + +DocEntryWithTypeAndGid::DocEntryWithTypeAndGid(Timestamp t, DocumentMetaFlags metaFlags, vespalib::stringref docType, GlobalId gid) + : DocEntry(t, metaFlags, docType.size() + sizeof(gid)), + _type(docType), + _gid(gid) { } +DocEntryWithTypeAndGid::~DocEntryWithTypeAndGid() = default; DocEntryWithId::~DocEntryWithId() = default; DocEntryWithDoc::~DocEntryWithDoc() = default; @@ -63,7 +84,15 @@ vespalib::string DocEntryWithId::toString() const { std::ostringstream out; - out << "DocEntry(" << getTimestamp() << ", " << int(getFlags()) << ", " << *_documentId << ")"; + out << "DocEntry(" << getTimestamp() << ", " << int(getFlags()) << ", " << _documentId << ")"; + return out.str(); +} + +vespalib::string +DocEntryWithTypeAndGid::toString() const +{ + std::ostringstream out; + out << "DocEntry(" << getTimestamp() << ", " << int(getFlags()) << ", " << _type << ", " << _gid << ")"; return out.str(); } @@ -92,6 +121,10 @@ DocEntry::create(Timestamp t, DocumentMetaFlags metaFlags, const DocumentId &doc return std::make_unique<DocEntryWithId>(t, metaFlags, docId); } DocEntry::UP +DocEntry::create(Timestamp t, DocumentMetaFlags metaFlags, vespalib::stringref docType, GlobalId gid) { + return std::make_unique<DocEntryWithTypeAndGid>(t, metaFlags, docType, gid); +} +DocEntry::UP DocEntry::create(Timestamp t, DocumentUP doc) { return std::make_unique<DocEntryWithDoc>(t, std::move(doc)); } diff --git a/persistence/src/vespa/persistence/spi/docentry.h b/persistence/src/vespa/persistence/spi/docentry.h index d2da3f03881..d263527dff6 100644 --- a/persistence/src/vespa/persistence/spi/docentry.h +++ b/persistence/src/vespa/persistence/spi/docentry.h @@ -14,6 +14,7 @@ #pragma once #include <persistence/spi/types.h> +#include <vespa/document/base/globalid.h> namespace storage::spi { @@ -37,25 +38,22 @@ public: Timestamp getTimestamp() const { return _timestamp; } DocumentMetaFlags getFlags() const { return _metaFlags; } /** - * @return In-memory size of this doc entry, including document instance. - * In essence: serialized size of document + sizeof(DocEntry). - */ - SizeType getSize() const { return _size + getOwnSize() ; } - virtual SizeType getOwnSize() const { return sizeof(DocEntry); } - /** * If entry contains a document, returns its serialized size. * If entry contains a document id, returns the serialized size of * the id alone. - * Otherwise (i.e. metadata only), returns zero. + * Otherwise (i.e. metadata only), returns sizeof(DocEntry). */ - SizeType getDocumentSize() const { return _size; } + SizeType getSize() const { return _size; } virtual vespalib::string toString() const; virtual const Document* getDocument() const { return nullptr; } virtual const DocumentId* getDocumentId() const { return nullptr; } + virtual vespalib::stringref getDocumentType() const { return vespalib::stringref(); } + virtual GlobalId getGid() const { return GlobalId(); } virtual DocumentUP releaseDocument(); static UP create(Timestamp t, DocumentMetaFlags metaFlags); static UP create(Timestamp t, DocumentMetaFlags metaFlags, const DocumentId &docId); + static UP create(Timestamp t, DocumentMetaFlags metaFlags, vespalib::stringref docType, GlobalId gid); static UP create(Timestamp t, DocumentUP doc); static UP create(Timestamp t, DocumentUP doc, SizeType serializedDocumentSize); protected: @@ -65,10 +63,10 @@ protected: _size(size) {} private: - DocEntry(Timestamp t, DocumentMetaFlags metaFlags) : DocEntry(t, metaFlags, 0) { } - Timestamp _timestamp; - DocumentMetaFlags _metaFlags; - SizeType _size; + DocEntry(Timestamp t, DocumentMetaFlags metaFlags) : DocEntry(t, metaFlags, sizeof(DocEntry)) { } + Timestamp _timestamp; + DocumentMetaFlags _metaFlags; + SizeType _size; }; std::ostream & operator << (std::ostream & os, const DocEntry & r); diff --git a/persistence/src/vespa/persistence/spi/test.cpp b/persistence/src/vespa/persistence/spi/test.cpp index 9aaa26adb59..f6ad2dd3b49 100644 --- a/persistence/src/vespa/persistence/spi/test.cpp +++ b/persistence/src/vespa/persistence/spi/test.cpp @@ -21,7 +21,7 @@ std::unique_ptr<DocEntry> cloneDocEntry(const DocEntry & e) { std::unique_ptr<DocEntry> ret; if (e.getDocument()) { - ret = DocEntry::create(e.getTimestamp(), std::make_unique<Document>(*e.getDocument()), e.getDocumentSize()); + ret = DocEntry::create(e.getTimestamp(), std::make_unique<Document>(*e.getDocument()), e.getSize()); } else if (e.getDocumentId()) { ret = DocEntry::create(e.getTimestamp(), e.getFlags(), *e.getDocumentId()); } else { @@ -34,7 +34,7 @@ bool equal(const DocEntry & a, const DocEntry & b) { if (a.getTimestamp() != b.getTimestamp()) return false; if (a.getFlags() != b.getFlags()) return false; - if (a.getDocumentSize() != b.getDocumentSize()) return false; + if (a.getSize() != b.getSize()) return false; if (a.getDocument()) { if (!b.getDocument()) return false; diff --git a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp index 9df5de567f5..3d4d20bbe9c 100644 --- a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp +++ b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp @@ -389,7 +389,7 @@ void checkEntry(const IterateResult &res, size_t idx, const Timestamp ×tamp ASSERT_LESS(idx, res.getEntries().size()); auto expect = DocEntry::create(timestamp, flags); EXPECT_TRUE(equal(*expect, *res.getEntries()[idx])); - EXPECT_EQUAL(0u, res.getEntries()[idx]->getDocumentSize()); + EXPECT_EQUAL(sizeof(DocEntry), res.getEntries()[idx]->getSize()); } void checkEntry(const IterateResult &res, size_t idx, const DocumentId &id, const Timestamp ×tamp) @@ -397,7 +397,7 @@ void checkEntry(const IterateResult &res, size_t idx, const DocumentId &id, cons ASSERT_LESS(idx, res.getEntries().size()); auto expect = DocEntry::create(timestamp, DocumentMetaFlags::REMOVE_ENTRY, id); EXPECT_TRUE(equal(*expect, *res.getEntries()[idx])); - EXPECT_EQUAL(getSize(id), res.getEntries()[idx]->getDocumentSize()); + EXPECT_EQUAL(getSize(id), res.getEntries()[idx]->getSize()); EXPECT_GREATER(getSize(id), 0u); } @@ -406,7 +406,7 @@ void checkEntry(const IterateResult &res, size_t idx, const Document &doc, const ASSERT_LESS(idx, res.getEntries().size()); auto expect = DocEntry::create(timestamp, Document::UP(doc.clone())); EXPECT_TRUE(equal(*expect, *res.getEntries()[idx])); - EXPECT_EQUAL(getSize(doc), res.getEntries()[idx]->getDocumentSize()); + EXPECT_EQUAL(getSize(doc), res.getEntries()[idx]->getSize()); EXPECT_GREATER(getSize(doc), 0u); } @@ -627,8 +627,8 @@ TEST("require that maxBytes splits iteration results") { itr.add(doc("id:ns:document::1", Timestamp(2), bucket(5))); itr.add(cat(rem("id:ns:document::2", Timestamp(3), bucket(5)), doc("id:ns:document::3", Timestamp(4), bucket(5)))); - IterateResult res1 = itr.iterate(getSize(Document(*DataType::DOCUMENT, DocumentId("id:ns:document::1"))) + sizeof(DocEntry) + 8 + - getSize(DocumentId("id:ns:document::2")) + sizeof(DocEntry) + 8); + IterateResult res1 = itr.iterate(getSize(Document(*DataType::DOCUMENT, DocumentId("id:ns:document::1"))) + + getSize(DocumentId("id:ns:document::2"))); EXPECT_TRUE(!res1.isCompleted()); EXPECT_EQUAL(2u, res1.getEntries().size()); TEST_DO(checkEntry(res1, 0, Document(*DataType::DOCUMENT, DocumentId("id:ns:document::1")), Timestamp(2))); diff --git a/storage/src/vespa/storage/persistence/processallhandler.cpp b/storage/src/vespa/storage/persistence/processallhandler.cpp index c23d246d463..6d6723a0185 100644 --- a/storage/src/vespa/storage/persistence/processallhandler.cpp +++ b/storage/src/vespa/storage/persistence/processallhandler.cpp @@ -32,7 +32,7 @@ public: if (e.getDocument() != nullptr) { ost << "Doc(" << e.getDocument()->getId() << ")" << ", " << e.getDocument()->getId().getGlobalId().toString() - << ", size: " << e.getDocumentSize(); + << ", size: " << e.getSize(); } else if (e.getDocumentId() != nullptr) { ost << *e.getDocumentId() << ", " << e.getDocumentId()->getGlobalId().toString(); diff --git a/storage/src/vespa/storage/visiting/dumpvisitorsingle.cpp b/storage/src/vespa/storage/visiting/dumpvisitorsingle.cpp index 92c080169ac..3419d329a06 100644 --- a/storage/src/vespa/storage/visiting/dumpvisitorsingle.cpp +++ b/storage/src/vespa/storage/visiting/dumpvisitorsingle.cpp @@ -26,7 +26,7 @@ void DumpVisitorSingle::handleDocuments(const document::BucketId&, for (size_t i = 0; i < entries.size(); ++i) { spi::DocEntry& entry(*entries[i]); - const uint32_t docSize = entry.getDocumentSize(); + const uint32_t docSize = entry.getSize(); if (entry.isRemove()) { hitCounter.addHit(*entry.getDocumentId(), docSize); sendMessage(std::make_unique<documentapi::RemoveDocumentMessage>(*entry.getDocumentId())); diff --git a/storage/src/vespa/storage/visiting/reindexing_visitor.cpp b/storage/src/vespa/storage/visiting/reindexing_visitor.cpp index 91ed2810420..c33ea24ed99 100644 --- a/storage/src/vespa/storage/visiting/reindexing_visitor.cpp +++ b/storage/src/vespa/storage/visiting/reindexing_visitor.cpp @@ -27,7 +27,7 @@ void ReindexingVisitor::handleDocuments(const document::BucketId& , // We don't reindex removed documents, as that would be very silly. continue; } - const uint32_t doc_size = entry->getDocumentSize(); + const uint32_t doc_size = entry->getSize(); hitCounter.addHit(*entry->getDocumentId(), doc_size); auto msg = std::make_unique<documentapi::PutDocumentMessage>(entry->releaseDocument()); msg->setApproxSize(doc_size); diff --git a/storage/src/vespa/storage/visiting/visitor.cpp b/storage/src/vespa/storage/visiting/visitor.cpp index ac2c918910c..b66285f5048 100644 --- a/storage/src/vespa/storage/visiting/visitor.cpp +++ b/storage/src/vespa/storage/visiting/visitor.cpp @@ -815,7 +815,7 @@ Visitor::onGetIterReply(const std::shared_ptr<GetIterReply>& reply, uint64_t size = 0; for (const auto& entry : reply->getEntries()) { - size += entry->getDocumentSize(); + size += entry->getSize(); } _visitorStatistics.setDocumentsVisited( |