diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-01-05 13:25:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-05 13:25:59 +0100 |
commit | d31c2ac3547c63ac3e1695b61a5168dddd12a402 (patch) | |
tree | 1943114e55fcf61a9ee29e7ab7df161c6c04cbba /storage | |
parent | 6438e2f64460178525a42fcfdaf207e264a020ef (diff) | |
parent | f201ce874e5790352e42ef084c40ce396ca75acf (diff) |
Merge pull request #15909 from vespa-engine/vekterli/only-fetch-doc-id-when-splitting
Only fetch document IDs in bucket when splitting, not whole documents
Diffstat (limited to 'storage')
4 files changed, 16 insertions, 15 deletions
diff --git a/storage/src/vespa/storage/persistence/bucketprocessor.cpp b/storage/src/vespa/storage/persistence/bucketprocessor.cpp index ea09fcfc348..33706e69701 100644 --- a/storage/src/vespa/storage/persistence/bucketprocessor.cpp +++ b/storage/src/vespa/storage/persistence/bucketprocessor.cpp @@ -40,15 +40,15 @@ void BucketProcessor::iterateAll(spi::PersistenceProvider& provider, const spi::Bucket& bucket, const std::string& documentSelection, + std::shared_ptr<document::FieldSet> field_set, EntryProcessor& processor, spi::IncludedVersions versions, spi::Context& context) { - spi::Selection sel - = spi::Selection(spi::DocumentSelection(documentSelection)); + spi::Selection sel = spi::Selection(spi::DocumentSelection(documentSelection)); spi::CreateIteratorResult createIterResult(provider.createIterator( bucket, - std::make_shared<document::AllFields>(), + std::move(field_set), sel, versions, context)); diff --git a/storage/src/vespa/storage/persistence/bucketprocessor.h b/storage/src/vespa/storage/persistence/bucketprocessor.h index 2c1be512b5e..0f15b4a222d 100644 --- a/storage/src/vespa/storage/persistence/bucketprocessor.h +++ b/storage/src/vespa/storage/persistence/bucketprocessor.h @@ -22,6 +22,7 @@ public: static void iterateAll(spi::PersistenceProvider&, const spi::Bucket&, const std::string& documentSelection, + std::shared_ptr<document::FieldSet> field_set, EntryProcessor&, spi::IncludedVersions, spi::Context&); diff --git a/storage/src/vespa/storage/persistence/processallhandler.cpp b/storage/src/vespa/storage/persistence/processallhandler.cpp index 9cf570ef7a4..26cd757196f 100644 --- a/storage/src/vespa/storage/persistence/processallhandler.cpp +++ b/storage/src/vespa/storage/persistence/processallhandler.cpp @@ -3,6 +3,7 @@ #include "processallhandler.h" #include "bucketprocessor.h" #include "persistenceutil.h" +#include <vespa/document/fieldset/fieldsets.h> #include <vespa/persistence/spi/persistenceprovider.h> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/stllike/hash_map.hpp> @@ -85,6 +86,7 @@ ProcessAllHandler::handleRemoveLocation(api::RemoveLocationCommand& cmd, Message spi::Bucket bucket(cmd.getBucket()); UnrevertableRemoveEntryProcessor processor(_spi, bucket, tracker->context()); BucketProcessor::iterateAll(_spi, bucket, cmd.getDocumentSelection(), + std::make_shared<document::AllFields>(), processor, spi::NEWEST_DOCUMENT_ONLY,tracker->context()); tracker->setReply(std::make_shared<api::RemoveLocationReply>(cmd, processor._n_removed)); @@ -102,6 +104,7 @@ ProcessAllHandler::handleStatBucket(api::StatBucketCommand& cmd, MessageTracker: spi::Bucket bucket(cmd.getBucket()); StatEntryProcessor processor(ost); BucketProcessor::iterateAll(_spi, bucket, cmd.getDocumentSelection(), + std::make_shared<document::AllFields>(), processor, spi::ALL_VERSIONS,tracker->context()); tracker->setReply(std::make_shared<api::StatBucketReply>(cmd, ost.str())); diff --git a/storage/src/vespa/storage/persistence/splitbitdetector.cpp b/storage/src/vespa/storage/persistence/splitbitdetector.cpp index 0208b220b79..fdf86f61639 100644 --- a/storage/src/vespa/storage/persistence/splitbitdetector.cpp +++ b/storage/src/vespa/storage/persistence/splitbitdetector.cpp @@ -4,6 +4,7 @@ #include "bucketprocessor.h" #include <vespa/document/bucket/bucketidfactory.h> #include <vespa/document/base/documentid.h> +#include <vespa/document/fieldset/fieldsets.h> #include <sstream> #include <cassert> @@ -38,7 +39,6 @@ struct BucketVisitor : public BucketProcessor::EntryProcessor { mutable document::DocumentId _conflictId; mutable document::BucketId _conflictBucket; uint32_t _docCount; - uint64_t _docSize; struct DocInfo { uint64_t timestamp; document::DocumentId docId; @@ -57,16 +57,14 @@ struct BucketVisitor : public BucketProcessor::EntryProcessor { BucketVisitor(const document::BucketIdFactory& factory); ~BucketVisitor(); - void process(spi::DocEntry& slot) override { - assert(slot.getDocumentId()); + void process(spi::DocEntry& entry) override { + assert(entry.getDocumentId()); ++_docCount; - _docSize += slot.getDocumentSize(); - const document::DocumentId& id(*slot.getDocumentId()); + const document::DocumentId& id(*entry.getDocumentId()); document::BucketId bucket = _factory.getBucketId(id); - // LOG(spam, "Bucket %s", bucket.toString().c_str()); if (_firstDocs.size() < keepFirstCount) { - _firstDocs.push_back(DocInfo(slot.getTimestamp(), id, bucket)); + _firstDocs.push_back(DocInfo(entry.getTimestamp(), id, bucket)); } if (_refBucket.getRawId() == 0) { @@ -101,10 +99,9 @@ BucketVisitor::BucketVisitor(const document::BucketIdFactory& factory) : _factory(factory), _splitBit(58), _splitMask(0), _refId(), _refBucket(), _conflictId(), _conflictBucket(), - _docCount(0), _docSize(0), _firstDocs() + _docCount(0), _firstDocs() { _firstDocs.reserve(keepFirstCount); - // LOG(spam, "Checking out meta entries in bucket"); for (uint32_t i=0; i<_splitBit; ++i) { _splitMask = (_splitMask << 1) | 1; } @@ -158,7 +155,8 @@ SplitBitDetector::detectSplit(spi::PersistenceProvider& provider, BucketVisitor detector(factory); BucketProcessor::iterateAll( - provider, source, "", detector, spi::ALL_VERSIONS, context); + provider, source, "", std::make_shared<document::DocIdOnly>(), + detector, spi::ALL_VERSIONS, context); uint16_t splitBit = detector._splitBit; @@ -211,8 +209,7 @@ SplitBitDetector::detectSplit(spi::PersistenceProvider& provider, error << "Could not find differing bit to split bucket contents " "around due to bucket ID collisions. Forcing resulting " "bucket to be 58 bits. Bucket has " - << detector._docCount << " docs totalling " - << detector._docSize << " bytes. "; + << detector._docCount << " docs."; detector.printEntrySummary(error); LOGBT(warning, source.getBucketId().toString(), |