diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2021-01-05 12:04:24 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2021-01-05 12:07:08 +0000 |
commit | f201ce874e5790352e42ef084c40ce396ca75acf (patch) | |
tree | 7204574c017ab738e24320e1da95f21bcac4e1fe /storage | |
parent | 5613779dc559920120f93cf9358d62cad0ddde1e (diff) |
Only fetch document IDs in bucket when splitting, not whole documents
Splitting code only needs to look at the document IDs to figure out
the distribution of the target buckets. Remove tracking of document
sizes (which did need the whole documents) since it was only used
in a warning log message.
Diffstat (limited to 'storage')
4 files changed, 16 insertions, 15 deletions
diff --git a/storage/src/vespa/storage/persistence/bucketprocessor.cpp b/storage/src/vespa/storage/persistence/bucketprocessor.cpp index ea09fcfc348..33706e69701 100644 --- a/storage/src/vespa/storage/persistence/bucketprocessor.cpp +++ b/storage/src/vespa/storage/persistence/bucketprocessor.cpp @@ -40,15 +40,15 @@ void BucketProcessor::iterateAll(spi::PersistenceProvider& provider, const spi::Bucket& bucket, const std::string& documentSelection, + std::shared_ptr<document::FieldSet> field_set, EntryProcessor& processor, spi::IncludedVersions versions, spi::Context& context) { - spi::Selection sel - = spi::Selection(spi::DocumentSelection(documentSelection)); + spi::Selection sel = spi::Selection(spi::DocumentSelection(documentSelection)); spi::CreateIteratorResult createIterResult(provider.createIterator( bucket, - std::make_shared<document::AllFields>(), + std::move(field_set), sel, versions, context)); diff --git a/storage/src/vespa/storage/persistence/bucketprocessor.h b/storage/src/vespa/storage/persistence/bucketprocessor.h index 2c1be512b5e..0f15b4a222d 100644 --- a/storage/src/vespa/storage/persistence/bucketprocessor.h +++ b/storage/src/vespa/storage/persistence/bucketprocessor.h @@ -22,6 +22,7 @@ public: static void iterateAll(spi::PersistenceProvider&, const spi::Bucket&, const std::string& documentSelection, + std::shared_ptr<document::FieldSet> field_set, EntryProcessor&, spi::IncludedVersions, spi::Context&); diff --git a/storage/src/vespa/storage/persistence/processallhandler.cpp b/storage/src/vespa/storage/persistence/processallhandler.cpp index 9cf570ef7a4..26cd757196f 100644 --- a/storage/src/vespa/storage/persistence/processallhandler.cpp +++ b/storage/src/vespa/storage/persistence/processallhandler.cpp @@ -3,6 +3,7 @@ #include "processallhandler.h" #include "bucketprocessor.h" #include "persistenceutil.h" +#include <vespa/document/fieldset/fieldsets.h> #include <vespa/persistence/spi/persistenceprovider.h> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/stllike/hash_map.hpp> @@ -85,6 +86,7 @@ ProcessAllHandler::handleRemoveLocation(api::RemoveLocationCommand& cmd, Message spi::Bucket bucket(cmd.getBucket()); UnrevertableRemoveEntryProcessor processor(_spi, bucket, tracker->context()); BucketProcessor::iterateAll(_spi, bucket, cmd.getDocumentSelection(), + std::make_shared<document::AllFields>(), processor, spi::NEWEST_DOCUMENT_ONLY,tracker->context()); tracker->setReply(std::make_shared<api::RemoveLocationReply>(cmd, processor._n_removed)); @@ -102,6 +104,7 @@ ProcessAllHandler::handleStatBucket(api::StatBucketCommand& cmd, MessageTracker: spi::Bucket bucket(cmd.getBucket()); StatEntryProcessor processor(ost); BucketProcessor::iterateAll(_spi, bucket, cmd.getDocumentSelection(), + std::make_shared<document::AllFields>(), processor, spi::ALL_VERSIONS,tracker->context()); tracker->setReply(std::make_shared<api::StatBucketReply>(cmd, ost.str())); diff --git a/storage/src/vespa/storage/persistence/splitbitdetector.cpp b/storage/src/vespa/storage/persistence/splitbitdetector.cpp index 0208b220b79..fdf86f61639 100644 --- a/storage/src/vespa/storage/persistence/splitbitdetector.cpp +++ b/storage/src/vespa/storage/persistence/splitbitdetector.cpp @@ -4,6 +4,7 @@ #include "bucketprocessor.h" #include <vespa/document/bucket/bucketidfactory.h> #include <vespa/document/base/documentid.h> +#include <vespa/document/fieldset/fieldsets.h> #include <sstream> #include <cassert> @@ -38,7 +39,6 @@ struct BucketVisitor : public BucketProcessor::EntryProcessor { mutable document::DocumentId _conflictId; mutable document::BucketId _conflictBucket; uint32_t _docCount; - uint64_t _docSize; struct DocInfo { uint64_t timestamp; document::DocumentId docId; @@ -57,16 +57,14 @@ struct BucketVisitor : public BucketProcessor::EntryProcessor { BucketVisitor(const document::BucketIdFactory& factory); ~BucketVisitor(); - void process(spi::DocEntry& slot) override { - assert(slot.getDocumentId()); + void process(spi::DocEntry& entry) override { + assert(entry.getDocumentId()); ++_docCount; - _docSize += slot.getDocumentSize(); - const document::DocumentId& id(*slot.getDocumentId()); + const document::DocumentId& id(*entry.getDocumentId()); document::BucketId bucket = _factory.getBucketId(id); - // LOG(spam, "Bucket %s", bucket.toString().c_str()); if (_firstDocs.size() < keepFirstCount) { - _firstDocs.push_back(DocInfo(slot.getTimestamp(), id, bucket)); + _firstDocs.push_back(DocInfo(entry.getTimestamp(), id, bucket)); } if (_refBucket.getRawId() == 0) { @@ -101,10 +99,9 @@ BucketVisitor::BucketVisitor(const document::BucketIdFactory& factory) : _factory(factory), _splitBit(58), _splitMask(0), _refId(), _refBucket(), _conflictId(), _conflictBucket(), - _docCount(0), _docSize(0), _firstDocs() + _docCount(0), _firstDocs() { _firstDocs.reserve(keepFirstCount); - // LOG(spam, "Checking out meta entries in bucket"); for (uint32_t i=0; i<_splitBit; ++i) { _splitMask = (_splitMask << 1) | 1; } @@ -158,7 +155,8 @@ SplitBitDetector::detectSplit(spi::PersistenceProvider& provider, BucketVisitor detector(factory); BucketProcessor::iterateAll( - provider, source, "", detector, spi::ALL_VERSIONS, context); + provider, source, "", std::make_shared<document::DocIdOnly>(), + detector, spi::ALL_VERSIONS, context); uint16_t splitBit = detector._splitBit; @@ -211,8 +209,7 @@ SplitBitDetector::detectSplit(spi::PersistenceProvider& provider, error << "Could not find differing bit to split bucket contents " "around due to bucket ID collisions. Forcing resulting " "bucket to be 58 bits. Bucket has " - << detector._docCount << " docs totalling " - << detector._docSize << " bytes. "; + << detector._docCount << " docs."; detector.printEntrySummary(error); LOGBT(warning, source.getBucketId().toString(), |