summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2021-01-05 12:04:24 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2021-01-05 12:07:08 +0000
commitf201ce874e5790352e42ef084c40ce396ca75acf (patch)
tree7204574c017ab738e24320e1da95f21bcac4e1fe /storage
parent5613779dc559920120f93cf9358d62cad0ddde1e (diff)
Only fetch document IDs in bucket when splitting, not whole documents
Splitting code only needs to look at the document IDs to figure out the distribution of the target buckets. Remove tracking of document sizes (which did need the whole documents) since it was only used in a warning log message.
Diffstat (limited to 'storage')
-rw-r--r--storage/src/vespa/storage/persistence/bucketprocessor.cpp6
-rw-r--r--storage/src/vespa/storage/persistence/bucketprocessor.h1
-rw-r--r--storage/src/vespa/storage/persistence/processallhandler.cpp3
-rw-r--r--storage/src/vespa/storage/persistence/splitbitdetector.cpp21
4 files changed, 16 insertions, 15 deletions
diff --git a/storage/src/vespa/storage/persistence/bucketprocessor.cpp b/storage/src/vespa/storage/persistence/bucketprocessor.cpp
index ea09fcfc348..33706e69701 100644
--- a/storage/src/vespa/storage/persistence/bucketprocessor.cpp
+++ b/storage/src/vespa/storage/persistence/bucketprocessor.cpp
@@ -40,15 +40,15 @@ void
BucketProcessor::iterateAll(spi::PersistenceProvider& provider,
const spi::Bucket& bucket,
const std::string& documentSelection,
+ std::shared_ptr<document::FieldSet> field_set,
EntryProcessor& processor,
spi::IncludedVersions versions,
spi::Context& context)
{
- spi::Selection sel
- = spi::Selection(spi::DocumentSelection(documentSelection));
+ spi::Selection sel = spi::Selection(spi::DocumentSelection(documentSelection));
spi::CreateIteratorResult createIterResult(provider.createIterator(
bucket,
- std::make_shared<document::AllFields>(),
+ std::move(field_set),
sel,
versions,
context));
diff --git a/storage/src/vespa/storage/persistence/bucketprocessor.h b/storage/src/vespa/storage/persistence/bucketprocessor.h
index 2c1be512b5e..0f15b4a222d 100644
--- a/storage/src/vespa/storage/persistence/bucketprocessor.h
+++ b/storage/src/vespa/storage/persistence/bucketprocessor.h
@@ -22,6 +22,7 @@ public:
static void iterateAll(spi::PersistenceProvider&,
const spi::Bucket&,
const std::string& documentSelection,
+ std::shared_ptr<document::FieldSet> field_set,
EntryProcessor&,
spi::IncludedVersions,
spi::Context&);
diff --git a/storage/src/vespa/storage/persistence/processallhandler.cpp b/storage/src/vespa/storage/persistence/processallhandler.cpp
index 9cf570ef7a4..26cd757196f 100644
--- a/storage/src/vespa/storage/persistence/processallhandler.cpp
+++ b/storage/src/vespa/storage/persistence/processallhandler.cpp
@@ -3,6 +3,7 @@
#include "processallhandler.h"
#include "bucketprocessor.h"
#include "persistenceutil.h"
+#include <vespa/document/fieldset/fieldsets.h>
#include <vespa/persistence/spi/persistenceprovider.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
@@ -85,6 +86,7 @@ ProcessAllHandler::handleRemoveLocation(api::RemoveLocationCommand& cmd, Message
spi::Bucket bucket(cmd.getBucket());
UnrevertableRemoveEntryProcessor processor(_spi, bucket, tracker->context());
BucketProcessor::iterateAll(_spi, bucket, cmd.getDocumentSelection(),
+ std::make_shared<document::AllFields>(),
processor, spi::NEWEST_DOCUMENT_ONLY,tracker->context());
tracker->setReply(std::make_shared<api::RemoveLocationReply>(cmd, processor._n_removed));
@@ -102,6 +104,7 @@ ProcessAllHandler::handleStatBucket(api::StatBucketCommand& cmd, MessageTracker:
spi::Bucket bucket(cmd.getBucket());
StatEntryProcessor processor(ost);
BucketProcessor::iterateAll(_spi, bucket, cmd.getDocumentSelection(),
+ std::make_shared<document::AllFields>(),
processor, spi::ALL_VERSIONS,tracker->context());
tracker->setReply(std::make_shared<api::StatBucketReply>(cmd, ost.str()));
diff --git a/storage/src/vespa/storage/persistence/splitbitdetector.cpp b/storage/src/vespa/storage/persistence/splitbitdetector.cpp
index 0208b220b79..fdf86f61639 100644
--- a/storage/src/vespa/storage/persistence/splitbitdetector.cpp
+++ b/storage/src/vespa/storage/persistence/splitbitdetector.cpp
@@ -4,6 +4,7 @@
#include "bucketprocessor.h"
#include <vespa/document/bucket/bucketidfactory.h>
#include <vespa/document/base/documentid.h>
+#include <vespa/document/fieldset/fieldsets.h>
#include <sstream>
#include <cassert>
@@ -38,7 +39,6 @@ struct BucketVisitor : public BucketProcessor::EntryProcessor {
mutable document::DocumentId _conflictId;
mutable document::BucketId _conflictBucket;
uint32_t _docCount;
- uint64_t _docSize;
struct DocInfo {
uint64_t timestamp;
document::DocumentId docId;
@@ -57,16 +57,14 @@ struct BucketVisitor : public BucketProcessor::EntryProcessor {
BucketVisitor(const document::BucketIdFactory& factory);
~BucketVisitor();
- void process(spi::DocEntry& slot) override {
- assert(slot.getDocumentId());
+ void process(spi::DocEntry& entry) override {
+ assert(entry.getDocumentId());
++_docCount;
- _docSize += slot.getDocumentSize();
- const document::DocumentId& id(*slot.getDocumentId());
+ const document::DocumentId& id(*entry.getDocumentId());
document::BucketId bucket = _factory.getBucketId(id);
- // LOG(spam, "Bucket %s", bucket.toString().c_str());
if (_firstDocs.size() < keepFirstCount) {
- _firstDocs.push_back(DocInfo(slot.getTimestamp(), id, bucket));
+ _firstDocs.push_back(DocInfo(entry.getTimestamp(), id, bucket));
}
if (_refBucket.getRawId() == 0) {
@@ -101,10 +99,9 @@ BucketVisitor::BucketVisitor(const document::BucketIdFactory& factory)
: _factory(factory), _splitBit(58),
_splitMask(0), _refId(), _refBucket(),
_conflictId(), _conflictBucket(),
- _docCount(0), _docSize(0), _firstDocs()
+ _docCount(0), _firstDocs()
{
_firstDocs.reserve(keepFirstCount);
- // LOG(spam, "Checking out meta entries in bucket");
for (uint32_t i=0; i<_splitBit; ++i) {
_splitMask = (_splitMask << 1) | 1;
}
@@ -158,7 +155,8 @@ SplitBitDetector::detectSplit(spi::PersistenceProvider& provider,
BucketVisitor detector(factory);
BucketProcessor::iterateAll(
- provider, source, "", detector, spi::ALL_VERSIONS, context);
+ provider, source, "", std::make_shared<document::DocIdOnly>(),
+ detector, spi::ALL_VERSIONS, context);
uint16_t splitBit = detector._splitBit;
@@ -211,8 +209,7 @@ SplitBitDetector::detectSplit(spi::PersistenceProvider& provider,
error << "Could not find differing bit to split bucket contents "
"around due to bucket ID collisions. Forcing resulting "
"bucket to be 58 bits. Bucket has "
- << detector._docCount << " docs totalling "
- << detector._docSize << " bytes. ";
+ << detector._docCount << " docs.";
detector.printEntrySummary(error);
LOGBT(warning,
source.getBucketId().toString(),