summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-03-18 14:27:46 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2020-03-18 14:27:46 +0000
commit37660e1dd9cb70c25e56ee83bacc7f4205144faa (patch)
tree0352901592a3668ec7103ade311216b6fc1b0e5a /storage
parent0af38d72c0c131d7c3955d8542da8201468d7397 (diff)
Account for stripe index when performing a multi-lock
Legacy locking code had a hidden assumption that each disk had only a single queue associated with it and locking requests could therefore be deduped at the disk level. Since we now only have a single logical disk with a number of queues striped over it, this would introduce race conditions when splits/joins would cross stripe boundaries for their target/source buckets. Use a composite disk/stripe multi lock key to ensure we only dedupe locking requests at the stripe-level.
Diffstat (limited to 'storage')
-rw-r--r--storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp36
-rw-r--r--storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h6
2 files changed, 33 insertions, 9 deletions
diff --git a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp
index 355d0eb8cc6..f06e0141ae7 100644
--- a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp
+++ b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp
@@ -399,13 +399,30 @@ FileStorHandlerImpl::Stripe::lock(const document::Bucket &bucket, api::LockingRe
namespace {
struct MultiLockGuard {
- std::map<uint16_t, vespalib::Monitor*> monitors;
+ struct DiskAndStripe {
+ uint16_t disk;
+ uint16_t stripe;
+
+ DiskAndStripe(uint16_t disk_, uint16_t stripe_) noexcept : disk(disk_), stripe(stripe_) {}
+
+ bool operator==(const DiskAndStripe& rhs) const noexcept {
+ return (disk == rhs.disk) && (stripe == rhs.stripe);
+ }
+ bool operator<(const DiskAndStripe& rhs) const noexcept {
+ if (disk != rhs.disk) {
+ return disk < rhs.disk;
+ }
+ return stripe < rhs.stripe;
+ }
+ };
+
+ std::map<DiskAndStripe, vespalib::Monitor*> monitors;
std::vector<std::shared_ptr<vespalib::MonitorGuard>> guards;
MultiLockGuard() = default;
- void addLock(vespalib::Monitor& monitor, uint16_t index) {
- monitors[index] = &monitor;
+ void addLock(vespalib::Monitor& monitor, uint16_t disk_index, uint16_t stripe_index) {
+ monitors[DiskAndStripe(disk_index, stripe_index)] = &monitor;
}
void lock() {
for (auto & entry : monitors) {
@@ -758,6 +775,9 @@ FileStorHandlerImpl::remapQueueNoLock(Disk& from, const RemapInfo& source,
} else {
entry._bucket = bucket;
// Move to correct disk queue if needed
+ assert(bucket == source.bucket || std::find_if(targets.begin(), targets.end(), [bucket](auto* e){
+ return e->bucket == bucket;
+ }) != targets.end());
_diskInfo[targetDisk].stripe(bucket).exposeQueue().emplace_back(std::move(entry));
}
}
@@ -772,11 +792,11 @@ FileStorHandlerImpl::remapQueue(const RemapInfo& source, RemapInfo& target, Oper
MultiLockGuard guard;
Disk& from(_diskInfo[source.diskIndex]);
- guard.addLock(from.stripe(source.bucket).exposeLock(), source.diskIndex);
+ guard.addLock(from.stripe(source.bucket).exposeLock(), source.diskIndex, from.stripe_index(source.bucket));
Disk& to1(_diskInfo[target.diskIndex]);
if (target.bucket.getBucketId().getRawId() != 0) {
- guard.addLock(to1.stripe(target.bucket).exposeLock(), target.diskIndex);
+ guard.addLock(to1.stripe(target.bucket).exposeLock(), target.diskIndex, to1.stripe_index(target.bucket));
}
std::vector<RemapInfo*> targets;
@@ -795,16 +815,16 @@ FileStorHandlerImpl::remapQueue(const RemapInfo& source, RemapInfo& target1, Rem
MultiLockGuard guard;
Disk& from(_diskInfo[source.diskIndex]);
- guard.addLock(from.stripe(source.bucket).exposeLock(), source.diskIndex);
+ guard.addLock(from.stripe(source.bucket).exposeLock(), source.diskIndex, from.stripe_index(source.bucket));
Disk& to1(_diskInfo[target1.diskIndex]);
if (target1.bucket.getBucketId().getRawId() != 0) {
- guard.addLock(to1.stripe(target1.bucket).exposeLock(), target1.diskIndex);
+ guard.addLock(to1.stripe(target1.bucket).exposeLock(), target1.diskIndex, to1.stripe_index(target1.bucket));
}
Disk& to2(_diskInfo[target2.diskIndex]);
if (target2.bucket.getBucketId().getRawId() != 0) {
- guard.addLock(to2.stripe(target2.bucket).exposeLock(), target2.diskIndex);
+ guard.addLock(to2.stripe(target2.bucket).exposeLock(), target2.diskIndex, to2.stripe_index(target2.bucket));
}
guard.lock();
diff --git a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h
index 253cb84caeb..da4d242a4c9 100644
--- a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h
+++ b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h
@@ -207,8 +207,12 @@ public:
// and the stripe an operation ends up on.
return bucket.getBucketId().getId() * 1099511628211ULL;
}
+ // We make a fairly reasonable assumption that there will be less than 64k stripes.
+ uint16_t stripe_index(const document::Bucket& bucket) const noexcept {
+ return static_cast<uint16_t>(dispersed_bucket_bits(bucket) % _stripes.size());
+ }
Stripe & stripe(const document::Bucket & bucket) {
- return _stripes[dispersed_bucket_bits(bucket) % _stripes.size()];
+ return _stripes[stripe_index(bucket)];
}
std::vector<Stripe> & getStripes() { return _stripes; }
private: