diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2020-06-08 13:25:40 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2020-06-08 13:25:40 +0000 |
commit | e56dc7ca69e8ea5e3e20b35238ce25061b500db6 (patch) | |
tree | 0fe20b94ad9c44e40d882a20a982132941d79dd7 /storage | |
parent | 4e2dbbc64561204633debdbc7b8abde63402b4fc (diff) |
Use xxhash64 for bucket-to-stripe distribution
Existing naive prime-based solution was susceptible to scheduling
operations for the subtree of a superbucket in one strand alone,
despite previous attempts to disperse this using prime number
multiplication. This would put a serious limiter on parallelism
for super bucket locality-sensitive reads such as streaming search
visitors.
Diffstat (limited to 'storage')
-rw-r--r-- | storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp | 9 | ||||
-rw-r--r-- | storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h | 7 |
2 files changed, 10 insertions, 6 deletions
diff --git a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp index 5627ade2a11..fbdbac27b7c 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp @@ -14,6 +14,10 @@ #include <vespa/storageapi/message/stat.h> #include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/vespalib/util/exceptions.h> +#ifndef XXH_INLINE_ALL +# define XXH_INLINE_ALL // Let XXH64 be inlined for fixed hash size (bucket ID) +#endif +#include <xxhash.h> #include <vespa/log/log.h> LOG_SETUP(".persistence.filestor.handler.impl"); @@ -894,6 +898,11 @@ FileStorHandlerImpl::Disk::broadcast() } } +uint64_t FileStorHandlerImpl::Disk::dispersed_bucket_bits(const document::Bucket& bucket) noexcept { + const uint64_t raw_id = bucket.getBucketId().getId(); + return XXH64(&raw_id, sizeof(uint64_t), 0); +} + bool FileStorHandlerImpl::Disk::schedule(const std::shared_ptr<api::StorageMessage>& msg) { diff --git a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h index 7a4f9000e82..00714c291b7 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h +++ b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h @@ -197,12 +197,7 @@ public: std::string dumpQueue() const; void dumpActiveHtml(std::ostream & os) const; void dumpQueueHtml(std::ostream & os) const; - static uint64_t dispersed_bucket_bits(const document::Bucket& bucket) noexcept { - // Disperse bucket bits by multiplying with the 64-bit FNV-1 prime. - // This avoids an inherent affinity between the LSB of a bucket's bits - // and the stripe an operation ends up on. - return bucket.getBucketId().getId() * 1099511628211ULL; - } + static uint64_t dispersed_bucket_bits(const document::Bucket& bucket) noexcept; // We make a fairly reasonable assumption that there will be less than 64k stripes. uint16_t stripe_index(const document::Bucket& bucket) const noexcept { return static_cast<uint16_t>(dispersed_bucket_bits(bucket) % _stripes.size()); |