summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-06-08 13:25:40 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2020-06-08 13:25:40 +0000
commite56dc7ca69e8ea5e3e20b35238ce25061b500db6 (patch)
tree0fe20b94ad9c44e40d882a20a982132941d79dd7 /storage
parent4e2dbbc64561204633debdbc7b8abde63402b4fc (diff)
Use xxhash64 for bucket-to-stripe distribution
Existing naive prime-based solution was susceptible to scheduling operations for the subtree of a superbucket in one strand alone, despite previous attempts to disperse this using prime number multiplication. This would put a serious limiter on parallelism for super bucket locality-sensitive reads such as streaming search visitors.
Diffstat (limited to 'storage')
-rw-r--r--storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp9
-rw-r--r--storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h7
2 files changed, 10 insertions, 6 deletions
diff --git a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp
index 5627ade2a11..fbdbac27b7c 100644
--- a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp
+++ b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp
@@ -14,6 +14,10 @@
#include <vespa/storageapi/message/stat.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
#include <vespa/vespalib/util/exceptions.h>
+#ifndef XXH_INLINE_ALL
+# define XXH_INLINE_ALL // Let XXH64 be inlined for fixed hash size (bucket ID)
+#endif
+#include <xxhash.h>
#include <vespa/log/log.h>
LOG_SETUP(".persistence.filestor.handler.impl");
@@ -894,6 +898,11 @@ FileStorHandlerImpl::Disk::broadcast()
}
}
+uint64_t FileStorHandlerImpl::Disk::dispersed_bucket_bits(const document::Bucket& bucket) noexcept {
+ const uint64_t raw_id = bucket.getBucketId().getId();
+ return XXH64(&raw_id, sizeof(uint64_t), 0);
+}
+
bool
FileStorHandlerImpl::Disk::schedule(const std::shared_ptr<api::StorageMessage>& msg)
{
diff --git a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h
index 7a4f9000e82..00714c291b7 100644
--- a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h
+++ b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.h
@@ -197,12 +197,7 @@ public:
std::string dumpQueue() const;
void dumpActiveHtml(std::ostream & os) const;
void dumpQueueHtml(std::ostream & os) const;
- static uint64_t dispersed_bucket_bits(const document::Bucket& bucket) noexcept {
- // Disperse bucket bits by multiplying with the 64-bit FNV-1 prime.
- // This avoids an inherent affinity between the LSB of a bucket's bits
- // and the stripe an operation ends up on.
- return bucket.getBucketId().getId() * 1099511628211ULL;
- }
+ static uint64_t dispersed_bucket_bits(const document::Bucket& bucket) noexcept;
// We make a fairly reasonable assumption that there will be less than 64k stripes.
uint16_t stripe_index(const document::Bucket& bucket) const noexcept {
return static_cast<uint16_t>(dispersed_bucket_bits(bucket) % _stripes.size());