diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-10-18 10:52:40 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-18 10:52:40 +0200 |
commit | e4905038523fce91d3e437855e7ef62eb95c0b69 (patch) | |
tree | 9b993653d0f7b7cc21ed509dace2faa437617e02 | |
parent | a4400c1d6864eaee513901229ae6d6ccaefc2584 (diff) | |
parent | 1e1177e32619601f790c1d221e7bf5355c2f9c1e (diff) |
Merge pull request #28990 from vespa-engine/balder/xxhash-methods-in-one-place
Move xxh3_64 methods to vespalib. That also removes the need for work…
6 files changed, 29 insertions, 62 deletions
diff --git a/document/src/vespa/document/bucket/bucketid.cpp b/document/src/vespa/document/bucket/bucketid.cpp index dee818b407e..fcc9a0df4f6 100644 --- a/document/src/vespa/document/bucket/bucketid.cpp +++ b/document/src/vespa/document/bucket/bucketid.cpp @@ -8,7 +8,6 @@ #include <vespa/vespalib/stllike/hash_set.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <limits> -#include <xxhash.h> using vespalib::nbostream; using vespalib::asciistream; @@ -79,29 +78,7 @@ void BucketId::initialize() noexcept { uint64_t BucketId::hash::operator () (const BucketId& bucketId) const noexcept { - const uint64_t raw_id = bucketId.getId(); - /* - * This is a workaround for gcc 12 and on that produces incorrect warning when compiled with -march=haswell or newer - * /home/balder/git/vespa/document/src/vespa/document/bucket/bucketid.cpp: In member function ‘uint64_t document::BucketId::hash::operator()(const document::BucketId&) const’: - * /home/balder/git/vespa/document/src/vespa/document/bucket/bucketid.cpp:83:23: error: ‘raw_id’ may be used uninitialized [-Werror=maybe-uninitialized] - * 83 | return XXH3_64bits(&raw_id, sizeof(uint64_t)); - * | ^ - * In file included from /usr/include/xxh3.h:55, - * from /home/balder/git/vespa/document/src/vespa/document/bucket/bucketid.cpp:11: - * /usr/include/xxhash.h:5719:29: note: by argument 1 of type ‘const void*’ to ‘XXH64_hash_t XXH_INLINE_XXH3_64bits(const void*, size_t)’ declared here - * 5719 | XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length) - * | ^~~~~~~~~~~ - * /home/balder/git/vespa/document/src/vespa/document/bucket/bucketid.cpp:82:14: note: ‘raw_id’ declared here - * 82 | uint64_t raw_id = bucketId.getId(); - * | ^~~~~~ - * cc1plus: all warnings being treated as errors - * - * Same issue in storage/src/vespa/storage/persistence/filestorhandlerimpl.cpp:FileStorHandlerImpl::dispersed_bucket_bits - */ - uint8_t raw_as_bytes[sizeof(raw_id)]; - memcpy(raw_as_bytes, &raw_id, sizeof(raw_id)); - - return XXH3_64bits(raw_as_bytes, sizeof(raw_as_bytes)); + return vespalib::xxhash::xxh3_64(bucketId.getId()); } vespalib::string diff --git a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp index 582b67a4dbc..a96bee2d11a 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp @@ -14,7 +14,6 @@ #include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/string_escape.h> -#include <xxhash.h> #include <vespa/log/log.h> LOG_SETUP(".persistence.filestor.handler.impl"); @@ -896,14 +895,7 @@ FileStorHandlerImpl::flush() uint64_t FileStorHandlerImpl::dispersed_bucket_bits(const document::Bucket& bucket) noexcept { - const uint64_t raw_id = bucket.getBucketId().getId(); - /* - * This is a workaround for gcc 12 and on that produces incorrect warning when compiled with -march=haswell or newer - * See document/src/vespa/document/bucket/bucketid.cpp: In member function ‘uint64_t document::BucketId::hash::operator()(const document::BucketId&) const - */ - uint8_t raw_as_bytes[sizeof(raw_id)]; - memcpy(raw_as_bytes, &raw_id, sizeof(raw_id)); - return XXH3_64bits(&raw_as_bytes, sizeof(raw_id)); + return vespalib::xxhash::xxh3_64(bucket.getBucketId().getId()); } FileStorHandlerImpl::Stripe::Stripe(const FileStorHandlerImpl & owner, MessageSender & messageSender) diff --git a/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h b/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h index 7e381468fbe..e023f4d3de2 100644 --- a/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h +++ b/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h @@ -2,13 +2,12 @@ #pragma once #include <vespa/config.h> +#include <vespa/vespalib/stllike/hash_fun.h> #include <algorithm> #include <array> #include <cassert> -#include <cstdint> #include <ostream> #include <span> -#include <xxhash.h> // TODO factor out? namespace vespalib::fuzzy { @@ -80,15 +79,8 @@ public: size_t operator()(const FixedSparseState& s) const noexcept { static_assert(std::is_same_v<uint32_t, std::decay_t<decltype(s.indices[0])>>); static_assert(std::is_same_v<uint8_t, std::decay_t<decltype(s.costs[0])>>); - // FIXME GCC 12.2 worse-than-useless(tm) warning false positives :I -#pragma GCC diagnostic push -#ifdef VESPA_USE_SANITIZER -# pragma GCC diagnostic ignored "-Wstringop-overread" // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98465 etc. -#endif -#pragma GCC diagnostic ignored "-Warray-bounds" - return (XXH3_64bits(s.indices.data(), s.sz * sizeof(uint32_t)) ^ - XXH3_64bits(s.costs.data(), s.sz)); -#pragma GCC diagnostic pop + return (xxhash::xxh3_64(s.indices.data(), s.sz * sizeof(uint32_t)) ^ + xxhash::xxh3_64(s.costs.data(), s.sz)); } }; }; diff --git a/vespalib/src/vespa/vespalib/stllike/hash_fun.cpp b/vespalib/src/vespa/vespalib/stllike/hash_fun.cpp index e4911d2e0c6..6802e5f91a6 100644 --- a/vespalib/src/vespa/vespalib/stllike/hash_fun.cpp +++ b/vespalib/src/vespa/vespalib/stllike/hash_fun.cpp @@ -6,23 +6,21 @@ namespace vespalib { size_t -hashValue(const char *str) noexcept -{ - return hashValue(str, strlen(str)); +hashValue(const char *str) noexcept { + return xxhash::xxh3_64(str, strlen(str)); } -/** - * @brief Calculate hash value. - * - * The hash function XXH3_64bits from xxhash library. - * @param buf input buffer - * @param sz input buffer size - * @return hash value of input - **/ -size_t -hashValue(const void * buf, size_t sz) noexcept -{ +namespace xxhash { + +uint64_t +xxh3_64(uint64_t value) noexcept { + return XXH3_64bits(&value, sizeof(value)); +} + +uint64_t +xxh3_64(const void * buf, size_t sz) noexcept { return XXH3_64bits(buf, sz); } } +} diff --git a/vespalib/src/vespa/vespalib/stllike/hash_fun.h b/vespalib/src/vespa/vespalib/stllike/hash_fun.h index f8a8a06b921..8fecc41b4c1 100644 --- a/vespalib/src/vespa/vespalib/stllike/hash_fun.h +++ b/vespalib/src/vespa/vespalib/stllike/hash_fun.h @@ -2,7 +2,6 @@ #pragma once #include <vespa/vespalib/stllike/string.h> -#include <cstdint> namespace vespalib { @@ -64,9 +63,18 @@ template<typename T> struct hash<const T *> { size_t operator() (const T * arg) const noexcept { return size_t(arg); } }; +namespace xxhash { + +uint64_t xxh3_64(uint64_t value) noexcept; +uint64_t xxh3_64(const void *str, size_t sz) noexcept; + +} + // reuse old string hash function size_t hashValue(const char *str) noexcept; -size_t hashValue(const void *str, size_t sz) noexcept; +inline size_t hashValue(const void *buf, size_t sz) noexcept { + return xxhash::xxh3_64(buf, sz); +} struct hash_strings { size_t operator() (const vespalib::string & arg) const noexcept { return hashValue(arg.data(), arg.size()); } diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp index a5942e1af9b..09f2bbd828d 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp @@ -1,7 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "shared_string_repo.h" -#include <xxhash.h> +#include <vespa/vespalib/stllike/hash_fun.h> #include <charconv> #include <cassert> @@ -232,7 +232,7 @@ string_id SharedStringRepo::resolve(vespalib::stringref str) { uint32_t direct_id = try_make_direct_id(str); if (direct_id >= ID_BIAS) { - uint64_t full_hash = XXH3_64bits(str.data(), str.size()); + uint64_t full_hash = xxhash::xxh3_64(str.data(), str.size()); uint32_t part = full_hash & PART_MASK; uint32_t local_hash = full_hash >> PART_BITS; uint32_t local_idx = _partitions[part].resolve(AltKey{str, local_hash}); |