From 4705dcb8fd738546882adfb39ebce0bf2421ffe1 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 26 Mar 2021 10:59:16 +0100 Subject: Rename SimpleHashMap to ShardedHashMap. --- .../searchlib/attribute/enum_store_dictionary.cpp | 4 +- .../src/vespa/searchlib/attribute/enumstore.cpp | 4 +- vespalib/CMakeLists.txt | 2 +- .../datastore/sharded_hash_map/CMakeLists.txt | 9 + .../sharded_hash_map/sharded_hash_map_test.cpp | 219 +++++++++++++++++++++ .../tests/datastore/simple_hash_map/CMakeLists.txt | 9 - .../simple_hash_map/simple_hash_map_test.cpp | 219 --------------------- .../datastore/unique_store/unique_store_test.cpp | 4 +- .../unique_store_dictionary_test.cpp | 4 +- .../src/vespa/vespalib/datastore/CMakeLists.txt | 2 +- .../vespalib/datastore/fixed_size_hash_map.cpp | 16 +- .../vespa/vespalib/datastore/fixed_size_hash_map.h | 6 +- .../vespa/vespalib/datastore/sharded_hash_map.cpp | 168 ++++++++++++++++ .../vespa/vespalib/datastore/sharded_hash_map.h | 61 ++++++ .../vespa/vespalib/datastore/simple_hash_map.cpp | 168 ---------------- .../src/vespa/vespalib/datastore/simple_hash_map.h | 61 ------ 16 files changed, 478 insertions(+), 478 deletions(-) create mode 100644 vespalib/src/tests/datastore/sharded_hash_map/CMakeLists.txt create mode 100644 vespalib/src/tests/datastore/sharded_hash_map/sharded_hash_map_test.cpp delete mode 100644 vespalib/src/tests/datastore/simple_hash_map/CMakeLists.txt delete mode 100644 vespalib/src/tests/datastore/simple_hash_map/simple_hash_map_test.cpp create mode 100644 vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp create mode 100644 vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h delete mode 100644 vespalib/src/vespa/vespalib/datastore/simple_hash_map.cpp delete mode 100644 vespalib/src/vespa/vespalib/datastore/simple_hash_map.h diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp index 1d0430cfb63..5a65d4937e4 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include @@ -355,7 +355,7 @@ template class EnumStoreDictionary; template class EnumStoreDictionary; -template class EnumStoreDictionary; +template class EnumStoreDictionary; } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp index 5beafea0046..5065a4a6cdc 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp @@ -1,7 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "enumstore.hpp" -#include +#include #include #include @@ -51,7 +51,7 @@ make_enum_store_dictionary(IEnumStore &store, bool has_postings, search::Diction switch (type) { case search::DictionaryConfig::Type::HASH: case search::DictionaryConfig::Type::BTREE_AND_HASH: - return std::make_unique>(store, std::move(compare)); + return std::make_unique>(store, std::move(compare)); default: return std::make_unique>(store, std::move(compare)); } diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index 9e06bb152fd..3bdd477fcb6 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -43,7 +43,7 @@ vespa_define_module( src/tests/datastore/buffer_type src/tests/datastore/datastore src/tests/datastore/fixed_size_hash_map - src/tests/datastore/simple_hash_map + src/tests/datastore/sharded_hash_map src/tests/datastore/unique_store src/tests/datastore/unique_store_dictionary src/tests/datastore/unique_store_string_allocator diff --git a/vespalib/src/tests/datastore/sharded_hash_map/CMakeLists.txt b/vespalib/src/tests/datastore/sharded_hash_map/CMakeLists.txt new file mode 100644 index 00000000000..f94b952f4a3 --- /dev/null +++ b/vespalib/src/tests/datastore/sharded_hash_map/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_sharded_hash_map_test_app + SOURCES + sharded_hash_map_test.cpp + DEPENDS + vespalib + GTest::GTest +) +vespa_add_test(NAME vespalib_sharded_hash_map_test_app COMMAND vespalib_sharded_hash_map_test_app) diff --git a/vespalib/src/tests/datastore/sharded_hash_map/sharded_hash_map_test.cpp b/vespalib/src/tests/datastore/sharded_hash_map/sharded_hash_map_test.cpp new file mode 100644 index 00000000000..9f11b96e672 --- /dev/null +++ b/vespalib/src/tests/datastore/sharded_hash_map/sharded_hash_map_test.cpp @@ -0,0 +1,219 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +LOG_SETUP("vespalib_datastore_shared_hash_test"); + +using vespalib::datastore::EntryRef; +using RefT = vespalib::datastore::EntryRefT<22>; +using MyAllocator = vespalib::datastore::UniqueStoreAllocator; +using MyDataStore = vespalib::datastore::DataStoreT; +using MyCompare = vespalib::datastore::UniqueStoreComparator; +using MyHashMap = vespalib::datastore::ShardedHashMap; +using GenerationHandler = vespalib::GenerationHandler; +using vespalib::makeLambdaTask; + +struct DataStoreShardedHashTest : public ::testing::Test +{ + GenerationHandler _generationHandler; + MyAllocator _allocator; + MyDataStore& _store; + MyHashMap _hash_map; + vespalib::ThreadStackExecutor _writer; // 1 write thread + vespalib::ThreadStackExecutor _readers; // multiple reader threads + vespalib::Rand48 _rnd; + uint32_t _keyLimit; + std::atomic _read_seed; + std::atomic _done_write_work; + std::atomic _done_read_work; + std::atomic _found_count; + std::atomic _stop_read; + bool _report_work; + + DataStoreShardedHashTest(); + ~DataStoreShardedHashTest(); + void commit(); + void insert(uint32_t key); + void remove(uint32_t key); + + void read_work(uint32_t cnt); + void read_work(); + void write_work(uint32_t cnt); +}; + + +DataStoreShardedHashTest::DataStoreShardedHashTest() + : _generationHandler(), + _allocator(), + _store(_allocator.get_data_store()), + _hash_map(std::make_unique(_store)), + _writer(1, 128_Ki), + _readers(4, 128_Ki), + _rnd(), + _keyLimit(1000000), + _read_seed(50), + _done_write_work(0), + _done_read_work(0), + _found_count(0), + _stop_read(0), + _report_work(false) +{ + _rnd.srand48(32); +} + + +DataStoreShardedHashTest::~DataStoreShardedHashTest() +{ + _readers.sync(); + _readers.shutdown(); + _writer.sync(); + _writer.shutdown(); + commit(); + if (_report_work) { + LOG(info, + "read_work=%ld, write_work=%ld, found_count=%ld", + _done_read_work.load(), _done_write_work.load(), _found_count.load()); + } +} + + +void +DataStoreShardedHashTest::commit() +{ + _store.transferHoldLists(_generationHandler.getCurrentGeneration()); + _hash_map.transfer_hold_lists(_generationHandler.getCurrentGeneration()); + _generationHandler.incGeneration(); + _store.trimHoldLists(_generationHandler.getFirstUsedGeneration()); + _hash_map.trim_hold_lists(_generationHandler.getFirstUsedGeneration()); +} + +void +DataStoreShardedHashTest::insert(uint32_t key) +{ + MyCompare comp(_store, key); + std::function insert_entry([this, key]() -> EntryRef { return _allocator.allocate(key); }); + auto& result = _hash_map.add(comp, EntryRef(), insert_entry); + auto ref = result.first.load_relaxed(); + auto &wrapped_entry = _allocator.get_wrapped(ref); + EXPECT_EQ(key, wrapped_entry.value()); +} + +void +DataStoreShardedHashTest::remove(uint32_t key) +{ + MyCompare comp(_store, key); + auto result = _hash_map.remove(comp, EntryRef()); + if (result != nullptr) { + auto ref = result->first.load_relaxed(); + auto &wrapped_entry = _allocator.get_wrapped(ref); + EXPECT_EQ(key, wrapped_entry.value()); + _allocator.hold(ref); + } +} + + +void +DataStoreShardedHashTest::read_work(uint32_t cnt) +{ + vespalib::Rand48 rnd; + long found = 0; + rnd.srand48(++_read_seed); + uint32_t i; + for (i = 0; i < cnt && _stop_read.load() == 0; ++i) { + auto guard = _generationHandler.takeGuard(); + uint32_t key = rnd.lrand48() % (_keyLimit + 1); + MyCompare comp(_store, key); + auto result = _hash_map.find(comp, EntryRef()); + if (result != nullptr) { + auto ref = result->first.load_relaxed(); + auto &wrapped_entry = _allocator.get_wrapped(ref); + EXPECT_EQ(key, wrapped_entry.value()); + ++found; + } + } + _done_read_work += i; + _found_count += found; + LOG(info, "done %u read work", i); +} + + +void +DataStoreShardedHashTest::read_work() +{ + read_work(std::numeric_limits::max()); +} + + +void +DataStoreShardedHashTest::write_work(uint32_t cnt) +{ + vespalib::Rand48 &rnd(_rnd); + for (uint32_t i = 0; i < cnt; ++i) { + uint32_t key = rnd.lrand48() % _keyLimit; + if ((rnd.lrand48() & 1) == 0) { + insert(key); + } else { + remove(key); + } + commit(); + } + _done_write_work += cnt; + _stop_read = 1; + LOG(info, "done %u write work", cnt); +} + + +TEST_F(DataStoreShardedHashTest, single_threaded_reader_without_updates) +{ + _report_work = true; + write_work(10); + _stop_read = 0; + read_work(10); +} + +TEST_F(DataStoreShardedHashTest, single_threaded_reader_during_updates) +{ + uint32_t cnt = 1000000; + _report_work = true; + _writer.execute(makeLambdaTask([this, cnt]() { write_work(cnt); })); + _readers.execute(makeLambdaTask([this]() { read_work(); })); +} + +TEST_F(DataStoreShardedHashTest, multi_threaded_reader_during_updates) +{ + uint32_t cnt = 1000000; + _report_work = true; + _writer.execute(makeLambdaTask([this, cnt]() { write_work(cnt); })); + for (size_t i = 0; i < 4; ++i) { + _readers.execute(makeLambdaTask([this]() { read_work(); })); + } +} + +TEST_F(DataStoreShardedHashTest, memory_usage_is_reported) +{ + auto initial_usage = _hash_map.get_memory_usage(); + EXPECT_LT(0, initial_usage.allocatedBytes()); + EXPECT_LT(0, initial_usage.usedBytes()); + EXPECT_EQ(0, initial_usage.deadBytes()); + EXPECT_EQ(0, initial_usage.allocatedBytesOnHold()); + auto guard = _generationHandler.takeGuard(); + for (uint32_t i = 0; i < 50; ++i) { + insert(i); + } + auto usage = _hash_map.get_memory_usage(); + EXPECT_EQ(0, usage.deadBytes()); + EXPECT_LT(0, usage.allocatedBytesOnHold()); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/tests/datastore/simple_hash_map/CMakeLists.txt b/vespalib/src/tests/datastore/simple_hash_map/CMakeLists.txt deleted file mode 100644 index c790481ebbc..00000000000 --- a/vespalib/src/tests/datastore/simple_hash_map/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(vespalib_simple_hash_map_test_app - SOURCES - simple_hash_map_test.cpp - DEPENDS - vespalib - GTest::GTest -) -vespa_add_test(NAME vespalib_simple_hash_map_test_app COMMAND vespalib_simple_hash_map_test_app) diff --git a/vespalib/src/tests/datastore/simple_hash_map/simple_hash_map_test.cpp b/vespalib/src/tests/datastore/simple_hash_map/simple_hash_map_test.cpp deleted file mode 100644 index 866c2a30818..00000000000 --- a/vespalib/src/tests/datastore/simple_hash_map/simple_hash_map_test.cpp +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -#include -LOG_SETUP("vespalib_datastore_simple_hash_test"); - -using vespalib::datastore::EntryRef; -using RefT = vespalib::datastore::EntryRefT<22>; -using MyAllocator = vespalib::datastore::UniqueStoreAllocator; -using MyDataStore = vespalib::datastore::DataStoreT; -using MyCompare = vespalib::datastore::UniqueStoreComparator; -using MyHashMap = vespalib::datastore::SimpleHashMap; -using GenerationHandler = vespalib::GenerationHandler; -using vespalib::makeLambdaTask; - -struct DataStoreSimpleHashTest : public ::testing::Test -{ - GenerationHandler _generationHandler; - MyAllocator _allocator; - MyDataStore& _store; - MyHashMap _hash_map; - vespalib::ThreadStackExecutor _writer; // 1 write thread - vespalib::ThreadStackExecutor _readers; // multiple reader threads - vespalib::Rand48 _rnd; - uint32_t _keyLimit; - std::atomic _read_seed; - std::atomic _done_write_work; - std::atomic _done_read_work; - std::atomic _found_count; - std::atomic _stop_read; - bool _report_work; - - DataStoreSimpleHashTest(); - ~DataStoreSimpleHashTest(); - void commit(); - void insert(uint32_t key); - void remove(uint32_t key); - - void read_work(uint32_t cnt); - void read_work(); - void write_work(uint32_t cnt); -}; - - -DataStoreSimpleHashTest::DataStoreSimpleHashTest() - : _generationHandler(), - _allocator(), - _store(_allocator.get_data_store()), - _hash_map(std::make_unique(_store)), - _writer(1, 128_Ki), - _readers(4, 128_Ki), - _rnd(), - _keyLimit(1000000), - _read_seed(50), - _done_write_work(0), - _done_read_work(0), - _found_count(0), - _stop_read(0), - _report_work(false) -{ - _rnd.srand48(32); -} - - -DataStoreSimpleHashTest::~DataStoreSimpleHashTest() -{ - _readers.sync(); - _readers.shutdown(); - _writer.sync(); - _writer.shutdown(); - commit(); - if (_report_work) { - LOG(info, - "read_work=%ld, write_work=%ld, found_count=%ld", - _done_read_work.load(), _done_write_work.load(), _found_count.load()); - } -} - - -void -DataStoreSimpleHashTest::commit() -{ - _store.transferHoldLists(_generationHandler.getCurrentGeneration()); - _hash_map.transfer_hold_lists(_generationHandler.getCurrentGeneration()); - _generationHandler.incGeneration(); - _store.trimHoldLists(_generationHandler.getFirstUsedGeneration()); - _hash_map.trim_hold_lists(_generationHandler.getFirstUsedGeneration()); -} - -void -DataStoreSimpleHashTest::insert(uint32_t key) -{ - MyCompare comp(_store, key); - std::function insert_entry([this, key]() -> EntryRef { return _allocator.allocate(key); }); - auto& result = _hash_map.add(comp, EntryRef(), insert_entry); - auto ref = result.first.load_relaxed(); - auto &wrapped_entry = _allocator.get_wrapped(ref); - EXPECT_EQ(key, wrapped_entry.value()); -} - -void -DataStoreSimpleHashTest::remove(uint32_t key) -{ - MyCompare comp(_store, key); - auto result = _hash_map.remove(comp, EntryRef()); - if (result != nullptr) { - auto ref = result->first.load_relaxed(); - auto &wrapped_entry = _allocator.get_wrapped(ref); - EXPECT_EQ(key, wrapped_entry.value()); - _allocator.hold(ref); - } -} - - -void -DataStoreSimpleHashTest::read_work(uint32_t cnt) -{ - vespalib::Rand48 rnd; - long found = 0; - rnd.srand48(++_read_seed); - uint32_t i; - for (i = 0; i < cnt && _stop_read.load() == 0; ++i) { - auto guard = _generationHandler.takeGuard(); - uint32_t key = rnd.lrand48() % (_keyLimit + 1); - MyCompare comp(_store, key); - auto result = _hash_map.find(comp, EntryRef()); - if (result != nullptr) { - auto ref = result->first.load_relaxed(); - auto &wrapped_entry = _allocator.get_wrapped(ref); - EXPECT_EQ(key, wrapped_entry.value()); - ++found; - } - } - _done_read_work += i; - _found_count += found; - LOG(info, "done %u read work", i); -} - - -void -DataStoreSimpleHashTest::read_work() -{ - read_work(std::numeric_limits::max()); -} - - -void -DataStoreSimpleHashTest::write_work(uint32_t cnt) -{ - vespalib::Rand48 &rnd(_rnd); - for (uint32_t i = 0; i < cnt; ++i) { - uint32_t key = rnd.lrand48() % _keyLimit; - if ((rnd.lrand48() & 1) == 0) { - insert(key); - } else { - remove(key); - } - commit(); - } - _done_write_work += cnt; - _stop_read = 1; - LOG(info, "done %u write work", cnt); -} - - -TEST_F(DataStoreSimpleHashTest, single_threaded_reader_without_updates) -{ - _report_work = true; - write_work(10); - _stop_read = 0; - read_work(10); -} - -TEST_F(DataStoreSimpleHashTest, single_threaded_reader_during_updates) -{ - uint32_t cnt = 1000000; - _report_work = true; - _writer.execute(makeLambdaTask([this, cnt]() { write_work(cnt); })); - _readers.execute(makeLambdaTask([this]() { read_work(); })); -} - -TEST_F(DataStoreSimpleHashTest, multi_threaded_reader_during_updates) -{ - uint32_t cnt = 1000000; - _report_work = true; - _writer.execute(makeLambdaTask([this, cnt]() { write_work(cnt); })); - for (size_t i = 0; i < 4; ++i) { - _readers.execute(makeLambdaTask([this]() { read_work(); })); - } -} - -TEST_F(DataStoreSimpleHashTest, memory_usage_is_reported) -{ - auto initial_usage = _hash_map.get_memory_usage(); - EXPECT_LT(0, initial_usage.allocatedBytes()); - EXPECT_LT(0, initial_usage.usedBytes()); - EXPECT_EQ(0, initial_usage.deadBytes()); - EXPECT_EQ(0, initial_usage.allocatedBytesOnHold()); - auto guard = _generationHandler.takeGuard(); - for (uint32_t i = 0; i < 50; ++i) { - insert(i); - } - auto usage = _hash_map.get_memory_usage(); - EXPECT_EQ(0, usage.deadBytes()); - EXPECT_LT(0, usage.allocatedBytesOnHold()); -} - -GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp index 25ed566f57a..3e8b144cf32 100644 --- a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp +++ b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include @@ -152,7 +152,7 @@ TestBase::TestBase() case DictionaryType::BTREE: break; default: - store.set_dictionary(std::make_unique>(std::make_unique(store.get_data_store()))); + store.set_dictionary(std::make_unique>(std::make_unique(store.get_data_store()))); } } diff --git a/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp b/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp index 2085292cdf4..2c1b4ad3cf1 100644 --- a/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp +++ b/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -59,7 +59,7 @@ struct DictionaryReadTest : public ::testing::Test { } }; -using DictionaryReadTestTypes = ::testing::Types>; +using DictionaryReadTestTypes = ::testing::Types>; VESPA_GTEST_TYPED_TEST_SUITE(DictionaryReadTest, DictionaryReadTestTypes); // Disable warnings emitted by gtest generated files when using typed tests diff --git a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt index 1ee945f1b8f..b028a040f8d 100644 --- a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt @@ -9,7 +9,7 @@ vespa_add_library(vespalib_vespalib_datastore OBJECT datastorebase.cpp entryref.cpp fixed_size_hash_map.cpp - simple_hash_map.cpp + sharded_hash_map.cpp unique_store.cpp unique_store_string_allocator.cpp DEPENDS diff --git a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp index 4822313e39e..cca61e3c852 100644 --- a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp +++ b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp @@ -20,7 +20,7 @@ FixedSizeHashMap::Node::on_free() _kv = std::make_pair(AtomicEntryRef(), AtomicEntryRef()); } -FixedSizeHashMap::FixedSizeHashMap(uint32_t modulo, uint32_t capacity, uint32_t num_stripes) +FixedSizeHashMap::FixedSizeHashMap(uint32_t modulo, uint32_t capacity, uint32_t num_shards) : _chain_heads(modulo), _nodes(), _modulo(modulo), @@ -30,13 +30,13 @@ FixedSizeHashMap::FixedSizeHashMap(uint32_t modulo, uint32_t capacity, uint32_t _hold_count(0u), _hold_1_list(), _hold_2_list(), - _num_stripes(num_stripes) + _num_shards(num_shards) { _nodes.reserve(capacity); } -FixedSizeHashMap::FixedSizeHashMap(uint32_t modulo, uint32_t capacity, uint32_t num_stripes, const FixedSizeHashMap &orig, const EntryComparator& comp) - : FixedSizeHashMap(modulo, capacity, num_stripes) +FixedSizeHashMap::FixedSizeHashMap(uint32_t modulo, uint32_t capacity, uint32_t num_shards, const FixedSizeHashMap &orig, const EntryComparator& comp) + : FixedSizeHashMap(modulo, capacity, num_shards) { for (const auto &chain_head : orig._chain_heads) { for (uint32_t node_idx = chain_head.load_relaxed(); node_idx != no_node_idx;) { @@ -52,7 +52,7 @@ FixedSizeHashMap::~FixedSizeHashMap() = default; void FixedSizeHashMap::force_add(const EntryComparator& comp, const KvType& kv) { - size_t hash_idx = comp.hash(kv.first.load_relaxed()) / _num_stripes; + size_t hash_idx = comp.hash(kv.first.load_relaxed()) / _num_shards; hash_idx %= _modulo; auto& chain_head = _chain_heads[hash_idx]; assert(_nodes.size() < _nodes.capacity()); @@ -65,7 +65,7 @@ FixedSizeHashMap::force_add(const EntryComparator& comp, const KvType& kv) FixedSizeHashMap::KvType& FixedSizeHashMap::add(const EntryComparator& comp, EntryRef key_ref, std::function& insert_entry) { - size_t hash_idx = comp.hash(key_ref) / _num_stripes; + size_t hash_idx = comp.hash(key_ref) / _num_shards; hash_idx %= _modulo; auto& chain_head = _chain_heads[hash_idx]; uint32_t node_idx = chain_head.load_relaxed(); @@ -129,7 +129,7 @@ FixedSizeHashMap::trim_hold_lists_slow(generation_t first_used) FixedSizeHashMap::KvType* FixedSizeHashMap::remove(const EntryComparator& comp, EntryRef key_ref) { - size_t hash_idx = comp.hash(key_ref) / _num_stripes; + size_t hash_idx = comp.hash(key_ref) / _num_shards; hash_idx %= _modulo; auto& chain_head = _chain_heads[hash_idx]; uint32_t node_idx = chain_head.load_relaxed(); @@ -157,7 +157,7 @@ FixedSizeHashMap::remove(const EntryComparator& comp, EntryRef key_ref) FixedSizeHashMap::KvType* FixedSizeHashMap::find(const EntryComparator& comp, EntryRef key_ref) { - size_t hash_idx = comp.hash(key_ref) / _num_stripes; + size_t hash_idx = comp.hash(key_ref) / _num_shards; hash_idx %= _modulo; auto& chain_head = _chain_heads[hash_idx]; uint32_t node_idx = chain_head.load_acquire(); diff --git a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h index 8153bc82e06..a0ca1bf0568 100644 --- a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h +++ b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h @@ -89,14 +89,14 @@ private: uint32_t _hold_count; Array _hold_1_list; std::deque> _hold_2_list; - uint32_t _num_stripes; + uint32_t _num_shards; void transfer_hold_lists_slow(generation_t generation); void trim_hold_lists_slow(generation_t first_used); void force_add(const EntryComparator& comp, const KvType& kv); public: - FixedSizeHashMap(uint32_t module, uint32_t capacity, uint32_t num_stripes); - FixedSizeHashMap(uint32_t module, uint32_t capacity, uint32_t num_stripes, const FixedSizeHashMap &orig, const EntryComparator& comp); + FixedSizeHashMap(uint32_t module, uint32_t capacity, uint32_t num_shards); + FixedSizeHashMap(uint32_t module, uint32_t capacity, uint32_t num_shards, const FixedSizeHashMap &orig, const EntryComparator& comp); ~FixedSizeHashMap(); KvType& add(const EntryComparator& comp, EntryRef key_ref, std::function& insert_entry); diff --git a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp new file mode 100644 index 00000000000..7c609f89972 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp @@ -0,0 +1,168 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "sharded_hash_map.h" +#include "fixed_size_hash_map.h" +#include "entry_comparator.h" +#include + +namespace vespalib::datastore { + +class ShardedHashMapShardHeld : public GenerationHeldBase +{ + std::unique_ptr _data; +public: + ShardedHashMapShardHeld(size_t size, std::unique_ptr data); + ~ShardedHashMapShardHeld(); +}; + +ShardedHashMapShardHeld::ShardedHashMapShardHeld(size_t size, std::unique_ptr data) + : GenerationHeldBase(size), + _data(std::move(data)) +{ +} + +ShardedHashMapShardHeld::~ShardedHashMapShardHeld() = default; + +ShardedHashMap::ShardedHashMap(std::unique_ptr comp) + : _gen_holder(), + _maps(), + _comp(std::move(comp)) +{ +} + +ShardedHashMap::~ShardedHashMap() +{ + _gen_holder.clearHoldLists(); + for (size_t i = 0; i < num_shards; ++i) { + auto map = _maps[i].load(std::memory_order_relaxed); + delete map; + } +} + +size_t +ShardedHashMap::get_shard_idx(const EntryComparator& comp, EntryRef key_ref) const +{ + return comp.hash(key_ref) % num_shards; +} + +void +ShardedHashMap::alloc_shard(size_t shard_idx) +{ + auto map = _maps[shard_idx].load(std::memory_order_relaxed); + if (map == nullptr) { + auto umap = std::make_unique(2u, 3u, num_shards); + _maps[shard_idx].store(umap.release(), std::memory_order_release); + } else { + auto umap = std::make_unique(map->size() * 2 + 2, map->size() * 3 + 3, num_shards, *map, *_comp); + _maps[shard_idx].store(umap.release(), std::memory_order_release); + hold_shard(std::unique_ptr(map)); + } +} + +void +ShardedHashMap::hold_shard(std::unique_ptr map) +{ + auto usage = map->get_memory_usage(); + auto hold = std::make_unique(usage.allocatedBytes(), std::move(map)); + _gen_holder.hold(std::move(hold)); +} + +ShardedHashMap::KvType& +ShardedHashMap::add(const EntryComparator& comp, EntryRef key_ref, std::function& insert_entry) +{ + size_t shard_idx = get_shard_idx(comp, key_ref); + auto map = _maps[shard_idx].load(std::memory_order_relaxed); + if (map == nullptr || map->full()) { + alloc_shard(shard_idx); + map = _maps[shard_idx].load(std::memory_order_relaxed); + } + return map->add(comp, key_ref, insert_entry); +} + +ShardedHashMap::KvType* +ShardedHashMap::remove(const EntryComparator& comp, EntryRef key_ref) +{ + size_t shard_idx = get_shard_idx(comp, key_ref); + auto map = _maps[shard_idx].load(std::memory_order_relaxed); + if (map == nullptr) { + return nullptr; + } + return map->remove(comp, key_ref); +} + +ShardedHashMap::KvType* +ShardedHashMap::find(const EntryComparator& comp, EntryRef key_ref) +{ + size_t shard_idx = get_shard_idx(comp, key_ref); + auto map = _maps[shard_idx].load(std::memory_order_relaxed); + if (map == nullptr) { + return nullptr; + } + return map->find(comp, key_ref); +} + +const ShardedHashMap::KvType* +ShardedHashMap::find(const EntryComparator& comp, EntryRef key_ref) const +{ + size_t shard_idx = get_shard_idx(comp, key_ref); + auto map = _maps[shard_idx].load(std::memory_order_relaxed); + if (map == nullptr) { + return nullptr; + } + return map->find(comp, key_ref); +} + +void +ShardedHashMap::transfer_hold_lists(generation_t generation) +{ + for (size_t i = 0; i < num_shards; ++i) { + auto map = _maps[i].load(std::memory_order_relaxed); + if (map != nullptr) { + map->transfer_hold_lists(generation); + } + } + _gen_holder.transferHoldLists(generation); +} + +void +ShardedHashMap::trim_hold_lists(generation_t first_used) +{ + for (size_t i = 0; i < num_shards; ++i) { + auto map = _maps[i].load(std::memory_order_relaxed); + if (map != nullptr) { + map->trim_hold_lists(first_used); + } + } + _gen_holder.trimHoldLists(first_used); +} + +size_t +ShardedHashMap::size() const noexcept +{ + size_t result = 0; + for (size_t i = 0; i < num_shards; ++i) { + auto map = _maps[i].load(std::memory_order_relaxed); + if (map != nullptr) { + result += map->size(); + } + } + return result; +} + +MemoryUsage +ShardedHashMap::get_memory_usage() const +{ + MemoryUsage memory_usage(sizeof(ShardedHashMap), sizeof(ShardedHashMap), 0, 0); + for (size_t i = 0; i < num_shards; ++i) { + auto map = _maps[i].load(std::memory_order_relaxed); + if (map != nullptr) { + memory_usage.merge(map->get_memory_usage()); + } + } + size_t gen_holder_held_bytes = _gen_holder.getHeldBytes(); + memory_usage.incAllocatedBytes(gen_holder_held_bytes); + memory_usage.incAllocatedBytesOnHold(gen_holder_held_bytes); + return memory_usage; +} + +} diff --git a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h new file mode 100644 index 00000000000..7ea60646b06 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h @@ -0,0 +1,61 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "atomic_entry_ref.h" +#include +#include +#include + +namespace vespalib { class MemoryUsage; } +namespace vespalib::datastore { + +class FixedSizeHashMap; +class EntryComparator; + +/* + * Hash map over keys in data store, meant to support a faster + * dictionary for unique store with relation to lookups. + * + * Currently hardcoded key and data types, where key references an entry + * in a UniqueStore and value references a posting list + * (cf. search::attribute::PostingStore). + * + * This structure supports one writer and many readers. + * + * A reader must own an appropriate GenerationHandler::Guard to ensure + * that memory is held while it can be accessed by reader. + * + * The writer must update generation and call transfer_hold_lists and + * trim_hold_lists as needed to free up memory no longer needed by any + * readers. + */ +class ShardedHashMap { +public: + using KvType = std::pair; + using generation_t = GenerationHandler::generation_t; + using sgeneration_t = GenerationHandler::sgeneration_t; +private: + GenerationHolder _gen_holder; + static constexpr size_t num_shards = 3; + std::atomic _maps[num_shards]; + std::unique_ptr _comp; + + size_t get_shard_idx(const EntryComparator& comp, EntryRef key_ref) const; + void alloc_shard(size_t shard_idx); + void hold_shard(std::unique_ptr map); +public: + ShardedHashMap(std::unique_ptr comp); + ~ShardedHashMap(); + KvType& add(const EntryComparator& comp, EntryRef key_ref, std::function &insert_entry); + KvType* remove(const EntryComparator& comp, EntryRef key_ref); + KvType* find(const EntryComparator& comp, EntryRef key_ref); + const KvType* find(const EntryComparator& comp, EntryRef key_ref) const; + void transfer_hold_lists(generation_t generation); + void trim_hold_lists(generation_t first_used); + size_t size() const noexcept; + const EntryComparator &get_default_comparator() const noexcept { return *_comp; } + MemoryUsage get_memory_usage() const; +}; + +} diff --git a/vespalib/src/vespa/vespalib/datastore/simple_hash_map.cpp b/vespalib/src/vespa/vespalib/datastore/simple_hash_map.cpp deleted file mode 100644 index 2295cecd613..00000000000 --- a/vespalib/src/vespa/vespalib/datastore/simple_hash_map.cpp +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "simple_hash_map.h" -#include "fixed_size_hash_map.h" -#include "entry_comparator.h" -#include - -namespace vespalib::datastore { - -class SimpleHashMapStripeHeld : public GenerationHeldBase -{ - std::unique_ptr _data; -public: - SimpleHashMapStripeHeld(size_t size, std::unique_ptr data); - ~SimpleHashMapStripeHeld(); -}; - -SimpleHashMapStripeHeld::SimpleHashMapStripeHeld(size_t size, std::unique_ptr data) - : GenerationHeldBase(size), - _data(std::move(data)) -{ -} - -SimpleHashMapStripeHeld::~SimpleHashMapStripeHeld() = default; - -SimpleHashMap::SimpleHashMap(std::unique_ptr comp) - : _gen_holder(), - _maps(), - _comp(std::move(comp)) -{ -} - -SimpleHashMap::~SimpleHashMap() -{ - _gen_holder.clearHoldLists(); - for (size_t i = 0; i < num_stripes; ++i) { - auto map = _maps[i].load(std::memory_order_relaxed); - delete map; - } -} - -size_t -SimpleHashMap::get_stripe(const EntryComparator& comp, EntryRef key_ref) const -{ - return comp.hash(key_ref) % num_stripes; -} - -void -SimpleHashMap::alloc_stripe(size_t stripe) -{ - auto map = _maps[stripe].load(std::memory_order_relaxed); - if (map == nullptr) { - auto umap = std::make_unique(2u, 3u, num_stripes); - _maps[stripe].store(umap.release(), std::memory_order_release); - } else { - auto umap = std::make_unique(map->size() * 2 + 2, map->size() * 3 + 3, num_stripes, *map, *_comp); - _maps[stripe].store(umap.release(), std::memory_order_release); - hold_stripe(std::unique_ptr(map)); - } -} - -void -SimpleHashMap::hold_stripe(std::unique_ptr map) -{ - auto usage = map->get_memory_usage(); - auto hold = std::make_unique(usage.allocatedBytes(), std::move(map)); - _gen_holder.hold(std::move(hold)); -} - -SimpleHashMap::KvType& -SimpleHashMap::add(const EntryComparator& comp, EntryRef key_ref, std::function& insert_entry) -{ - size_t stripe = get_stripe(comp, key_ref); - auto map = _maps[stripe].load(std::memory_order_relaxed); - if (map == nullptr || map->full()) { - alloc_stripe(stripe); - map = _maps[stripe].load(std::memory_order_relaxed); - } - return map->add(comp, key_ref, insert_entry); -} - -SimpleHashMap::KvType* -SimpleHashMap::remove(const EntryComparator& comp, EntryRef key_ref) -{ - size_t stripe = get_stripe(comp, key_ref); - auto map = _maps[stripe].load(std::memory_order_relaxed); - if (map == nullptr) { - return nullptr; - } - return map->remove(comp, key_ref); -} - -SimpleHashMap::KvType* -SimpleHashMap::find(const EntryComparator& comp, EntryRef key_ref) -{ - size_t stripe = get_stripe(comp, key_ref); - auto map = _maps[stripe].load(std::memory_order_relaxed); - if (map == nullptr) { - return nullptr; - } - return map->find(comp, key_ref); -} - -const SimpleHashMap::KvType* -SimpleHashMap::find(const EntryComparator& comp, EntryRef key_ref) const -{ - size_t stripe = get_stripe(comp, key_ref); - auto map = _maps[stripe].load(std::memory_order_relaxed); - if (map == nullptr) { - return nullptr; - } - return map->find(comp, key_ref); -} - -void -SimpleHashMap::transfer_hold_lists(generation_t generation) -{ - for (size_t i = 0; i < num_stripes; ++i) { - auto map = _maps[i].load(std::memory_order_relaxed); - if (map != nullptr) { - map->transfer_hold_lists(generation); - } - } - _gen_holder.transferHoldLists(generation); -} - -void -SimpleHashMap::trim_hold_lists(generation_t first_used) -{ - for (size_t i = 0; i < num_stripes; ++i) { - auto map = _maps[i].load(std::memory_order_relaxed); - if (map != nullptr) { - map->trim_hold_lists(first_used); - } - } - _gen_holder.trimHoldLists(first_used); -} - -size_t -SimpleHashMap::size() const noexcept -{ - size_t result = 0; - for (size_t i = 0; i < num_stripes; ++i) { - auto map = _maps[i].load(std::memory_order_relaxed); - if (map != nullptr) { - result += map->size(); - } - } - return result; -} - -MemoryUsage -SimpleHashMap::get_memory_usage() const -{ - MemoryUsage memory_usage(sizeof(SimpleHashMap), sizeof(SimpleHashMap), 0, 0); - for (size_t i = 0; i < num_stripes; ++i) { - auto map = _maps[i].load(std::memory_order_relaxed); - if (map != nullptr) { - memory_usage.merge(map->get_memory_usage()); - } - } - size_t gen_holder_held_bytes = _gen_holder.getHeldBytes(); - memory_usage.incAllocatedBytes(gen_holder_held_bytes); - memory_usage.incAllocatedBytesOnHold(gen_holder_held_bytes); - return memory_usage; -} - -} diff --git a/vespalib/src/vespa/vespalib/datastore/simple_hash_map.h b/vespalib/src/vespa/vespalib/datastore/simple_hash_map.h deleted file mode 100644 index bc052295511..00000000000 --- a/vespalib/src/vespa/vespalib/datastore/simple_hash_map.h +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "atomic_entry_ref.h" -#include -#include -#include - -namespace vespalib { class MemoryUsage; } -namespace vespalib::datastore { - -class FixedSizeHashMap; -class EntryComparator; - -/* - * Hash map over keys in data store, meant to support a faster - * dictionary for unique store with relation to lookups. - * - * Currently hardcoded key and data types, where key references an entry - * in a UniqueStore and value references a posting list - * (cf. search::attribute::PostingStore). - * - * This structure supports one writer and many readers. - * - * A reader must own an appropriate GenerationHandler::Guard to ensure - * that memory is held while it can be accessed by reader. - * - * The writer must update generation and call transfer_hold_lists and - * trim_hold_lists as needed to free up memory no longer needed by any - * readers. - */ -class SimpleHashMap { -public: - using KvType = std::pair; - using generation_t = GenerationHandler::generation_t; - using sgeneration_t = GenerationHandler::sgeneration_t; -private: - GenerationHolder _gen_holder; - static constexpr size_t num_stripes = 3; - std::atomic _maps[num_stripes]; - std::unique_ptr _comp; - - size_t get_stripe(const EntryComparator& comp, EntryRef key_ref) const; - void alloc_stripe(size_t stripe); - void hold_stripe(std::unique_ptr map); -public: - SimpleHashMap(std::unique_ptr comp); - ~SimpleHashMap(); - KvType& add(const EntryComparator& comp, EntryRef key_ref, std::function &insert_entry); - KvType* remove(const EntryComparator& comp, EntryRef key_ref); - KvType* find(const EntryComparator& comp, EntryRef key_ref); - const KvType* find(const EntryComparator& comp, EntryRef key_ref) const; - void transfer_hold_lists(generation_t generation); - void trim_hold_lists(generation_t first_used); - size_t size() const noexcept; - const EntryComparator &get_default_comparator() const noexcept { return *_comp; } - MemoryUsage get_memory_usage() const; -}; - -} -- cgit v1.2.3