diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-06-14 17:58:42 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-14 17:58:42 +0200 |
commit | 056bf0398c118bde7773d8e7e4d122112be606e5 (patch) | |
tree | 6a5bb486868d04d129ded61fee5f0e9a21a254f1 | |
parent | dc4ef240e27d270fe685e2f2eedeecaf5ff977e9 (diff) | |
parent | e338eda79fecc15554e99a4c234e01eaadf96332 (diff) |
Merge pull request #27427 from vespa-engine/toregge/add-array-store-dynamic-type-mapper
Add ArrayStoreDynamicTypeMapper.
9 files changed, 330 insertions, 3 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index b753dd8e77a..d2892ee4429 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -65,6 +65,7 @@ vespa_define_module( src/tests/data/smart_buffer src/tests/datastore/array_store src/tests/datastore/array_store_config + src/tests/datastore/array_store_dynamic_type_mapper src/tests/datastore/buffer_stats src/tests/datastore/buffer_type src/tests/datastore/compact_buffer_candidates diff --git a/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/CMakeLists.txt b/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/CMakeLists.txt new file mode 100644 index 00000000000..d6b474a526a --- /dev/null +++ b/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_array_store_dynamic_type_mapper_test_app TEST + SOURCES + array_store_dynamic_type_mapper_test.cpp + DEPENDS + vespalib + GTest::GTest +) +vespa_add_test(NAME vespalib_array_store_dynamic_type_mapper_test_app COMMAND vespalib_array_store_dynamic_type_mapper_test_app) diff --git a/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/array_store_dynamic_type_mapper_test.cpp b/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/array_store_dynamic_type_mapper_test.cpp new file mode 100644 index 00000000000..86b80aaa695 --- /dev/null +++ b/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/array_store_dynamic_type_mapper_test.cpp @@ -0,0 +1,169 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/datastore/array_store_dynamic_type_mapper.h> +#include <vespa/vespalib/gtest/gtest.h> + +using vespalib::datastore::ArrayStoreDynamicTypeMapper; + +constexpr double default_grow_factor = 1.03; + +template <typename ElemT> +class TestBase : public testing::Test +{ +protected: + ArrayStoreDynamicTypeMapper<ElemT> _mapper; + TestBase(); + ~TestBase() override; + std::vector<size_t> get_array_sizes(uint32_t num_array_sizes); + std::vector<size_t> get_entry_sizes(uint32_t num_entry_sizes); + std::vector<size_t> get_large_array_sizes(uint32_t num_large_arrays); + void select_type_ids(std::vector<size_t> array_sizes); + void setup_mapper(uint32_t max_buffer_type_id, double grow_factor); + static uint32_t calc_max_buffer_type_id(double grow_factor); +}; + +template <typename ElemT> +TestBase<ElemT>::TestBase() + : testing::Test(), + _mapper(5, default_grow_factor) +{ +} + +template <typename ElemT> +TestBase<ElemT>::~TestBase() = default; + +template <typename ElemT> +void +TestBase<ElemT>::setup_mapper(uint32_t max_buffer_type_id, double grow_factor) +{ + _mapper = ArrayStoreDynamicTypeMapper<ElemT>(max_buffer_type_id, grow_factor); +} + +template <typename ElemT> +std::vector<size_t> +TestBase<ElemT>::get_array_sizes(uint32_t num_array_sizes) +{ + std::vector<size_t> array_sizes; + for (uint32_t type_id = 1; type_id <= num_array_sizes; ++type_id) { + array_sizes.emplace_back(_mapper.get_array_size(type_id)); + } + return array_sizes; +} + +template <typename ElemT> +std::vector<size_t> +TestBase<ElemT>::get_entry_sizes(uint32_t num_entry_sizes) +{ + std::vector<size_t> entry_sizes; + for (uint32_t type_id = 1; type_id <= num_entry_sizes; ++type_id) { + entry_sizes.emplace_back(_mapper.get_entry_size(type_id)); + } + return entry_sizes; +} + +template <typename ElemT> +std::vector<size_t> +TestBase<ElemT>::get_large_array_sizes(uint32_t num_large_array_sizes) +{ + setup_mapper(num_large_array_sizes * 100, default_grow_factor); + std::vector<size_t> result; + for (uint32_t i = 0; i < num_large_array_sizes; ++i) { + uint32_t type_id = (i + 1) * 100; + auto array_size = _mapper.get_array_size(type_id); + result.emplace_back(array_size); + EXPECT_EQ(type_id, _mapper.get_type_id(array_size)); + EXPECT_EQ(type_id, _mapper.get_type_id(array_size - 1)); + if (i + 1 == num_large_array_sizes) { + EXPECT_EQ(0u, _mapper.get_type_id(array_size + 1)); + } else { + EXPECT_EQ(type_id + 1, _mapper.get_type_id(array_size + 1)); + } + } + return result; +} + +template <typename ElemT> +void +TestBase<ElemT>::select_type_ids(std::vector<size_t> array_sizes) +{ + uint32_t type_id = 0; + std::optional<size_t> prev_array_size; + for (auto array_size : array_sizes) { + ++type_id; + EXPECT_EQ(type_id, _mapper.get_type_id(array_size)); + if (!prev_array_size.has_value() || prev_array_size.value() < array_size - 1) { + EXPECT_EQ(type_id, _mapper.get_type_id(array_size - 1)); + } else { + EXPECT_EQ(type_id - 1, _mapper.get_type_id(array_size - 1)); + } + prev_array_size = array_size; + if (array_size == array_sizes.back()) { + // Fallback to indirect storage, using type id 0 + EXPECT_EQ(0u, _mapper.get_type_id(array_size + 1)); + } else { + EXPECT_EQ(type_id + 1, _mapper.get_type_id(array_size + 1)); + } + } +} + +template <typename ElemT> +uint32_t +TestBase<ElemT>::calc_max_buffer_type_id(double grow_factor) +{ + ArrayStoreDynamicTypeMapper<ElemT> mapper(1000, grow_factor); + return mapper.get_max_small_array_type_id(1000); +} + +using ArrayStoreDynamicTypeMapperCharTest = TestBase<char>; + +TEST_F(ArrayStoreDynamicTypeMapperCharTest, array_sizes_are_calculated) +{ + EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5}), get_array_sizes(5)); + EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5}), get_entry_sizes(5)); + setup_mapper(10, 1.4); + EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5, 8, 12, 16, 24, 36}), get_array_sizes(10)); + EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5, 12, 16, 20, 28, 40}), get_entry_sizes(10)); +} + +TEST_F(ArrayStoreDynamicTypeMapperCharTest, type_ids_are_selected) +{ + select_type_ids({1, 2, 3, 4, 5}); + setup_mapper(10, 1.4); + select_type_ids({1, 2, 3, 4, 5, 8, 12, 16, 24, 36}); +} + +TEST_F(ArrayStoreDynamicTypeMapperCharTest, large_arrays_grows_exponentially) +{ + EXPECT_EQ((std::vector<size_t>{232, 5024, 97100, 1866776}), get_large_array_sizes(4)); +} + +TEST_F(ArrayStoreDynamicTypeMapperCharTest, avoid_entry_size_overflow) +{ + EXPECT_EQ(32, calc_max_buffer_type_id(2.0)); + EXPECT_EQ(410, calc_max_buffer_type_id(1.05)); + EXPECT_EQ(507, calc_max_buffer_type_id(1.04)); + EXPECT_EQ(661, calc_max_buffer_type_id(1.03)); + EXPECT_EQ(968, calc_max_buffer_type_id(1.02)); +} + +using ArrayStoreDynamicTypeMapperInt32Test = TestBase<int32_t>; + +TEST_F(ArrayStoreDynamicTypeMapperInt32Test, array_sizes_are_calculated) +{ + EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5}), get_array_sizes(5)); + EXPECT_EQ((std::vector<size_t>{4, 8, 12, 16, 20}), get_entry_sizes(5)); + setup_mapper(10, 1.4); + EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5, 7, 9, 12, 16, 22}), get_array_sizes(10)); + EXPECT_EQ((std::vector<size_t>{4, 8, 12, 16, 20, 32, 40, 52, 68, 92}), get_entry_sizes(10)); +} + +TEST_F(ArrayStoreDynamicTypeMapperInt32Test, avoid_entry_size_overflow) +{ + EXPECT_EQ(30, calc_max_buffer_type_id(2.0)); + EXPECT_EQ(395, calc_max_buffer_type_id(1.05)); + EXPECT_EQ(487, calc_max_buffer_type_id(1.04)); + EXPECT_EQ(636, calc_max_buffer_type_id(1.03)); + EXPECT_EQ(930, calc_max_buffer_type_id(1.02)); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt index 8b76c576cf0..38e5cb40abe 100644 --- a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt @@ -3,6 +3,7 @@ vespa_add_library(vespalib_vespalib_datastore OBJECT SOURCES array_store.cpp array_store_config.cpp + array_store_dynamic_type_mapper.cpp array_store_type_mapper.cpp atomic_entry_ref.cpp buffer_free_list.cpp diff --git a/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.cpp b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.cpp new file mode 100644 index 00000000000..106a1f037d7 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.cpp @@ -0,0 +1,16 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "array_store_dynamic_type_mapper.hpp" + +namespace vespalib::datastore { + +template class ArrayStoreDynamicTypeMapper<char>; +template class ArrayStoreDynamicTypeMapper<int8_t>; +template class ArrayStoreDynamicTypeMapper<int16_t>; +template class ArrayStoreDynamicTypeMapper<int32_t>; +template class ArrayStoreDynamicTypeMapper<int64_t>; +template class ArrayStoreDynamicTypeMapper<float>; +template class ArrayStoreDynamicTypeMapper<double>; +template class ArrayStoreDynamicTypeMapper<AtomicEntryRef>; + +} diff --git a/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.h b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.h new file mode 100644 index 00000000000..e02f57eaea3 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.h @@ -0,0 +1,53 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "array_store_type_mapper.h" +#include "atomic_entry_ref.h" + +namespace vespalib::datastore { + +template <typename EntryT> class SmallArrayBufferType; +template <typename EntryT> class DynamicArrayBufferType; +template <typename EntryT> class LargeArrayBufferType; + +/* + * This class provides mapping between type ids and array sizes needed for + * storing values. + * + * Type ids [1;max_static_array_buffer_type_id] use SmallBufferType, + * containing small arrays where buffer type specifies array size. + * + * Type ids [max_static_array_buffer_type_id+1;max_buffer_type_id] use + * DynamicBufferType, containing medium sized arrays where the same + * buffer type handles a range of array sizes and actual array size is + * also stored in the entry. + * + * Type id 0 uses LargeBufferType, which handles any array size but uses + * heap allocation. + */ +template <typename ElemT> +class ArrayStoreDynamicTypeMapper : public vespalib::datastore::ArrayStoreTypeMapper +{ + uint32_t _max_static_array_buffer_type_id; +public: + using SmallBufferType = vespalib::datastore::SmallArrayBufferType<ElemT>; + using DynamicBufferType = vespalib::datastore::DynamicArrayBufferType<ElemT>; + using LargeBufferType = vespalib::datastore::LargeArrayBufferType<ElemT>; + + ArrayStoreDynamicTypeMapper(uint32_t max_buffer_type_id, double grow_factor); + ~ArrayStoreDynamicTypeMapper(); + void setup_array_sizes(uint32_t max_buffer_type_id, double grow_factor); + size_t get_entry_size(uint32_t type_id) const; +}; + +extern template class ArrayStoreDynamicTypeMapper<char>; +extern template class ArrayStoreDynamicTypeMapper<int8_t>; +extern template class ArrayStoreDynamicTypeMapper<int16_t>; +extern template class ArrayStoreDynamicTypeMapper<int32_t>; +extern template class ArrayStoreDynamicTypeMapper<int64_t>; +extern template class ArrayStoreDynamicTypeMapper<float>; +extern template class ArrayStoreDynamicTypeMapper<double>; +extern template class ArrayStoreDynamicTypeMapper<AtomicEntryRef>; + +} diff --git a/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.hpp b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.hpp new file mode 100644 index 00000000000..f529ecccb46 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.hpp @@ -0,0 +1,70 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "array_store_dynamic_type_mapper.h" +#include "dynamic_array_buffer_type.h" +#include "aligner.h" +#include <algorithm> +#include <cmath> +#include <limits> +#include <optional> + +namespace vespalib::datastore { + +template <typename ElemT> +ArrayStoreDynamicTypeMapper<ElemT>::ArrayStoreDynamicTypeMapper(uint32_t max_buffer_type_id, double grow_factor) + : ArrayStoreTypeMapper(), + _max_static_array_buffer_type_id(0) +{ + setup_array_sizes(max_buffer_type_id, grow_factor); +} + +template <typename ElemT> +void +ArrayStoreDynamicTypeMapper<ElemT>::setup_array_sizes(uint32_t max_buffer_type_id, double grow_factor) +{ + _array_sizes.clear(); + _array_sizes.reserve(max_buffer_type_id + 1); + _array_sizes.emplace_back(0); // type id 0 is fallback for large arrays + size_t array_size = 1u; + size_t entry_size = sizeof(ElemT); + bool dynamic_arrays = false; + for (uint32_t type_id = 1; type_id <= max_buffer_type_id; ++type_id) { + if (type_id > 1) { + array_size = std::max(array_size + 1, static_cast<size_t>(std::floor(array_size * grow_factor))); + if (array_size > _array_sizes.back() + 1 || dynamic_arrays) { + if (!dynamic_arrays) { + _max_static_array_buffer_type_id = type_id - 1; + dynamic_arrays = true; + } + entry_size = DynamicBufferType::calc_entry_size(array_size); + array_size = DynamicBufferType::calc_array_size(entry_size); + } else { + entry_size = array_size * sizeof(ElemT); + } + } + if (entry_size > std::numeric_limits<uint32_t>::max()) { + break; + } + _array_sizes.emplace_back(array_size); + } + if (!dynamic_arrays) { + _max_static_array_buffer_type_id = _array_sizes.size() - 1; + } +} + +template <typename ElemT> +ArrayStoreDynamicTypeMapper<ElemT>::~ArrayStoreDynamicTypeMapper() = default; + +template <typename ElemT> +size_t +ArrayStoreDynamicTypeMapper<ElemT>::get_entry_size(uint32_t type_id) const +{ + auto array_size = get_array_size(type_id); + if (type_id <= _max_static_array_buffer_type_id) { + return array_size * sizeof(ElemT); + } else { + return DynamicBufferType::calc_entry_size(array_size); + } +} + +} diff --git a/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.h b/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.h index 7e2c70b2374..e314accd664 100644 --- a/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.h +++ b/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.h @@ -38,7 +38,8 @@ public: void fallback_copy(void* new_buffer, const void* old_buffer, EntryCount num_entries) override; void initialize_reserved_entries(void* buffer, EntryCount reserved_entries) override; void clean_hold(void* buffer, size_t offset, EntryCount num_entries, CleanContext cleanCxt) override; - static uint32_t calc_entry_size(uint32_t array_size) noexcept; + static size_t calc_entry_size(size_t array_size) noexcept; + static size_t calc_array_size(size_t entry_size) noexcept; static ElemType* get_entry(void* buffer, size_t offset, uint32_t entry_size) noexcept { return reinterpret_cast<ElemType*>(static_cast<char*>(buffer) + offset * entry_size); } static const ElemType* get_entry(const void* buffer, size_t offset, uint32_t entry_size) noexcept { return reinterpret_cast<const ElemType*>(static_cast<const char*>(buffer) + offset * entry_size); } static uint32_t get_dynamic_array_size(const void *buffer, uint32_t entry_size) noexcept { return *reinterpret_cast<const uint32_t*>(static_cast<const char*>(buffer) + entry_size - sizeof(uint32_t)); } diff --git a/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.hpp b/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.hpp index 9085e32db6b..514267b0a85 100644 --- a/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.hpp +++ b/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.hpp @@ -19,14 +19,21 @@ template <typename ElemT> DynamicArrayBufferType<ElemT>::~DynamicArrayBufferType() = default; template <typename ElemT> -uint32_t -DynamicArrayBufferType<ElemT>::calc_entry_size(uint32_t array_size) noexcept +size_t +DynamicArrayBufferType<ElemT>::calc_entry_size(size_t array_size) noexcept { Aligner aligner(std::max(alignof(uint32_t), alignof(ElemType))); return aligner.align(sizeof(ElemType) * array_size + sizeof(uint32_t)); } template <typename ElemT> +size_t +DynamicArrayBufferType<ElemT>::calc_array_size(size_t entry_size) noexcept +{ + return (entry_size - sizeof(uint32_t)) / sizeof(ElemType); +} + +template <typename ElemT> void DynamicArrayBufferType<ElemT>::destroy_entries(void* buffer, EntryCount num_entries) { |