aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-06-14 17:58:42 +0200
committerGitHub <noreply@github.com>2023-06-14 17:58:42 +0200
commit056bf0398c118bde7773d8e7e4d122112be606e5 (patch)
tree6a5bb486868d04d129ded61fee5f0e9a21a254f1
parentdc4ef240e27d270fe685e2f2eedeecaf5ff977e9 (diff)
parente338eda79fecc15554e99a4c234e01eaadf96332 (diff)
Merge pull request #27427 from vespa-engine/toregge/add-array-store-dynamic-type-mapper
Add ArrayStoreDynamicTypeMapper.
-rw-r--r--vespalib/CMakeLists.txt1
-rw-r--r--vespalib/src/tests/datastore/array_store_dynamic_type_mapper/CMakeLists.txt9
-rw-r--r--vespalib/src/tests/datastore/array_store_dynamic_type_mapper/array_store_dynamic_type_mapper_test.cpp169
-rw-r--r--vespalib/src/vespa/vespalib/datastore/CMakeLists.txt1
-rw-r--r--vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.cpp16
-rw-r--r--vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.h53
-rw-r--r--vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.hpp70
-rw-r--r--vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.h3
-rw-r--r--vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.hpp11
9 files changed, 330 insertions, 3 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt
index b753dd8e77a..d2892ee4429 100644
--- a/vespalib/CMakeLists.txt
+++ b/vespalib/CMakeLists.txt
@@ -65,6 +65,7 @@ vespa_define_module(
src/tests/data/smart_buffer
src/tests/datastore/array_store
src/tests/datastore/array_store_config
+ src/tests/datastore/array_store_dynamic_type_mapper
src/tests/datastore/buffer_stats
src/tests/datastore/buffer_type
src/tests/datastore/compact_buffer_candidates
diff --git a/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/CMakeLists.txt b/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/CMakeLists.txt
new file mode 100644
index 00000000000..d6b474a526a
--- /dev/null
+++ b/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespalib_array_store_dynamic_type_mapper_test_app TEST
+ SOURCES
+ array_store_dynamic_type_mapper_test.cpp
+ DEPENDS
+ vespalib
+ GTest::GTest
+)
+vespa_add_test(NAME vespalib_array_store_dynamic_type_mapper_test_app COMMAND vespalib_array_store_dynamic_type_mapper_test_app)
diff --git a/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/array_store_dynamic_type_mapper_test.cpp b/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/array_store_dynamic_type_mapper_test.cpp
new file mode 100644
index 00000000000..86b80aaa695
--- /dev/null
+++ b/vespalib/src/tests/datastore/array_store_dynamic_type_mapper/array_store_dynamic_type_mapper_test.cpp
@@ -0,0 +1,169 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/datastore/array_store_dynamic_type_mapper.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using vespalib::datastore::ArrayStoreDynamicTypeMapper;
+
+constexpr double default_grow_factor = 1.03;
+
+template <typename ElemT>
+class TestBase : public testing::Test
+{
+protected:
+ ArrayStoreDynamicTypeMapper<ElemT> _mapper;
+ TestBase();
+ ~TestBase() override;
+ std::vector<size_t> get_array_sizes(uint32_t num_array_sizes);
+ std::vector<size_t> get_entry_sizes(uint32_t num_entry_sizes);
+ std::vector<size_t> get_large_array_sizes(uint32_t num_large_arrays);
+ void select_type_ids(std::vector<size_t> array_sizes);
+ void setup_mapper(uint32_t max_buffer_type_id, double grow_factor);
+ static uint32_t calc_max_buffer_type_id(double grow_factor);
+};
+
+template <typename ElemT>
+TestBase<ElemT>::TestBase()
+ : testing::Test(),
+ _mapper(5, default_grow_factor)
+{
+}
+
+template <typename ElemT>
+TestBase<ElemT>::~TestBase() = default;
+
+template <typename ElemT>
+void
+TestBase<ElemT>::setup_mapper(uint32_t max_buffer_type_id, double grow_factor)
+{
+ _mapper = ArrayStoreDynamicTypeMapper<ElemT>(max_buffer_type_id, grow_factor);
+}
+
+template <typename ElemT>
+std::vector<size_t>
+TestBase<ElemT>::get_array_sizes(uint32_t num_array_sizes)
+{
+ std::vector<size_t> array_sizes;
+ for (uint32_t type_id = 1; type_id <= num_array_sizes; ++type_id) {
+ array_sizes.emplace_back(_mapper.get_array_size(type_id));
+ }
+ return array_sizes;
+}
+
+template <typename ElemT>
+std::vector<size_t>
+TestBase<ElemT>::get_entry_sizes(uint32_t num_entry_sizes)
+{
+ std::vector<size_t> entry_sizes;
+ for (uint32_t type_id = 1; type_id <= num_entry_sizes; ++type_id) {
+ entry_sizes.emplace_back(_mapper.get_entry_size(type_id));
+ }
+ return entry_sizes;
+}
+
+template <typename ElemT>
+std::vector<size_t>
+TestBase<ElemT>::get_large_array_sizes(uint32_t num_large_array_sizes)
+{
+ setup_mapper(num_large_array_sizes * 100, default_grow_factor);
+ std::vector<size_t> result;
+ for (uint32_t i = 0; i < num_large_array_sizes; ++i) {
+ uint32_t type_id = (i + 1) * 100;
+ auto array_size = _mapper.get_array_size(type_id);
+ result.emplace_back(array_size);
+ EXPECT_EQ(type_id, _mapper.get_type_id(array_size));
+ EXPECT_EQ(type_id, _mapper.get_type_id(array_size - 1));
+ if (i + 1 == num_large_array_sizes) {
+ EXPECT_EQ(0u, _mapper.get_type_id(array_size + 1));
+ } else {
+ EXPECT_EQ(type_id + 1, _mapper.get_type_id(array_size + 1));
+ }
+ }
+ return result;
+}
+
+template <typename ElemT>
+void
+TestBase<ElemT>::select_type_ids(std::vector<size_t> array_sizes)
+{
+ uint32_t type_id = 0;
+ std::optional<size_t> prev_array_size;
+ for (auto array_size : array_sizes) {
+ ++type_id;
+ EXPECT_EQ(type_id, _mapper.get_type_id(array_size));
+ if (!prev_array_size.has_value() || prev_array_size.value() < array_size - 1) {
+ EXPECT_EQ(type_id, _mapper.get_type_id(array_size - 1));
+ } else {
+ EXPECT_EQ(type_id - 1, _mapper.get_type_id(array_size - 1));
+ }
+ prev_array_size = array_size;
+ if (array_size == array_sizes.back()) {
+ // Fallback to indirect storage, using type id 0
+ EXPECT_EQ(0u, _mapper.get_type_id(array_size + 1));
+ } else {
+ EXPECT_EQ(type_id + 1, _mapper.get_type_id(array_size + 1));
+ }
+ }
+}
+
+template <typename ElemT>
+uint32_t
+TestBase<ElemT>::calc_max_buffer_type_id(double grow_factor)
+{
+ ArrayStoreDynamicTypeMapper<ElemT> mapper(1000, grow_factor);
+ return mapper.get_max_small_array_type_id(1000);
+}
+
+using ArrayStoreDynamicTypeMapperCharTest = TestBase<char>;
+
+TEST_F(ArrayStoreDynamicTypeMapperCharTest, array_sizes_are_calculated)
+{
+ EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5}), get_array_sizes(5));
+ EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5}), get_entry_sizes(5));
+ setup_mapper(10, 1.4);
+ EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5, 8, 12, 16, 24, 36}), get_array_sizes(10));
+ EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5, 12, 16, 20, 28, 40}), get_entry_sizes(10));
+}
+
+TEST_F(ArrayStoreDynamicTypeMapperCharTest, type_ids_are_selected)
+{
+ select_type_ids({1, 2, 3, 4, 5});
+ setup_mapper(10, 1.4);
+ select_type_ids({1, 2, 3, 4, 5, 8, 12, 16, 24, 36});
+}
+
+TEST_F(ArrayStoreDynamicTypeMapperCharTest, large_arrays_grows_exponentially)
+{
+ EXPECT_EQ((std::vector<size_t>{232, 5024, 97100, 1866776}), get_large_array_sizes(4));
+}
+
+TEST_F(ArrayStoreDynamicTypeMapperCharTest, avoid_entry_size_overflow)
+{
+ EXPECT_EQ(32, calc_max_buffer_type_id(2.0));
+ EXPECT_EQ(410, calc_max_buffer_type_id(1.05));
+ EXPECT_EQ(507, calc_max_buffer_type_id(1.04));
+ EXPECT_EQ(661, calc_max_buffer_type_id(1.03));
+ EXPECT_EQ(968, calc_max_buffer_type_id(1.02));
+}
+
+using ArrayStoreDynamicTypeMapperInt32Test = TestBase<int32_t>;
+
+TEST_F(ArrayStoreDynamicTypeMapperInt32Test, array_sizes_are_calculated)
+{
+ EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5}), get_array_sizes(5));
+ EXPECT_EQ((std::vector<size_t>{4, 8, 12, 16, 20}), get_entry_sizes(5));
+ setup_mapper(10, 1.4);
+ EXPECT_EQ((std::vector<size_t>{1, 2, 3, 4, 5, 7, 9, 12, 16, 22}), get_array_sizes(10));
+ EXPECT_EQ((std::vector<size_t>{4, 8, 12, 16, 20, 32, 40, 52, 68, 92}), get_entry_sizes(10));
+}
+
+TEST_F(ArrayStoreDynamicTypeMapperInt32Test, avoid_entry_size_overflow)
+{
+ EXPECT_EQ(30, calc_max_buffer_type_id(2.0));
+ EXPECT_EQ(395, calc_max_buffer_type_id(1.05));
+ EXPECT_EQ(487, calc_max_buffer_type_id(1.04));
+ EXPECT_EQ(636, calc_max_buffer_type_id(1.03));
+ EXPECT_EQ(930, calc_max_buffer_type_id(1.02));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt
index 8b76c576cf0..38e5cb40abe 100644
--- a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt
@@ -3,6 +3,7 @@ vespa_add_library(vespalib_vespalib_datastore OBJECT
SOURCES
array_store.cpp
array_store_config.cpp
+ array_store_dynamic_type_mapper.cpp
array_store_type_mapper.cpp
atomic_entry_ref.cpp
buffer_free_list.cpp
diff --git a/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.cpp b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.cpp
new file mode 100644
index 00000000000..106a1f037d7
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.cpp
@@ -0,0 +1,16 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "array_store_dynamic_type_mapper.hpp"
+
+namespace vespalib::datastore {
+
+template class ArrayStoreDynamicTypeMapper<char>;
+template class ArrayStoreDynamicTypeMapper<int8_t>;
+template class ArrayStoreDynamicTypeMapper<int16_t>;
+template class ArrayStoreDynamicTypeMapper<int32_t>;
+template class ArrayStoreDynamicTypeMapper<int64_t>;
+template class ArrayStoreDynamicTypeMapper<float>;
+template class ArrayStoreDynamicTypeMapper<double>;
+template class ArrayStoreDynamicTypeMapper<AtomicEntryRef>;
+
+}
diff --git a/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.h b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.h
new file mode 100644
index 00000000000..e02f57eaea3
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.h
@@ -0,0 +1,53 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "array_store_type_mapper.h"
+#include "atomic_entry_ref.h"
+
+namespace vespalib::datastore {
+
+template <typename EntryT> class SmallArrayBufferType;
+template <typename EntryT> class DynamicArrayBufferType;
+template <typename EntryT> class LargeArrayBufferType;
+
+/*
+ * This class provides mapping between type ids and array sizes needed for
+ * storing values.
+ *
+ * Type ids [1;max_static_array_buffer_type_id] use SmallBufferType,
+ * containing small arrays where buffer type specifies array size.
+ *
+ * Type ids [max_static_array_buffer_type_id+1;max_buffer_type_id] use
+ * DynamicBufferType, containing medium sized arrays where the same
+ * buffer type handles a range of array sizes and actual array size is
+ * also stored in the entry.
+ *
+ * Type id 0 uses LargeBufferType, which handles any array size but uses
+ * heap allocation.
+ */
+template <typename ElemT>
+class ArrayStoreDynamicTypeMapper : public vespalib::datastore::ArrayStoreTypeMapper
+{
+ uint32_t _max_static_array_buffer_type_id;
+public:
+ using SmallBufferType = vespalib::datastore::SmallArrayBufferType<ElemT>;
+ using DynamicBufferType = vespalib::datastore::DynamicArrayBufferType<ElemT>;
+ using LargeBufferType = vespalib::datastore::LargeArrayBufferType<ElemT>;
+
+ ArrayStoreDynamicTypeMapper(uint32_t max_buffer_type_id, double grow_factor);
+ ~ArrayStoreDynamicTypeMapper();
+ void setup_array_sizes(uint32_t max_buffer_type_id, double grow_factor);
+ size_t get_entry_size(uint32_t type_id) const;
+};
+
+extern template class ArrayStoreDynamicTypeMapper<char>;
+extern template class ArrayStoreDynamicTypeMapper<int8_t>;
+extern template class ArrayStoreDynamicTypeMapper<int16_t>;
+extern template class ArrayStoreDynamicTypeMapper<int32_t>;
+extern template class ArrayStoreDynamicTypeMapper<int64_t>;
+extern template class ArrayStoreDynamicTypeMapper<float>;
+extern template class ArrayStoreDynamicTypeMapper<double>;
+extern template class ArrayStoreDynamicTypeMapper<AtomicEntryRef>;
+
+}
diff --git a/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.hpp b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.hpp
new file mode 100644
index 00000000000..f529ecccb46
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/datastore/array_store_dynamic_type_mapper.hpp
@@ -0,0 +1,70 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "array_store_dynamic_type_mapper.h"
+#include "dynamic_array_buffer_type.h"
+#include "aligner.h"
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <optional>
+
+namespace vespalib::datastore {
+
+template <typename ElemT>
+ArrayStoreDynamicTypeMapper<ElemT>::ArrayStoreDynamicTypeMapper(uint32_t max_buffer_type_id, double grow_factor)
+ : ArrayStoreTypeMapper(),
+ _max_static_array_buffer_type_id(0)
+{
+ setup_array_sizes(max_buffer_type_id, grow_factor);
+}
+
+template <typename ElemT>
+void
+ArrayStoreDynamicTypeMapper<ElemT>::setup_array_sizes(uint32_t max_buffer_type_id, double grow_factor)
+{
+ _array_sizes.clear();
+ _array_sizes.reserve(max_buffer_type_id + 1);
+ _array_sizes.emplace_back(0); // type id 0 is fallback for large arrays
+ size_t array_size = 1u;
+ size_t entry_size = sizeof(ElemT);
+ bool dynamic_arrays = false;
+ for (uint32_t type_id = 1; type_id <= max_buffer_type_id; ++type_id) {
+ if (type_id > 1) {
+ array_size = std::max(array_size + 1, static_cast<size_t>(std::floor(array_size * grow_factor)));
+ if (array_size > _array_sizes.back() + 1 || dynamic_arrays) {
+ if (!dynamic_arrays) {
+ _max_static_array_buffer_type_id = type_id - 1;
+ dynamic_arrays = true;
+ }
+ entry_size = DynamicBufferType::calc_entry_size(array_size);
+ array_size = DynamicBufferType::calc_array_size(entry_size);
+ } else {
+ entry_size = array_size * sizeof(ElemT);
+ }
+ }
+ if (entry_size > std::numeric_limits<uint32_t>::max()) {
+ break;
+ }
+ _array_sizes.emplace_back(array_size);
+ }
+ if (!dynamic_arrays) {
+ _max_static_array_buffer_type_id = _array_sizes.size() - 1;
+ }
+}
+
+template <typename ElemT>
+ArrayStoreDynamicTypeMapper<ElemT>::~ArrayStoreDynamicTypeMapper() = default;
+
+template <typename ElemT>
+size_t
+ArrayStoreDynamicTypeMapper<ElemT>::get_entry_size(uint32_t type_id) const
+{
+ auto array_size = get_array_size(type_id);
+ if (type_id <= _max_static_array_buffer_type_id) {
+ return array_size * sizeof(ElemT);
+ } else {
+ return DynamicBufferType::calc_entry_size(array_size);
+ }
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.h b/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.h
index 7e2c70b2374..e314accd664 100644
--- a/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.h
+++ b/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.h
@@ -38,7 +38,8 @@ public:
void fallback_copy(void* new_buffer, const void* old_buffer, EntryCount num_entries) override;
void initialize_reserved_entries(void* buffer, EntryCount reserved_entries) override;
void clean_hold(void* buffer, size_t offset, EntryCount num_entries, CleanContext cleanCxt) override;
- static uint32_t calc_entry_size(uint32_t array_size) noexcept;
+ static size_t calc_entry_size(size_t array_size) noexcept;
+ static size_t calc_array_size(size_t entry_size) noexcept;
static ElemType* get_entry(void* buffer, size_t offset, uint32_t entry_size) noexcept { return reinterpret_cast<ElemType*>(static_cast<char*>(buffer) + offset * entry_size); }
static const ElemType* get_entry(const void* buffer, size_t offset, uint32_t entry_size) noexcept { return reinterpret_cast<const ElemType*>(static_cast<const char*>(buffer) + offset * entry_size); }
static uint32_t get_dynamic_array_size(const void *buffer, uint32_t entry_size) noexcept { return *reinterpret_cast<const uint32_t*>(static_cast<const char*>(buffer) + entry_size - sizeof(uint32_t)); }
diff --git a/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.hpp b/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.hpp
index 9085e32db6b..514267b0a85 100644
--- a/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.hpp
+++ b/vespalib/src/vespa/vespalib/datastore/dynamic_array_buffer_type.hpp
@@ -19,14 +19,21 @@ template <typename ElemT>
DynamicArrayBufferType<ElemT>::~DynamicArrayBufferType() = default;
template <typename ElemT>
-uint32_t
-DynamicArrayBufferType<ElemT>::calc_entry_size(uint32_t array_size) noexcept
+size_t
+DynamicArrayBufferType<ElemT>::calc_entry_size(size_t array_size) noexcept
{
Aligner aligner(std::max(alignof(uint32_t), alignof(ElemType)));
return aligner.align(sizeof(ElemType) * array_size + sizeof(uint32_t));
}
template <typename ElemT>
+size_t
+DynamicArrayBufferType<ElemT>::calc_array_size(size_t entry_size) noexcept
+{
+ return (entry_size - sizeof(uint32_t)) / sizeof(ElemType);
+}
+
+template <typename ElemT>
void
DynamicArrayBufferType<ElemT>::destroy_entries(void* buffer, EntryCount num_entries)
{