diff options
author | Tor Egge <Tor.Egge@yahooinc.com> | 2023-06-19 11:57:15 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-19 11:57:15 +0200 |
commit | 840dd538108bf3f12475cebf8d7a6ae3a5d9183d (patch) | |
tree | 45f140f376efa7a44dc1e6051c9f29133e1289e0 | |
parent | 79d21be845c44c08369a81cefb32578006c692d7 (diff) | |
parent | 42b100597c5a7299a8e0cc5c93bc4402f79f6242 (diff) |
Merge pull request #27470 from vespa-engine/toregge/simplify-raw-buffer-store
Simplify RawBufferStore and remove RawBufferTypeMapper.
8 files changed, 11 insertions, 240 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 8959a2dd2e0..07045684d6e 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -92,7 +92,6 @@ vespa_define_module( src/tests/attribute/postinglist src/tests/attribute/postinglistattribute src/tests/attribute/raw_attribute - src/tests/attribute/raw_buffer_type_mapper src/tests/attribute/reference_attribute src/tests/attribute/save_target src/tests/attribute/searchable diff --git a/searchlib/src/tests/attribute/raw_buffer_type_mapper/CMakeLists.txt b/searchlib/src/tests/attribute/raw_buffer_type_mapper/CMakeLists.txt deleted file mode 100644 index c860770536d..00000000000 --- a/searchlib/src/tests/attribute/raw_buffer_type_mapper/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_raw_buffer_type_mapper_test_app TEST - SOURCES - raw_buffer_type_mapper_test.cpp - DEPENDS - searchlib - GTest::GTest -) -vespa_add_test(NAME searchlib_raw_buffer_type_mapper_test_app COMMAND searchlib_raw_buffer_type_mapper_test_app) diff --git a/searchlib/src/tests/attribute/raw_buffer_type_mapper/raw_buffer_type_mapper_test.cpp b/searchlib/src/tests/attribute/raw_buffer_type_mapper/raw_buffer_type_mapper_test.cpp deleted file mode 100644 index 74ec839670e..00000000000 --- a/searchlib/src/tests/attribute/raw_buffer_type_mapper/raw_buffer_type_mapper_test.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/searchlib/attribute/raw_buffer_type_mapper.h> -#include <vespa/vespalib/gtest/gtest.h> - -using search::attribute::RawBufferTypeMapper; - -constexpr double default_grow_factor = 1.03; - -class RawBufferTypeMapperTest : public testing::Test -{ -protected: - RawBufferTypeMapper _mapper; - RawBufferTypeMapperTest(); - ~RawBufferTypeMapperTest() override; - std::vector<size_t> get_array_sizes(uint32_t num_array_sizes); - std::vector<size_t> get_large_array_sizes(uint32_t num_large_arrays); - void select_type_ids(std::vector<size_t> array_sizes); - void setup_mapper(uint32_t max_small_buffer_type_id, double grow_factor); - static uint32_t calc_max_small_array_type_id(double grow_factor); -}; - -RawBufferTypeMapperTest::RawBufferTypeMapperTest() - : testing::Test(), - _mapper(5, default_grow_factor) -{ -} - -RawBufferTypeMapperTest::~RawBufferTypeMapperTest() = default; - -void -RawBufferTypeMapperTest::setup_mapper(uint32_t max_small_buffer_type_id, double grow_factor) -{ - _mapper = RawBufferTypeMapper(max_small_buffer_type_id, grow_factor); -} - -std::vector<size_t> -RawBufferTypeMapperTest::get_array_sizes(uint32_t num_array_sizes) -{ - std::vector<size_t> array_sizes; - for (uint32_t type_id = 1; type_id <= num_array_sizes; ++type_id) { - array_sizes.emplace_back(_mapper.get_array_size(type_id)); - } - return array_sizes; -} - -std::vector<size_t> -RawBufferTypeMapperTest::get_large_array_sizes(uint32_t num_large_array_sizes) -{ - setup_mapper(num_large_array_sizes * 100, default_grow_factor); - std::vector<size_t> result; - for (uint32_t i = 0; i < num_large_array_sizes; ++i) { - uint32_t type_id = (i + 1) * 100; - auto array_size = _mapper.get_array_size(type_id); - result.emplace_back(array_size); - EXPECT_EQ(type_id, _mapper.get_type_id(array_size)); - EXPECT_EQ(type_id, _mapper.get_type_id(array_size - 1)); - if (i + 1 == num_large_array_sizes) { - EXPECT_EQ(0u, _mapper.get_type_id(array_size + 1)); - } else { - EXPECT_EQ(type_id + 1, _mapper.get_type_id(array_size + 1)); - } - } - return result; -} - -void -RawBufferTypeMapperTest::select_type_ids(std::vector<size_t> array_sizes) -{ - uint32_t type_id = 0; - for (auto array_size : array_sizes) { - ++type_id; - EXPECT_EQ(type_id, _mapper.get_type_id(array_size)); - EXPECT_EQ(type_id, _mapper.get_type_id(array_size - 1)); - if (array_size == array_sizes.back()) { - // Fallback to indirect storage, using type id 0 - EXPECT_EQ(0u, _mapper.get_type_id(array_size + 1)); - } else { - EXPECT_EQ(type_id + 1, _mapper.get_type_id(array_size + 1)); - } - } -} - -uint32_t -RawBufferTypeMapperTest::calc_max_small_array_type_id(double grow_factor) -{ - RawBufferTypeMapper mapper(1000, grow_factor); - return mapper.get_max_small_array_type_id(1000); -} - -TEST_F(RawBufferTypeMapperTest, array_sizes_are_calculated) -{ - EXPECT_EQ((std::vector<size_t>{8, 12, 16, 20, 24}), get_array_sizes(5)); -} - -TEST_F(RawBufferTypeMapperTest, type_ids_are_selected) -{ - select_type_ids({8, 12, 16, 20, 24}); -} - -TEST_F(RawBufferTypeMapperTest, large_arrays_grows_exponentially) -{ - EXPECT_EQ((std::vector<size_t>{1148, 22796, 438572, 8429384}), get_large_array_sizes(4)); -} - -TEST_F(RawBufferTypeMapperTest, avoid_array_size_overflow) -{ - EXPECT_EQ(29, calc_max_small_array_type_id(2.0)); - EXPECT_EQ(379, calc_max_small_array_type_id(1.05)); - EXPECT_EQ(468, calc_max_small_array_type_id(1.04)); - EXPECT_EQ(610, calc_max_small_array_type_id(1.03)); - EXPECT_EQ(892, calc_max_small_array_type_id(1.02)); -} - -GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 6c1f4871161..896226f005d 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -111,7 +111,6 @@ vespa_add_library(searchlib_attribute OBJECT raw_buffer_store.cpp raw_buffer_store_reader.cpp raw_buffer_store_writer.cpp - raw_buffer_type_mapper.cpp raw_multi_value_read_view.cpp readerbase.cpp reference_attribute.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.cpp b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.cpp index 74894728ff4..cd9e0508344 100644 --- a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.cpp +++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.cpp @@ -17,53 +17,20 @@ namespace search::attribute { RawBufferStore::RawBufferStore(std::shared_ptr<vespalib::alloc::MemoryAllocator> allocator, uint32_t max_small_buffer_type_id, double grow_factor) : _array_store(ArrayStoreType::optimizedConfigForHugePage(max_small_buffer_type_id, - RawBufferTypeMapper(max_small_buffer_type_id, grow_factor), + TypeMapper(max_small_buffer_type_id, grow_factor), MemoryAllocator::HUGEPAGE_SIZE, MemoryAllocator::PAGE_SIZE, 8_Ki, ALLOC_GROW_FACTOR), - std::move(allocator), RawBufferTypeMapper(max_small_buffer_type_id, grow_factor)) + std::move(allocator), TypeMapper(max_small_buffer_type_id, grow_factor)) { } RawBufferStore::~RawBufferStore() = default; -vespalib::ConstArrayRef<char> -RawBufferStore::get(EntryRef ref) const -{ - auto array = _array_store.get(ref); - uint32_t size = 0; - assert(array.size() >= sizeof(size)); - memcpy(&size, array.data(), sizeof(size)); - assert(array.size() >= sizeof(size) + size); - return {array.data() + sizeof(size), size}; } -EntryRef -RawBufferStore::set(vespalib::ConstArrayRef<char> raw) -{ - uint32_t size = raw.size(); - if (size == 0) { - return EntryRef(); - } - size_t buffer_size = raw.size() + sizeof(size); - auto& mapper = _array_store.get_mapper(); - auto type_id = mapper.get_type_id(buffer_size); - auto array_size = (type_id != 0) ? mapper.get_array_size(type_id) : buffer_size; - assert(array_size >= buffer_size); - auto ref = _array_store.allocate(array_size); - auto buf = _array_store.get_writable(ref); - memcpy(buf.data(), &size, sizeof(size)); - memcpy(buf.data() + sizeof(size), raw.data(), size); - if (array_size > buffer_size) { - memset(buf.data() + buffer_size, 0, array_size - buffer_size); - } - return ref; -} +namespace vespalib::datastore { -std::unique_ptr<vespalib::datastore::ICompactionContext> -RawBufferStore::start_compact(const vespalib::datastore::CompactionStrategy& compaction_strategy) -{ - return _array_store.compact_worst(compaction_strategy); -} +template class ArrayStore<char, EntryRefT<19>, ArrayStoreDynamicTypeMapper<char>>; } diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h index bc3c189b329..a3f5b564846 100644 --- a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h +++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h @@ -3,7 +3,8 @@ #pragma once #include <vespa/vespalib/datastore/array_store.h> -#include "raw_buffer_type_mapper.h" +#include <vespa/vespalib/datastore/array_store_dynamic_type_mapper.h> +#include <vespa/vespalib/datastore/dynamic_array_buffer_type.h> namespace search::attribute { @@ -15,20 +16,21 @@ class RawBufferStore { using EntryRef = vespalib::datastore::EntryRef; using RefType = vespalib::datastore::EntryRefT<19>; - using ArrayStoreType = vespalib::datastore::ArrayStore<char, RefType, RawBufferTypeMapper>; + using TypeMapper = vespalib::datastore::ArrayStoreDynamicTypeMapper<char>; + using ArrayStoreType = vespalib::datastore::ArrayStore<char, RefType, TypeMapper>; using generation_t = vespalib::GenerationHandler::generation_t; ArrayStoreType _array_store; public: RawBufferStore(std::shared_ptr<vespalib::alloc::MemoryAllocator> allocator, uint32_t max_small_buffer_type_id, double grow_factor); ~RawBufferStore(); - EntryRef set(vespalib::ConstArrayRef<char> raw); - vespalib::ConstArrayRef<char> get(EntryRef ref) const; + EntryRef set(vespalib::ConstArrayRef<char> raw) { return _array_store.add(raw); }; + vespalib::ConstArrayRef<char> get(EntryRef ref) const { return _array_store.get(ref); } void remove(EntryRef ref) { _array_store.remove(ref); } vespalib::MemoryUsage update_stat(const vespalib::datastore::CompactionStrategy& compaction_strategy) { return _array_store.update_stat(compaction_strategy); } vespalib::AddressSpace get_address_space_usage() const { return _array_store.addressSpaceUsage(); } bool consider_compact() const noexcept { return _array_store.consider_compact(); } - std::unique_ptr<vespalib::datastore::ICompactionContext> start_compact(const vespalib::datastore::CompactionStrategy& compaction_strategy); + std::unique_ptr<vespalib::datastore::ICompactionContext> start_compact(const vespalib::datastore::CompactionStrategy& compaction_strategy) { return _array_store.compact_worst(compaction_strategy); } void reclaim_memory(generation_t oldest_used_gen) { _array_store.reclaim_memory(oldest_used_gen); } void assign_generation(generation_t current_gen) { _array_store.assign_generation(current_gen); } void set_initializing(bool initializing) { _array_store.setInitializing(initializing); } diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.cpp b/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.cpp deleted file mode 100644 index 29245fb403a..00000000000 --- a/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "raw_buffer_type_mapper.h" -#include <vespa/vespalib/datastore/aligner.h> -#include <algorithm> -#include <cmath> -#include <limits> - -using vespalib::datastore::Aligner; -using vespalib::datastore::ArrayStoreTypeMapper; - -namespace search::attribute { - -RawBufferTypeMapper::RawBufferTypeMapper() - : ArrayStoreTypeMapper() -{ -} - -RawBufferTypeMapper::RawBufferTypeMapper(uint32_t max_small_buffer_type_id, double grow_factor) - : ArrayStoreTypeMapper() -{ - Aligner<4> aligner; - _array_sizes.reserve(max_small_buffer_type_id + 1); - _array_sizes.emplace_back(0); // type id 0 uses LargeArrayBufferType<char> - size_t array_size = 8u; - for (uint32_t type_id = 1; type_id <= max_small_buffer_type_id; ++type_id) { - if (type_id > 1) { - array_size = std::max(array_size + 4, static_cast<size_t>(std::floor(array_size * grow_factor))); - array_size = aligner.align(array_size); - } - if (array_size > std::numeric_limits<uint32_t>::max()) { - break; - } - _array_sizes.emplace_back(array_size); - } -} - -RawBufferTypeMapper::~RawBufferTypeMapper() = default; - -} diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.h b/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.h deleted file mode 100644 index a2538fe51e5..00000000000 --- a/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/vespalib/datastore/array_store_type_mapper.h> - -namespace vespalib::datastore { - -template <typename EntryT> class SmallArrayBufferType; -template <typename EntryT> class LargeArrayBufferType; - -} - -namespace search::attribute { - -/* - * This class provides mapping between type ids and array sizes needed for - * storing a raw value. - */ -class RawBufferTypeMapper : public vespalib::datastore::ArrayStoreTypeMapper -{ -public: - using SmallBufferType = vespalib::datastore::SmallArrayBufferType<char>; - using LargeBufferType = vespalib::datastore::LargeArrayBufferType<char>; - - RawBufferTypeMapper(); - RawBufferTypeMapper(uint32_t max_small_buffer_type_id, double grow_factor); - ~RawBufferTypeMapper(); - size_t get_entry_size(uint32_t type_id) const { return get_array_size(type_id); } -}; - -} |