diff options
author | Tor Egge <Tor.Egge@yahooinc.com> | 2023-03-07 16:24:53 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-07 16:24:53 +0100 |
commit | 0fbf49026d872bb604ed73b9c7037687f598a05f (patch) | |
tree | 96206a40875f8f3abd7a106a866732d47e13de1b | |
parent | 9b627ff454f85763f7a4c9c95b6ecec7a2ec6484 (diff) | |
parent | 981fdc08daf60b97575d400cf2e589ff5a004a34 (diff) |
Merge pull request #26342 from vespa-engine/toregge/add-raw-buffer-type-mapper
Add RawBufferTypeMapper.
8 files changed, 209 insertions, 3 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 07045684d6e..8959a2dd2e0 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -92,6 +92,7 @@ vespa_define_module( src/tests/attribute/postinglist src/tests/attribute/postinglistattribute src/tests/attribute/raw_attribute + src/tests/attribute/raw_buffer_type_mapper src/tests/attribute/reference_attribute src/tests/attribute/save_target src/tests/attribute/searchable diff --git a/searchlib/src/tests/attribute/raw_buffer_type_mapper/CMakeLists.txt b/searchlib/src/tests/attribute/raw_buffer_type_mapper/CMakeLists.txt new file mode 100644 index 00000000000..c860770536d --- /dev/null +++ b/searchlib/src/tests/attribute/raw_buffer_type_mapper/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_raw_buffer_type_mapper_test_app TEST + SOURCES + raw_buffer_type_mapper_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_raw_buffer_type_mapper_test_app COMMAND searchlib_raw_buffer_type_mapper_test_app) diff --git a/searchlib/src/tests/attribute/raw_buffer_type_mapper/raw_buffer_type_mapper_test.cpp b/searchlib/src/tests/attribute/raw_buffer_type_mapper/raw_buffer_type_mapper_test.cpp new file mode 100644 index 00000000000..74ec839670e --- /dev/null +++ b/searchlib/src/tests/attribute/raw_buffer_type_mapper/raw_buffer_type_mapper_test.cpp @@ -0,0 +1,115 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/attribute/raw_buffer_type_mapper.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::attribute::RawBufferTypeMapper; + +constexpr double default_grow_factor = 1.03; + +class RawBufferTypeMapperTest : public testing::Test +{ +protected: + RawBufferTypeMapper _mapper; + RawBufferTypeMapperTest(); + ~RawBufferTypeMapperTest() override; + std::vector<size_t> get_array_sizes(uint32_t num_array_sizes); + std::vector<size_t> get_large_array_sizes(uint32_t num_large_arrays); + void select_type_ids(std::vector<size_t> array_sizes); + void setup_mapper(uint32_t max_small_buffer_type_id, double grow_factor); + static uint32_t calc_max_small_array_type_id(double grow_factor); +}; + +RawBufferTypeMapperTest::RawBufferTypeMapperTest() + : testing::Test(), + _mapper(5, default_grow_factor) +{ +} + +RawBufferTypeMapperTest::~RawBufferTypeMapperTest() = default; + +void +RawBufferTypeMapperTest::setup_mapper(uint32_t max_small_buffer_type_id, double grow_factor) +{ + _mapper = RawBufferTypeMapper(max_small_buffer_type_id, grow_factor); +} + +std::vector<size_t> +RawBufferTypeMapperTest::get_array_sizes(uint32_t num_array_sizes) +{ + std::vector<size_t> array_sizes; + for (uint32_t type_id = 1; type_id <= num_array_sizes; ++type_id) { + array_sizes.emplace_back(_mapper.get_array_size(type_id)); + } + return array_sizes; +} + +std::vector<size_t> +RawBufferTypeMapperTest::get_large_array_sizes(uint32_t num_large_array_sizes) +{ + setup_mapper(num_large_array_sizes * 100, default_grow_factor); + std::vector<size_t> result; + for (uint32_t i = 0; i < num_large_array_sizes; ++i) { + uint32_t type_id = (i + 1) * 100; + auto array_size = _mapper.get_array_size(type_id); + result.emplace_back(array_size); + EXPECT_EQ(type_id, _mapper.get_type_id(array_size)); + EXPECT_EQ(type_id, _mapper.get_type_id(array_size - 1)); + if (i + 1 == num_large_array_sizes) { + EXPECT_EQ(0u, _mapper.get_type_id(array_size + 1)); + } else { + EXPECT_EQ(type_id + 1, _mapper.get_type_id(array_size + 1)); + } + } + return result; +} + +void +RawBufferTypeMapperTest::select_type_ids(std::vector<size_t> array_sizes) +{ + uint32_t type_id = 0; + for (auto array_size : array_sizes) { + ++type_id; + EXPECT_EQ(type_id, _mapper.get_type_id(array_size)); + EXPECT_EQ(type_id, _mapper.get_type_id(array_size - 1)); + if (array_size == array_sizes.back()) { + // Fallback to indirect storage, using type id 0 + EXPECT_EQ(0u, _mapper.get_type_id(array_size + 1)); + } else { + EXPECT_EQ(type_id + 1, _mapper.get_type_id(array_size + 1)); + } + } +} + +uint32_t +RawBufferTypeMapperTest::calc_max_small_array_type_id(double grow_factor) +{ + RawBufferTypeMapper mapper(1000, grow_factor); + return mapper.get_max_small_array_type_id(1000); +} + +TEST_F(RawBufferTypeMapperTest, array_sizes_are_calculated) +{ + EXPECT_EQ((std::vector<size_t>{8, 12, 16, 20, 24}), get_array_sizes(5)); +} + +TEST_F(RawBufferTypeMapperTest, type_ids_are_selected) +{ + select_type_ids({8, 12, 16, 20, 24}); +} + +TEST_F(RawBufferTypeMapperTest, large_arrays_grows_exponentially) +{ + EXPECT_EQ((std::vector<size_t>{1148, 22796, 438572, 8429384}), get_large_array_sizes(4)); +} + +TEST_F(RawBufferTypeMapperTest, avoid_array_size_overflow) +{ + EXPECT_EQ(29, calc_max_small_array_type_id(2.0)); + EXPECT_EQ(379, calc_max_small_array_type_id(1.05)); + EXPECT_EQ(468, calc_max_small_array_type_id(1.04)); + EXPECT_EQ(610, calc_max_small_array_type_id(1.03)); + EXPECT_EQ(892, calc_max_small_array_type_id(1.02)); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 594afc31795..e3028773b75 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -104,6 +104,7 @@ vespa_add_library(searchlib_attribute OBJECT postinglisttraits.cpp postingstore.cpp predicate_attribute.cpp + raw_buffer_type_mapper.cpp raw_multi_value_read_view.cpp readerbase.cpp reference_attribute.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.cpp b/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.cpp new file mode 100644 index 00000000000..29245fb403a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.cpp @@ -0,0 +1,40 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "raw_buffer_type_mapper.h" +#include <vespa/vespalib/datastore/aligner.h> +#include <algorithm> +#include <cmath> +#include <limits> + +using vespalib::datastore::Aligner; +using vespalib::datastore::ArrayStoreTypeMapper; + +namespace search::attribute { + +RawBufferTypeMapper::RawBufferTypeMapper() + : ArrayStoreTypeMapper() +{ +} + +RawBufferTypeMapper::RawBufferTypeMapper(uint32_t max_small_buffer_type_id, double grow_factor) + : ArrayStoreTypeMapper() +{ + Aligner<4> aligner; + _array_sizes.reserve(max_small_buffer_type_id + 1); + _array_sizes.emplace_back(0); // type id 0 uses LargeArrayBufferType<char> + size_t array_size = 8u; + for (uint32_t type_id = 1; type_id <= max_small_buffer_type_id; ++type_id) { + if (type_id > 1) { + array_size = std::max(array_size + 4, static_cast<size_t>(std::floor(array_size * grow_factor))); + array_size = aligner.align(array_size); + } + if (array_size > std::numeric_limits<uint32_t>::max()) { + break; + } + _array_sizes.emplace_back(array_size); + } +} + +RawBufferTypeMapper::~RawBufferTypeMapper() = default; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.h b/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.h new file mode 100644 index 00000000000..88c213c8979 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.h @@ -0,0 +1,31 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/datastore/array_store_type_mapper.h> + +namespace vespalib::datastore { + +template <typename EntryT> class SmallArrayBufferType; +template <typename EntryT> class LargeArrayBufferType; + +} + +namespace search::attribute { + +/* + * This class provides mapping between type ids and array sizes needed for + * storing a raw value. + */ +class RawBufferTypeMapper : public vespalib::datastore::ArrayStoreTypeMapper +{ +public: + using SmallBufferType = vespalib::datastore::SmallArrayBufferType<char>; + using LargeBufferType = vespalib::datastore::LargeArrayBufferType<char>; + + RawBufferTypeMapper(); + RawBufferTypeMapper(uint32_t max_small_buffer_type_id, double grow_factor); + ~RawBufferTypeMapper(); +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp index 04d99d3a59a..a37400ed88e 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp @@ -11,6 +11,10 @@ namespace { constexpr float ALLOC_GROW_FACTOR = 0.2; +constexpr double mapper_grow_factor = 1.03; + +constexpr uint32_t max_small_buffer_type_id = 500u; + } namespace search::attribute { @@ -18,11 +22,12 @@ namespace search::attribute { SingleRawAttribute::SingleRawAttribute(const vespalib::string& name, const Config& config) : NotImplementedAttribute(name, config), _ref_vector(config.getGrowStrategy(), getGenerationHolder()), - _array_store(ArrayStoreType::optimizedConfigForHugePage(1000u, + _array_store(ArrayStoreType::optimizedConfigForHugePage(max_small_buffer_type_id, + RawBufferTypeMapper(max_small_buffer_type_id, mapper_grow_factor), MemoryAllocator::HUGEPAGE_SIZE, MemoryAllocator::PAGE_SIZE, 8_Ki, ALLOC_GROW_FACTOR), - get_memory_allocator()) + get_memory_allocator(), RawBufferTypeMapper(max_small_buffer_type_id, mapper_grow_factor)) { } @@ -124,6 +129,9 @@ EntryRef SingleRawAttribute::set_raw(vespalib::ConstArrayRef<char> raw) { uint32_t size = raw.size(); + if (size == 0) { + return EntryRef(); + } size_t buffer_size = raw.size() + sizeof(size); auto& mapper = _array_store.get_mapper(); auto type_id = mapper.get_type_id(buffer_size); diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h index 52b81a782b9..876acc9ad58 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h @@ -3,6 +3,7 @@ #pragma once #include "not_implemented_attribute.h" +#include "raw_buffer_type_mapper.h" #include <vespa/vespalib/datastore/array_store.h> #include <vespa/vespalib/util/rcuvector.h> @@ -17,7 +18,7 @@ class SingleRawAttribute : public NotImplementedAttribute using EntryRef = vespalib::datastore::EntryRef; using RefVector = vespalib::RcuVectorBase<AtomicEntryRef>; using RefType = vespalib::datastore::EntryRefT<19>; - using ArrayStoreType = vespalib::datastore::ArrayStore<char, RefType>; + using ArrayStoreType = vespalib::datastore::ArrayStore<char, RefType, RawBufferTypeMapper>; RefVector _ref_vector; ArrayStoreType _array_store; |