aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@yahooinc.com>2023-03-07 16:24:53 +0100
committerGitHub <noreply@github.com>2023-03-07 16:24:53 +0100
commit0fbf49026d872bb604ed73b9c7037687f598a05f (patch)
tree96206a40875f8f3abd7a106a866732d47e13de1b
parent9b627ff454f85763f7a4c9c95b6ecec7a2ec6484 (diff)
parent981fdc08daf60b97575d400cf2e589ff5a004a34 (diff)
Merge pull request #26342 from vespa-engine/toregge/add-raw-buffer-type-mapper
Add RawBufferTypeMapper.
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/raw_buffer_type_mapper/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/raw_buffer_type_mapper/raw_buffer_type_mapper_test.cpp115
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.cpp40
-rw-r--r--searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.h31
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h3
8 files changed, 209 insertions, 3 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 07045684d6e..8959a2dd2e0 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -92,6 +92,7 @@ vespa_define_module(
src/tests/attribute/postinglist
src/tests/attribute/postinglistattribute
src/tests/attribute/raw_attribute
+ src/tests/attribute/raw_buffer_type_mapper
src/tests/attribute/reference_attribute
src/tests/attribute/save_target
src/tests/attribute/searchable
diff --git a/searchlib/src/tests/attribute/raw_buffer_type_mapper/CMakeLists.txt b/searchlib/src/tests/attribute/raw_buffer_type_mapper/CMakeLists.txt
new file mode 100644
index 00000000000..c860770536d
--- /dev/null
+++ b/searchlib/src/tests/attribute/raw_buffer_type_mapper/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_raw_buffer_type_mapper_test_app TEST
+ SOURCES
+ raw_buffer_type_mapper_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_raw_buffer_type_mapper_test_app COMMAND searchlib_raw_buffer_type_mapper_test_app)
diff --git a/searchlib/src/tests/attribute/raw_buffer_type_mapper/raw_buffer_type_mapper_test.cpp b/searchlib/src/tests/attribute/raw_buffer_type_mapper/raw_buffer_type_mapper_test.cpp
new file mode 100644
index 00000000000..74ec839670e
--- /dev/null
+++ b/searchlib/src/tests/attribute/raw_buffer_type_mapper/raw_buffer_type_mapper_test.cpp
@@ -0,0 +1,115 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/attribute/raw_buffer_type_mapper.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using search::attribute::RawBufferTypeMapper;
+
+constexpr double default_grow_factor = 1.03;
+
+class RawBufferTypeMapperTest : public testing::Test
+{
+protected:
+ RawBufferTypeMapper _mapper;
+ RawBufferTypeMapperTest();
+ ~RawBufferTypeMapperTest() override;
+ std::vector<size_t> get_array_sizes(uint32_t num_array_sizes);
+ std::vector<size_t> get_large_array_sizes(uint32_t num_large_arrays);
+ void select_type_ids(std::vector<size_t> array_sizes);
+ void setup_mapper(uint32_t max_small_buffer_type_id, double grow_factor);
+ static uint32_t calc_max_small_array_type_id(double grow_factor);
+};
+
+RawBufferTypeMapperTest::RawBufferTypeMapperTest()
+ : testing::Test(),
+ _mapper(5, default_grow_factor)
+{
+}
+
+RawBufferTypeMapperTest::~RawBufferTypeMapperTest() = default;
+
+void
+RawBufferTypeMapperTest::setup_mapper(uint32_t max_small_buffer_type_id, double grow_factor)
+{
+ _mapper = RawBufferTypeMapper(max_small_buffer_type_id, grow_factor);
+}
+
+std::vector<size_t>
+RawBufferTypeMapperTest::get_array_sizes(uint32_t num_array_sizes)
+{
+ std::vector<size_t> array_sizes;
+ for (uint32_t type_id = 1; type_id <= num_array_sizes; ++type_id) {
+ array_sizes.emplace_back(_mapper.get_array_size(type_id));
+ }
+ return array_sizes;
+}
+
+std::vector<size_t>
+RawBufferTypeMapperTest::get_large_array_sizes(uint32_t num_large_array_sizes)
+{
+ setup_mapper(num_large_array_sizes * 100, default_grow_factor);
+ std::vector<size_t> result;
+ for (uint32_t i = 0; i < num_large_array_sizes; ++i) {
+ uint32_t type_id = (i + 1) * 100;
+ auto array_size = _mapper.get_array_size(type_id);
+ result.emplace_back(array_size);
+ EXPECT_EQ(type_id, _mapper.get_type_id(array_size));
+ EXPECT_EQ(type_id, _mapper.get_type_id(array_size - 1));
+ if (i + 1 == num_large_array_sizes) {
+ EXPECT_EQ(0u, _mapper.get_type_id(array_size + 1));
+ } else {
+ EXPECT_EQ(type_id + 1, _mapper.get_type_id(array_size + 1));
+ }
+ }
+ return result;
+}
+
+void
+RawBufferTypeMapperTest::select_type_ids(std::vector<size_t> array_sizes)
+{
+ uint32_t type_id = 0;
+ for (auto array_size : array_sizes) {
+ ++type_id;
+ EXPECT_EQ(type_id, _mapper.get_type_id(array_size));
+ EXPECT_EQ(type_id, _mapper.get_type_id(array_size - 1));
+ if (array_size == array_sizes.back()) {
+ // Fallback to indirect storage, using type id 0
+ EXPECT_EQ(0u, _mapper.get_type_id(array_size + 1));
+ } else {
+ EXPECT_EQ(type_id + 1, _mapper.get_type_id(array_size + 1));
+ }
+ }
+}
+
+uint32_t
+RawBufferTypeMapperTest::calc_max_small_array_type_id(double grow_factor)
+{
+ RawBufferTypeMapper mapper(1000, grow_factor);
+ return mapper.get_max_small_array_type_id(1000);
+}
+
+TEST_F(RawBufferTypeMapperTest, array_sizes_are_calculated)
+{
+ EXPECT_EQ((std::vector<size_t>{8, 12, 16, 20, 24}), get_array_sizes(5));
+}
+
+TEST_F(RawBufferTypeMapperTest, type_ids_are_selected)
+{
+ select_type_ids({8, 12, 16, 20, 24});
+}
+
+TEST_F(RawBufferTypeMapperTest, large_arrays_grows_exponentially)
+{
+ EXPECT_EQ((std::vector<size_t>{1148, 22796, 438572, 8429384}), get_large_array_sizes(4));
+}
+
+TEST_F(RawBufferTypeMapperTest, avoid_array_size_overflow)
+{
+ EXPECT_EQ(29, calc_max_small_array_type_id(2.0));
+ EXPECT_EQ(379, calc_max_small_array_type_id(1.05));
+ EXPECT_EQ(468, calc_max_small_array_type_id(1.04));
+ EXPECT_EQ(610, calc_max_small_array_type_id(1.03));
+ EXPECT_EQ(892, calc_max_small_array_type_id(1.02));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index 594afc31795..e3028773b75 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -104,6 +104,7 @@ vespa_add_library(searchlib_attribute OBJECT
postinglisttraits.cpp
postingstore.cpp
predicate_attribute.cpp
+ raw_buffer_type_mapper.cpp
raw_multi_value_read_view.cpp
readerbase.cpp
reference_attribute.cpp
diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.cpp b/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.cpp
new file mode 100644
index 00000000000..29245fb403a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.cpp
@@ -0,0 +1,40 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "raw_buffer_type_mapper.h"
+#include <vespa/vespalib/datastore/aligner.h>
+#include <algorithm>
+#include <cmath>
+#include <limits>
+
+using vespalib::datastore::Aligner;
+using vespalib::datastore::ArrayStoreTypeMapper;
+
+namespace search::attribute {
+
+RawBufferTypeMapper::RawBufferTypeMapper()
+ : ArrayStoreTypeMapper()
+{
+}
+
+RawBufferTypeMapper::RawBufferTypeMapper(uint32_t max_small_buffer_type_id, double grow_factor)
+ : ArrayStoreTypeMapper()
+{
+ Aligner<4> aligner;
+ _array_sizes.reserve(max_small_buffer_type_id + 1);
+ _array_sizes.emplace_back(0); // type id 0 uses LargeArrayBufferType<char>
+ size_t array_size = 8u;
+ for (uint32_t type_id = 1; type_id <= max_small_buffer_type_id; ++type_id) {
+ if (type_id > 1) {
+ array_size = std::max(array_size + 4, static_cast<size_t>(std::floor(array_size * grow_factor)));
+ array_size = aligner.align(array_size);
+ }
+ if (array_size > std::numeric_limits<uint32_t>::max()) {
+ break;
+ }
+ _array_sizes.emplace_back(array_size);
+ }
+}
+
+RawBufferTypeMapper::~RawBufferTypeMapper() = default;
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.h b/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.h
new file mode 100644
index 00000000000..88c213c8979
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_type_mapper.h
@@ -0,0 +1,31 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/datastore/array_store_type_mapper.h>
+
+namespace vespalib::datastore {
+
+template <typename EntryT> class SmallArrayBufferType;
+template <typename EntryT> class LargeArrayBufferType;
+
+}
+
+namespace search::attribute {
+
+/*
+ * This class provides mapping between type ids and array sizes needed for
+ * storing a raw value.
+ */
+class RawBufferTypeMapper : public vespalib::datastore::ArrayStoreTypeMapper
+{
+public:
+ using SmallBufferType = vespalib::datastore::SmallArrayBufferType<char>;
+ using LargeBufferType = vespalib::datastore::LargeArrayBufferType<char>;
+
+ RawBufferTypeMapper();
+ RawBufferTypeMapper(uint32_t max_small_buffer_type_id, double grow_factor);
+ ~RawBufferTypeMapper();
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp
index 04d99d3a59a..a37400ed88e 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp
@@ -11,6 +11,10 @@ namespace {
constexpr float ALLOC_GROW_FACTOR = 0.2;
+constexpr double mapper_grow_factor = 1.03;
+
+constexpr uint32_t max_small_buffer_type_id = 500u;
+
}
namespace search::attribute {
@@ -18,11 +22,12 @@ namespace search::attribute {
SingleRawAttribute::SingleRawAttribute(const vespalib::string& name, const Config& config)
: NotImplementedAttribute(name, config),
_ref_vector(config.getGrowStrategy(), getGenerationHolder()),
- _array_store(ArrayStoreType::optimizedConfigForHugePage(1000u,
+ _array_store(ArrayStoreType::optimizedConfigForHugePage(max_small_buffer_type_id,
+ RawBufferTypeMapper(max_small_buffer_type_id, mapper_grow_factor),
MemoryAllocator::HUGEPAGE_SIZE,
MemoryAllocator::PAGE_SIZE,
8_Ki, ALLOC_GROW_FACTOR),
- get_memory_allocator())
+ get_memory_allocator(), RawBufferTypeMapper(max_small_buffer_type_id, mapper_grow_factor))
{
}
@@ -124,6 +129,9 @@ EntryRef
SingleRawAttribute::set_raw(vespalib::ConstArrayRef<char> raw)
{
uint32_t size = raw.size();
+ if (size == 0) {
+ return EntryRef();
+ }
size_t buffer_size = raw.size() + sizeof(size);
auto& mapper = _array_store.get_mapper();
auto type_id = mapper.get_type_id(buffer_size);
diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h
index 52b81a782b9..876acc9ad58 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h
@@ -3,6 +3,7 @@
#pragma once
#include "not_implemented_attribute.h"
+#include "raw_buffer_type_mapper.h"
#include <vespa/vespalib/datastore/array_store.h>
#include <vespa/vespalib/util/rcuvector.h>
@@ -17,7 +18,7 @@ class SingleRawAttribute : public NotImplementedAttribute
using EntryRef = vespalib::datastore::EntryRef;
using RefVector = vespalib::RcuVectorBase<AtomicEntryRef>;
using RefType = vespalib::datastore::EntryRefT<19>;
- using ArrayStoreType = vespalib::datastore::ArrayStore<char, RefType>;
+ using ArrayStoreType = vespalib::datastore::ArrayStore<char, RefType, RawBufferTypeMapper>;
RefVector _ref_vector;
ArrayStoreType _array_store;