aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-06-27 23:51:25 +0200
committerGitHub <noreply@github.com>2023-06-27 23:51:25 +0200
commit8a950d04ee85d5078ae1b0091ca27e7144da208f (patch)
treec3e41528508eb0fa5df3d074f41c3c0891771522
parent61502c3c2db3e38a2feeb7bf41e029bb5cc22a85 (diff)
parent161b7411804e7ce5177902f54524517e3191d4f3 (diff)
Merge pull request #27556 from vespa-engine/toregge/add-max-buffer-size-to-tensor-buffer-type-mapper
Add max buffer size parameter to tensor buffer type mapper.
-rw-r--r--searchlib/src/tests/tensor/tensor_buffer_type_mapper/tensor_buffer_type_mapper_test.cpp49
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.h2
5 files changed, 49 insertions, 20 deletions
diff --git a/searchlib/src/tests/tensor/tensor_buffer_type_mapper/tensor_buffer_type_mapper_test.cpp b/searchlib/src/tests/tensor/tensor_buffer_type_mapper/tensor_buffer_type_mapper_test.cpp
index fc574ba9b2c..6f25b9e07c5 100644
--- a/searchlib/src/tests/tensor/tensor_buffer_type_mapper/tensor_buffer_type_mapper_test.cpp
+++ b/searchlib/src/tests/tensor/tensor_buffer_type_mapper/tensor_buffer_type_mapper_test.cpp
@@ -3,10 +3,13 @@
#include <vespa/searchlib/tensor/tensor_buffer_type_mapper.h>
#include <vespa/searchlib/tensor/tensor_buffer_operations.h>
#include <vespa/eval/eval/value_type.h>
+#include <vespa/vespalib/datastore/array_store_config.h>
#include <vespa/vespalib/gtest/gtest.h>
+#include <limits>
using search::tensor::TensorBufferOperations;
using search::tensor::TensorBufferTypeMapper;
+using vespalib::datastore::ArrayStoreConfig;
using vespalib::eval::ValueType;
const vespalib::string tensor_type_sparse_spec("tensor(x{})");
@@ -15,18 +18,26 @@ const vespalib::string tensor_type_2d_mixed_spec("tensor(x{},y[2])");
const vespalib::string float_tensor_type_spec("tensor<float>(y{})");
const vespalib::string tensor_type_dense_spec("tensor(x[2])");
-constexpr double grow_factor = 1.03;
+namespace {
+
+constexpr double default_grow_factor = 1.03;
+constexpr size_t default_max_buffer_size = ArrayStoreConfig::default_max_buffer_size;
+constexpr size_t max_max_buffer_size = std::numeric_limits<uint32_t>::max();
+
+}
struct TestParam
{
vespalib::string _name;
std::vector<size_t> _array_sizes;
std::vector<size_t> _large_array_sizes;
+ std::vector<uint32_t> _type_id_caps;
vespalib::string _tensor_type_spec;
- TestParam(vespalib::string name, std::vector<size_t> array_sizes, std::vector<size_t> large_array_sizes, const vespalib::string& tensor_type_spec)
+ TestParam(vespalib::string name, std::vector<size_t> array_sizes, std::vector<size_t> large_array_sizes, std::vector<uint32_t> type_id_caps, const vespalib::string& tensor_type_spec)
: _name(std::move(name)),
_array_sizes(std::move(array_sizes)),
_large_array_sizes(std::move(large_array_sizes)),
+ _type_id_caps(type_id_caps),
_tensor_type_spec(tensor_type_spec)
{
}
@@ -61,7 +72,7 @@ TensorBufferTypeMapperTest::TensorBufferTypeMapperTest()
: testing::TestWithParam<TestParam>(),
_tensor_type(ValueType::from_spec(GetParam()._tensor_type_spec)),
_ops(_tensor_type),
- _mapper(GetParam()._array_sizes.size(), grow_factor, &_ops)
+ _mapper(GetParam()._array_sizes.size(), default_grow_factor, default_max_buffer_size, &_ops)
{
}
@@ -73,7 +84,7 @@ TensorBufferTypeMapperTest::get_array_sizes()
uint32_t max_small_subspaces_type_id = GetParam()._array_sizes.size();
std::vector<size_t> array_sizes;
for (uint32_t type_id = 1; type_id <= max_small_subspaces_type_id; ++type_id) {
- auto num_subspaces = type_id - 1;
+ auto num_subspaces = _tensor_type.is_dense() ? 1 : (type_id - 1);
array_sizes.emplace_back(_mapper.get_array_size(type_id));
EXPECT_EQ(_ops.get_buffer_size(num_subspaces), array_sizes.back());
}
@@ -85,10 +96,13 @@ TensorBufferTypeMapperTest::get_large_array_sizes()
{
auto& large_array_sizes = GetParam()._large_array_sizes;
uint32_t max_large = large_array_sizes.size();
- TensorBufferTypeMapper mapper(max_large * 100, grow_factor, &_ops);
+ TensorBufferTypeMapper mapper(max_large * 100, default_grow_factor, default_max_buffer_size, &_ops);
std::vector<size_t> result;
for (uint32_t i = 0; i < max_large; ++i) {
uint32_t type_id = (i + 1) * 100;
+ if (type_id > mapper.get_max_type_id(max_large * 100)) {
+ break;
+ }
auto array_size = mapper.get_array_size(type_id);
result.emplace_back(array_size);
EXPECT_EQ(type_id, mapper.get_type_id(array_size));
@@ -128,11 +142,11 @@ TensorBufferTypeMapperTest::select_type_ids()
INSTANTIATE_TEST_SUITE_P(TensorBufferTypeMapperMultiTest,
TensorBufferTypeMapperTest,
- testing::Values(TestParam("1d", {8, 16, 32, 40, 64}, {2768, 49712, 950768, 18268976, 351101184}, tensor_type_sparse_spec),
- TestParam("1dfloat", {4, 12, 20, 28, 36}, {2688, 48896, 937248, 18009808, 346121248}, float_tensor_type_spec),
- TestParam("2d", {8, 24, 40, 56, 80}, {2416, 41392, 790112, 15179616, 291726288}, tensor_type_2d_spec),
- TestParam("2dmixed", {8, 24, 48, 64, 96}, {3008, 51728, 987632, 18974512, 364657856}, tensor_type_2d_mixed_spec),
- TestParam("dense", {8, 24}, {}, tensor_type_dense_spec)),
+ testing::Values(TestParam("1d", {8, 16, 32, 40, 64}, {2768, 49712, 950768, 18268976, 351101184}, {27, 30, 514, 584}, tensor_type_sparse_spec),
+ TestParam("1dfloat", {4, 12, 20, 28, 36}, {2688, 48896, 937248, 18009808, 346121248}, {27, 30, 514, 585}, float_tensor_type_spec),
+ TestParam("2d", {8, 24, 40, 56, 80}, {2416, 41392, 790112, 15179616, 291726288}, {26, 29, 520, 590}, tensor_type_2d_spec),
+ TestParam("2dmixed", {8, 24, 48, 64, 96}, {3008, 51728, 987632, 18974512, 364657856}, {26, 29, 513, 583}, tensor_type_2d_mixed_spec),
+ TestParam("dense", {24}, {}, {1, 1, 1, 1}, tensor_type_dense_spec)),
testing::PrintToStringParamName());
TEST_P(TensorBufferTypeMapperTest, array_sizes_are_calculated)
@@ -150,10 +164,19 @@ TEST_P(TensorBufferTypeMapperTest, large_arrays_grows_exponentially)
EXPECT_EQ(GetParam()._large_array_sizes, get_large_array_sizes());
}
-TEST_P(TensorBufferTypeMapperTest, avoid_array_size_overflow)
+TEST_P(TensorBufferTypeMapperTest, type_id_is_capped)
{
- TensorBufferTypeMapper mapper(300, 2.0, &_ops);
- EXPECT_GE(30, mapper.get_max_type_id(1000));
+ auto& exp_type_id_caps = GetParam()._type_id_caps;
+ std::vector<uint32_t> act_type_id_caps;
+ std::vector<double> grow_factors = { 2.0, default_grow_factor };
+ std::vector<size_t> max_buffer_sizes = { default_max_buffer_size, max_max_buffer_size };
+ for (auto& grow_factor : grow_factors) {
+ for (auto max_buffer_size : max_buffer_sizes) {
+ TensorBufferTypeMapper mapper(1000, grow_factor, max_buffer_size, &_ops);
+ act_type_id_caps.emplace_back(mapper.get_max_type_id(1000));
+ }
+ }
+ EXPECT_EQ(exp_type_id_caps, act_type_id_caps);
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
index 72940cbd6a0..8f0ddfe5800 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
@@ -118,6 +118,7 @@ public:
vespalib::ConstArrayRef<vespalib::string_id> labels(reinterpret_cast<const vespalib::string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions);
return SerializedTensorRef(VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type), _num_mapped_dimensions, labels);
}
+ bool is_dense() const noexcept { return _num_mapped_dimensions == 0; }
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp
index 8a7d84010cb..e4f54383821 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp
@@ -13,6 +13,7 @@
using document::DeserializeException;
using vespalib::alloc::MemoryAllocator;
+using vespalib::datastore::ArrayStoreConfig;
using vespalib::datastore::CompactionContext;
using vespalib::datastore::CompactionStrategy;
using vespalib::datastore::EntryRef;
@@ -33,12 +34,12 @@ TensorBufferStore::TensorBufferStore(const ValueType& tensor_type, std::shared_p
_tensor_type(tensor_type),
_ops(_tensor_type),
_array_store(ArrayStoreType::optimizedConfigForHugePage(max_small_subspaces_type_id,
- TensorBufferTypeMapper(max_small_subspaces_type_id, array_store_grow_factor, &_ops),
+ TensorBufferTypeMapper(max_small_subspaces_type_id, array_store_grow_factor, ArrayStoreConfig::default_max_buffer_size, &_ops),
MemoryAllocator::HUGEPAGE_SIZE,
MemoryAllocator::PAGE_SIZE,
- vespalib::datastore::ArrayStoreConfig::default_max_buffer_size,
+ ArrayStoreConfig::default_max_buffer_size,
8_Ki, ALLOC_GROW_FACTOR),
- std::move(allocator), TensorBufferTypeMapper(max_small_subspaces_type_id, array_store_grow_factor, &_ops))
+ std::move(allocator), TensorBufferTypeMapper(max_small_subspaces_type_id, array_store_grow_factor, ArrayStoreConfig::default_max_buffer_size, &_ops))
{
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.cpp
index 3bd9f72c73b..16c2d65d829 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.cpp
@@ -14,13 +14,13 @@ TensorBufferTypeMapper::TensorBufferTypeMapper()
{
}
-TensorBufferTypeMapper::TensorBufferTypeMapper(uint32_t max_small_subspaces_type_id, double grow_factor, TensorBufferOperations* ops)
+TensorBufferTypeMapper::TensorBufferTypeMapper(uint32_t max_small_subspaces_type_id, double grow_factor, size_t max_buffer_size, TensorBufferOperations* ops)
: vespalib::datastore::ArrayStoreTypeMapper(),
_ops(ops)
{
_array_sizes.reserve(max_small_subspaces_type_id + 1);
_array_sizes.emplace_back(0); // type id 0 uses LargeSubspacesBufferType
- uint32_t num_subspaces = 0;
+ uint32_t num_subspaces = _ops->is_dense() ? 1 : 0;
size_t prev_array_size = 0u;
size_t array_size = 0u;
for (uint32_t type_id = 1; type_id <= max_small_subspaces_type_id; ++type_id) {
@@ -32,10 +32,14 @@ TensorBufferTypeMapper::TensorBufferTypeMapper(uint32_t max_small_subspaces_type
++num_subspaces;
array_size = _ops->get_buffer_size(num_subspaces);
}
- if (array_size > std::numeric_limits<uint32_t>::max()) {
+ if (array_size > std::numeric_limits<uint32_t>::max() ||
+ array_size >= 2 * max_buffer_size) {
break;
}
_array_sizes.emplace_back(array_size);
+ if (_ops->is_dense()) {
+ break;
+ }
prev_array_size = array_size;
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.h
index 3087ef67c4d..74c3d73badb 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.h
@@ -22,7 +22,7 @@ public:
using LargeBufferType = LargeSubspacesBufferType;
TensorBufferTypeMapper();
- TensorBufferTypeMapper(uint32_t max_small_subspaces_type_id, double grow_factor, TensorBufferOperations* ops);
+ TensorBufferTypeMapper(uint32_t max_small_subspaces_type_id, double grow_factor, size_t max_buffer_size, TensorBufferOperations* ops);
~TensorBufferTypeMapper();
TensorBufferOperations& get_tensor_buffer_operations() const noexcept { return *_ops; }