diff options
author | Geir Storli <geirst@oath.com> | 2018-01-19 14:18:57 +0000 |
---|---|---|
committer | Geir Storli <geirst@oath.com> | 2018-01-19 14:18:57 +0000 |
commit | eac3947f1275207660547f040b69127c18c08bff (patch) | |
tree | f403ed62c7f874ebb33dd063d5e03fb56cda6247 /searchlib/src/tests | |
parent | c8f67aeeb1bb0acb996af6cc6abb9becda074272 (diff) |
Add more conservative buffer allocation strategy in datastore buffers.
This should greatly reduce the memory footprint of multi-value attribute vectors
which uses an array store for underlying values.
The size of a new buffer is calculated as (simplified):
*) before: (size of previous buffer) * 2
*) now: (size of all previous buffers) * allocation grow factor (default = 0.2)
In addition we now support resizing of all buffers, not only the first one.
Diffstat (limited to 'searchlib/src/tests')
8 files changed, 180 insertions, 43 deletions
diff --git a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp index 33d5d648feb..c1413d5244e 100644 --- a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp +++ b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp @@ -59,6 +59,8 @@ public: } }; +constexpr float ALLOC_GROW_FACTOR = 0.2; + template <typename EntryT> class Fixture { @@ -72,12 +74,16 @@ protected: public: using ConstArrayRef = vespalib::ConstArrayRef<EntryT>; Fixture(uint32_t maxSmallArraySize) - : _mvMapping(ArrayStoreConfig(maxSmallArraySize, ArrayStoreConfig::AllocSpec(0, RefType::offsetSize(), 8 * 1024))), + : _mvMapping(ArrayStoreConfig(maxSmallArraySize, + ArrayStoreConfig::AllocSpec(0, RefType::offsetSize(), 8 * 1024, + ALLOC_GROW_FACTOR))), _attr(_mvMapping) { } Fixture(uint32_t maxSmallArraySize, size_t minClusters, size_t maxClusters, size_t numClustersForNewBuffer) - : _mvMapping(ArrayStoreConfig(maxSmallArraySize, ArrayStoreConfig::AllocSpec(minClusters, maxClusters, numClustersForNewBuffer))), + : _mvMapping(ArrayStoreConfig(maxSmallArraySize, + ArrayStoreConfig::AllocSpec(minClusters, maxClusters, numClustersForNewBuffer, + ALLOC_GROW_FACTOR))), _attr(_mvMapping) { } diff --git a/searchlib/src/tests/datastore/array_store/array_store_test.cpp b/searchlib/src/tests/datastore/array_store/array_store_test.cpp index 08559ed213f..fff4445890b 100644 --- a/searchlib/src/tests/datastore/array_store/array_store_test.cpp +++ b/searchlib/src/tests/datastore/array_store/array_store_test.cpp @@ -13,6 +13,8 @@ using vespalib::ArrayRef; using generation_t = vespalib::GenerationHandler::generation_t; using MemStats = search::datastore::test::MemStats; +constexpr float ALLOC_GROW_FACTOR = 0.2; + template <typename EntryT, typename RefT = EntryRefT<19> > struct Fixture { @@ -28,7 +30,9 @@ struct Fixture ReferenceStore refStore; generation_t generation; Fixture(uint32_t maxSmallArraySize) - : store(ArrayStoreConfig(maxSmallArraySize, ArrayStoreConfig::AllocSpec(16, RefT::offsetSize(), 8 * 1024))), + : store(ArrayStoreConfig(maxSmallArraySize, + ArrayStoreConfig::AllocSpec(16, RefT::offsetSize(), 8 * 1024, + ALLOC_GROW_FACTOR))), refStore(), generation(1) {} diff --git a/searchlib/src/tests/datastore/array_store_config/array_store_config_test.cpp b/searchlib/src/tests/datastore/array_store_config/array_store_config_test.cpp index d9fe6f353bf..e0f93761b13 100644 --- a/searchlib/src/tests/datastore/array_store_config/array_store_config_test.cpp +++ b/searchlib/src/tests/datastore/array_store_config/array_store_config_test.cpp @@ -7,6 +7,8 @@ using namespace search::datastore; using AllocSpec = ArrayStoreConfig::AllocSpec; +constexpr float ALLOC_GROW_FACTOR = 0.2; + struct Fixture { using EntryRefType = EntryRefT<18>; @@ -22,28 +24,39 @@ struct Fixture size_t minNumArraysForNewBuffer) : cfg(ArrayStoreConfig::optimizeForHugePage(maxSmallArraySize, hugePageSize, smallPageSize, sizeof(int), EntryRefType::offsetSize(), - minNumArraysForNewBuffer)) { } + minNumArraysForNewBuffer, + ALLOC_GROW_FACTOR)) { } void assertSpec(size_t arraySize, uint32_t numArraysForNewBuffer) { - assertSpec(arraySize, AllocSpec(0, EntryRefType::offsetSize(), numArraysForNewBuffer)); + assertSpec(arraySize, AllocSpec(0, EntryRefType::offsetSize(), + numArraysForNewBuffer, ALLOC_GROW_FACTOR)); } void assertSpec(size_t arraySize, const AllocSpec &expSpec) { const ArrayStoreConfig::AllocSpec &actSpec = cfg.specForSize(arraySize); EXPECT_EQUAL(expSpec.minArraysInBuffer, actSpec.minArraysInBuffer); EXPECT_EQUAL(expSpec.maxArraysInBuffer, actSpec.maxArraysInBuffer); EXPECT_EQUAL(expSpec.numArraysForNewBuffer, actSpec.numArraysForNewBuffer); + EXPECT_EQUAL(expSpec.allocGrowFactor, actSpec.allocGrowFactor); } }; +AllocSpec +makeSpec(size_t minArraysInBuffer, + size_t maxArraysInBuffer, + size_t numArraysForNewBuffer) +{ + return AllocSpec(minArraysInBuffer, maxArraysInBuffer, numArraysForNewBuffer, ALLOC_GROW_FACTOR); +} + constexpr size_t KB = 1024; constexpr size_t MB = KB * KB; -TEST_F("require that default allocation spec is given for all array sizes", Fixture(3, AllocSpec(4, 32, 8))) +TEST_F("require that default allocation spec is given for all array sizes", Fixture(3, makeSpec(4, 32, 8))) { EXPECT_EQUAL(3u, f.cfg.maxSmallArraySize()); - TEST_DO(f.assertSpec(0, AllocSpec(4, 32, 8))); - TEST_DO(f.assertSpec(1, AllocSpec(4, 32, 8))); - TEST_DO(f.assertSpec(2, AllocSpec(4, 32, 8))); - TEST_DO(f.assertSpec(3, AllocSpec(4, 32, 8))); + TEST_DO(f.assertSpec(0, makeSpec(4, 32, 8))); + TEST_DO(f.assertSpec(1, makeSpec(4, 32, 8))); + TEST_DO(f.assertSpec(2, makeSpec(4, 32, 8))); + TEST_DO(f.assertSpec(3, makeSpec(4, 32, 8))); } TEST_F("require that we can generate config optimized for a given huge page", Fixture(1024, diff --git a/searchlib/src/tests/datastore/buffer_type/CMakeLists.txt b/searchlib/src/tests/datastore/buffer_type/CMakeLists.txt new file mode 100644 index 00000000000..3c3a6eb6f87 --- /dev/null +++ b/searchlib/src/tests/datastore/buffer_type/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_buffer_type_test_app TEST + SOURCES + buffer_type_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_buffer_type_test_app COMMAND searchlib_buffer_type_test_app) diff --git a/searchlib/src/tests/datastore/buffer_type/FILES b/searchlib/src/tests/datastore/buffer_type/FILES new file mode 100644 index 00000000000..741fd68ebba --- /dev/null +++ b/searchlib/src/tests/datastore/buffer_type/FILES @@ -0,0 +1 @@ +buffer_type_test.cpp diff --git a/searchlib/src/tests/datastore/buffer_type/buffer_type_test.cpp b/searchlib/src/tests/datastore/buffer_type/buffer_type_test.cpp new file mode 100644 index 00000000000..1477a7281e6 --- /dev/null +++ b/searchlib/src/tests/datastore/buffer_type/buffer_type_test.cpp @@ -0,0 +1,116 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/datastore/buffer_type.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search::datastore; + +using IntBufferType = BufferType<int>; +constexpr uint32_t CLUSTER_SIZE(4); +constexpr uint32_t MAX_CLUSTERS(128); +constexpr uint32_t NUM_CLUSTERS_FOR_NEW_BUFFER(0); + +struct Setup { + uint32_t _minClusters; + size_t _usedElems; + size_t _neededElems; + uint32_t _bufferId; + float _allocGrowFactor; + bool _resizing; + Setup() + : _minClusters(0), + _usedElems(0), + _neededElems(0), + _bufferId(1), + _allocGrowFactor(0.5), + _resizing(false) + {} + Setup &minClusters(uint32_t value) { _minClusters = value; return *this; } + Setup &used(size_t value) { _usedElems = value; return *this; } + Setup &needed(size_t value) { _neededElems = value; return *this; } + Setup &bufferId(uint32_t value) { _bufferId = value; return *this; } + Setup &resizing(bool value) { _resizing = value; return *this; } +}; + +struct Fixture { + Setup setup; + IntBufferType bufferType; + size_t deadElems; + int buffer; + Fixture(const Setup &setup_) + : setup(setup_), + bufferType(CLUSTER_SIZE, setup._minClusters, MAX_CLUSTERS, NUM_CLUSTERS_FOR_NEW_BUFFER, setup._allocGrowFactor), + deadElems(0), + buffer(0) + {} + ~Fixture() { + bufferType.onHold(&setup._usedElems); + bufferType.onFree(setup._usedElems); + } + void onActive() { + bufferType.onActive(setup._bufferId, &setup._usedElems, deadElems, &buffer); + } + size_t clustersToAlloc() { + return bufferType.calcClustersToAlloc(setup._bufferId, setup._neededElems, setup._resizing); + } +}; + +void +assertClustersToAlloc(size_t exp, const Setup &setup) +{ + Fixture f(setup); + f.onActive(); + EXPECT_EQUAL(exp, f.clustersToAlloc()); +} + +TEST("require that complete clusters are allocated") +{ + TEST_DO(assertClustersToAlloc(1, Setup().needed(1))); + TEST_DO(assertClustersToAlloc(1, Setup().needed(2))); + TEST_DO(assertClustersToAlloc(1, Setup().needed(3))); + TEST_DO(assertClustersToAlloc(1, Setup().needed(4))); + TEST_DO(assertClustersToAlloc(2, Setup().needed(5))); +} + +TEST("require that reserved elements are taken into account when not resizing") +{ + TEST_DO(assertClustersToAlloc(2, Setup().needed(1).bufferId(0))); + TEST_DO(assertClustersToAlloc(2, Setup().needed(4).bufferId(0))); + TEST_DO(assertClustersToAlloc(3, Setup().needed(5).bufferId(0))); +} + +TEST("require that clusters to alloc is based on currently used elements (no resizing)") +{ + TEST_DO(assertClustersToAlloc(2, Setup().used(4 * 4).needed(4))); + TEST_DO(assertClustersToAlloc(4, Setup().used(8 * 4).needed(4))); +} + +TEST("require that clusters to alloc is based on currently used elements (with resizing)") +{ + TEST_DO(assertClustersToAlloc(4 + 2, Setup().used(4 * 4).needed(4).resizing(true))); + TEST_DO(assertClustersToAlloc(8 + 4, Setup().used(8 * 4).needed(4).resizing(true))); + TEST_DO(assertClustersToAlloc(4 + 3, Setup().used(4 * 4).needed(3 * 4).resizing(true))); +} + +TEST("require that clusters to alloc always contain elements needed") +{ + TEST_DO(assertClustersToAlloc(2, Setup().used(4 * 4).needed(2 * 4))); + TEST_DO(assertClustersToAlloc(3, Setup().used(4 * 4).needed(3 * 4))); + TEST_DO(assertClustersToAlloc(4, Setup().used(4 * 4).needed(4 * 4))); +} + +TEST("require that clusters to alloc is capped to max clusters") +{ + TEST_DO(assertClustersToAlloc(127, Setup().used(254 * 4).needed(4))); + TEST_DO(assertClustersToAlloc(128, Setup().used(256 * 4).needed(4))); + TEST_DO(assertClustersToAlloc(128, Setup().used(258 * 4).needed(8))); +} + +TEST("require that clusters to alloc is capped to min clusters") +{ + TEST_DO(assertClustersToAlloc(16, Setup().used(30 * 4).needed(4).minClusters(16))); + TEST_DO(assertClustersToAlloc(16, Setup().used(32 * 4).needed(4).minClusters(16))); + TEST_DO(assertClustersToAlloc(17, Setup().used(34 * 4).needed(4).minClusters(16))); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/datastore/datastore/datastore_test.cpp b/searchlib/src/tests/datastore/datastore/datastore_test.cpp index 7599f675b58..2463439c47c 100644 --- a/searchlib/src/tests/datastore/datastore/datastore_test.cpp +++ b/searchlib/src/tests/datastore/datastore/datastore_test.cpp @@ -65,6 +65,8 @@ public: using GrowthStats = std::vector<int>; +constexpr float ALLOC_GROW_FACTOR = 0.5; + class GrowStore { using Store = DataStoreT<EntryRefT<22>>; @@ -76,8 +78,8 @@ class GrowStore public: GrowStore(size_t minSize, size_t minSwitch) : _store(), - _firstType(1, 1, 64, 0), - _type(1, minSize, 64, minSwitch), + _firstType(1, 1, 64, 0, ALLOC_GROW_FACTOR), + _type(1, minSize, 64, minSwitch, ALLOC_GROW_FACTOR), _typeId(0) { (void) _store.addType(&_firstType); @@ -90,16 +92,16 @@ public: GrowthStats sizes; int i = 0; int previ = 0; - int prevBuffer = -1; + int prevBufferId = -1; while (sizes.size() < bufs) { RefType iRef(_store.allocator<int>(_typeId).alloc().ref); - int buffer = iRef.bufferId(); - if (buffer != prevBuffer) { - if (prevBuffer >= 0) { + int bufferId = iRef.bufferId(); + if (bufferId != prevBufferId) { + if (prevBufferId >= 0) { sizes.push_back(i - previ); previ = i; } - prevBuffer = buffer; + prevBufferId = bufferId; } ++i; } @@ -460,7 +462,7 @@ void assertGrowStats(GrowthStats expSizes, size_t expInitMemUsage, size_t minSize, size_t minSwitch) { - EXPECT_EQUAL(expSizes, GrowStore(minSize, minSwitch).getGrowthStats(9)); + EXPECT_EQUAL(expSizes, GrowStore(minSize, minSwitch).getGrowthStats(expSizes.size())); EXPECT_EQUAL(expFirstBufSizes, GrowStore(minSize, minSwitch).getFirstBufGrowStats()); EXPECT_EQUAL(expInitMemUsage, GrowStore(minSize, minSwitch).getMemoryUsage().allocatedBytes()); } @@ -470,22 +472,22 @@ void assertGrowStats(GrowthStats expSizes, TEST("require that buffer growth works") { // Always switch to new buffer, min size 4 - TEST_DO(assertGrowStats({ 4, 8, 16, 32, 64, 64, 64, 64, 64 }, + TEST_DO(assertGrowStats({ 4, 4, 4, 6, 9, 13, 20, 30, 45, 64 }, { 4 }, 20, 4, 0)); // Resize if buffer size is less than 4, min size 0 - TEST_DO(assertGrowStats({ 4, 8, 16, 32, 64, 64, 64, 64, 64 }, - { 0, 1, 2, 4 }, 4, 0, 4)); + TEST_DO(assertGrowStats({ 3, 3, 3, 4, 6, 9, 14, 21, 31, 47 }, + { 0, 1, 2, 3 }, 4, 0, 4)); // Always switch to new buffer, min size 16 - TEST_DO(assertGrowStats({ 16, 32, 64, 64, 64, 64, 64, 64, 64 }, + TEST_DO(assertGrowStats({ 16, 16, 16, 24, 36, 54, 64, 64, 64 }, { 16 }, 68, 16, 0)); // Resize if buffer size is less than 16, min size 0 - TEST_DO(assertGrowStats({ 16, 32, 64, 64, 64, 64, 64, 64, 64 }, - { 0, 1, 2, 4, 8, 16 }, 4, 0, 16)); + TEST_DO(assertGrowStats({ 19, 19, 19, 28, 42, 63, 64, 64, 64 }, + { 0, 1, 2, 3, 4, 6, 9, 13, 19 }, 4, 0, 16)); // Resize if buffer size is less than 16, min size 4 - TEST_DO(assertGrowStats({ 16, 32, 64, 64, 64, 64, 64, 64, 64 }, - { 4, 8, 16 }, 20, 4, 16)); + TEST_DO(assertGrowStats({ 19, 19, 19, 28, 42, 63, 64, 64, 64 }, + { 4, 6, 9, 13, 19 }, 20, 4, 16)); // Always switch to new buffer, min size 0 - TEST_DO(assertGrowStats({ 1, 1, 2, 4, 8, 16, 32, 64, 64}, + TEST_DO(assertGrowStats({ 1, 1, 1, 1, 2, 3, 4, 6, 9 }, { 0, 1 }, 4, 0, 0)); } diff --git a/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp index 6c4c7801038..c1baff72514 100644 --- a/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp +++ b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp @@ -49,15 +49,7 @@ Test::requireThatAddWordTriggersChangeOfBuffer() WordStore ws; size_t word = 0; uint32_t lastId = 0; - size_t lastWord = 0; char wordStr[10]; - size_t entrySize = WordStore::RefType::align(6 + 1); - size_t initBufferSpace = 1024u * WordStore::RefType::align(1); - size_t bufferSpace = initBufferSpace; - size_t bufferWords = (bufferSpace - WordStore::RefType::align(1)) / - entrySize; - size_t usedSpace = 0; - size_t sumBufferWords = 0; for (;;++word) { sprintf(wordStr, "%6zu", word); // all words uses 12 bytes (include padding) @@ -68,21 +60,16 @@ Test::requireThatAddWordTriggersChangeOfBuffer() LOG(info, "Changed to bufferId %u after %zu words", bufferId, word); - EXPECT_EQUAL(bufferWords, word - lastWord); lastId = bufferId; - lastWord = word; - usedSpace += bufferWords * entrySize; - sumBufferWords += bufferWords; - bufferSpace = usedSpace + initBufferSpace; - bufferWords = bufferSpace / entrySize; } if (bufferId == 4) { + lastId = bufferId; break; } } - // each buffer can have offsetSize / 12 words - EXPECT_EQUAL(sumBufferWords, word); LOG(info, "Added %zu words in 4 buffers", word); + EXPECT_EQUAL(2047u, word); + EXPECT_EQUAL(4u, lastId); } int |