diff options
Diffstat (limited to 'searchlib')
19 files changed, 243 insertions, 78 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index d77ec346cef..3b321f4a12f 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -108,6 +108,7 @@ vespa_define_module( src/tests/common/summaryfeatures src/tests/datastore/array_store src/tests/datastore/array_store_config + src/tests/datastore/buffer_type src/tests/datastore/datastore src/tests/datastore/unique_store src/tests/diskindex/bitvector diff --git a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp index 33d5d648feb..c1413d5244e 100644 --- a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp +++ b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp @@ -59,6 +59,8 @@ public: } }; +constexpr float ALLOC_GROW_FACTOR = 0.2; + template <typename EntryT> class Fixture { @@ -72,12 +74,16 @@ protected: public: using ConstArrayRef = vespalib::ConstArrayRef<EntryT>; Fixture(uint32_t maxSmallArraySize) - : _mvMapping(ArrayStoreConfig(maxSmallArraySize, ArrayStoreConfig::AllocSpec(0, RefType::offsetSize(), 8 * 1024))), + : _mvMapping(ArrayStoreConfig(maxSmallArraySize, + ArrayStoreConfig::AllocSpec(0, RefType::offsetSize(), 8 * 1024, + ALLOC_GROW_FACTOR))), _attr(_mvMapping) { } Fixture(uint32_t maxSmallArraySize, size_t minClusters, size_t maxClusters, size_t numClustersForNewBuffer) - : _mvMapping(ArrayStoreConfig(maxSmallArraySize, ArrayStoreConfig::AllocSpec(minClusters, maxClusters, numClustersForNewBuffer))), + : _mvMapping(ArrayStoreConfig(maxSmallArraySize, + ArrayStoreConfig::AllocSpec(minClusters, maxClusters, numClustersForNewBuffer, + ALLOC_GROW_FACTOR))), _attr(_mvMapping) { } diff --git a/searchlib/src/tests/datastore/array_store/array_store_test.cpp b/searchlib/src/tests/datastore/array_store/array_store_test.cpp index 08559ed213f..fff4445890b 100644 --- a/searchlib/src/tests/datastore/array_store/array_store_test.cpp +++ b/searchlib/src/tests/datastore/array_store/array_store_test.cpp @@ -13,6 +13,8 @@ using vespalib::ArrayRef; using generation_t = vespalib::GenerationHandler::generation_t; using MemStats = search::datastore::test::MemStats; +constexpr float ALLOC_GROW_FACTOR = 0.2; + template <typename EntryT, typename RefT = EntryRefT<19> > struct Fixture { @@ -28,7 +30,9 @@ struct Fixture ReferenceStore refStore; generation_t generation; Fixture(uint32_t maxSmallArraySize) - : store(ArrayStoreConfig(maxSmallArraySize, ArrayStoreConfig::AllocSpec(16, RefT::offsetSize(), 8 * 1024))), + : store(ArrayStoreConfig(maxSmallArraySize, + ArrayStoreConfig::AllocSpec(16, RefT::offsetSize(), 8 * 1024, + ALLOC_GROW_FACTOR))), refStore(), generation(1) {} diff --git a/searchlib/src/tests/datastore/array_store_config/array_store_config_test.cpp b/searchlib/src/tests/datastore/array_store_config/array_store_config_test.cpp index d9fe6f353bf..e0f93761b13 100644 --- a/searchlib/src/tests/datastore/array_store_config/array_store_config_test.cpp +++ b/searchlib/src/tests/datastore/array_store_config/array_store_config_test.cpp @@ -7,6 +7,8 @@ using namespace search::datastore; using AllocSpec = ArrayStoreConfig::AllocSpec; +constexpr float ALLOC_GROW_FACTOR = 0.2; + struct Fixture { using EntryRefType = EntryRefT<18>; @@ -22,28 +24,39 @@ struct Fixture size_t minNumArraysForNewBuffer) : cfg(ArrayStoreConfig::optimizeForHugePage(maxSmallArraySize, hugePageSize, smallPageSize, sizeof(int), EntryRefType::offsetSize(), - minNumArraysForNewBuffer)) { } + minNumArraysForNewBuffer, + ALLOC_GROW_FACTOR)) { } void assertSpec(size_t arraySize, uint32_t numArraysForNewBuffer) { - assertSpec(arraySize, AllocSpec(0, EntryRefType::offsetSize(), numArraysForNewBuffer)); + assertSpec(arraySize, AllocSpec(0, EntryRefType::offsetSize(), + numArraysForNewBuffer, ALLOC_GROW_FACTOR)); } void assertSpec(size_t arraySize, const AllocSpec &expSpec) { const ArrayStoreConfig::AllocSpec &actSpec = cfg.specForSize(arraySize); EXPECT_EQUAL(expSpec.minArraysInBuffer, actSpec.minArraysInBuffer); EXPECT_EQUAL(expSpec.maxArraysInBuffer, actSpec.maxArraysInBuffer); EXPECT_EQUAL(expSpec.numArraysForNewBuffer, actSpec.numArraysForNewBuffer); + EXPECT_EQUAL(expSpec.allocGrowFactor, actSpec.allocGrowFactor); } }; +AllocSpec +makeSpec(size_t minArraysInBuffer, + size_t maxArraysInBuffer, + size_t numArraysForNewBuffer) +{ + return AllocSpec(minArraysInBuffer, maxArraysInBuffer, numArraysForNewBuffer, ALLOC_GROW_FACTOR); +} + constexpr size_t KB = 1024; constexpr size_t MB = KB * KB; -TEST_F("require that default allocation spec is given for all array sizes", Fixture(3, AllocSpec(4, 32, 8))) +TEST_F("require that default allocation spec is given for all array sizes", Fixture(3, makeSpec(4, 32, 8))) { EXPECT_EQUAL(3u, f.cfg.maxSmallArraySize()); - TEST_DO(f.assertSpec(0, AllocSpec(4, 32, 8))); - TEST_DO(f.assertSpec(1, AllocSpec(4, 32, 8))); - TEST_DO(f.assertSpec(2, AllocSpec(4, 32, 8))); - TEST_DO(f.assertSpec(3, AllocSpec(4, 32, 8))); + TEST_DO(f.assertSpec(0, makeSpec(4, 32, 8))); + TEST_DO(f.assertSpec(1, makeSpec(4, 32, 8))); + TEST_DO(f.assertSpec(2, makeSpec(4, 32, 8))); + TEST_DO(f.assertSpec(3, makeSpec(4, 32, 8))); } TEST_F("require that we can generate config optimized for a given huge page", Fixture(1024, diff --git a/searchlib/src/tests/datastore/buffer_type/CMakeLists.txt b/searchlib/src/tests/datastore/buffer_type/CMakeLists.txt new file mode 100644 index 00000000000..3c3a6eb6f87 --- /dev/null +++ b/searchlib/src/tests/datastore/buffer_type/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_buffer_type_test_app TEST + SOURCES + buffer_type_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_buffer_type_test_app COMMAND searchlib_buffer_type_test_app) diff --git a/searchlib/src/tests/datastore/buffer_type/FILES b/searchlib/src/tests/datastore/buffer_type/FILES new file mode 100644 index 00000000000..741fd68ebba --- /dev/null +++ b/searchlib/src/tests/datastore/buffer_type/FILES @@ -0,0 +1 @@ +buffer_type_test.cpp diff --git a/searchlib/src/tests/datastore/buffer_type/buffer_type_test.cpp b/searchlib/src/tests/datastore/buffer_type/buffer_type_test.cpp new file mode 100644 index 00000000000..1477a7281e6 --- /dev/null +++ b/searchlib/src/tests/datastore/buffer_type/buffer_type_test.cpp @@ -0,0 +1,116 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/datastore/buffer_type.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search::datastore; + +using IntBufferType = BufferType<int>; +constexpr uint32_t CLUSTER_SIZE(4); +constexpr uint32_t MAX_CLUSTERS(128); +constexpr uint32_t NUM_CLUSTERS_FOR_NEW_BUFFER(0); + +struct Setup { + uint32_t _minClusters; + size_t _usedElems; + size_t _neededElems; + uint32_t _bufferId; + float _allocGrowFactor; + bool _resizing; + Setup() + : _minClusters(0), + _usedElems(0), + _neededElems(0), + _bufferId(1), + _allocGrowFactor(0.5), + _resizing(false) + {} + Setup &minClusters(uint32_t value) { _minClusters = value; return *this; } + Setup &used(size_t value) { _usedElems = value; return *this; } + Setup &needed(size_t value) { _neededElems = value; return *this; } + Setup &bufferId(uint32_t value) { _bufferId = value; return *this; } + Setup &resizing(bool value) { _resizing = value; return *this; } +}; + +struct Fixture { + Setup setup; + IntBufferType bufferType; + size_t deadElems; + int buffer; + Fixture(const Setup &setup_) + : setup(setup_), + bufferType(CLUSTER_SIZE, setup._minClusters, MAX_CLUSTERS, NUM_CLUSTERS_FOR_NEW_BUFFER, setup._allocGrowFactor), + deadElems(0), + buffer(0) + {} + ~Fixture() { + bufferType.onHold(&setup._usedElems); + bufferType.onFree(setup._usedElems); + } + void onActive() { + bufferType.onActive(setup._bufferId, &setup._usedElems, deadElems, &buffer); + } + size_t clustersToAlloc() { + return bufferType.calcClustersToAlloc(setup._bufferId, setup._neededElems, setup._resizing); + } +}; + +void +assertClustersToAlloc(size_t exp, const Setup &setup) +{ + Fixture f(setup); + f.onActive(); + EXPECT_EQUAL(exp, f.clustersToAlloc()); +} + +TEST("require that complete clusters are allocated") +{ + TEST_DO(assertClustersToAlloc(1, Setup().needed(1))); + TEST_DO(assertClustersToAlloc(1, Setup().needed(2))); + TEST_DO(assertClustersToAlloc(1, Setup().needed(3))); + TEST_DO(assertClustersToAlloc(1, Setup().needed(4))); + TEST_DO(assertClustersToAlloc(2, Setup().needed(5))); +} + +TEST("require that reserved elements are taken into account when not resizing") +{ + TEST_DO(assertClustersToAlloc(2, Setup().needed(1).bufferId(0))); + TEST_DO(assertClustersToAlloc(2, Setup().needed(4).bufferId(0))); + TEST_DO(assertClustersToAlloc(3, Setup().needed(5).bufferId(0))); +} + +TEST("require that clusters to alloc is based on currently used elements (no resizing)") +{ + TEST_DO(assertClustersToAlloc(2, Setup().used(4 * 4).needed(4))); + TEST_DO(assertClustersToAlloc(4, Setup().used(8 * 4).needed(4))); +} + +TEST("require that clusters to alloc is based on currently used elements (with resizing)") +{ + TEST_DO(assertClustersToAlloc(4 + 2, Setup().used(4 * 4).needed(4).resizing(true))); + TEST_DO(assertClustersToAlloc(8 + 4, Setup().used(8 * 4).needed(4).resizing(true))); + TEST_DO(assertClustersToAlloc(4 + 3, Setup().used(4 * 4).needed(3 * 4).resizing(true))); +} + +TEST("require that clusters to alloc always contain elements needed") +{ + TEST_DO(assertClustersToAlloc(2, Setup().used(4 * 4).needed(2 * 4))); + TEST_DO(assertClustersToAlloc(3, Setup().used(4 * 4).needed(3 * 4))); + TEST_DO(assertClustersToAlloc(4, Setup().used(4 * 4).needed(4 * 4))); +} + +TEST("require that clusters to alloc is capped to max clusters") +{ + TEST_DO(assertClustersToAlloc(127, Setup().used(254 * 4).needed(4))); + TEST_DO(assertClustersToAlloc(128, Setup().used(256 * 4).needed(4))); + TEST_DO(assertClustersToAlloc(128, Setup().used(258 * 4).needed(8))); +} + +TEST("require that clusters to alloc is capped to min clusters") +{ + TEST_DO(assertClustersToAlloc(16, Setup().used(30 * 4).needed(4).minClusters(16))); + TEST_DO(assertClustersToAlloc(16, Setup().used(32 * 4).needed(4).minClusters(16))); + TEST_DO(assertClustersToAlloc(17, Setup().used(34 * 4).needed(4).minClusters(16))); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/datastore/datastore/datastore_test.cpp b/searchlib/src/tests/datastore/datastore/datastore_test.cpp index 7599f675b58..2463439c47c 100644 --- a/searchlib/src/tests/datastore/datastore/datastore_test.cpp +++ b/searchlib/src/tests/datastore/datastore/datastore_test.cpp @@ -65,6 +65,8 @@ public: using GrowthStats = std::vector<int>; +constexpr float ALLOC_GROW_FACTOR = 0.5; + class GrowStore { using Store = DataStoreT<EntryRefT<22>>; @@ -76,8 +78,8 @@ class GrowStore public: GrowStore(size_t minSize, size_t minSwitch) : _store(), - _firstType(1, 1, 64, 0), - _type(1, minSize, 64, minSwitch), + _firstType(1, 1, 64, 0, ALLOC_GROW_FACTOR), + _type(1, minSize, 64, minSwitch, ALLOC_GROW_FACTOR), _typeId(0) { (void) _store.addType(&_firstType); @@ -90,16 +92,16 @@ public: GrowthStats sizes; int i = 0; int previ = 0; - int prevBuffer = -1; + int prevBufferId = -1; while (sizes.size() < bufs) { RefType iRef(_store.allocator<int>(_typeId).alloc().ref); - int buffer = iRef.bufferId(); - if (buffer != prevBuffer) { - if (prevBuffer >= 0) { + int bufferId = iRef.bufferId(); + if (bufferId != prevBufferId) { + if (prevBufferId >= 0) { sizes.push_back(i - previ); previ = i; } - prevBuffer = buffer; + prevBufferId = bufferId; } ++i; } @@ -460,7 +462,7 @@ void assertGrowStats(GrowthStats expSizes, size_t expInitMemUsage, size_t minSize, size_t minSwitch) { - EXPECT_EQUAL(expSizes, GrowStore(minSize, minSwitch).getGrowthStats(9)); + EXPECT_EQUAL(expSizes, GrowStore(minSize, minSwitch).getGrowthStats(expSizes.size())); EXPECT_EQUAL(expFirstBufSizes, GrowStore(minSize, minSwitch).getFirstBufGrowStats()); EXPECT_EQUAL(expInitMemUsage, GrowStore(minSize, minSwitch).getMemoryUsage().allocatedBytes()); } @@ -470,22 +472,22 @@ void assertGrowStats(GrowthStats expSizes, TEST("require that buffer growth works") { // Always switch to new buffer, min size 4 - TEST_DO(assertGrowStats({ 4, 8, 16, 32, 64, 64, 64, 64, 64 }, + TEST_DO(assertGrowStats({ 4, 4, 4, 6, 9, 13, 20, 30, 45, 64 }, { 4 }, 20, 4, 0)); // Resize if buffer size is less than 4, min size 0 - TEST_DO(assertGrowStats({ 4, 8, 16, 32, 64, 64, 64, 64, 64 }, - { 0, 1, 2, 4 }, 4, 0, 4)); + TEST_DO(assertGrowStats({ 3, 3, 3, 4, 6, 9, 14, 21, 31, 47 }, + { 0, 1, 2, 3 }, 4, 0, 4)); // Always switch to new buffer, min size 16 - TEST_DO(assertGrowStats({ 16, 32, 64, 64, 64, 64, 64, 64, 64 }, + TEST_DO(assertGrowStats({ 16, 16, 16, 24, 36, 54, 64, 64, 64 }, { 16 }, 68, 16, 0)); // Resize if buffer size is less than 16, min size 0 - TEST_DO(assertGrowStats({ 16, 32, 64, 64, 64, 64, 64, 64, 64 }, - { 0, 1, 2, 4, 8, 16 }, 4, 0, 16)); + TEST_DO(assertGrowStats({ 19, 19, 19, 28, 42, 63, 64, 64, 64 }, + { 0, 1, 2, 3, 4, 6, 9, 13, 19 }, 4, 0, 16)); // Resize if buffer size is less than 16, min size 4 - TEST_DO(assertGrowStats({ 16, 32, 64, 64, 64, 64, 64, 64, 64 }, - { 4, 8, 16 }, 20, 4, 16)); + TEST_DO(assertGrowStats({ 19, 19, 19, 28, 42, 63, 64, 64, 64 }, + { 4, 6, 9, 13, 19 }, 20, 4, 16)); // Always switch to new buffer, min size 0 - TEST_DO(assertGrowStats({ 1, 1, 2, 4, 8, 16, 32, 64, 64}, + TEST_DO(assertGrowStats({ 1, 1, 1, 1, 2, 3, 4, 6, 9 }, { 0, 1 }, 4, 0, 0)); } diff --git a/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp index 6c4c7801038..c1baff72514 100644 --- a/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp +++ b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp @@ -49,15 +49,7 @@ Test::requireThatAddWordTriggersChangeOfBuffer() WordStore ws; size_t word = 0; uint32_t lastId = 0; - size_t lastWord = 0; char wordStr[10]; - size_t entrySize = WordStore::RefType::align(6 + 1); - size_t initBufferSpace = 1024u * WordStore::RefType::align(1); - size_t bufferSpace = initBufferSpace; - size_t bufferWords = (bufferSpace - WordStore::RefType::align(1)) / - entrySize; - size_t usedSpace = 0; - size_t sumBufferWords = 0; for (;;++word) { sprintf(wordStr, "%6zu", word); // all words uses 12 bytes (include padding) @@ -68,21 +60,16 @@ Test::requireThatAddWordTriggersChangeOfBuffer() LOG(info, "Changed to bufferId %u after %zu words", bufferId, word); - EXPECT_EQUAL(bufferWords, word - lastWord); lastId = bufferId; - lastWord = word; - usedSpace += bufferWords * entrySize; - sumBufferWords += bufferWords; - bufferSpace = usedSpace + initBufferSpace; - bufferWords = bufferSpace / entrySize; } if (bufferId == 4) { + lastId = bufferId; break; } } - // each buffer can have offsetSize / 12 words - EXPECT_EQUAL(sumBufferWords, word); LOG(info, "Added %zu words in 4 buffers", word); + EXPECT_EQUAL(2047u, word); + EXPECT_EQUAL(4u, lastId); } int diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h index 1edbd67af30..435fbb21923 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h @@ -53,7 +53,8 @@ public: static datastore::ArrayStoreConfig optimizedConfigForHugePage(size_t maxSmallArraySize, size_t hugePageSize, size_t smallPageSize, - size_t minNumArraysForNewBuffer); + size_t minNumArraysForNewBuffer, + float allocGrowFactor); }; } // namespace search::attribute diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.hpp b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.hpp index 4931322f169..83886619d0f 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.hpp @@ -75,9 +75,10 @@ datastore::ArrayStoreConfig MultiValueMapping<EntryT, RefT>::optimizedConfigForHugePage(size_t maxSmallArraySize, size_t hugePageSize, size_t smallPageSize, - size_t minNumArraysForNewBuffer) + size_t minNumArraysForNewBuffer, + float allocGrowFactor) { - return ArrayStore::optimizedConfigForHugePage(maxSmallArraySize, hugePageSize, smallPageSize, minNumArraysForNewBuffer); + return ArrayStore::optimizedConfigForHugePage(maxSmallArraySize, hugePageSize, smallPageSize, minNumArraysForNewBuffer, allocGrowFactor); } } // namespace search::attribute diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp index e5cbfe8c7d8..6632ca730c4 100644 --- a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp @@ -10,6 +10,7 @@ namespace multivalueattribute { constexpr size_t HUGE_MEMORY_PAGE_SIZE = 2 * 1024 * 1024; constexpr size_t SMALL_MEMORY_PAGE_SIZE = 4 * 1024; +constexpr float ALLOC_GROW_FACTOR = 0.2; } @@ -21,7 +22,9 @@ MultiValueAttribute(const vespalib::string &baseFileName, _mvMapping(MultiValueMapping::optimizedConfigForHugePage(1023, multivalueattribute::HUGE_MEMORY_PAGE_SIZE, multivalueattribute::SMALL_MEMORY_PAGE_SIZE, - 8 * 1024), cfg.getGrowStrategy()) + 8 * 1024, + multivalueattribute::ALLOC_GROW_FACTOR), + cfg.getGrowStrategy()) { } diff --git a/searchlib/src/vespa/searchlib/datastore/array_store.h b/searchlib/src/vespa/searchlib/datastore/array_store.h index c6d3fff1b85..749e567a6ce 100644 --- a/searchlib/src/vespa/searchlib/datastore/array_store.h +++ b/searchlib/src/vespa/searchlib/datastore/array_store.h @@ -105,7 +105,8 @@ public: static ArrayStoreConfig optimizedConfigForHugePage(size_t maxSmallArraySize, size_t hugePageSize, size_t smallPageSize, - size_t minNumArraysForNewBuffer); + size_t minNumArraysForNewBuffer, + float allocGrowFactor); }; } diff --git a/searchlib/src/vespa/searchlib/datastore/array_store.hpp b/searchlib/src/vespa/searchlib/datastore/array_store.hpp index cd45df90899..d76bd173d4a 100644 --- a/searchlib/src/vespa/searchlib/datastore/array_store.hpp +++ b/searchlib/src/vespa/searchlib/datastore/array_store.hpp @@ -13,7 +13,7 @@ constexpr size_t MIN_BUFFER_CLUSTERS = 8192; template <typename EntryT, typename RefT> ArrayStore<EntryT, RefT>::LargeArrayType::LargeArrayType(const AllocSpec &spec) - : BufferType<LargeArray>(1, spec.minArraysInBuffer, spec.maxArraysInBuffer, spec.numArraysForNewBuffer) + : BufferType<LargeArray>(1, spec.minArraysInBuffer, spec.maxArraysInBuffer, spec.numArraysForNewBuffer, spec.allocGrowFactor) { } @@ -38,7 +38,8 @@ ArrayStore<EntryT, RefT>::initArrayTypes(const ArrayStoreConfig &cfg) for (uint32_t arraySize = 1; arraySize <= _maxSmallArraySize; ++arraySize) { const AllocSpec &spec = cfg.specForSize(arraySize); _smallArrayTypes.push_back(std::make_unique<SmallArrayType> - (arraySize, spec.minArraysInBuffer, spec.maxArraysInBuffer, spec.numArraysForNewBuffer)); + (arraySize, spec.minArraysInBuffer, spec.maxArraysInBuffer, + spec.numArraysForNewBuffer, spec.allocGrowFactor)); uint32_t typeId = _store.addType(_smallArrayTypes.back().get()); assert(typeId == arraySize); // Enforce 1-to-1 mapping between type ids and sizes for small arrays } @@ -188,14 +189,16 @@ ArrayStoreConfig ArrayStore<EntryT, RefT>::optimizedConfigForHugePage(size_t maxSmallArraySize, size_t hugePageSize, size_t smallPageSize, - size_t minNumArraysForNewBuffer) + size_t minNumArraysForNewBuffer, + float allocGrowFactor) { return ArrayStoreConfig::optimizeForHugePage(maxSmallArraySize, hugePageSize, smallPageSize, sizeof(EntryT), RefT::offsetSize(), - minNumArraysForNewBuffer); + minNumArraysForNewBuffer, + allocGrowFactor); } } diff --git a/searchlib/src/vespa/searchlib/datastore/array_store_config.cpp b/searchlib/src/vespa/searchlib/datastore/array_store_config.cpp index 0683475930c..0581183f675 100644 --- a/searchlib/src/vespa/searchlib/datastore/array_store_config.cpp +++ b/searchlib/src/vespa/searchlib/datastore/array_store_config.cpp @@ -48,15 +48,16 @@ ArrayStoreConfig::optimizeForHugePage(size_t maxSmallArraySize, size_t smallPageSize, size_t entrySize, size_t maxEntryRefOffset, - size_t minNumArraysForNewBuffer) + size_t minNumArraysForNewBuffer, + float allocGrowFactor) { AllocSpecVector allocSpecs; - allocSpecs.emplace_back(0, maxEntryRefOffset, minNumArraysForNewBuffer); // large array spec; + allocSpecs.emplace_back(0, maxEntryRefOffset, minNumArraysForNewBuffer, allocGrowFactor); // large array spec; for (size_t arraySize = 1; arraySize <= maxSmallArraySize; ++arraySize) { size_t numArraysForNewBuffer = hugePageSize / (entrySize * arraySize); numArraysForNewBuffer = capToLimits(numArraysForNewBuffer, minNumArraysForNewBuffer, maxEntryRefOffset); numArraysForNewBuffer = alignToSmallPageSize(numArraysForNewBuffer, minNumArraysForNewBuffer, smallPageSize); - allocSpecs.emplace_back(0, maxEntryRefOffset, numArraysForNewBuffer); + allocSpecs.emplace_back(0, maxEntryRefOffset, numArraysForNewBuffer, allocGrowFactor); } return ArrayStoreConfig(allocSpecs); } diff --git a/searchlib/src/vespa/searchlib/datastore/array_store_config.h b/searchlib/src/vespa/searchlib/datastore/array_store_config.h index 849ca53f35c..a39c4454308 100644 --- a/searchlib/src/vespa/searchlib/datastore/array_store_config.h +++ b/searchlib/src/vespa/searchlib/datastore/array_store_config.h @@ -23,12 +23,16 @@ public: size_t maxArraysInBuffer; // Number of arrays needed before allocating a new buffer instead of just resizing the first one. size_t numArraysForNewBuffer; + // Grow factor used when allocating a new buffer. + float allocGrowFactor; AllocSpec(size_t minArraysInBuffer_, size_t maxArraysInBuffer_, - size_t numArraysForNewBuffer_) + size_t numArraysForNewBuffer_, + float allocGrowFactor_) : minArraysInBuffer(minArraysInBuffer_), maxArraysInBuffer(maxArraysInBuffer_), - numArraysForNewBuffer(numArraysForNewBuffer_) {} + numArraysForNewBuffer(numArraysForNewBuffer_), + allocGrowFactor(allocGrowFactor_) {} }; using AllocSpecVector = std::vector<AllocSpec>; @@ -61,7 +65,8 @@ public: size_t smallPageSize, size_t entrySize, size_t maxEntryRefOffset, - size_t minNumArraysForNewBuffer); + size_t minNumArraysForNewBuffer, + float allocGrowFactor); }; } diff --git a/searchlib/src/vespa/searchlib/datastore/buffer_type.cpp b/searchlib/src/vespa/searchlib/datastore/buffer_type.cpp index 1a968942c6d..798c930a3e2 100644 --- a/searchlib/src/vespa/searchlib/datastore/buffer_type.cpp +++ b/searchlib/src/vespa/searchlib/datastore/buffer_type.cpp @@ -6,6 +6,12 @@ namespace search::datastore { +namespace { + +constexpr float DEFAULT_ALLOC_GROW_FACTOR = 0.2; + +} + void BufferTypeBase::CleanContext::extraBytesCleaned(uint64_t value) { @@ -16,11 +22,13 @@ BufferTypeBase::CleanContext::extraBytesCleaned(uint64_t value) BufferTypeBase::BufferTypeBase(uint32_t clusterSize, uint32_t minClusters, uint32_t maxClusters, - uint32_t numClustersForNewBuffer) + uint32_t numClustersForNewBuffer, + float allocGrowFactor) : _clusterSize(clusterSize), _minClusters(std::min(minClusters, maxClusters)), _maxClusters(maxClusters), _numClustersForNewBuffer(std::min(numClustersForNewBuffer, maxClusters)), + _allocGrowFactor(allocGrowFactor), _activeBuffers(0), _holdBuffers(0), _activeUsedElems(0), @@ -32,7 +40,7 @@ BufferTypeBase::BufferTypeBase(uint32_t clusterSize, BufferTypeBase::BufferTypeBase(uint32_t clusterSize, uint32_t minClusters, uint32_t maxClusters) - : BufferTypeBase(clusterSize, minClusters, maxClusters, 0u) + : BufferTypeBase(clusterSize, minClusters, maxClusters, 0u, DEFAULT_ALLOC_GROW_FACTOR) { } @@ -107,25 +115,25 @@ size_t BufferTypeBase::calcClustersToAlloc(uint32_t bufferId, size_t sizeNeeded, bool resizing) const { size_t reservedElements = getReservedElements(bufferId); - size_t usedElems = _activeUsedElems; + size_t usedElems = (resizing ? 0 : _activeUsedElems); if (_lastUsedElems != nullptr) { usedElems += *_lastUsedElems; } assert((usedElems % _clusterSize) == 0); size_t usedClusters = usedElems / _clusterSize; size_t needClusters = (sizeNeeded + (resizing ? usedElems : reservedElements) + _clusterSize - 1) / _clusterSize; - size_t minClusters = _minClusters; - size_t numClustersForNewBuffer = _numClustersForNewBuffer; - size_t extraGrowClusters = (usedElems != 0) ? numClustersForNewBuffer : 0; - uint64_t wantClusters = usedClusters + std::max(minClusters, (resizing ? usedClusters : extraGrowClusters)); - if (wantClusters < needClusters) { - wantClusters = needClusters; + size_t growClusters = (usedClusters * _allocGrowFactor); + size_t wantClusters = std::max((resizing ? usedClusters : 0u) + growClusters, + static_cast<size_t>(_minClusters)); + size_t result = wantClusters; + if (result < needClusters) { + result = needClusters; } - if (wantClusters > _maxClusters) { - wantClusters = _maxClusters; + if (result > _maxClusters) { + result = _maxClusters; } - assert(wantClusters >= needClusters); - return wantClusters; + assert(result >= needClusters); + return result; } } diff --git a/searchlib/src/vespa/searchlib/datastore/buffer_type.h b/searchlib/src/vespa/searchlib/datastore/buffer_type.h index b9bedce34f3..321100bb811 100644 --- a/searchlib/src/vespa/searchlib/datastore/buffer_type.h +++ b/searchlib/src/vespa/searchlib/datastore/buffer_type.h @@ -17,9 +17,9 @@ protected: uint32_t _clusterSize; // Number of elements in an allocation unit uint32_t _minClusters; // Minimum number of clusters to allocate uint32_t _maxClusters; // Maximum number of clusters to allocate - // Number of clusters needed before allocating a new buffer - // instead of just resizing the first one + // Number of clusters needed before allocating a new buffer instead of just resizing the first one uint32_t _numClustersForNewBuffer; + float _allocGrowFactor; uint32_t _activeBuffers; uint32_t _holdBuffers; size_t _activeUsedElems; // used elements in all but last active buffer @@ -38,7 +38,8 @@ public: BufferTypeBase(const BufferTypeBase &rhs) = delete; BufferTypeBase & operator=(const BufferTypeBase &rhs) = delete; BufferTypeBase(uint32_t clusterSize, uint32_t minClusters, uint32_t maxClusters); - BufferTypeBase(uint32_t clusterSize, uint32_t minClusters, uint32_t maxClusters, uint32_t numClustersForNewBuffer); + BufferTypeBase(uint32_t clusterSize, uint32_t minClusters, uint32_t maxClusters, + uint32_t numClustersForNewBuffer, float allocGrowFactor); virtual ~BufferTypeBase(); virtual void destroyElements(void *buffer, size_t numElements) = 0; virtual void fallbackCopy(void *newBuffer, const void *oldBuffer, size_t numElements) = 0; @@ -83,7 +84,8 @@ public: BufferType(const BufferType &rhs) = delete; BufferType & operator=(const BufferType &rhs) = delete; BufferType(uint32_t clusterSize, uint32_t minClusters, uint32_t maxClusters); - BufferType(uint32_t clusterSize, uint32_t minClusters, uint32_t maxClusters, uint32_t numClustersForNewBuffer); + BufferType(uint32_t clusterSize, uint32_t minClusters, uint32_t maxClusters, + uint32_t numClustersForNewBuffer, float allocGrowFactor); ~BufferType(); void destroyElements(void *buffer, size_t numElements) override; void fallbackCopy(void *newBuffer, const void *oldBuffer, size_t numElements) override; @@ -99,8 +101,9 @@ BufferType<EntryType>::BufferType(uint32_t clusterSize, uint32_t minClusters, ui { } template <typename EntryType> -BufferType<EntryType>::BufferType(uint32_t clusterSize, uint32_t minClusters, uint32_t maxClusters, uint32_t numClustersForNewBuffer) - : BufferTypeBase(clusterSize, minClusters, maxClusters, numClustersForNewBuffer), +BufferType<EntryType>::BufferType(uint32_t clusterSize, uint32_t minClusters, uint32_t maxClusters, + uint32_t numClustersForNewBuffer, float allocGrowFactor) + : BufferTypeBase(clusterSize, minClusters, maxClusters, numClustersForNewBuffer, allocGrowFactor), _emptyEntry() { } diff --git a/searchlib/src/vespa/searchlib/datastore/unique_store.hpp b/searchlib/src/vespa/searchlib/datastore/unique_store.hpp index 603b6e53a85..ab526ccbfc2 100644 --- a/searchlib/src/vespa/searchlib/datastore/unique_store.hpp +++ b/searchlib/src/vespa/searchlib/datastore/unique_store.hpp @@ -18,11 +18,12 @@ namespace search::datastore { constexpr size_t NUMCLUSTERS_FOR_NEW_UNIQUESTORE_BUFFER = 1024u; +constexpr float ALLOC_GROW_FACTOR = 0.2; template <typename EntryT, typename RefT> UniqueStore<EntryT, RefT>::UniqueStore() : _store(), - _typeHandler(1, 2u, RefT::offsetSize(), NUMCLUSTERS_FOR_NEW_UNIQUESTORE_BUFFER), + _typeHandler(1, 2u, RefT::offsetSize(), NUMCLUSTERS_FOR_NEW_UNIQUESTORE_BUFFER, ALLOC_GROW_FACTOR), _typeId(0), _dict() { |