summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2021-03-01 15:32:51 +0000
committerGeir Storli <geirst@verizonmedia.com>2021-03-01 16:04:51 +0000
commitf0028b3f478cbea1e5383cf86f610c08f6ba2c96 (patch)
treeb2724700d593e9d6081756cafd2c4c930486282d
parent0165a17314823be279f368f63431790c50f3ca2e (diff)
Take dead elements into account (and subtract them) when calculating how many arrays to allocate in a datastore buffer.
This avoids a problem were allocated memory can accumulate over time in components using an ArrayStore. If all documents in an array attribute vector changes from one value class to another, all elements in the buffers of the previous value class are marked dead. Those buffers will eventually be compacted. Without this fix the wanted size of the resulting compacted buffer is calculated too high, and we allocate memory we are not going to use. If we move to yet another value class later, the same problem occurs again and more memory is allocated.
-rw-r--r--searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp33
-rw-r--r--vespalib/src/tests/datastore/buffer_type/buffer_type_test.cpp33
-rw-r--r--vespalib/src/vespa/vespalib/datastore/buffer_type.cpp40
-rw-r--r--vespalib/src/vespa/vespalib/datastore/buffer_type.h8
-rw-r--r--vespalib/src/vespa/vespalib/datastore/bufferstate.cpp4
5 files changed, 90 insertions, 28 deletions
diff --git a/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp b/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp
index 36a697eaa12..c3bb9481f0f 100644
--- a/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp
+++ b/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp
@@ -1,10 +1,11 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/attribute/address_space_usage.h>
#include <vespa/searchlib/attribute/attribute.h>
#include <vespa/searchlib/attribute/attributefactory.h>
#include <vespa/searchlib/attribute/attributeguard.h>
#include <vespa/searchlib/attribute/integerbase.h>
-#include <vespa/searchlib/attribute/address_space_usage.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/log/log.h>
LOG_SETUP("attribute_compaction_test");
@@ -126,6 +127,12 @@ Config compactAddressSpaceAttributeConfig(bool enableAddressSpaceCompact)
}
+double
+calc_alloc_waste(const AttributeStatus& status)
+{
+ return ((double)(status.getAllocated() - status.getUsed())) / status.getAllocated();
+}
+
class Fixture {
public:
AttributePtr _v;
@@ -141,8 +148,9 @@ public:
AttributeStatus getStatus() { _v->commit(true); return _v->getStatus(); }
AttributeStatus getStatus(const vespalib::string &prefix) {
AttributeStatus status(getStatus());
- LOG(info, "status %s: used=%" PRIu64 ", dead=%" PRIu64 ", onHold=%" PRIu64,
- prefix.c_str(), status.getUsed(), status.getDead(), status.getOnHold());
+ LOG(info, "status %s: allocated=%" PRIu64 ", used=%" PRIu64 ", dead=%" PRIu64 ", onHold=%" PRIu64 ", waste=%f",
+ prefix.c_str(), status.getAllocated(), status.getUsed(), status.getDead(), status.getOnHold(),
+ calc_alloc_waste(status));
return status;
}
const Config &getConfig() const { return _v->getConfig(); }
@@ -167,6 +175,23 @@ TEST_F("Test that compaction of integer array attribute reduces memory usage", F
EXPECT_LESS(afterStatus.getUsed(), beforeStatus.getUsed());
}
+TEST_F("Allocated memory is not accumulated in an array attribute when moving between value classes when compaction is active",
+ Fixture({BasicType::INT64, CollectionType::ARRAY}))
+{
+ DocIdRange range = f.addDocs(1000);
+ for (uint32_t i = 0; i < 50; ++i) {
+ uint32_t values = 10 + i;
+ // When moving all documents from one value class to the next,
+ // all elements in the buffers of the previous value class are marked dead.
+ // Those buffers will eventually be compacted. By taking the dead elements into account when
+ // calculating how large the resulting compacted buffer should be,
+ // we don't accumulate allocated memory as part of that process.
+ f.populate(range, values);
+ auto status = f.getStatus(vespalib::make_string("values=%u", values));
+ EXPECT_LESS(calc_alloc_waste(status), 0.68);
+ }
+}
+
void
populate_and_hammer(Fixture& f, bool take_attribute_guard)
{
diff --git a/vespalib/src/tests/datastore/buffer_type/buffer_type_test.cpp b/vespalib/src/tests/datastore/buffer_type/buffer_type_test.cpp
index b5a0212528b..d647a659eb6 100644
--- a/vespalib/src/tests/datastore/buffer_type/buffer_type_test.cpp
+++ b/vespalib/src/tests/datastore/buffer_type/buffer_type_test.cpp
@@ -30,6 +30,7 @@ struct Setup {
Setup &minArrays(uint32_t value) { _minArrays = value; return *this; }
Setup &used(size_t value) { _usedElems = value; return *this; }
Setup &needed(size_t value) { _neededElems = value; return *this; }
+ Setup &dead(size_t value) { _deadElems = value; return *this; }
Setup &bufferId(uint32_t value) { _bufferId = value; return *this; }
Setup &resizing(bool value) { _resizing = value; return *this; }
};
@@ -48,7 +49,7 @@ struct Fixture {
}
~Fixture() {
for (auto& setup : setups) {
- bufferType.onHold(&setup._usedElems);
+ bufferType.onHold(&setup._usedElems, &setup._deadElems);
bufferType.onFree(setup._usedElems);
}
}
@@ -61,7 +62,7 @@ struct Fixture {
setups.push_back(setup_in);
}
void onActive() {
- bufferType.onActive(curr_setup()._bufferId, &curr_setup()._usedElems, curr_setup()._deadElems, &buffer[0]);
+ bufferType.onActive(curr_setup()._bufferId, &curr_setup()._usedElems, &curr_setup()._deadElems, &buffer[0]);
}
size_t arraysToAlloc() {
return bufferType.calcArraysToAlloc(curr_setup()._bufferId, curr_setup()._neededElems, curr_setup()._resizing);
@@ -129,7 +130,7 @@ TEST("require that arrays to alloc is capped to min arrays")
TEST_DO(assertArraysToAlloc(17, Setup().used(34 * 4).needed(4).minArrays(16)));
}
-TEST("arrays to alloc considers used elements across all active buffers")
+TEST("arrays to alloc considers used elements across all active buffers (no resizing)")
{
Fixture f(Setup().used(6 * 4));
f.assertArraysToAlloc(6 * 0.5);
@@ -139,4 +140,30 @@ TEST("arrays to alloc considers used elements across all active buffers")
f.assertArraysToAlloc((6 + 8 + 10) * 0.5);
}
+TEST("arrays to alloc only considers used elements in current buffer when resizing")
+{
+ Fixture f(Setup().used(6 * 4));
+ f.assertArraysToAlloc(6 * 0.5);
+ f.add_setup(Setup().used(8 * 4).resizing(true));
+ f.assertArraysToAlloc(8 + 8 * 0.5);
+}
+
+TEST("arrays to alloc considers (and subtracts) dead elements across all active buffers (no resizing)")
+{
+ Fixture f(Setup().used(6 * 4).dead(2 * 4));
+ f.assertArraysToAlloc((6 - 2) * 0.5);
+ f.add_setup(Setup().used(12 * 4).dead(4 * 4));
+ f.assertArraysToAlloc((6 - 2 + 12 - 4) * 0.5);
+ f.add_setup(Setup().used(20 * 4).dead(6 * 4));
+ f.assertArraysToAlloc((6 - 2 + 12 - 4 + 20 - 6) * 0.5);
+}
+
+TEST("arrays to alloc only considers (and subtracts) dead elements in current buffer when resizing")
+{
+ Fixture f(Setup().used(6 * 4).dead(2 * 4));
+ f.assertArraysToAlloc((6 - 2) * 0.5);
+ f.add_setup(Setup().used(12 * 4).dead(4 * 4).resizing(true));
+ f.assertArraysToAlloc(12 + (12 - 4) * 0.5);
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/vespalib/src/vespa/vespalib/datastore/buffer_type.cpp b/vespalib/src/vespa/vespalib/datastore/buffer_type.cpp
index 965b997a280..22e44fe665d 100644
--- a/vespalib/src/vespa/vespalib/datastore/buffer_type.cpp
+++ b/vespalib/src/vespa/vespalib/datastore/buffer_type.cpp
@@ -60,24 +60,24 @@ BufferTypeBase::getReservedElements(uint32_t bufferId) const
}
void
-BufferTypeBase::onActive(uint32_t bufferId, ElemCount *usedElems, ElemCount &deadElems, void *buffer)
+BufferTypeBase::onActive(uint32_t bufferId, ElemCount* usedElems, ElemCount* deadElems, void* buffer)
{
++_activeBuffers;
- _aggr_counts.add_buffer(usedElems);
+ _aggr_counts.add_buffer(usedElems, deadElems);
size_t reservedElems = getReservedElements(bufferId);
if (reservedElems != 0u) {
initializeReservedElements(buffer, reservedElems);
*usedElems = reservedElems;
- deadElems = reservedElems;
+ *deadElems = reservedElems;
}
}
void
-BufferTypeBase::onHold(const ElemCount *usedElems)
+BufferTypeBase::onHold(const ElemCount* usedElems, const ElemCount* deadElems)
{
--_activeBuffers;
++_holdBuffers;
- _aggr_counts.remove_buffer(usedElems);
+ _aggr_counts.remove_buffer(usedElems, deadElems);
_holdUsedElems += *usedElems;
}
@@ -107,16 +107,22 @@ size_t
BufferTypeBase::calcArraysToAlloc(uint32_t bufferId, ElemCount elemsNeeded, bool resizing) const
{
size_t reservedElems = getReservedElements(bufferId);
- size_t usedElems = 0;
+ BufferCounts bc;
if (resizing) {
- usedElems = _aggr_counts.empty() ? 0 : _aggr_counts.last_buffer().used_elems;
+ if (!_aggr_counts.empty()) {
+ bc = _aggr_counts.last_buffer();
+ }
} else {
- usedElems = _aggr_counts.all_buffers().used_elems;
+ bc = _aggr_counts.all_buffers();
}
- assert((usedElems % _arraySize) == 0);
- size_t usedArrays = usedElems / _arraySize;
- size_t neededArrays = (elemsNeeded + (resizing ? usedElems : reservedElems) + _arraySize - 1) / _arraySize;
- size_t growArrays = (usedArrays * _allocGrowFactor);
+ assert((bc.used_elems % _arraySize) == 0);
+ assert((bc.dead_elems % _arraySize) == 0);
+ assert(bc.used_elems >= bc.dead_elems);
+ size_t neededArrays = (elemsNeeded + (resizing ? bc.used_elems : reservedElems) + _arraySize - 1) / _arraySize;
+
+ size_t liveArrays = (bc.used_elems - bc.dead_elems) / _arraySize;
+ size_t growArrays = (liveArrays * _allocGrowFactor);
+ size_t usedArrays = bc.used_elems / _arraySize;
size_t wantedArrays = std::max((resizing ? usedArrays : 0u) + growArrays,
static_cast<size_t>(_minArrays));
@@ -137,20 +143,22 @@ BufferTypeBase::AggregatedBufferCounts::AggregatedBufferCounts()
}
void
-BufferTypeBase::AggregatedBufferCounts::add_buffer(const ElemCount* used_elems)
+BufferTypeBase::AggregatedBufferCounts::add_buffer(const ElemCount* used_elems, const ElemCount* dead_elems)
{
for (const auto& elem : _counts) {
assert(elem.used_ptr != used_elems);
+ assert(elem.dead_ptr != dead_elems);
}
- _counts.emplace_back(used_elems, nullptr);
+ _counts.emplace_back(used_elems, dead_elems);
}
void
-BufferTypeBase::AggregatedBufferCounts::remove_buffer(const ElemCount* used_elems)
+BufferTypeBase::AggregatedBufferCounts::remove_buffer(const ElemCount* used_elems, const ElemCount* dead_elems)
{
auto itr = std::find_if(_counts.begin(), _counts.end(),
[=](const auto& elem){ return elem.used_ptr == used_elems; });
assert(itr != _counts.end());
+ assert(itr->dead_ptr == dead_elems);
_counts.erase(itr);
}
@@ -161,6 +169,7 @@ BufferTypeBase::AggregatedBufferCounts::last_buffer() const
assert(!_counts.empty());
const auto& last = _counts.back();
result.used_elems += *last.used_ptr;
+ result.dead_elems += *last.dead_ptr;
return result;
}
@@ -170,6 +179,7 @@ BufferTypeBase::AggregatedBufferCounts::all_buffers() const
BufferCounts result;
for (const auto& elem : _counts) {
result.used_elems += *elem.used_ptr;
+ result.dead_elems += *elem.dead_ptr;
}
return result;
}
diff --git a/vespalib/src/vespa/vespalib/datastore/buffer_type.h b/vespalib/src/vespa/vespalib/datastore/buffer_type.h
index 9d261ce26bc..c901025e69c 100644
--- a/vespalib/src/vespa/vespalib/datastore/buffer_type.h
+++ b/vespalib/src/vespa/vespalib/datastore/buffer_type.h
@@ -60,8 +60,8 @@ public:
virtual size_t elementSize() const = 0;
virtual void cleanHold(void *buffer, size_t offset, ElemCount numElems, CleanContext cleanCtx) = 0;
size_t getArraySize() const { return _arraySize; }
- virtual void onActive(uint32_t bufferId, ElemCount *usedElems, ElemCount &deadElems, void *buffer);
- void onHold(const ElemCount *usedElems);
+ virtual void onActive(uint32_t bufferId, ElemCount* usedElems, ElemCount* deadElems, void* buffer);
+ void onHold(const ElemCount* usedElems, const ElemCount* deadElems);
virtual void onFree(ElemCount usedElems);
virtual const alloc::MemoryAllocator* get_memory_allocator() const;
@@ -103,8 +103,8 @@ protected:
public:
AggregatedBufferCounts();
- void add_buffer(const ElemCount* used_elems);
- void remove_buffer(const ElemCount* used_elems);
+ void add_buffer(const ElemCount* used_elems, const ElemCount* dead_elems);
+ void remove_buffer(const ElemCount* used_elems, const ElemCount* dead_elems);
BufferCounts last_buffer() const;
BufferCounts all_buffers() const;
bool empty() const { return _counts.empty(); }
diff --git a/vespalib/src/vespa/vespalib/datastore/bufferstate.cpp b/vespalib/src/vespa/vespalib/datastore/bufferstate.cpp
index d3a5402d7b0..68606c5ea19 100644
--- a/vespalib/src/vespa/vespalib/datastore/bufferstate.cpp
+++ b/vespalib/src/vespa/vespalib/datastore/bufferstate.cpp
@@ -133,7 +133,7 @@ BufferState::onActive(uint32_t bufferId, uint32_t typeId,
assert(typeId <= std::numeric_limits<uint16_t>::max());
_typeId = typeId;
_arraySize = _typeHandler->getArraySize();
- typeHandler->onActive(bufferId, &_usedElems, _deadElems, buffer);
+ typeHandler->onActive(bufferId, &_usedElems, &_deadElems, buffer);
}
@@ -148,7 +148,7 @@ BufferState::onHold()
assert(_holdElems <= (_usedElems - _deadElems));
_deadElems = 0;
_holdElems = _usedElems; // Put everyting on hold
- _typeHandler->onHold(&_usedElems);
+ _typeHandler->onHold(&_usedElems, &_deadElems);
if ( ! isFreeListEmpty()) {
removeFromFreeListList();
FreeList().swap(_freeList);