From 4f8faeef83a649be7ccdf61bf13d4d09cd7d622b Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 15 Aug 2019 12:30:15 +0200 Subject: Add separate allocator for strings in unique store. --- vespalib/CMakeLists.txt | 1 + .../datastore/array_store/array_store_test.cpp | 2 +- .../unique_store_string_allocator/CMakeLists.txt | 9 + .../unique_store_string_allocator_test.cpp | 204 +++++++++++++++++++++ .../src/vespa/vespalib/datastore/CMakeLists.txt | 1 + .../src/vespa/vespalib/datastore/buffer_type.cpp | 6 +- .../src/vespa/vespalib/datastore/buffer_type.h | 5 +- .../src/vespa/vespalib/datastore/bufferstate.h | 3 +- .../src/vespa/vespalib/datastore/unique_store.hpp | 12 +- .../vespalib/datastore/unique_store_allocator.hpp | 3 +- .../vespalib/datastore/unique_store_dictionary.cpp | 9 +- .../vespalib/datastore/unique_store_dictionary.h | 2 +- .../datastore/unique_store_dictionary_base.h | 4 +- .../vespa/vespalib/datastore/unique_store_entry.h | 9 + .../vespalib/datastore/unique_store_entry_base.h | 3 +- .../datastore/unique_store_string_allocator.cpp | 82 +++++++++ .../datastore/unique_store_string_allocator.h | 126 +++++++++++++ .../datastore/unique_store_string_allocator.hpp | 98 ++++++++++ .../src/vespa/vespalib/test/datastore/memstats.h | 5 + 19 files changed, 560 insertions(+), 24 deletions(-) create mode 100644 vespalib/src/tests/datastore/unique_store_string_allocator/CMakeLists.txt create mode 100644 vespalib/src/tests/datastore/unique_store_string_allocator/unique_store_string_allocator_test.cpp create mode 100644 vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.cpp create mode 100644 vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.h create mode 100644 vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.hpp (limited to 'vespalib') diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index c1cb113ea40..041b2ccaba0 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -42,6 +42,7 @@ vespa_define_module( src/tests/datastore/buffer_type src/tests/datastore/datastore src/tests/datastore/unique_store + src/tests/datastore/unique_store_string_allocator src/tests/delegatelist src/tests/dotproduct src/tests/drop-file-from-cache diff --git a/vespalib/src/tests/datastore/array_store/array_store_test.cpp b/vespalib/src/tests/datastore/array_store/array_store_test.cpp index 1eb71d36fe7..d4e294dfd8d 100644 --- a/vespalib/src/tests/datastore/array_store/array_store_test.cpp +++ b/vespalib/src/tests/datastore/array_store/array_store_test.cpp @@ -317,7 +317,7 @@ TEST_F("require that used, onHold and dead memory usage is tracked for large arr f.remove({3,3,3}); TEST_DO(f.assertMemoryUsage(exp.hold(f.largeArraySize() + f.entrySize() * 3))); f.trimHoldLists(); - TEST_DO(f.assertMemoryUsage(exp.decHold(f.largeArraySize() + f.entrySize() * 3). + TEST_DO(f.assertMemoryUsage(exp.decUsed(f.entrySize() * 3).decHold(f.largeArraySize() + f.entrySize() * 3). dead(f.largeArraySize()))); } diff --git a/vespalib/src/tests/datastore/unique_store_string_allocator/CMakeLists.txt b/vespalib/src/tests/datastore/unique_store_string_allocator/CMakeLists.txt new file mode 100644 index 00000000000..c2a9999d545 --- /dev/null +++ b/vespalib/src/tests/datastore/unique_store_string_allocator/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_unique_store_string_allocator_test_app TEST + SOURCES + unique_store_string_allocator_test.cpp + DEPENDS + vespalib + gtest +) +vespa_add_test(NAME vespalib_unique_store_string_allocator_test_app COMMAND vespalib_unique_store_string_allocator_test_app) diff --git a/vespalib/src/tests/datastore/unique_store_string_allocator/unique_store_string_allocator_test.cpp b/vespalib/src/tests/datastore/unique_store_string_allocator/unique_store_string_allocator_test.cpp new file mode 100644 index 00000000000..4983f9f42a4 --- /dev/null +++ b/vespalib/src/tests/datastore/unique_store_string_allocator/unique_store_string_allocator_test.cpp @@ -0,0 +1,204 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include + +using namespace search::datastore; +using vespalib::MemoryUsage; +using generation_t = vespalib::GenerationHandler::generation_t; +using MemStats = search::datastore::test::MemStats; + +namespace { + +std::string small("small"); +std::string middle("middle long string"); +std::string spaces1000(1000, ' '); + +} + +template > +struct TestBase : public ::testing::Test { + using EntryRefType = RefT; + + UniqueStoreStringAllocator allocator; + generation_t generation; + TestBase() + : allocator(), + generation(1) + {} + void assert_add(const char *input) { + EntryRef ref = add(input); + assert_get(ref, input); + } + EntryRef add(const char *input) { + return allocator.allocate(input); + } + void assert_get(EntryRef ref, const char *exp) const { + const char *act = allocator.get(ref); + EXPECT_STREQ(exp, act); + } + void remove(EntryRef ref) { + allocator.hold(ref); + } + EntryRef move(EntryRef ref) { + return allocator.move(ref); + } + uint32_t get_buffer_id(EntryRef ref) const { + return EntryRefType(ref).bufferId(); + } + const BufferState &buffer_state(EntryRef ref) const { + return allocator.getDataStore().getBufferState(get_buffer_id(ref)); + } + void assert_buffer_state(EntryRef ref, const MemStats expStats) const { + EXPECT_EQ(expStats._used, buffer_state(ref).size()); + EXPECT_EQ(expStats._hold, buffer_state(ref).getHoldElems()); + EXPECT_EQ(expStats._dead, buffer_state(ref).getDeadElems()); + } + void assert_buffer_extra_state(EntryRef ref, size_t exp_extra_used, size_t exp_extra_hold) const { + EXPECT_EQ(exp_extra_used, buffer_state(ref).getExtraUsedBytes()); + EXPECT_EQ(exp_extra_hold, buffer_state(ref).getExtraHoldBytes()); + } + void trim_hold_lists() { + allocator.getDataStore().transferHoldLists(generation++); + allocator.getDataStore().trimHoldLists(generation); + } +}; + +using StringTest = TestBase>; +using SmallOffsetStringTest = TestBase>; + +TEST_F(StringTest, can_add_and_get_values) +{ + assert_add(small.c_str()); + assert_add(middle.c_str()); + assert_add(spaces1000.c_str()); +} + +TEST_F(StringTest, elements_are_put_on_hold_when_value_is_removed) +{ + EntryRef ref = add(small.c_str()); + assert_buffer_state(ref, MemStats().used(16).hold(0).dead(0)); + remove(ref); + assert_buffer_state(ref, MemStats().used(16).hold(16).dead(0)); + trim_hold_lists(); + assert_buffer_state(ref, MemStats().used(16).hold(0).dead(16)); +} + +TEST_F(StringTest, extra_bytes_used_is_tracked) +{ + EntryRef ref = add(spaces1000.c_str()); + // Note: The first buffer have the first element reserved -> we expect 2 elements used here. + assert_buffer_state(ref, MemStats().used(2).hold(0).dead(1)); + assert_buffer_extra_state(ref, 1001, 0); + remove(ref); + assert_buffer_state(ref, MemStats().used(2).hold(1).dead(1)); + assert_buffer_extra_state(ref, 1001, 1001); + trim_hold_lists(); + assert_buffer_state(ref, MemStats().used(2).hold(0).dead(2)); + assert_buffer_extra_state(ref, 0, 0); + ref = add(spaces1000.c_str()); + assert_buffer_state(ref, MemStats().used(2).hold(0).dead(1)); + assert_buffer_extra_state(ref, 1001, 0); + EntryRef ref2 = move(ref); + assert_get(ref2, spaces1000.c_str()); + assert_buffer_state(ref, MemStats().used(3).hold(0).dead(1)); + assert_buffer_extra_state(ref, 2002, 0); + remove(ref); + remove(ref2); + assert_buffer_state(ref, MemStats().used(3).hold(2).dead(1)); + assert_buffer_extra_state(ref, 2002, 2002); + trim_hold_lists(); + assert_buffer_state(ref, MemStats().used(3).hold(0).dead(3)); + assert_buffer_extra_state(ref, 0, 0); +} + +TEST_F(StringTest, string_length_determines_buffer) +{ + EntryRef ref1 = add(small.c_str()); + EntryRef ref2 = add(middle.c_str()); + EntryRef ref3 = add(spaces1000.c_str()); + EXPECT_NE(get_buffer_id(ref1), get_buffer_id(ref2)); + EXPECT_NE(get_buffer_id(ref1), get_buffer_id(ref3)); + EXPECT_NE(get_buffer_id(ref2), get_buffer_id(ref3)); + EntryRef ref4 = add(small.c_str()); + EXPECT_NE(ref1, ref4); + EXPECT_EQ(get_buffer_id(ref1), get_buffer_id(ref4)); +} + +TEST_F(StringTest, free_list_is_used_when_enabled) +{ + allocator.getDataStore().enableFreeLists(); + EntryRef ref1 = add(small.c_str()); + EntryRef ref2 = add(spaces1000.c_str()); + remove(ref1); + remove(ref2); + trim_hold_lists(); + EntryRef ref3 = add(small.c_str()); + EntryRef ref4 = add(spaces1000.c_str()); + EXPECT_EQ(ref1, ref3); + EXPECT_EQ(ref2, ref4); + assert_buffer_state(ref1, MemStats().used(16).hold(0).dead(0)); + assert_buffer_extra_state(ref1, 0, 0); + assert_buffer_state(ref2, MemStats().used(2).hold(0).dead(1)); + assert_buffer_extra_state(ref2, 1001, 0); +} + +TEST_F(StringTest, free_list_is_not_used_when_disabled) +{ + allocator.getDataStore().disableFreeLists(); + EntryRef ref1 = add(small.c_str()); + EntryRef ref2 = add(spaces1000.c_str()); + remove(ref1); + remove(ref2); + trim_hold_lists(); + EntryRef ref3 = add(small.c_str()); + EntryRef ref4 = add(spaces1000.c_str()); + EXPECT_NE(ref1, ref3); + EXPECT_NE(ref2, ref4); + assert_buffer_state(ref1, MemStats().used(32).hold(0).dead(16)); + assert_buffer_extra_state(ref1, 0, 0); + assert_buffer_state(ref2, MemStats().used(3).hold(0).dead(2)); + assert_buffer_extra_state(ref2, 1001, 0); +} + +TEST_F(StringTest, free_list_is_never_used_for_move) +{ + allocator.getDataStore().enableFreeLists(); + EntryRef ref1 = add(small.c_str()); + EntryRef ref2 = add(spaces1000.c_str()); + EntryRef ref3 = add(small.c_str()); + EntryRef ref4 = add(spaces1000.c_str()); + remove(ref3); + remove(ref4); + trim_hold_lists(); + EntryRef ref5 = move(ref1); + EntryRef ref6 = move(ref2); + EXPECT_NE(ref5, ref3); + EXPECT_NE(ref6, ref4); + assert_buffer_state(ref1, MemStats().used(48).hold(0).dead(16)); + assert_buffer_extra_state(ref1, 0, 0); + assert_buffer_state(ref2, MemStats().used(4).hold(0).dead(2)); + assert_buffer_extra_state(ref2, 2002, 0); +} + +TEST_F(SmallOffsetStringTest, new_underlying_buffer_is_allocated_when_current_is_full) +{ + uint32_t first_buffer_id = get_buffer_id(add(small.c_str())); + for (uint32_t i = 0; i < (SmallOffsetStringTest::EntryRefType::offsetSize() - 1); ++i) { + uint32_t buffer_id = get_buffer_id(add(small.c_str())); + EXPECT_EQ(first_buffer_id, buffer_id); + } + + uint32_t second_buffer_id = get_buffer_id(add(small.c_str())); + EXPECT_NE(first_buffer_id, second_buffer_id); + for (uint32_t i = 0; i < 10u; ++i) { + uint32_t buffer_id = get_buffer_id(add(small.c_str())); + EXPECT_EQ(second_buffer_id, buffer_id); + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt index 836fb33cdab..6f647a78a39 100644 --- a/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/datastore/CMakeLists.txt @@ -8,5 +8,6 @@ vespa_add_library(vespalib_vespalib_datastore OBJECT datastorebase.cpp entryref.cpp unique_store_dictionary.cpp + unique_store_string_allocator.cpp DEPENDS ) diff --git a/vespalib/src/vespa/vespalib/datastore/buffer_type.cpp b/vespalib/src/vespa/vespalib/datastore/buffer_type.cpp index 3955a6cb399..e7b878df3a1 100644 --- a/vespalib/src/vespa/vespalib/datastore/buffer_type.cpp +++ b/vespalib/src/vespa/vespalib/datastore/buffer_type.cpp @@ -15,8 +15,10 @@ constexpr float DEFAULT_ALLOC_GROW_FACTOR = 0.2; void BufferTypeBase::CleanContext::extraBytesCleaned(size_t value) { - assert(_extraBytes >= value); - _extraBytes -= value; + assert(_extraUsedBytes >= value); + assert(_extraHoldBytes >= value); + _extraUsedBytes -= value; + _extraHoldBytes -= value; } BufferTypeBase::BufferTypeBase(uint32_t arraySize, diff --git a/vespalib/src/vespa/vespalib/datastore/buffer_type.h b/vespalib/src/vespa/vespalib/datastore/buffer_type.h index 116b45fe106..eb432e6dc76 100644 --- a/vespalib/src/vespa/vespalib/datastore/buffer_type.h +++ b/vespalib/src/vespa/vespalib/datastore/buffer_type.h @@ -32,9 +32,10 @@ protected: public: class CleanContext { private: - size_t &_extraBytes; + size_t &_extraUsedBytes; + size_t &_extraHoldBytes; public: - CleanContext(size_t &extraBytes) : _extraBytes(extraBytes) {} + CleanContext(size_t &extraUsedBytes, size_t &extraHoldBytes) : _extraUsedBytes(extraUsedBytes), _extraHoldBytes(extraHoldBytes) {} void extraBytesCleaned(size_t value); }; diff --git a/vespalib/src/vespa/vespalib/datastore/bufferstate.h b/vespalib/src/vespa/vespalib/datastore/bufferstate.h index 1cc26d8dd18..3fe32bb59a1 100644 --- a/vespalib/src/vespa/vespalib/datastore/bufferstate.h +++ b/vespalib/src/vespa/vespalib/datastore/bufferstate.h @@ -147,7 +147,7 @@ public: _extraUsedBytes += extraBytes; } void cleanHold(void *buffer, size_t offset, size_t numElems) { - _typeHandler->cleanHold(buffer, offset, numElems, BufferTypeBase::CleanContext(_extraHoldBytes)); + _typeHandler->cleanHold(buffer, offset, numElems, BufferTypeBase::CleanContext(_extraUsedBytes, _extraHoldBytes)); } void dropBuffer(void *&buffer); uint32_t getTypeId() const { return _typeId; } @@ -176,6 +176,7 @@ public: assert(_holdElems >= value); _holdElems -= value; } + void incExtraUsedBytes(size_t value) { _extraUsedBytes += value; } void incExtraHoldBytes(size_t value) { _extraHoldBytes += value; } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp index b8092e25255..f43f0116517 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp @@ -48,14 +48,14 @@ void UniqueStore::remove(EntryRef ref) { auto &wrapped_entry = _allocator.getWrapped(ref); - if (wrapped_entry.get_ref_count() > 1u) { - wrapped_entry.dec_ref_count(); - } else { + auto ref_count = wrapped_entry.get_ref_count(); + assert(ref_count > 0u); + wrapped_entry.dec_ref_count(); + if (ref_count == 1u) { EntryType unused{}; Compare comp(_store, unused); - if (_dict->remove(comp, ref)) { - _allocator.hold(ref); - } + _dict->remove(comp, ref); + _allocator.hold(ref); } } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.hpp index 0849fc5129b..5bee5baf9c8 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.hpp @@ -19,6 +19,7 @@ UniqueStoreAllocator::UniqueStoreAllocator() auto typeId = _store.addType(&_typeHandler); assert(typeId == 0u); _store.initActiveBuffers(); + _store.enableFreeLists(); } template @@ -32,7 +33,7 @@ template EntryRef UniqueStoreAllocator::allocate(const EntryType& value) { - return _store.template allocator(0).alloc(value).ref; + return _store.template freeListAllocator>(0).alloc(value).ref; } template diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.cpp b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.cpp index b9af208263c..be28f8f7358 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.cpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.cpp @@ -66,16 +66,13 @@ UniqueStoreDictionary::find(const EntryComparator &comp) } } -bool +void UniqueStoreDictionary::remove(const EntryComparator &comp, EntryRef ref) { assert(ref.valid()); auto itr = _dict.lowerBound(ref, comp); - if (itr.valid() && itr.getKey() == ref) { - _dict.remove(itr); - return true; - } - return false; + assert(itr.valid() && itr.getKey() == ref); + _dict.remove(itr); } void diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h index 4fa71c3f9a9..a870d759ce3 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h @@ -32,7 +32,7 @@ public: void trim_hold_lists(generation_t firstUsed) override; UniqueStoreAddResult add(const EntryComparator& comp, std::function insertEntry) override; EntryRef find(const EntryComparator& comp) override; - bool remove(const EntryComparator& comp, EntryRef ref) override; + void remove(const EntryComparator& comp, EntryRef ref) override; void move_entries(ICompactable& compactable) override; uint32_t get_num_uniques() const override; vespalib::MemoryUsage get_memory_usage() const override; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary_base.h b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary_base.h index 1d378653c4f..82c8687513a 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary_base.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary_base.h @@ -9,7 +9,7 @@ namespace search::datastore { class EntryComparator; -class ICompactable; +struct ICompactable; class UniqueStoreAddResult; /* @@ -25,7 +25,7 @@ public: virtual void trim_hold_lists(generation_t firstUsed) = 0; virtual UniqueStoreAddResult add(const EntryComparator& comp, std::function insertEntry) = 0; virtual EntryRef find(const EntryComparator& comp) = 0; - virtual bool remove(const EntryComparator& comp, EntryRef ref) = 0; + virtual void remove(const EntryComparator& comp, EntryRef ref) = 0; virtual void move_entries(ICompactable& compactable) = 0; virtual uint32_t get_num_uniques() const = 0; virtual vespalib::MemoryUsage get_memory_usage() const = 0; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_entry.h b/vespalib/src/vespa/vespalib/datastore/unique_store_entry.h index b3468ecf523..b9157901b12 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_entry.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_entry.h @@ -3,9 +3,17 @@ #pragma once #include "unique_store_entry_base.h" +#include namespace search::datastore { +template +struct UniqueStoreEntryReclaimer { + static void reclaim(EntryType *entry) { + assert(entry->get_ref_count() == 0u); + } +}; + /* * Class for entries in unique store. */ @@ -31,6 +39,7 @@ public: } const EntryType& value() const { return _value; } + EntryType& value() { return _value; } }; } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_entry_base.h b/vespalib/src/vespa/vespalib/datastore/unique_store_entry_base.h index a048abb9126..cda2ea8337e 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_entry_base.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_entry_base.h @@ -12,11 +12,10 @@ namespace search::datastore { class UniqueStoreEntryBase { mutable uint32_t _ref_count; protected: - UniqueStoreEntryBase() + constexpr UniqueStoreEntryBase() : _ref_count(0u) { } - ~UniqueStoreEntryBase() = default; public: uint32_t get_ref_count() const { return _ref_count; } void set_ref_count(uint32_t ref_count) const { _ref_count = ref_count; } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.cpp b/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.cpp new file mode 100644 index 00000000000..4c665ee0517 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.cpp @@ -0,0 +1,82 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "unique_store_string_allocator.hpp" + +namespace search::datastore { + +namespace string_allocator { + +std::vector array_sizes = { 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 256 }; + +const size_t small_string_entry_value_offset = UniqueStoreSmallStringEntry().value_offset(); + +uint32_t +get_type_id(size_t string_len) +{ + auto len = small_string_entry_value_offset + string_len + 1; + auto itr = std::lower_bound(array_sizes.cbegin(), array_sizes.cend(), len); + if (itr != array_sizes.end()) { + return itr - array_sizes.cbegin() + 1; + } else { + return 0; + } +} + +} + +UniqueStoreSmallStringBufferType::UniqueStoreSmallStringBufferType(uint32_t array_size, uint32_t max_arrays) + : BufferType(array_size, 2u, max_arrays, NUM_ARRAYS_FOR_NEW_UNIQUESTORE_BUFFER, ALLOC_GROW_FACTOR) +{ +} + +UniqueStoreSmallStringBufferType::~UniqueStoreSmallStringBufferType() = default; + +void +UniqueStoreSmallStringBufferType::destroyElements(void *, size_t) +{ + static_assert(std::is_trivially_destructible::value, + "UniqueStoreSmallStringEntry must be trivially destructable"); +} + +void +UniqueStoreSmallStringBufferType::fallbackCopy(void *newBuffer, const void *oldBuffer, size_t numElems) +{ + static_assert(std::is_trivially_copyable::value, + "UniqueStoreSmallStringEntry must be trivially copyable"); + memcpy(newBuffer, oldBuffer, numElems); +} + +void +UniqueStoreSmallStringBufferType::cleanHold(void *buffer, size_t offset, size_t numElems, CleanContext) +{ + void *e = static_cast(buffer) + offset; + void *e_end = static_cast(e) + numElems; + size_t array_size = getArraySize(); + while (e < e_end) { + static_cast(e)->clean_hold(array_size); + e = static_cast(e) + array_size; + } + assert(e == e_end); +} + +UniqueStoreExternalStringBufferType::UniqueStoreExternalStringBufferType(uint32_t array_size, uint32_t max_arrays) + : BufferType>(array_size, 2u, max_arrays, NUM_ARRAYS_FOR_NEW_UNIQUESTORE_BUFFER, ALLOC_GROW_FACTOR) +{ +} + +UniqueStoreExternalStringBufferType::~UniqueStoreExternalStringBufferType() = default; + +void +UniqueStoreExternalStringBufferType::cleanHold(void *buffer, size_t offset, size_t numElems, CleanContext cleanCtx) +{ + UniqueStoreEntry *elem = static_cast *>(buffer) + offset; + for (size_t i = 0; i < numElems; ++i) { + cleanCtx.extraBytesCleaned(elem->value().size() + 1); + std::string().swap(elem->value()); + ++elem; + } +} + +template class UniqueStoreStringAllocator>; + +} diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.h b/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.h new file mode 100644 index 00000000000..e914c4c7f66 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.h @@ -0,0 +1,126 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "datastore.h" +#include "entryref.h" +#include "unique_store_add_result.h" +#include "unique_store_entry.h" +#include "i_compactable.h" +#include + +namespace search::datastore { + +namespace string_allocator { + +extern std::vector array_sizes; +uint32_t get_type_id(size_t string_len); + +}; + +/* + * Entry type for small strings. array_size is passed to constructors and + * clean_hold to tell how many bytes are set aside for the entry. + * array_size is supposed to be > sizeof(UniqueStoreSmallStringEntry). + * + * Class is trivially destructable, i.e. no need to call destructor. + * Class is trivially copyable, i.e. memcpy can be used to make a copy. + */ +class UniqueStoreSmallStringEntry : public UniqueStoreEntryBase { + char _value[0]; +public: + constexpr UniqueStoreSmallStringEntry() + : UniqueStoreEntryBase(), + _value() + { } + + UniqueStoreSmallStringEntry(const char *value, size_t value_len, size_t array_size) + : UniqueStoreEntryBase() + { + assert(value_offset() + value_len < array_size); + memcpy(&_value[0], value, value_len); + memset(&_value[0] + value_len, 0, array_size - value_len - value_offset()); + } + + void clean_hold(size_t array_size) { + memset(&_value[0], 0, array_size - value_offset()); + } + + const char *value() const { return &_value[0]; } + size_t value_offset() const { return &_value[0] - reinterpret_cast(this); } +}; + +/* + * Buffer type for small strings in unique store. Each entry uses array_size + * bytes + */ +class UniqueStoreSmallStringBufferType : public BufferType { +public: + UniqueStoreSmallStringBufferType(uint32_t array_size, uint32_t max_arrays); + ~UniqueStoreSmallStringBufferType() override; + void destroyElements(void *, size_t) override; + void fallbackCopy(void *newBuffer, const void *oldBuffer, size_t numElems) override; + void cleanHold(void *buffer, size_t offset, size_t numElems, CleanContext) override; +}; + +/* + * Buffer type for external strings in unique store. + */ +class UniqueStoreExternalStringBufferType : public BufferType> { +public: + UniqueStoreExternalStringBufferType(uint32_t array_size, uint32_t max_arrays); + ~UniqueStoreExternalStringBufferType() override; + void cleanHold(void *buffer, size_t offset, size_t numElems, CleanContext cleanCtx) override; +}; + +/** + * Allocator for unique NUL-terminated strings that is accessed via a + * 32-bit EntryRef. + */ +template > +class UniqueStoreStringAllocator : public ICompactable +{ +public: + using DataStoreType = DataStoreT; + using EntryType = const char *; + using WrappedExternalEntryType = UniqueStoreEntry; + using RefType = RefT; +private: + DataStoreType _store; + std::vector> _type_handlers; + + static uint32_t get_type_id(const char *value); + +public: + UniqueStoreStringAllocator(); + ~UniqueStoreStringAllocator() override; + EntryRef allocate(const char *value); + void hold(EntryRef ref); + EntryRef move(EntryRef ref) override; + const UniqueStoreEntryBase& getWrapped(EntryRef ref) const + { + RefType iRef(ref); + auto &state = _store.getBufferState(iRef.bufferId()); + auto type_id = state.getTypeId(); + if (type_id != 0) { + return *reinterpret_cast(_store.template getEntryArray(iRef, state.getArraySize())); + } else { + return *_store.template getEntry(iRef); + } + } + const char *get(EntryRef ref) const + { + RefType iRef(ref); + auto &state = _store.getBufferState(iRef.bufferId()); + auto type_id = state.getTypeId(); + if (type_id != 0) { + return reinterpret_cast(_store.template getEntryArray(iRef, state.getArraySize()))->value(); + } else { + return _store.template getEntry(iRef)->value().c_str(); + } + } + DataStoreType& getDataStore() { return _store; } + const DataStoreType& getDataStore() const { return _store; } +}; + +} diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.hpp new file mode 100644 index 00000000000..9bd2e050507 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.hpp @@ -0,0 +1,98 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "unique_store_string_allocator.h" +#include "datastore.hpp" + +namespace search::datastore { + +constexpr size_t NUM_ARRAYS_FOR_NEW_UNIQUESTORE_BUFFER = 1024u; +constexpr float ALLOC_GROW_FACTOR = 0.2; + +template +UniqueStoreStringAllocator::UniqueStoreStringAllocator() + : ICompactable(), + _store(), + _type_handlers() +{ + _type_handlers.emplace_back(std::make_unique(1, RefT::offsetSize())); + for (auto size : string_allocator::array_sizes) { + _type_handlers.emplace_back(std::make_unique(size, RefT::offsetSize())); + } + uint32_t exp_type_id = 0; + for (auto &type_handler : _type_handlers) { + auto type_id = _store.addType(type_handler.get()); + assert(type_id == exp_type_id); + ++exp_type_id; + } + _store.initActiveBuffers(); + _store.enableFreeLists(); +} + +template +UniqueStoreStringAllocator::~UniqueStoreStringAllocator() +{ + _store.clearHoldLists(); + _store.dropBuffers(); +} + +template +EntryRef +UniqueStoreStringAllocator::allocate(const char *value) +{ + size_t value_len = strlen(value); + uint32_t type_id = string_allocator::get_type_id(value_len); + if (type_id != 0) { + size_t array_size = string_allocator::array_sizes[type_id - 1]; + auto handle = _store.template freeListRawAllocator(type_id).alloc(array_size); + new (static_cast(handle.data)) UniqueStoreSmallStringEntry(value, value_len, array_size); + return handle.ref; + } else { + auto handle = _store.template freeListAllocator>(0).alloc(std::string(value)); + RefT iRef(handle.ref); + auto &state = _store.getBufferState(iRef.bufferId()); + state.incExtraUsedBytes(value_len + 1); + return handle.ref; + } +} + +template +void +UniqueStoreStringAllocator::hold(EntryRef ref) +{ + RefT iRef(ref); + uint32_t type_id = _store.getTypeId(iRef.bufferId()); + if (type_id != 0) { + size_t array_size = string_allocator::array_sizes[type_id - 1]; + _store.holdElem(ref, array_size); + } else { + auto &value = _store.template getEntry(iRef)->value(); + _store.holdElem(ref, 1, value.size() + 1); + } +} + +template +EntryRef +UniqueStoreStringAllocator::move(EntryRef ref) +{ + RefT iRef(ref); + uint32_t type_id = _store.getTypeId(iRef.bufferId()); + if (type_id != 0) { + static_assert(std::is_trivially_copyable::value, + "UniqueStoreSmallStringEntry must be trivially copyable"); + size_t array_size = string_allocator::array_sizes[type_id - 1]; + auto handle = _store.template rawAllocator(type_id).alloc(array_size); + auto orig = _store.template getEntryArray(iRef, array_size); + memcpy(handle.data, orig, array_size); + return handle.ref; + } else { + auto handle = _store.template allocator(0).alloc(*_store.template getEntry(iRef)); + auto &state = _store.getBufferState(RefT(handle.ref).bufferId()); + auto &value = static_cast(handle.data)->value(); + state.incExtraUsedBytes(value.size() + 1); + return handle.ref; + } +} + +} diff --git a/vespalib/src/vespa/vespalib/test/datastore/memstats.h b/vespalib/src/vespa/vespalib/test/datastore/memstats.h index 3486f255358..6f3c8b86482 100644 --- a/vespalib/src/vespa/vespalib/test/datastore/memstats.h +++ b/vespalib/src/vespa/vespalib/test/datastore/memstats.h @@ -28,6 +28,11 @@ struct MemStats _dead += val; return *this; } + MemStats &decUsed(size_t val) { + assert(_used >= val); + _used -= val; + return *this; + } MemStats &decHold(size_t val) { assert(_hold >= val); _hold -= val; -- cgit v1.2.3