summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2019-09-01 14:58:53 +0200
committerGeir Storli <geirst@verizonmedia.com>2019-09-02 08:57:40 +0000
commitfa8b8a17bf5b24ac95eaebfaeaa1984e5a017e1c (patch)
treee20eed45409273116a4c25c4bcf6de24f4f5e2e0
parent94ab377491f19e0b4ea80201eb0340d6e4ee55b2 (diff)
Restore enum store compaction support for enum attributes.
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt10
-rw-r--r--searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp229
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.h5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.hpp49
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_enum_store.h16
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp43
-rw-r--r--searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.h5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp38
-rw-r--r--vespalib/src/tests/datastore/unique_store/unique_store_test.cpp7
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store.h7
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store.hpp66
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store_remapper.h60
19 files changed, 504 insertions, 111 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 07ac6bb699c..5d3ee9d457d 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -79,6 +79,7 @@ vespa_define_module(
src/tests/attribute/document_weight_iterator
src/tests/attribute/enumeratedsave
src/tests/attribute/enumstore
+ src/tests/attribute/enum_attribute_compaction
src/tests/attribute/extendattributes
src/tests/attribute/guard
src/tests/attribute/imported_attribute_vector
diff --git a/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt b/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt
new file mode 100644
index 00000000000..6886a161fdf
--- /dev/null
+++ b/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+find_package(GTest REQUIRED)
+vespa_add_executable(searchlib_enum_attribute_compaction_test_app TEST
+ SOURCES
+ enum_attribute_compaction_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_enum_attribute_compaction_test_app COMMAND searchlib_enum_attribute_compaction_test_app)
diff --git a/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp b/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp
new file mode 100644
index 00000000000..4cf46a75827
--- /dev/null
+++ b/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp
@@ -0,0 +1,229 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP("enum_attribute_compaction_test");
+
+using search::IntegerAttribute;
+using search::StringAttribute;
+using search::AttributeVector;
+using search::attribute::Config;
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+using EnumHandle = search::attribute::IAttributeVector::EnumHandle;
+
+template <typename VectorType> struct TestData;
+
+template <>
+struct TestData<IntegerAttribute> {
+ using BufferType = search::attribute::IntegerContent;
+ using CheckType = int32_t;
+ static constexpr BasicType::Type basic_type = BasicType::INT32;
+ static int32_t make_value(uint32_t doc_id, uint32_t idx) { return doc_id * 10 + idx; }
+ static int32_t as_add(int32_t value) { return value; }
+ static int32_t make_undefined_value() { return std::numeric_limits<int32_t>::min(); }
+};
+
+template <>
+struct TestData<StringAttribute> {
+ using BufferType = search::attribute::ConstCharContent;
+ using CheckType = std::string;
+ static constexpr BasicType::Type basic_type = BasicType::STRING;
+ static std::string make_value(uint32_t doc_id, uint32_t idx) {
+ uint32_t combined = doc_id * 10 + idx;
+ vespalib::asciistream s;
+ if (doc_id == 2 && idx == 0) {
+ // Longer string will be stored in a different buffer
+ s << "bb345678901234";
+ } else {
+ s << combined;
+ }
+ return s.str();
+ }
+ static const char *as_add(const std::string &value) { return value.c_str(); }
+ static std::string make_undefined_value() { return std::string(); }
+};
+
+class CompactionTestBase : public ::testing::TestWithParam<CollectionType::Type> {
+public:
+ std::shared_ptr<AttributeVector> _v;
+
+ CompactionTestBase()
+ : _v()
+ {
+ }
+ void SetUp() override;
+ virtual BasicType get_basic_type() const = 0;
+ CollectionType get_collection_type() const noexcept { return GetParam(); }
+ void addDocs(uint32_t num_docs);
+ uint32_t count_changed_enum_handles(const std::vector<EnumHandle> &handles, uint32_t stride);
+};
+
+void
+CompactionTestBase::SetUp()
+{
+ Config cfg(get_basic_type(), get_collection_type());
+ cfg.setFastSearch(true);
+ _v = search::AttributeFactory::createAttribute("test", cfg);
+}
+
+void
+CompactionTestBase::addDocs(uint32_t num_docs)
+{
+ uint32_t start_doc;
+ uint32_t end_doc;
+ _v->addDocs(start_doc, end_doc, num_docs);
+ for (uint32_t doc = start_doc; doc <= end_doc; ++doc) {
+ _v->clearDoc(doc);
+ }
+ _v->commit();
+}
+
+uint32_t
+CompactionTestBase::count_changed_enum_handles(const std::vector<EnumHandle> &handles, uint32_t stride)
+{
+ uint32_t changed = 0;
+ for (uint32_t doc_id = 0; doc_id < handles.size(); doc_id += stride) {
+ if (_v->getEnum(doc_id) != handles[doc_id]) {
+ ++changed;
+ }
+ }
+ return changed;
+}
+
+template <typename VectorType>
+class CompactionTest : public CompactionTestBase
+{
+public:
+ CompactionTest();
+ void set_values(uint32_t doc_id);
+ void check_values(uint32_t doc_id);
+ void check_cleared_values(uint32_t doc_id);
+ void test_enum_store_compaction();
+ BasicType get_basic_type() const override { return TestData<VectorType>::basic_type; }
+};
+
+template <typename VectorType>
+CompactionTest<VectorType>::CompactionTest()
+ : CompactionTestBase()
+{
+}
+
+template <typename VectorType>
+void
+CompactionTest<VectorType>::set_values(uint32_t doc_id)
+{
+ using MyTestData = TestData<VectorType>;
+ auto &typed_v = dynamic_cast<VectorType &>(*_v);
+ _v->clearDoc(doc_id);
+ if (_v->hasMultiValue()) {
+ EXPECT_TRUE(typed_v.append(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 0)), 1));
+ EXPECT_TRUE(typed_v.append(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 1)), 1));
+ } else {
+ EXPECT_TRUE(typed_v.update(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 0))));
+ }
+ _v->commit();
+}
+
+template <typename VectorType>
+void
+CompactionTest<VectorType>::check_values(uint32_t doc_id)
+{
+ using MyTestData = TestData<VectorType>;
+ using CheckType = typename MyTestData::CheckType;
+ typename MyTestData::BufferType buffer;
+ buffer.fill(*_v, doc_id);
+ if (_v->hasMultiValue()) {
+ EXPECT_EQ(2u, buffer.size());
+ EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_value(doc_id, 0));
+ EXPECT_EQ(CheckType(buffer[1]), MyTestData::make_value(doc_id, 1));
+ } else {
+ EXPECT_EQ(1u, buffer.size());
+ EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_value(doc_id, 0));
+ }
+}
+
+template <typename VectorType>
+void
+CompactionTest<VectorType>::check_cleared_values(uint32_t doc_id)
+{
+ using MyTestData = TestData<VectorType>;
+ using CheckType = typename MyTestData::CheckType;
+ typename MyTestData::BufferType buffer;
+ buffer.fill(*_v, doc_id);
+ if (_v->hasMultiValue()) {
+ EXPECT_EQ(0u, buffer.size());
+ } else {
+ EXPECT_EQ(1u, buffer.size());
+ EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_undefined_value());
+ }
+}
+
+template <typename VectorType>
+void
+CompactionTest<VectorType>::test_enum_store_compaction()
+{
+ constexpr size_t DEAD_BYTES_SLACK = 0x10000u;
+ constexpr uint32_t canary_stride = 256;
+ uint32_t dead_limit = DEAD_BYTES_SLACK / 8;
+ uint32_t doc_count = dead_limit * 3;
+ if (_v->hasMultiValue() || std::is_same_v<VectorType,StringAttribute>) {
+ doc_count /= 2;
+ }
+ std::vector<EnumHandle> enum_handles;
+ addDocs(doc_count);
+ enum_handles.emplace_back(_v->getEnum(0));
+ uint32_t doc_id;
+ for (doc_id = 1; doc_id < doc_count; ++doc_id) {
+ set_values(doc_id);
+ enum_handles.emplace_back(_v->getEnum(doc_id));
+ }
+ uint32_t last_cleared_doc_id = 0;
+ for (doc_id = 1; doc_id < doc_count; doc_id += 2) {
+ _v->clearDoc(doc_id);
+ _v->commit(true);
+ enum_handles[doc_id] = enum_handles[0];
+ last_cleared_doc_id = doc_id;
+ if (count_changed_enum_handles(enum_handles, canary_stride) != 0) {
+ LOG(info, "Detected enum store compaction at doc_id %u", doc_id);
+ break;
+ }
+ }
+ EXPECT_LT(doc_id, doc_count);
+ uint32_t changed_enum_handles = count_changed_enum_handles(enum_handles, 1);
+ LOG(info, "%u enum handles changed", changed_enum_handles);
+ EXPECT_LT(0u, changed_enum_handles);
+ for (doc_id = 1; doc_id < doc_count; ++doc_id) {
+ if ((doc_id % 2) == 0 || doc_id > last_cleared_doc_id) {
+ check_values(doc_id);
+ } else {
+ check_cleared_values(doc_id);
+ }
+ }
+}
+
+using IntegerCompactionTest = CompactionTest<IntegerAttribute>;
+
+TEST_P(IntegerCompactionTest, compact)
+{
+ test_enum_store_compaction();
+}
+
+INSTANTIATE_TEST_CASE_P(IntegerCompactionTestSet, IntegerCompactionTest, ::testing::Values(CollectionType::SINGLE, CollectionType::ARRAY, CollectionType::WSET));
+
+using StringCompactionTest = CompactionTest<StringAttribute>;
+
+TEST_P(StringCompactionTest, compact)
+{
+ test_enum_store_compaction();
+}
+
+INSTANTIATE_TEST_CASE_P(StringCompactionTestSet, StringCompactionTest, ::testing::Values(CollectionType::SINGLE, CollectionType::ARRAY, CollectionType::WSET));
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h
index b5474fda9c9..0063e4c407b 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.h
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h
@@ -233,6 +233,7 @@ protected:
mutable AttributeVector * _attr;
};
+public:
class EnumModifier
{
std::unique_lock<std::shared_timed_mutex> _enumLock;
@@ -254,6 +255,7 @@ protected:
};
EnumModifier getEnumModifier();
+protected:
ValueModifier getValueModifier() { return ValueModifier(*this); }
void updateCommittedDocIdLimit() {
@@ -381,11 +383,11 @@ protected:
virtual vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const;
virtual vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const;
virtual vespalib::AddressSpace getMultiValueAddressSpaceUsage() const;
- void logEnumStoreEvent(const char *reason, const char *stage);
public:
DECLARE_IDENTIFIABLE_ABSTRACT(AttributeVector);
bool isLoaded() const { return _loaded; }
+ void logEnumStoreEvent(const char *reason, const char *stage);
/** Return the fixed length of the attribute. If 0 then you must inquire each document. */
size_t getFixedWidth() const override { return _config.basicType().fixedSize(); }
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
index db8952d4f71..d96b0543d71 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
@@ -53,7 +53,7 @@ public:
protected:
using EnumIndex = IEnumStore::Index;
- using EnumIndexMap = IEnumStore::EnumIndexMap;
+ using EnumIndexRemapper = IEnumStore::EnumIndexRemapper;
EnumStore _enumStore;
@@ -77,7 +77,6 @@ protected:
*/
void insertNewUniqueValues(EnumStoreBatchUpdater& updater);
virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques) = 0;
- virtual void reEnumerate(const EnumIndexMap &) = 0;
vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const override;
vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const override;
public:
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h
index 032acfc0ee2..94252239975 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h
@@ -96,6 +96,8 @@ public:
private:
UniqueStoreType _store;
IEnumStoreDictionary& _dict;
+ vespalib::MemoryUsage _cached_values_memory_usage;
+ vespalib::AddressSpace _cached_values_address_space_usage;
EnumStoreT(const EnumStoreT & rhs) = delete;
EnumStoreT & operator=(const EnumStoreT & rhs) = delete;
@@ -243,6 +245,9 @@ public:
bool findIndex(DataType value, Index &idx) const;
void freeUnusedEnums(bool movePostingidx) override;
void freeUnusedEnums(const IndexSet& toRemove);
+ vespalib::MemoryUsage update_stat() override;
+ std::unique_ptr<EnumIndexRemapper> consider_compact(const CompactionStrategy& compaction_strategy) override;
+ std::unique_ptr<EnumIndexRemapper> compact_worst(bool compact_memory, bool compact_address_space) override;
private:
template <typename Dictionary>
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
index 254f517ada2..574712798c2 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
@@ -18,6 +18,7 @@
#include <vespa/vespalib/datastore/unique_store_string_allocator.hpp>
#include <vespa/vespalib/util/array.hpp>
#include <vespa/vespalib/util/bufferwriter.h>
+#include <vespa/searchcommon/common/compaction_strategy.h>
namespace search {
@@ -34,7 +35,9 @@ void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet& unused)
template <typename EntryType>
EnumStoreT<EntryType>::EnumStoreT(bool has_postings)
: _store(make_enum_store_dictionary(*this, has_postings)),
- _dict(static_cast<IEnumStoreDictionary&>(_store.get_dictionary()))
+ _dict(static_cast<IEnumStoreDictionary&>(_store.get_dictionary())),
+ _cached_values_memory_usage(),
+ _cached_values_address_space_usage(0, 0, (1ull << 32))
{
}
@@ -253,4 +256,48 @@ EnumStoreT<EntryType>::addEnum(DataType value, Index& newIdx)
}
}
+template <typename EntryType>
+vespalib::MemoryUsage
+EnumStoreT<EntryType>::update_stat()
+{
+ auto &store = _store.get_allocator().get_data_store();
+ _cached_values_memory_usage = store.getMemoryUsage();
+ _cached_values_address_space_usage = store.getAddressSpaceUsage();
+ auto retval = _cached_values_memory_usage;
+ retval.merge(_dict.get_memory_usage());
+ return retval;
+}
+
+namespace {
+
+// minimum dead bytes in enum store before consider compaction
+constexpr size_t DEAD_BYTES_SLACK = 0x10000u;
+constexpr size_t DEAD_ADDRESS_SPACE_SLACK = 0x10000u;
+
+}
+template <typename EntryType>
+std::unique_ptr<IEnumStore::EnumIndexRemapper>
+EnumStoreT<EntryType>::consider_compact(const CompactionStrategy& compaction_strategy)
+{
+ size_t used_bytes = _cached_values_memory_usage.usedBytes();
+ size_t dead_bytes = _cached_values_memory_usage.deadBytes();
+ size_t used_address_space = _cached_values_address_space_usage.used();
+ size_t dead_address_space = _cached_values_address_space_usage.dead();
+ bool compact_memory = ((dead_bytes >= DEAD_BYTES_SLACK) &&
+ (used_bytes * compaction_strategy.getMaxDeadBytesRatio() < dead_bytes));
+ bool compact_address_space = ((dead_address_space >= DEAD_ADDRESS_SPACE_SLACK) &&
+ (used_address_space * compaction_strategy.getMaxDeadAddressSpaceRatio() < dead_address_space));
+ if (compact_memory || compact_address_space) {
+ return compact_worst(compact_memory, compact_address_space);
+ }
+ return std::unique_ptr<IEnumStore::EnumIndexRemapper>();
+}
+
+template <typename EntryType>
+std::unique_ptr<IEnumStore::EnumIndexRemapper>
+EnumStoreT<EntryType>::compact_worst(bool compact_memory, bool compact_address_space)
+{
+ return _store.compact_worst(compact_memory, compact_address_space);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
index f79098a67df..2a9842075e6 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
@@ -12,9 +12,16 @@
namespace search {
-namespace datastore { class DataStoreBase; }
+namespace datastore {
+
+class DataStoreBase;
+
+template <typename> class UniqueStoreRemapper;
+
+}
class BufferWriter;
+class CompactionStrategy;
class IEnumStoreDictionary;
/**
@@ -26,9 +33,7 @@ public:
using IndexVector = vespalib::Array<Index>;
using EnumHandle = attribute::IAttributeVector::EnumHandle;
using EnumVector = vespalib::Array<uint32_t>;
-
- using EnumIndexMap = vespalib::hash_map<Index, Index, vespalib::hash<Index>, std::equal_to<Index>,
- vespalib::hashtable_base::and_modulator>;
+ using EnumIndexRemapper = datastore::UniqueStoreRemapper<Index>;
struct CompareEnumIndex {
using Index = IEnumStore::Index;
@@ -54,6 +59,9 @@ public:
virtual uint32_t getNumUniques() const = 0;
virtual vespalib::MemoryUsage getValuesMemoryUsage() const = 0;
virtual vespalib::MemoryUsage getDictionaryMemoryUsage() const = 0;
+ virtual vespalib::MemoryUsage update_stat() = 0;
+ virtual std::unique_ptr<EnumIndexRemapper> consider_compact(const CompactionStrategy& compaction_strategy) = 0;
+ virtual std::unique_ptr<EnumIndexRemapper> compact_worst(bool compact_memory, bool compact_address_space) = 0;
template <typename TreeT>
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
index 05e83012421..1071cc0a835 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
@@ -11,5 +11,41 @@ IWeightedIndexVector::getEnumHandles(uint32_t, const WeightedIndex * &) const {
throw std::runtime_error("IWeightedIndexVector::getEnumHandles() not implmented");
}
-} // namespace search
+}
+
+namespace search::multienumattribute {
+
+using EnumIndex = IEnumStore::Index;
+using EnumIndexRemapper = IEnumStore::EnumIndexRemapper;
+using Value = multivalue::Value<EnumIndex>;
+using WeightedValue = multivalue::WeightedValue<EnumIndex>;
+
+template <typename WeightedIndex>
+void
+remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v, attribute::MultiValueMapping<WeightedIndex>& multi_value_mapping)
+{
+ using WeightedIndexVector = std::vector<WeightedIndex>;
+ // update multi_value_mapping with new EnumIndex values after enum store has been compacted.
+ v.logEnumStoreEvent("compactfixup", "drain");
+ {
+ AttributeVector::EnumModifier enum_guard(v.getEnumModifier());
+ v.logEnumStoreEvent("compactfixup", "start");
+ for (uint32_t doc = 0; doc < v.getNumDocs(); ++doc) {
+ vespalib::ConstArrayRef<WeightedIndex> indicesRef(multi_value_mapping.get(doc));
+ WeightedIndexVector indices(indicesRef.cbegin(), indicesRef.cend());
+ for (uint32_t i = 0; i < indices.size(); ++i) {
+ EnumIndex oldIndex = indices[i].value();
+ indices[i] = WeightedIndex(remapper.remap(oldIndex), indices[i].weight());
+ }
+ std::atomic_thread_fence(std::memory_order_release);
+ multi_value_mapping.replace(doc, indices);
+ }
+ }
+ v.logEnumStoreEvent("compactfixup", "complete");
+}
+
+template void remap_enum_store_refs(const EnumIndexRemapper&, AttributeVector&, attribute::MultiValueMapping<Value> &);
+template void remap_enum_store_refs(const EnumIndexRemapper&, AttributeVector&, attribute::MultiValueMapping<WeightedValue> &);
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
index ac271247e70..66f133c60fa 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
@@ -51,7 +51,7 @@ protected:
using DocIndices = typename MultiValueAttribute<B, M>::DocumentValues;
using EnumIndex = IEnumStore::Index;
- using EnumIndexMap = IEnumStore::EnumIndexMap;
+ using EnumIndexRemapper = IEnumStore::EnumIndexRemapper;
using EnumIndexVector = IEnumStore::IndexVector;
using EnumStoreBatchUpdater = typename B::EnumStoreBatchUpdater;
using EnumVector = IEnumStore::EnumVector;
@@ -66,7 +66,6 @@ protected:
// from EnumAttribute
void considerAttributeChange(const Change & c, UniqueSet & newUniques) override; // same for both string and numeric
- void reEnumerate(const EnumIndexMap &) override; // same for both string and numeric
virtual void applyValueChanges(const DocIndices& docIndices, EnumStoreBatchUpdater& updater);
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
index 9bdc36e805b..fbfd5516c05 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
@@ -7,9 +7,18 @@
#include "multienumattributesaver.h"
#include "load_utils.h"
#include <vespa/vespalib/stllike/hashtable.hpp>
+#include <vespa/vespalib/datastore/unique_store_remapper.h>
namespace search {
+namespace multienumattribute {
+
+template <typename WeightedIndex>
+void
+remap_enum_store_refs(const IEnumStore::EnumIndexRemapper& remapper, AttributeVector& v, attribute::MultiValueMapping<WeightedIndex>& multi_value_mapping);
+
+}
+
template <typename B, typename M>
bool
MultiValueEnumAttribute<B, M>::extractChangeData(const Change & c, EnumIndex & idx)
@@ -40,29 +49,6 @@ MultiValueEnumAttribute<B, M>::considerAttributeChange(const Change & c, UniqueS
template <typename B, typename M>
void
-MultiValueEnumAttribute<B, M>::reEnumerate(const EnumIndexMap & old2new)
-{
- // update MultiValueMapping with new EnumIndex values.
- this->logEnumStoreEvent("compactfixup", "drain");
- {
- EnumModifier enumGuard(this->getEnumModifier());
- this->logEnumStoreEvent("compactfixup", "start");
- for (DocId doc = 0; doc < this->getNumDocs(); ++doc) {
- vespalib::ConstArrayRef<WeightedIndex> indicesRef(this->_mvMapping.get(doc));
- WeightedIndexVector indices(indicesRef.cbegin(), indicesRef.cend());
- for (uint32_t i = 0; i < indices.size(); ++i) {
- EnumIndex oldIndex = indices[i].value();
- indices[i] = WeightedIndex(old2new[oldIndex], indices[i].weight());
- }
- std::atomic_thread_fence(std::memory_order_release);
- this->_mvMapping.replace(doc, indices);
- }
- }
- this->logEnumStoreEvent("compactfixup", "complete");
-}
-
-template <typename B, typename M>
-void
MultiValueEnumAttribute<B, M>::applyValueChanges(const DocIndices& docIndices, EnumStoreBatchUpdater& updater)
{
// set new set of indices for documents with changes
@@ -175,6 +161,14 @@ MultiValueEnumAttribute<B, M>::onCommit()
this->incGeneration();
this->updateStat(true);
}
+ auto remapper = this->_enumStore.consider_compact(this->getConfig().getCompactionStrategy());
+ if (remapper) {
+ multienumattribute::remap_enum_store_refs(*remapper, *this, this->_mvMapping);
+ remapper->done();
+ remapper.reset();
+ this->incGeneration();
+ this->updateStat(true);
+ }
}
template <typename B, typename M>
@@ -183,8 +177,7 @@ MultiValueEnumAttribute<B, M>::onUpdateStat()
{
// update statistics
vespalib::MemoryUsage total;
- total.merge(this->_enumStore.getValuesMemoryUsage());
- total.merge(this->_enumStore.getDictionaryMemoryUsage());
+ total.merge(this->_enumStore.update_stat());
total.merge(this->_mvMapping.updateStat());
total.merge(this->getChangeVectorMemoryUsage());
mergeMemoryStats(total);
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
index 406400cef00..c56d9821f66 100644
--- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
@@ -299,10 +299,10 @@ ReferenceAttribute::considerCompact(const CompactionStrategy &compactionStrategy
void
ReferenceAttribute::compactWorst()
{
- datastore::ICompactionContext::UP compactionContext(_store.compactWorst());
- if (compactionContext) {
- compactionContext->compact(vespalib::ArrayRef<EntryRef>(&_indices[0],
- _indices.size()));
+ auto remapper(_store.compact_worst(true, true));
+ if (remapper) {
+ remapper->remap(vespalib::ArrayRef<EntryRef>(&_indices[0], _indices.size()));
+ remapper->done();
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp
index 9e5c9f0bc7b..37ad03eb257 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp
@@ -41,4 +41,27 @@ SingleValueEnumAttributeBase::getIndicesCopy(uint32_t size) const
return EnumIndexCopyVector(&_enumIndices[0], &_enumIndices[0] + size);
}
+void
+SingleValueEnumAttributeBase::remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v)
+{
+ // update _enumIndices with new EnumIndex values after enum store has been compacted.
+ v.logEnumStoreEvent("reenumerate", "reserved");
+ auto new_indexes = std::make_unique<vespalib::Array<EnumIndex>>();
+ new_indexes->reserve(_enumIndices.capacity());
+ v.logEnumStoreEvent("reenumerate", "start");
+ for (uint32_t i = 0; i < _enumIndices.size(); ++i) {
+ EnumIndex old_index = _enumIndices[i];
+ EnumIndex new_index = remapper.remap(old_index);
+ new_indexes->push_back_fast(new_index);
+ }
+ v.logEnumStoreEvent("compactfixup", "drain");
+ {
+ AttributeVector::EnumModifier enum_guard(v.getEnumModifier());
+ v.logEnumStoreEvent("compactfixup", "start");
+ _enumIndices.replaceVector(std::move(new_indexes));
+ }
+ v.logEnumStoreEvent("compactfixup", "complete");
+ v.logEnumStoreEvent("reenumerate", "complete");
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h
index 5624ebe6582..6882158a474 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h
@@ -21,6 +21,7 @@ protected:
using EnumHandle = AttributeVector::EnumHandle;
using EnumIndex = IEnumStore::Index;
using EnumIndexVector = vespalib::RcuVectorBase<EnumIndex>;
+ using EnumIndexRemapper = IEnumStore::EnumIndexRemapper;
using GenerationHolder = vespalib::GenerationHolder;
public:
@@ -36,6 +37,7 @@ protected:
EnumIndexVector _enumIndices;
EnumIndexCopyVector getIndicesCopy(uint32_t size) const;
+ void remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v);
};
template <typename B>
@@ -45,7 +47,7 @@ protected:
using ChangeVector = typename B::ChangeVector;
using ChangeVectorIterator = typename B::ChangeVector::const_iterator;
using DocId = typename B::DocId;
- using EnumIndexMap = IEnumStore::EnumIndexMap;
+ using EnumIndexRemapper = IEnumStore::EnumIndexRemapper;
using EnumModifier = typename B::EnumModifier;
using EnumStore = typename B::EnumStore;
using EnumStoreBatchUpdater = typename EnumStore::BatchUpdater;
@@ -66,7 +68,6 @@ private:
protected:
// from EnumAttribute
void considerAttributeChange(const Change & c, UniqueSet & newUniques) override;
- void reEnumerate(const EnumIndexMap & old2New) override;
// implemented by single value numeric enum attribute.
virtual void considerUpdateAttributeChange(const Change & c) { (void) c; }
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
index 7f4f7503eff..19035d28875 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
@@ -7,6 +7,7 @@
#include "ipostinglistattributebase.h"
#include "singleenumattributesaver.h"
#include "load_utils.h"
+#include <vespa/vespalib/datastore/unique_store_remapper.h>
namespace search {
@@ -93,6 +94,14 @@ SingleValueEnumAttribute<B>::onCommit()
freezeEnumDictionary();
std::atomic_thread_fence(std::memory_order_release);
this->removeAllOldGenerations();
+ auto remapper = this->_enumStore.consider_compact(this->getConfig().getCompactionStrategy());
+ if (remapper) {
+ remap_enum_store_refs(*remapper, *this);
+ remapper->done();
+ remapper.reset();
+ this->incGeneration();
+ this->updateStat(true);
+ }
}
template <typename B>
@@ -102,8 +111,7 @@ SingleValueEnumAttribute<B>::onUpdateStat()
// update statistics
vespalib::MemoryUsage total = _enumIndices.getMemoryUsage();
total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes());
- total.merge(this->_enumStore.getValuesMemoryUsage());
- total.merge(this->_enumStore.getDictionaryMemoryUsage());
+ total.merge(this->_enumStore.update_stat());
total.merge(this->getChangeVectorMemoryUsage());
mergeMemoryStats(total);
this->updateStatistics(_enumIndices.size(), this->_enumStore.getNumUniques(), total.allocatedBytes(),
@@ -137,32 +145,6 @@ SingleValueEnumAttribute<B>::considerAttributeChange(const Change & c, UniqueSet
template <typename B>
void
-SingleValueEnumAttribute<B>::reEnumerate(const EnumIndexMap & old2New)
-{
- this->logEnumStoreEvent("reenumerate", "reserved");
- auto newIndexes = std::make_unique<vespalib::Array<EnumIndex>>();
- newIndexes->reserve(_enumIndices.capacity());
- this->logEnumStoreEvent("reenumerate", "start");
- for (uint32_t i = 0; i < _enumIndices.size(); ++i) {
- EnumIndex oldIdx = _enumIndices[i];
- EnumIndex newIdx;
- if (oldIdx.valid()) {
- newIdx = old2New[oldIdx];
- }
- newIndexes->push_back_fast(newIdx);
- }
- this->logEnumStoreEvent("compactfixup", "drain");
- {
- EnumModifier enumGuard(this->getEnumModifier());
- this->logEnumStoreEvent("compactfixup", "start");
- _enumIndices.replaceVector(std::move(newIndexes));
- }
- this->logEnumStoreEvent("compactfixup", "complete");
- this->logEnumStoreEvent("reenumerate", "complete");
-}
-
-template <typename B>
-void
SingleValueEnumAttribute<B>::applyUpdateValueChange(const Change& c, EnumStoreBatchUpdater& updater)
{
EnumIndex oldIdx = _enumIndices[c._doc];
diff --git a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp
index a1a24247b3f..88a5a05738b 100644
--- a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp
+++ b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp
@@ -1,5 +1,6 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/datastore/unique_store.hpp>
+#include <vespa/vespalib/datastore/unique_store_remapper.h>
#include <vespa/vespalib/datastore/unique_store_string_allocator.hpp>
#include <vespa/vespalib/datastore/unique_store_string_comparator.h>
#include <vespa/vespalib/gtest/gtest.h>
@@ -100,14 +101,16 @@ struct TestBase : public ::testing::Test {
store.trimHoldLists(generation);
}
void compactWorst() {
- ICompactionContext::UP ctx = store.compactWorst();
+ auto remapper = store.compact_worst(true, true);
std::vector<EntryRef> refs;
for (const auto &elem : refStore) {
refs.push_back(elem.first);
}
refs.push_back(EntryRef());
std::vector<EntryRef> compactedRefs = refs;
- ctx->compact(ArrayRef<EntryRef>(compactedRefs));
+ remapper->remap(ArrayRef<EntryRef>(compactedRefs));
+ remapper->done();
+ remapper.reset();
ASSERT_FALSE(refs.back().valid());
refs.pop_back();
ReferenceStore compactedRefStore;
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.h b/vespalib/src/vespa/vespalib/datastore/unique_store.h
index 6b85e79d3eb..b4ed3f6d86b 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store.h
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store.h
@@ -7,7 +7,6 @@
#include "datastore.h"
#include "entry_comparator_wrapper.h"
#include "entryref.h"
-#include "i_compaction_context.h"
#include "i_unique_store_dictionary.h"
#include "unique_store_add_result.h"
#include "unique_store_allocator.h"
@@ -22,6 +21,9 @@ class UniqueStoreBuilder;
template <typename RefT>
class UniqueStoreEnumerator;
+template <typename RefT>
+class UniqueStoreRemapper;
+
/**
* Datastore for unique values of type EntryT that is accessed via a
* 32-bit EntryRef.
@@ -35,6 +37,7 @@ public:
using RefType = RefT;
using Enumerator = UniqueStoreEnumerator<RefT>;
using Builder = UniqueStoreBuilder<Allocator>;
+ using Remapper = UniqueStoreRemapper<RefT>;
using EntryConstRefType = typename Allocator::EntryConstRefType;
private:
Allocator _allocator;
@@ -50,7 +53,7 @@ public:
EntryRef find(EntryConstRefType value);
EntryConstRefType get(EntryRef ref) const { return _allocator.get(ref); }
void remove(EntryRef ref);
- ICompactionContext::UP compactWorst();
+ std::unique_ptr<Remapper> compact_worst(bool compact_memory, bool compact_address_space);
vespalib::MemoryUsage getMemoryUsage() const;
vespalib::AddressSpace get_address_space_usage() const;
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp
index ebd81010612..abac2888a34 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp
@@ -4,6 +4,7 @@
#include "unique_store.h"
#include "unique_store_dictionary.h"
+#include "unique_store_remapper.h"
#include "datastore.hpp"
#include "unique_store_allocator.hpp"
#include "unique_store_builder.hpp"
@@ -80,28 +81,26 @@ UniqueStore<EntryT, RefT, Compare, Allocator>::remove(EntryRef ref)
namespace uniquestore {
template <typename RefT>
-class CompactionContext : public ICompactionContext, public ICompactable {
+class CompactionContext : public UniqueStoreRemapper<RefT>, public ICompactable {
private:
using DictionaryTraits = btree::BTreeTraits<32, 32, 7, true>;
using Dictionary = btree::BTree<EntryRef, uint32_t,
btree::NoAggregated,
EntryComparatorWrapper,
DictionaryTraits>;
+ using UniqueStoreRemapper<RefT>::_compacting_buffer;
+ using UniqueStoreRemapper<RefT>::_mapping;
DataStoreBase &_dataStore;
IUniqueStoreDictionary &_dict;
ICompactable &_store;
std::vector<uint32_t> _bufferIdsToCompact;
- std::vector<std::vector<EntryRef>> _mapping;
-
- bool compactingBuffer(uint32_t bufferId) {
- return std::find(_bufferIdsToCompact.begin(), _bufferIdsToCompact.end(),
- bufferId) != _bufferIdsToCompact.end();
- }
void allocMapping() {
+ _compacting_buffer.resize(RefT::numBuffers());
_mapping.resize(RefT::numBuffers());
for (const auto bufferId : _bufferIdsToCompact) {
BufferState &state = _dataStore.getBufferState(bufferId);
+ _compacting_buffer[bufferId] = true;
_mapping[bufferId].resize(state.size());
}
}
@@ -109,9 +108,10 @@ private:
EntryRef move(EntryRef oldRef) override {
RefT iRef(oldRef);
assert(iRef.valid());
- if (compactingBuffer(iRef.bufferId())) {
- assert(iRef.offset() < _mapping[iRef.bufferId()].size());
- EntryRef &mappedRef = _mapping[iRef.bufferId()][iRef.offset()];
+ uint32_t buffer_id = iRef.bufferId();
+ if (_compacting_buffer[buffer_id]) {
+ assert(iRef.offset() < _mapping[buffer_id].size());
+ EntryRef &mappedRef = _mapping[buffer_id][iRef.offset()];
assert(!mappedRef.valid());
EntryRef newRef = _store.move(oldRef);
mappedRef = newRef;
@@ -130,48 +130,40 @@ public:
IUniqueStoreDictionary &dict,
ICompactable &store,
std::vector<uint32_t> bufferIdsToCompact)
- : ICompactionContext(),
+ : UniqueStoreRemapper<RefT>(),
ICompactable(),
_dataStore(dataStore),
_dict(dict),
_store(store),
- _bufferIdsToCompact(std::move(bufferIdsToCompact)),
- _mapping()
+ _bufferIdsToCompact(std::move(bufferIdsToCompact))
{
+ if (!_bufferIdsToCompact.empty()) {
+ allocMapping();
+ fillMapping();
+ }
}
- virtual ~CompactionContext() {
+
+ void done() override {
_dataStore.finishCompact(_bufferIdsToCompact);
+ _bufferIdsToCompact.clear();
}
- virtual void compact(vespalib::ArrayRef<EntryRef> refs) override {
- if (!_bufferIdsToCompact.empty()) {
- if (_mapping.empty()) {
- allocMapping();
- fillMapping();
- }
- for (auto &ref : refs) {
- if (ref.valid()) {
- RefT internalRef(ref);
- if (compactingBuffer(internalRef.bufferId())) {
- assert(internalRef.offset() < _mapping[internalRef.bufferId()].size());
- EntryRef newRef = _mapping[internalRef.bufferId()][internalRef.offset()];
- assert(newRef.valid());
- ref = newRef;
- }
- }
- }
- }
+ ~CompactionContext() override {
+ assert(_bufferIdsToCompact.empty());
}
};
}
template <typename EntryT, typename RefT, typename Compare, typename Allocator>
-ICompactionContext::UP
-UniqueStore<EntryT, RefT, Compare, Allocator>::compactWorst()
+std::unique_ptr<typename UniqueStore<EntryT, RefT, Compare, Allocator>::Remapper>
+UniqueStore<EntryT, RefT, Compare, Allocator>::compact_worst(bool compact_memory, bool compact_address_space)
{
- std::vector<uint32_t> bufferIdsToCompact = _store.startCompactWorstBuffers(true, true);
- return std::make_unique<uniquestore::CompactionContext<RefT>>
- (_store, *_dict, _allocator, std::move(bufferIdsToCompact));
+ std::vector<uint32_t> bufferIdsToCompact = _store.startCompactWorstBuffers(compact_memory, compact_address_space);
+ if (bufferIdsToCompact.empty()) {
+ return std::unique_ptr<Remapper>();
+ } else {
+ return std::make_unique<uniquestore::CompactionContext<RefT>>(_store, *_dict, _allocator, std::move(bufferIdsToCompact));
+ }
}
template <typename EntryT, typename RefT, typename Compare, typename Allocator>
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_remapper.h b/vespalib/src/vespa/vespalib/datastore/unique_store_remapper.h
new file mode 100644
index 00000000000..13a732e6524
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store_remapper.h
@@ -0,0 +1,60 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "entryref.h"
+#include <vector>
+
+namespace search::datastore {
+
+/**
+ * Remapper for related UniqueStore class, used for adjusting
+ * references to unique store after compaction.
+ */
+template <typename RefT>
+class UniqueStoreRemapper {
+public:
+ using RefType = RefT;
+
+protected:
+ std::vector<bool> _compacting_buffer;
+ std::vector<std::vector<EntryRef>> _mapping;
+public:
+ UniqueStoreRemapper()
+ : _compacting_buffer(),
+ _mapping()
+ {
+ }
+ virtual ~UniqueStoreRemapper() = default;
+
+ EntryRef remap(EntryRef ref) const {
+ if (ref.valid()) {
+ RefType internal_ref(ref);
+ if (!_compacting_buffer[internal_ref.bufferId()]) {
+ // No remapping for references to buffers not being compacted
+ return ref;
+ } else {
+ auto &inner_mapping = _mapping[internal_ref.bufferId()];
+ assert(internal_ref.unscaled_offset() < inner_mapping.size());
+ EntryRef mapped_ref = inner_mapping[internal_ref.unscaled_offset()];
+ assert(mapped_ref.valid());
+ return mapped_ref;
+ }
+ } else {
+ return EntryRef();
+ }
+ }
+
+ void remap(vespalib::ArrayRef<EntryRef> refs) const {
+ for (auto &ref : refs) {
+ auto mapped_ref = remap(ref);
+ if (mapped_ref != ref) {
+ ref = mapped_ref;
+ }
+ }
+ }
+
+ virtual void done() = 0;
+};
+
+}