diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-04-17 13:37:04 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-04-17 13:37:04 +0200 |
commit | 093a4d03abc6b48127bd9b3aca0fd6ed5f6c99a9 (patch) | |
tree | a241237b5309e8d1be3074906a4861292c98d935 /searchlib | |
parent | cdc45b909deb8f9a520ba922a7b88e283e72917e (diff) |
Test move of default values during EnumStore compaction.
Diffstat (limited to 'searchlib')
4 files changed, 75 insertions, 15 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index 7341595ba40..e9260a55f57 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -180,15 +180,35 @@ TYPED_TEST(FloatEnumStoreTest, numbers_can_be_inserted_and_retrieved) } } +TEST(EnumStoreTest, default_value_is_present) +{ + StringEnumStore ses(false, DictionaryConfig::Type::BTREE); + using EntryType = StringEnumStore::EntryType; + EntryType undefined = attribute::getUndefined<EntryType>(); + EnumIndex idx; + EXPECT_TRUE(ses.find_index(undefined, idx)); + EXPECT_TRUE(idx.valid()); + EXPECT_EQ(ses.get_default_value_ref().load_relaxed(), idx); + ses.clear_default_value_ref(); + EXPECT_FALSE(ses.find_index(undefined, idx)); + EXPECT_FALSE(ses.get_default_value_ref().load_relaxed().valid()); + ses.setup_default_value_ref(); + idx = EnumIndex(); + EXPECT_TRUE(ses.find_index(undefined, idx)); + EXPECT_TRUE(idx.valid()); + EXPECT_EQ(ses.get_default_value_ref().load_relaxed(), idx); +} + TEST(EnumStoreTest, test_find_folded_on_string_enum_store) { StringEnumStore ses(false, DictionaryConfig::Type::BTREE); + using EntryType = StringEnumStore::EntryType; std::vector<EnumIndex> indices; std::vector<std::string> unique({"", "one", "two", "TWO", "Two", "three"}); for (std::string &str : unique) { EnumIndex idx = ses.insert(str.c_str()); indices.push_back(idx); - EXPECT_EQ((str == "") ? 2u : 1u, ses.get_ref_count(idx)); + EXPECT_EQ((str == attribute::getUndefined<EntryType>()) ? 2u : 1u, ses.get_ref_count(idx)); } ses.freeze_dictionary(); for (uint32_t i = 0; i < indices.size(); ++i) { @@ -233,13 +253,14 @@ void StringEnumStoreTest::testInsert(bool hasPostings) { StringEnumStore ses(hasPostings, DictionaryConfig::Type::BTREE); + using EntryType = StringEnumStore::EntryType; std::vector<EnumIndex> indices; std::vector<std::string> unique = {"", "add", "enumstore", "unique"}; for (const auto & i : unique) { EnumIndex idx = ses.insert(i.c_str()); - EXPECT_EQ((i == "") ? 2u : 1u, ses.get_ref_count(idx)); + EXPECT_EQ((i == attribute::getUndefined<EntryType>()) ? 2u : 1u, ses.get_ref_count(idx)); indices.push_back(idx); EXPECT_TRUE(ses.find_index(i.c_str(), idx)); } @@ -613,6 +634,7 @@ public: void test_normalize_posting_lists(bool use_filter, bool one_filter); void test_foreach_posting_list(bool one_filter); static EntryRef fake_pidx() { return EntryRef(42); } + EnumIndex check_default_value_ref() const noexcept; }; template <typename EnumStoreTypeAndDictionaryType> @@ -778,6 +800,16 @@ EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_foreach_posting_li clear_sample_values(large_population); } +template <typename EnumStoreTypeAndDictionaryType> +EnumIndex +EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::check_default_value_ref() const noexcept +{ + EnumIndex default_value_ref = store.get_default_value_ref().load_relaxed(); + EXPECT_TRUE(default_value_ref.valid()); + EXPECT_EQ(attribute::getUndefined<EntryType>(), store.get_value(default_value_ref)); + return default_value_ref; +} + using EnumStoreDictionaryTestTypes = ::testing::Types<BTreeNumericEnumStore, HybridNumericEnumStore, HashNumericEnumStore>; TYPED_TEST_SUITE(EnumStoreDictionaryTest, EnumStoreDictionaryTestTypes); @@ -878,6 +910,7 @@ TYPED_TEST(EnumStoreDictionaryTest, compact_worst_works) updater.commit(); generation_t gen = 3; inc_generation(gen, this->store); + // Compact dictionary auto& dict = this->store.get_dictionary(); if (dict.get_has_btree_dictionary()) { EXPECT_LT(CompactionStrategy::DEAD_BYTES_SLACK, dict.get_btree_memory_usage().deadBytes()); @@ -905,6 +938,28 @@ TYPED_TEST(EnumStoreDictionaryTest, compact_worst_works) if (dict.get_has_hash_dictionary()) { EXPECT_GT(CompactionStrategy::DEAD_BYTES_SLACK, dict.get_hash_memory_usage().deadBytes()); } + auto old_default_value_ref = this->check_default_value_ref(); + // Compact values + EXPECT_LT(CompactionStrategy::DEAD_BYTES_SLACK, this->store.get_values_memory_usage().deadBytes()); + compaction_strategy = CompactionStrategy::make_compact_all_active_buffers_strategy(); + int compact_values_count = 0; + for (uint32_t i = 0; i < 2; ++i) { + this->store.update_stat(compaction_strategy); + auto remapper = this->store.consider_compact_values(compaction_strategy); + if (remapper) { + remapper->done(); + ++compact_values_count; + } else { + break; + } + EXPECT_FALSE(this->store.consider_compact_values(compaction_strategy)); + inc_generation(gen, this->store); + } + EXPECT_EQ(1, compact_values_count); + auto new_default_value_ref = this->check_default_value_ref(); + EXPECT_NE(old_default_value_ref, new_default_value_ref); + EXPECT_GT(CompactionStrategy::DEAD_BYTES_SLACK, this->store.get_values_memory_usage().deadBytes()); + std::vector<int32_t> exp_values; std::vector<int32_t> values; exp_values.push_back(std::numeric_limits<int32_t>::min()); diff --git a/searchlib/src/vespa/searchcommon/common/undefinedvalues.h b/searchlib/src/vespa/searchcommon/common/undefinedvalues.h index bbe3198a8dc..a080648c054 100644 --- a/searchlib/src/vespa/searchcommon/common/undefinedvalues.h +++ b/searchlib/src/vespa/searchcommon/common/undefinedvalues.h @@ -24,6 +24,10 @@ inline constexpr double getUndefined<double>() { return -std::numeric_limits<double>::quiet_NaN(); } +template <> +inline constexpr const char* getUndefined<const char*>() { + return ""; +} // for all signed integers template <typename T> diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index e8b18cf3974..dc272a86078 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -56,7 +56,7 @@ private: ComparatorType _foldedComparator; enumstore::EnumStoreCompactionSpec _compaction_spec; EntryType _default_value; - Index _default_value_ref; + AtomicIndex _default_value_ref; EnumStoreT(const EnumStoreT & rhs) = delete; EnumStoreT & operator=(const EnumStoreT & rhs) = delete; @@ -205,6 +205,7 @@ public: void free_unused_values(IndexList to_remove); void clear_default_value_ref() override; void setup_default_value_ref() override; + const AtomicIndex& get_default_value_ref() const noexcept { return _default_value_ref; } vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) override; std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) override; std::unique_ptr<EnumIndexRemapper> compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) override; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 98f82ade535..373102076d5 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -83,9 +83,6 @@ EnumStoreT<EntryT>::EnumStoreT(bool has_postings, const DictionaryConfig& dict_c _default_value(attribute::getUndefined<EntryT>()), _default_value_ref() { - if constexpr (std::is_same_v<const char *, EntryT>) { - _default_value = ""; - } _store.set_dictionary(make_enum_store_dictionary(*this, has_postings, dict_cfg, allocate_comparator(), allocate_optionally_folded_comparator(is_folded()))); @@ -227,10 +224,11 @@ template <typename EntryT> void EnumStoreT<EntryT>::clear_default_value_ref() { - if (_default_value_ref.valid()) { + auto ref = _default_value_ref.load_relaxed(); + if (ref.valid()) { auto updater = make_batch_updater(); - updater.dec_ref_count(_default_value_ref); - _default_value_ref = Index(); + updater.dec_ref_count(ref); + _default_value_ref.store_relaxed(Index()); updater.commit(); } } @@ -239,10 +237,11 @@ template <typename EntryT> void EnumStoreT<EntryT>::setup_default_value_ref() { - if (!_default_value_ref.valid()) { + if (!_default_value_ref.load_relaxed().valid()) { auto updater = make_batch_updater(); - _default_value_ref = updater.insert(_default_value); - updater.inc_ref_count(_default_value_ref); + auto ref = updater.insert(_default_value); + updater.inc_ref_count(ref); + _default_value_ref.store_relaxed(ref); updater.commit(); } } @@ -269,9 +268,10 @@ std::unique_ptr<IEnumStore::EnumIndexRemapper> EnumStoreT<EntryT>::compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) { auto remapper = _store.compact_worst(compaction_spec, compaction_strategy); - if (remapper && _default_value_ref.valid()) { - if (remapper->get_entry_ref_filter().has(_default_value_ref)) { - _default_value_ref = remapper->remap(_default_value_ref); + if (remapper) { + auto ref = _default_value_ref.load_relaxed(); + if (ref.valid() && remapper->get_entry_ref_filter().has(ref)) { + _default_value_ref.store_release(remapper->remap(ref)); } } return remapper; |