summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-04-17 13:37:04 +0200
committerTor Egge <Tor.Egge@online.no>2023-04-17 13:37:04 +0200
commit093a4d03abc6b48127bd9b3aca0fd6ed5f6c99a9 (patch)
treea241237b5309e8d1be3074906a4861292c98d935 /searchlib
parentcdc45b909deb8f9a520ba922a7b88e283e72917e (diff)
Test move of default values during EnumStore compaction.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp59
-rw-r--r--searchlib/src/vespa/searchcommon/common/undefinedvalues.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.hpp24
4 files changed, 75 insertions, 15 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
index 7341595ba40..e9260a55f57 100644
--- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -180,15 +180,35 @@ TYPED_TEST(FloatEnumStoreTest, numbers_can_be_inserted_and_retrieved)
}
}
+TEST(EnumStoreTest, default_value_is_present)
+{
+ StringEnumStore ses(false, DictionaryConfig::Type::BTREE);
+ using EntryType = StringEnumStore::EntryType;
+ EntryType undefined = attribute::getUndefined<EntryType>();
+ EnumIndex idx;
+ EXPECT_TRUE(ses.find_index(undefined, idx));
+ EXPECT_TRUE(idx.valid());
+ EXPECT_EQ(ses.get_default_value_ref().load_relaxed(), idx);
+ ses.clear_default_value_ref();
+ EXPECT_FALSE(ses.find_index(undefined, idx));
+ EXPECT_FALSE(ses.get_default_value_ref().load_relaxed().valid());
+ ses.setup_default_value_ref();
+ idx = EnumIndex();
+ EXPECT_TRUE(ses.find_index(undefined, idx));
+ EXPECT_TRUE(idx.valid());
+ EXPECT_EQ(ses.get_default_value_ref().load_relaxed(), idx);
+}
+
TEST(EnumStoreTest, test_find_folded_on_string_enum_store)
{
StringEnumStore ses(false, DictionaryConfig::Type::BTREE);
+ using EntryType = StringEnumStore::EntryType;
std::vector<EnumIndex> indices;
std::vector<std::string> unique({"", "one", "two", "TWO", "Two", "three"});
for (std::string &str : unique) {
EnumIndex idx = ses.insert(str.c_str());
indices.push_back(idx);
- EXPECT_EQ((str == "") ? 2u : 1u, ses.get_ref_count(idx));
+ EXPECT_EQ((str == attribute::getUndefined<EntryType>()) ? 2u : 1u, ses.get_ref_count(idx));
}
ses.freeze_dictionary();
for (uint32_t i = 0; i < indices.size(); ++i) {
@@ -233,13 +253,14 @@ void
StringEnumStoreTest::testInsert(bool hasPostings)
{
StringEnumStore ses(hasPostings, DictionaryConfig::Type::BTREE);
+ using EntryType = StringEnumStore::EntryType;
std::vector<EnumIndex> indices;
std::vector<std::string> unique = {"", "add", "enumstore", "unique"};
for (const auto & i : unique) {
EnumIndex idx = ses.insert(i.c_str());
- EXPECT_EQ((i == "") ? 2u : 1u, ses.get_ref_count(idx));
+ EXPECT_EQ((i == attribute::getUndefined<EntryType>()) ? 2u : 1u, ses.get_ref_count(idx));
indices.push_back(idx);
EXPECT_TRUE(ses.find_index(i.c_str(), idx));
}
@@ -613,6 +634,7 @@ public:
void test_normalize_posting_lists(bool use_filter, bool one_filter);
void test_foreach_posting_list(bool one_filter);
static EntryRef fake_pidx() { return EntryRef(42); }
+ EnumIndex check_default_value_ref() const noexcept;
};
template <typename EnumStoreTypeAndDictionaryType>
@@ -778,6 +800,16 @@ EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_foreach_posting_li
clear_sample_values(large_population);
}
+template <typename EnumStoreTypeAndDictionaryType>
+EnumIndex
+EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::check_default_value_ref() const noexcept
+{
+ EnumIndex default_value_ref = store.get_default_value_ref().load_relaxed();
+ EXPECT_TRUE(default_value_ref.valid());
+ EXPECT_EQ(attribute::getUndefined<EntryType>(), store.get_value(default_value_ref));
+ return default_value_ref;
+}
+
using EnumStoreDictionaryTestTypes = ::testing::Types<BTreeNumericEnumStore, HybridNumericEnumStore, HashNumericEnumStore>;
TYPED_TEST_SUITE(EnumStoreDictionaryTest, EnumStoreDictionaryTestTypes);
@@ -878,6 +910,7 @@ TYPED_TEST(EnumStoreDictionaryTest, compact_worst_works)
updater.commit();
generation_t gen = 3;
inc_generation(gen, this->store);
+ // Compact dictionary
auto& dict = this->store.get_dictionary();
if (dict.get_has_btree_dictionary()) {
EXPECT_LT(CompactionStrategy::DEAD_BYTES_SLACK, dict.get_btree_memory_usage().deadBytes());
@@ -905,6 +938,28 @@ TYPED_TEST(EnumStoreDictionaryTest, compact_worst_works)
if (dict.get_has_hash_dictionary()) {
EXPECT_GT(CompactionStrategy::DEAD_BYTES_SLACK, dict.get_hash_memory_usage().deadBytes());
}
+ auto old_default_value_ref = this->check_default_value_ref();
+ // Compact values
+ EXPECT_LT(CompactionStrategy::DEAD_BYTES_SLACK, this->store.get_values_memory_usage().deadBytes());
+ compaction_strategy = CompactionStrategy::make_compact_all_active_buffers_strategy();
+ int compact_values_count = 0;
+ for (uint32_t i = 0; i < 2; ++i) {
+ this->store.update_stat(compaction_strategy);
+ auto remapper = this->store.consider_compact_values(compaction_strategy);
+ if (remapper) {
+ remapper->done();
+ ++compact_values_count;
+ } else {
+ break;
+ }
+ EXPECT_FALSE(this->store.consider_compact_values(compaction_strategy));
+ inc_generation(gen, this->store);
+ }
+ EXPECT_EQ(1, compact_values_count);
+ auto new_default_value_ref = this->check_default_value_ref();
+ EXPECT_NE(old_default_value_ref, new_default_value_ref);
+ EXPECT_GT(CompactionStrategy::DEAD_BYTES_SLACK, this->store.get_values_memory_usage().deadBytes());
+
std::vector<int32_t> exp_values;
std::vector<int32_t> values;
exp_values.push_back(std::numeric_limits<int32_t>::min());
diff --git a/searchlib/src/vespa/searchcommon/common/undefinedvalues.h b/searchlib/src/vespa/searchcommon/common/undefinedvalues.h
index bbe3198a8dc..a080648c054 100644
--- a/searchlib/src/vespa/searchcommon/common/undefinedvalues.h
+++ b/searchlib/src/vespa/searchcommon/common/undefinedvalues.h
@@ -24,6 +24,10 @@ inline constexpr double getUndefined<double>() {
return -std::numeric_limits<double>::quiet_NaN();
}
+template <>
+inline constexpr const char* getUndefined<const char*>() {
+ return "";
+}
// for all signed integers
template <typename T>
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h
index e8b18cf3974..dc272a86078 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h
@@ -56,7 +56,7 @@ private:
ComparatorType _foldedComparator;
enumstore::EnumStoreCompactionSpec _compaction_spec;
EntryType _default_value;
- Index _default_value_ref;
+ AtomicIndex _default_value_ref;
EnumStoreT(const EnumStoreT & rhs) = delete;
EnumStoreT & operator=(const EnumStoreT & rhs) = delete;
@@ -205,6 +205,7 @@ public:
void free_unused_values(IndexList to_remove);
void clear_default_value_ref() override;
void setup_default_value_ref() override;
+ const AtomicIndex& get_default_value_ref() const noexcept { return _default_value_ref; }
vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) override;
std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) override;
std::unique_ptr<EnumIndexRemapper> compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) override;
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
index 98f82ade535..373102076d5 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
@@ -83,9 +83,6 @@ EnumStoreT<EntryT>::EnumStoreT(bool has_postings, const DictionaryConfig& dict_c
_default_value(attribute::getUndefined<EntryT>()),
_default_value_ref()
{
- if constexpr (std::is_same_v<const char *, EntryT>) {
- _default_value = "";
- }
_store.set_dictionary(make_enum_store_dictionary(*this, has_postings, dict_cfg,
allocate_comparator(),
allocate_optionally_folded_comparator(is_folded())));
@@ -227,10 +224,11 @@ template <typename EntryT>
void
EnumStoreT<EntryT>::clear_default_value_ref()
{
- if (_default_value_ref.valid()) {
+ auto ref = _default_value_ref.load_relaxed();
+ if (ref.valid()) {
auto updater = make_batch_updater();
- updater.dec_ref_count(_default_value_ref);
- _default_value_ref = Index();
+ updater.dec_ref_count(ref);
+ _default_value_ref.store_relaxed(Index());
updater.commit();
}
}
@@ -239,10 +237,11 @@ template <typename EntryT>
void
EnumStoreT<EntryT>::setup_default_value_ref()
{
- if (!_default_value_ref.valid()) {
+ if (!_default_value_ref.load_relaxed().valid()) {
auto updater = make_batch_updater();
- _default_value_ref = updater.insert(_default_value);
- updater.inc_ref_count(_default_value_ref);
+ auto ref = updater.insert(_default_value);
+ updater.inc_ref_count(ref);
+ _default_value_ref.store_relaxed(ref);
updater.commit();
}
}
@@ -269,9 +268,10 @@ std::unique_ptr<IEnumStore::EnumIndexRemapper>
EnumStoreT<EntryT>::compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy)
{
auto remapper = _store.compact_worst(compaction_spec, compaction_strategy);
- if (remapper && _default_value_ref.valid()) {
- if (remapper->get_entry_ref_filter().has(_default_value_ref)) {
- _default_value_ref = remapper->remap(_default_value_ref);
+ if (remapper) {
+ auto ref = _default_value_ref.load_relaxed();
+ if (ref.valid() && remapper->get_entry_ref_filter().has(ref)) {
+ _default_value_ref.store_release(remapper->remap(ref));
}
}
return remapper;