diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2019-09-25 11:16:16 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-09-25 11:16:16 +0200 |
commit | 3800cdc8f1200ede667229eceef02792b060194a (patch) | |
tree | e8a3d3cdbe69804a371efb16c6ce6beeeb712e72 /searchlib | |
parent | 2c1a51763f6e48439f50301958f4a270892eb238 (diff) | |
parent | a7ef5454b000ca06f0756d4d374e5e24c704c007 (diff) |
Merge pull request #10770 from vespa-engine/toregge/detect-enum-store-entry-ref-count-overflow
Detect enum store entry reference count overflow
Diffstat (limited to 'searchlib')
7 files changed, 76 insertions, 3 deletions
diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp index 98caf39dace..2f3df28cc6f 100644 --- a/searchlib/src/tests/attribute/attribute_test.cpp +++ b/searchlib/src/tests/attribute/attribute_test.cpp @@ -15,6 +15,7 @@ #include <vespa/searchlib/attribute/multistringattribute.h> #include <vespa/searchlib/attribute/predicate_attribute.h> #include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/singlenumericpostattribute.h> #include <vespa/searchlib/attribute/singlestringattribute.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/searchlib/util/randomgenerator.h> @@ -241,6 +242,8 @@ private: void testCompactLidSpace(); + void test_default_value_ref_count_is_updated_after_shrink_lid_space(); + template <typename AttributeType> void requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch); template <typename AttributeType> @@ -2019,6 +2022,43 @@ AttributeTest::testCompactLidSpace() TEST_DO(testCompactLidSpace(Config(BasicType::PREDICATE, CollectionType::SINGLE))); } +namespace { + +uint32_t +get_default_value_ref_count(AttributeVector &attr) +{ + auto *enum_store_base = attr.getEnumStoreBase(); + auto &enum_store = dynamic_cast<EnumStoreT<int32_t> &>(*enum_store_base); + IAttributeVector::EnumHandle default_value_handle(0); + if (enum_store.find_enum(attr.getDefaultValue(), default_value_handle)) { + datastore::EntryRef default_value_ref(default_value_handle); + assert(default_value_ref.valid()); + return enum_store.get_ref_count(default_value_ref); + } else { + return 0u; + } +} + +} + + +void +AttributeTest::test_default_value_ref_count_is_updated_after_shrink_lid_space() +{ + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + vespalib::string name = "shrink"; + AttributePtr attr = AttributeFactory::createAttribute(name, cfg); + attr->addReservedDoc(); + attr->addDocs(10); + EXPECT_EQUAL(11u, get_default_value_ref_count(*attr)); + attr->compactLidSpace(6); + EXPECT_EQUAL(11u, get_default_value_ref_count(*attr)); + attr->shrinkLidSpace(); + EXPECT_EQUAL(6u, attr->getNumDocs()); + EXPECT_EQUAL(6u, get_default_value_ref_count(*attr)); +} + template <typename AttributeType> void AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch) @@ -2223,6 +2263,7 @@ int AttributeTest::Main() testCreateSerialNum(); testPredicateHeaderTags(); TEST_DO(testCompactLidSpace()); + TEST_DO(test_default_value_ref_count_is_updated_after_shrink_lid_space()); TEST_DO(requireThatAddressSpaceUsageIsReported()); testReaderDuringLastUpdate(); TEST_DO(testPendingCompaction()); diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp index 2a544814710..cda2bea9fb1 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp @@ -78,6 +78,19 @@ EnumStoreDictionary<DictionaryT>::free_unused_values(const IndexSet& to_remove, } template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::remove(const EntryComparator &comp, EntryRef ref) +{ + assert(ref.valid()); + auto itr = this->_dict.lowerBound(ref, comp); + assert(itr.valid() && itr.getKey() == ref); + if constexpr (std::is_same_v<DictionaryT, EnumPostingTree>) { + assert(EntryRef(itr.getData()) == EntryRef()); + } + this->_dict.remove(itr); +} + +template <typename DictionaryT> bool EnumStoreDictionary<DictionaryT>::find_index(const datastore::EntryComparator& cmp, Index& idx) const @@ -184,9 +197,13 @@ EnumStoreFoldedDictionary::remove(const EntryComparator& comp, EntryRef ref) EntryRef posting_list_ref(it.getData()); _dict.remove(it); // Maybe copy posting list reference to next entry - if (posting_list_ref.valid() && it.valid() && !EntryRef(it.getData()).valid() && !(*_folded_compare)(ref, it.getKey())) { - this->_dict.thaw(it); - it.writeData(posting_list_ref.ref()); + if (posting_list_ref.valid()) { + if (it.valid() && !EntryRef(it.getData()).valid() && !(*_folded_compare)(ref, it.getKey())) { + this->_dict.thaw(it); + it.writeData(posting_list_ref.ref()); + } else { + LOG_ABORT("Posting list not cleared for removed unique value"); + } } } diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h index 5e7a63e68b4..c2c4c96c2d9 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h @@ -41,6 +41,7 @@ public: void free_unused_values(const IndexSet& to_remove, const datastore::EntryComparator& cmp) override; + void remove(const datastore::EntryComparator& comp, datastore::EntryRef ref) override; bool find_index(const datastore::EntryComparator& cmp, Index& idx) const override; bool find_frozen_index(const datastore::EntryComparator& cmp, Index& idx) const override; std::vector<attribute::IAttributeVector::EnumHandle> diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp index 897d8a10ec6..b527f89b224 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp @@ -41,6 +41,7 @@ void EnumAttribute<B>::load_enum_store(LoadedVector& loaded) prev = value.getValue(); prevRefCount = 1; } else { + assert(prevRefCount < std::numeric_limits<uint32_t>::max()); prevRefCount++; } value.setEidx(index); diff --git a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h index a8d6fc3ba68..275fadd1e7f 100644 --- a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h +++ b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h @@ -138,6 +138,7 @@ public: (void) docId; (void) weight; assert(e < _hist.size()); + assert(_hist[e] < std::numeric_limits<uint32_t>::max()); ++_hist[e]; } }; diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp index 21bbec729df..19ef92c9356 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp @@ -93,6 +93,7 @@ PostingListAttributeBase<P>::handle_load_posting_lists_and_update_enum_store(enu postings.clear(); } } + assert(refCount < std::numeric_limits<uint32_t>::max()); ++refCount; assert(elem.getDocId() < docIdLimit); (void) docIdLimit; diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index 265116f3bd1..5ad1629e673 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -284,6 +284,17 @@ SingleValueEnumAttribute<B>::onShrinkLidSpace() if (pab != nullptr) { pab->clearPostings(e, committedDocIdLimit, _enumIndices.size()); } + uint32_t shrink_docs = _enumIndices.size() - committedDocIdLimit; + if (shrink_docs > 0u) { + datastore::EntryRef default_value_ref(e); + assert(default_value_ref.valid()); + uint32_t default_value_ref_count = this->_enumStore.get_ref_count(default_value_ref); + assert(default_value_ref_count >= shrink_docs); + this->_enumStore.set_ref_count(default_value_ref, default_value_ref_count - shrink_docs); + IEnumStore::IndexSet possibly_unused; + possibly_unused.insert(default_value_ref); + this->_enumStore.free_unused_values(possibly_unused); + } _enumIndices.shrink(committedDocIdLimit); this->setNumDocs(committedDocIdLimit); } |