summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2019-09-25 11:16:16 +0200
committerGitHub <noreply@github.com>2019-09-25 11:16:16 +0200
commit3800cdc8f1200ede667229eceef02792b060194a (patch)
treee8a3d3cdbe69804a371efb16c6ce6beeeb712e72 /searchlib
parent2c1a51763f6e48439f50301958f4a270892eb238 (diff)
parenta7ef5454b000ca06f0756d4d374e5e24c704c007 (diff)
Merge pull request #10770 from vespa-engine/toregge/detect-enum-store-entry-ref-count-overflow
Detect enum store entry reference count overflow
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/attribute_test.cpp41
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.hpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp11
7 files changed, 76 insertions, 3 deletions
diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp
index 98caf39dace..2f3df28cc6f 100644
--- a/searchlib/src/tests/attribute/attribute_test.cpp
+++ b/searchlib/src/tests/attribute/attribute_test.cpp
@@ -15,6 +15,7 @@
#include <vespa/searchlib/attribute/multistringattribute.h>
#include <vespa/searchlib/attribute/predicate_attribute.h>
#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/singlenumericpostattribute.h>
#include <vespa/searchlib/attribute/singlestringattribute.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/util/randomgenerator.h>
@@ -241,6 +242,8 @@ private:
void testCompactLidSpace();
+ void test_default_value_ref_count_is_updated_after_shrink_lid_space();
+
template <typename AttributeType>
void requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch);
template <typename AttributeType>
@@ -2019,6 +2022,43 @@ AttributeTest::testCompactLidSpace()
TEST_DO(testCompactLidSpace(Config(BasicType::PREDICATE, CollectionType::SINGLE)));
}
+namespace {
+
+uint32_t
+get_default_value_ref_count(AttributeVector &attr)
+{
+ auto *enum_store_base = attr.getEnumStoreBase();
+ auto &enum_store = dynamic_cast<EnumStoreT<int32_t> &>(*enum_store_base);
+ IAttributeVector::EnumHandle default_value_handle(0);
+ if (enum_store.find_enum(attr.getDefaultValue(), default_value_handle)) {
+ datastore::EntryRef default_value_ref(default_value_handle);
+ assert(default_value_ref.valid());
+ return enum_store.get_ref_count(default_value_ref);
+ } else {
+ return 0u;
+ }
+}
+
+}
+
+
+void
+AttributeTest::test_default_value_ref_count_is_updated_after_shrink_lid_space()
+{
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ vespalib::string name = "shrink";
+ AttributePtr attr = AttributeFactory::createAttribute(name, cfg);
+ attr->addReservedDoc();
+ attr->addDocs(10);
+ EXPECT_EQUAL(11u, get_default_value_ref_count(*attr));
+ attr->compactLidSpace(6);
+ EXPECT_EQUAL(11u, get_default_value_ref_count(*attr));
+ attr->shrinkLidSpace();
+ EXPECT_EQUAL(6u, attr->getNumDocs());
+ EXPECT_EQUAL(6u, get_default_value_ref_count(*attr));
+}
+
template <typename AttributeType>
void
AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch)
@@ -2223,6 +2263,7 @@ int AttributeTest::Main()
testCreateSerialNum();
testPredicateHeaderTags();
TEST_DO(testCompactLidSpace());
+ TEST_DO(test_default_value_ref_count_is_updated_after_shrink_lid_space());
TEST_DO(requireThatAddressSpaceUsageIsReported());
testReaderDuringLastUpdate();
TEST_DO(testPendingCompaction());
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
index 2a544814710..cda2bea9fb1 100644
--- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
@@ -78,6 +78,19 @@ EnumStoreDictionary<DictionaryT>::free_unused_values(const IndexSet& to_remove,
}
template <typename DictionaryT>
+void
+EnumStoreDictionary<DictionaryT>::remove(const EntryComparator &comp, EntryRef ref)
+{
+ assert(ref.valid());
+ auto itr = this->_dict.lowerBound(ref, comp);
+ assert(itr.valid() && itr.getKey() == ref);
+ if constexpr (std::is_same_v<DictionaryT, EnumPostingTree>) {
+ assert(EntryRef(itr.getData()) == EntryRef());
+ }
+ this->_dict.remove(itr);
+}
+
+template <typename DictionaryT>
bool
EnumStoreDictionary<DictionaryT>::find_index(const datastore::EntryComparator& cmp,
Index& idx) const
@@ -184,9 +197,13 @@ EnumStoreFoldedDictionary::remove(const EntryComparator& comp, EntryRef ref)
EntryRef posting_list_ref(it.getData());
_dict.remove(it);
// Maybe copy posting list reference to next entry
- if (posting_list_ref.valid() && it.valid() && !EntryRef(it.getData()).valid() && !(*_folded_compare)(ref, it.getKey())) {
- this->_dict.thaw(it);
- it.writeData(posting_list_ref.ref());
+ if (posting_list_ref.valid()) {
+ if (it.valid() && !EntryRef(it.getData()).valid() && !(*_folded_compare)(ref, it.getKey())) {
+ this->_dict.thaw(it);
+ it.writeData(posting_list_ref.ref());
+ } else {
+ LOG_ABORT("Posting list not cleared for removed unique value");
+ }
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
index 5e7a63e68b4..c2c4c96c2d9 100644
--- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
@@ -41,6 +41,7 @@ public:
void free_unused_values(const IndexSet& to_remove,
const datastore::EntryComparator& cmp) override;
+ void remove(const datastore::EntryComparator& comp, datastore::EntryRef ref) override;
bool find_index(const datastore::EntryComparator& cmp, Index& idx) const override;
bool find_frozen_index(const datastore::EntryComparator& cmp, Index& idx) const override;
std::vector<attribute::IAttributeVector::EnumHandle>
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
index 897d8a10ec6..b527f89b224 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
@@ -41,6 +41,7 @@ void EnumAttribute<B>::load_enum_store(LoadedVector& loaded)
prev = value.getValue();
prevRefCount = 1;
} else {
+ assert(prevRefCount < std::numeric_limits<uint32_t>::max());
prevRefCount++;
}
value.setEidx(index);
diff --git a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h
index a8d6fc3ba68..275fadd1e7f 100644
--- a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h
+++ b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h
@@ -138,6 +138,7 @@ public:
(void) docId;
(void) weight;
assert(e < _hist.size());
+ assert(_hist[e] < std::numeric_limits<uint32_t>::max());
++_hist[e];
}
};
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
index 21bbec729df..19ef92c9356 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
@@ -93,6 +93,7 @@ PostingListAttributeBase<P>::handle_load_posting_lists_and_update_enum_store(enu
postings.clear();
}
}
+ assert(refCount < std::numeric_limits<uint32_t>::max());
++refCount;
assert(elem.getDocId() < docIdLimit);
(void) docIdLimit;
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
index 265116f3bd1..5ad1629e673 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
@@ -284,6 +284,17 @@ SingleValueEnumAttribute<B>::onShrinkLidSpace()
if (pab != nullptr) {
pab->clearPostings(e, committedDocIdLimit, _enumIndices.size());
}
+ uint32_t shrink_docs = _enumIndices.size() - committedDocIdLimit;
+ if (shrink_docs > 0u) {
+ datastore::EntryRef default_value_ref(e);
+ assert(default_value_ref.valid());
+ uint32_t default_value_ref_count = this->_enumStore.get_ref_count(default_value_ref);
+ assert(default_value_ref_count >= shrink_docs);
+ this->_enumStore.set_ref_count(default_value_ref, default_value_ref_count - shrink_docs);
+ IEnumStore::IndexSet possibly_unused;
+ possibly_unused.insert(default_value_ref);
+ this->_enumStore.free_unused_values(possibly_unused);
+ }
_enumIndices.shrink(committedDocIdLimit);
this->setNumDocs(committedDocIdLimit);
}