diff options
Diffstat (limited to 'searchlib/src/tests/attribute/postinglist/postinglist_test.cpp')
-rw-r--r-- | searchlib/src/tests/attribute/postinglist/postinglist_test.cpp | 698 |
1 files changed, 698 insertions, 0 deletions
diff --git a/searchlib/src/tests/attribute/postinglist/postinglist_test.cpp b/searchlib/src/tests/attribute/postinglist/postinglist_test.cpp new file mode 100644 index 00000000000..39e31b23498 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/postinglist_test.cpp @@ -0,0 +1,698 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/btree/btreenodeallocator.hpp> +#include <vespa/vespalib/btree/btreenode.hpp> +#include <vespa/vespalib/btree/btreenodestore.hpp> +#include <vespa/vespalib/btree/btreeiterator.hpp> +#include <vespa/vespalib/btree/btreeroot.hpp> +#include <vespa/vespalib/btree/btreestore.hpp> +#include <vespa/vespalib/datastore/datastore.h> +#include <vespa/vespalib/util/rand48.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <set> +#include <map> +#include <cinttypes> + +#include <vespa/log/log.h> +LOG_SETUP("postinglist_test"); + +namespace search { + +using vespalib::GenerationHandler; + +/* + * TODO: Make it pass MALLOC_OPTIONS=AJ on freebsd and valgrind on Linux. + */ + +class AttributePostingListTest : public vespalib::TestApp +{ +private: + /* Limited STL version for validation of full version */ + using STLPostingList = std::set<uint32_t>; + using STLValueTree = std::map<int, STLPostingList>; + + class RandomValue + { + public: + uint32_t _docId; + int _value; + uint32_t _order; + + RandomValue() + : _docId(0), + _value(0u), + _order(0u) + { + } + + RandomValue(uint32_t docId, uint32_t value, uint32_t order) + : _docId(docId), + _value(value), + _order(order) + { + } + + bool + operator<(const RandomValue &rhs) const + { + return (_value < rhs._value || + (_value == rhs._value && + (_docId < rhs._docId || + (_docId == rhs._docId && + _order < rhs._order)))); + } + + bool + operator>(const RandomValue &rhs) const + { + return (_value > rhs._value || + (_value == rhs._value && + (_docId > rhs._docId || + (_docId == rhs._docId && + _order > rhs._order)))); + } + + bool + operator==(const RandomValue &rhs) const + { + return (_value == rhs._value && + _docId == rhs._docId && + _order == rhs._order); + } + }; + + class CompareOrder + { + public: + bool + operator()(const RandomValue &a, const RandomValue &b) + { + return (a._order < b._order || + (a._order == b._order && + (a._value < b._value || + (a._value == b._value && + a._docId < b._docId)))); + } + }; + std::vector<RandomValue> _randomValues; + +public: + using IntKeyStore = vespalib::datastore::DataStore<int>; + using AttributePosting = vespalib::btree::BTreeKeyData<uint32_t, vespalib::btree::BTreeNoLeafData>; + using PostingList = vespalib::btree::BTreeStore<uint32_t, + vespalib::btree::BTreeNoLeafData, + vespalib::btree::NoAggregated, + std::less<uint32_t>, + vespalib::btree::BTreeDefaultTraits>; + using PostingListNodeAllocator = PostingList::NodeAllocatorType; + using PostingIdx = vespalib::datastore::EntryRef; + using StoreIndex = vespalib::datastore::EntryRef; + + class IntComp { + private: + const IntKeyStore & _store; + int _value; + int getValue(const StoreIndex & idx) const { + if (idx.valid()) { + return _store.getEntry(idx); + } + return _value; + } + public: + IntComp(const IntKeyStore & store) : _store(store), _value(0) {} + IntComp(const IntKeyStore & store, int value) : _store(store), _value(value) {} + bool operator() (const StoreIndex & lhs, const StoreIndex & rhs) const { + return getValue(lhs) < getValue(rhs); + } + }; + + using IntEnumTree = vespalib::btree::BTreeRoot<StoreIndex, PostingIdx, + vespalib::btree::NoAggregated, + const IntComp &>; + using IntEnumNodeAllocator = IntEnumTree::NodeAllocatorType; + using Tree = IntEnumTree; + using TreeManager = IntEnumNodeAllocator; + using ValueHandle = IntKeyStore; + using RandomValuesVector = std::vector<RandomValue>; +private: + GenerationHandler _handler; + IntKeyStore *_intKeyStore; + IntEnumNodeAllocator *_intNodeAlloc; + IntEnumTree *_intTree; + PostingList *_intPostings; + STLValueTree *_stlTree; + + vespalib::Rand48 _randomGenerator; + uint32_t _generation; + + void + allocTree(); + + void + freeTree(bool verbose); + + void + fillRandomValues(unsigned int count, + unsigned int mvcount); + + void + insertRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + removeRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + lookupRandomValues(Tree &tree, + TreeManager &treeMgr, + const ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + sortRandomValues(); + + void + doCompactEnumStore(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle); + + void + doCompactPostingList(Tree &tree, + TreeManager &treeMgr, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + void + bumpGeneration(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + void + reclaim_memory(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + static const char * + frozenName(bool frozen) + { + return frozen ? "frozen" : "thawed"; + } +public: + AttributePostingListTest(); + ~AttributePostingListTest(); + + int Main() override; +}; + +AttributePostingListTest::AttributePostingListTest() + : vespalib::TestApp(), + _randomValues(), + _handler(), + _intKeyStore(NULL), + _intNodeAlloc(NULL), + _intTree(NULL), + _intPostings(NULL), + _stlTree(NULL), + _randomGenerator() +{} +AttributePostingListTest::~AttributePostingListTest() {} + +void +AttributePostingListTest::allocTree() +{ + _intKeyStore = new IntKeyStore; + _intNodeAlloc = new IntEnumNodeAllocator(); + _intTree = new IntEnumTree(); + _intPostings = new PostingList(); + _stlTree = new STLValueTree; +} + + +void +AttributePostingListTest::freeTree(bool verbose) +{ + (void) verbose; + LOG(info, + "freeTree before clear: %" PRIu64 " (%" PRIu64 " held)" + ", %zu leaves", + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold()), + _intTree->size(*_intNodeAlloc)); + _intTree->clear(*_intNodeAlloc); + LOG(info, + "freeTree before unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold())); + _intNodeAlloc->freeze(); + _intPostings->freeze(); + _intNodeAlloc->assign_generation(_handler.getCurrentGeneration()); + _intPostings->clearBuilder(); + _intPostings->assign_generation(_handler.getCurrentGeneration()); + _handler.incGeneration(); + _intNodeAlloc->reclaim_memory(_handler.get_oldest_used_generation()); + _intPostings->reclaim_memory(_handler.get_oldest_used_generation()); + LOG(info, + "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold())); + delete _stlTree; + _stlTree = NULL; + delete _intTree; + _intTree = NULL; + delete _intNodeAlloc; + _intNodeAlloc = NULL; + delete _intKeyStore; + _intKeyStore = NULL; + delete _intPostings; + _intPostings = NULL; +} + + +void +AttributePostingListTest:: +fillRandomValues(unsigned int count, + unsigned int mvcount) +{ + unsigned int i; + unsigned int j; + unsigned int mv; + unsigned int mvmax; + unsigned int mvcount2; + unsigned int mvcount3; + + mvmax = 100; + mvcount2 = mvcount * (mvmax * (mvmax - 1)) / 2; + LOG(info, + "Filling %u+%u random values", count, mvcount2); + _randomValues.clear(); + _randomValues.reserve(count); + _randomGenerator.srand48(42); + for (i = 0; i <count; i++) { + uint32_t docId = _randomGenerator.lrand48(); + uint32_t val = _randomGenerator.lrand48(); + uint32_t order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + } + for (mv = 1; mv < mvmax; mv++) { + for (i = 0; i < mvcount; i++) { + for (j = 0; j < mv; j++) { + uint32_t docId = _randomGenerator.lrand48(); + uint32_t val = _randomGenerator.lrand48(); + uint32_t order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + } + } + } + mvcount3 = 0; + for (mv = 10; mv < 4000; mv = mv * 3) + { + mvcount3 += mv * 2; + for (j = 0; j < mv; j++) { + uint32_t val = _randomGenerator.lrand48(); + uint32_t docId = _randomGenerator.lrand48(); + uint32_t order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + val = _randomGenerator.lrand48(); + docId = _randomGenerator.lrand48(); + order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + } + } + std::sort(_randomValues.begin(), + _randomValues.end(), + CompareOrder()); + + EXPECT_TRUE(_randomValues.size() == count + mvcount2 + mvcount3); +} + + +void +AttributePostingListTest:: +insertRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector & + values) +{ + RandomValuesVector::iterator i; + RandomValuesVector::iterator ie; + + LOG(info, "insertRandomValues start"); + ie = values.end(); + for (i = values.begin(); i != ie; ++i) { + Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + if (!itr.valid()) { +#if 0 + if (valueHandle.needResize()) + doCompactEnumStore(tree, treeMgr, valueHandle); +#endif + StoreIndex idx = valueHandle.addEntry(i->_value); + if (tree.insert(idx, PostingIdx(), treeMgr, IntComp(valueHandle))) { + itr = tree.find(idx, treeMgr, IntComp(valueHandle)); + } + } else { + } + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(i->_value, valueHandle.getEntry(itr.getKey())); + + /* TODO: Insert docid to postinglist */ + PostingIdx oldIdx = itr.getData(); + PostingIdx newIdx = oldIdx; + AttributePosting newPosting(i->_docId, + vespalib::btree::BTreeNoLeafData()); + std::vector<AttributePosting> additions; + std::vector<uint32_t> removals; + additions.push_back(newPosting); + postings.apply(newIdx, additions.data(), additions.data() + additions.size(), + removals.data(), removals.data() + removals.size()); + std::atomic_thread_fence(std::memory_order_release); + itr.writeData(newIdx); + + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + if (it == stlTree->end()) { + std::pair<STLValueTree::iterator,bool> ir = + stlTree->insert(std::make_pair(i->_value, + STLPostingList())); + ASSERT_TRUE(ir.second && ir.first != stlTree->end() && + ir.first->first == i->_value); + it = ir.first; + } + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + it->second.insert(i->_docId); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(newIdx); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(newIdx); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle))); + LOG(info, "insertRandomValues done"); +} + + +void +AttributePostingListTest:: +removeRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values) +{ + RandomValuesVector::iterator i; + RandomValuesVector::iterator ie; + + LOG(info, "removeRandomValues start"); + ie = values.end(); + for (i = values.begin(); i != ie; ++i) { + Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + PostingIdx newIdx; + /* + * TODO: Remove docid from postinglist, and only remove + * value from tree if postinglist is empty + */ + if (itr.valid()) { + PostingIdx oldIdx = itr.getData(); + newIdx = oldIdx; + std::vector<AttributePosting> additions; + std::vector<uint32_t> removals; + removals.push_back(i->_docId); + postings.apply(newIdx, additions.data(), additions.data() + additions.size(), + removals.data(), removals.data() + removals.size()); + if (newIdx != oldIdx) { + std::atomic_thread_fence(std::memory_order_release); + itr.writeData(newIdx); + } + if (!newIdx.valid()) { + if (tree.remove(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value))) { + itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + } + } + } + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + STLPostingList::iterator it2; + it2 = it->second.find(i->_docId); + ASSERT_TRUE(it2 != it->second.end() && + *it2 == i->_docId); + it->second.erase(it2); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(newIdx); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(newIdx); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle))); + LOG(info, "removeRandomValues done"); +} + + +void +AttributePostingListTest:: +lookupRandomValues(Tree &tree, + TreeManager &treeMgr, + const ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values) +{ + RandomValuesVector::iterator i; + RandomValuesVector::iterator ie; + + LOG(info, "lookupRandomValues start"); + ie = values.end(); + for (i = values.begin(); i != ie; ++i) { + Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + ASSERT_TRUE(itr.valid() && + valueHandle.getEntry(itr.getKey()) == i->_value); + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(itr.getData()); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(itr.getData()); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + LOG(info, "lookupRandomValues done"); +} + + +void +AttributePostingListTest::doCompactEnumStore(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle) +{ + LOG(info,"doCompactEnumStore start"); + + Tree::Iterator i = tree.begin(treeMgr); + + std::vector<uint32_t> toHold; + valueHandle.for_each_active_buffer([&toHold](uint32_t buffer_id, const vespalib::datastore::BufferState &) { + toHold.push_back(buffer_id); + }); + + valueHandle.switch_primary_buffer(0, 0u); + + for (; i.valid(); ++i) + { + StoreIndex ov = i.getKey(); + StoreIndex nv = valueHandle.addEntry(valueHandle.getEntry(ov)); + + std::atomic_thread_fence(std::memory_order_release); + i.writeKey(nv); + } + using generation_t = GenerationHandler::generation_t; + for (std::vector<uint32_t>::const_iterator + it = toHold.begin(), ite = toHold.end(); it != ite; ++it) { + valueHandle.holdBuffer(*it); + } + generation_t generation = _handler.getCurrentGeneration(); + valueHandle.assign_generation(generation); + _handler.incGeneration(); + valueHandle.reclaim_memory(_handler.get_oldest_used_generation()); + + LOG(info, + "doCompactEnumStore done"); +} + + +void +AttributePostingListTest:: +doCompactPostingList(Tree &tree, + TreeManager &treeMgr, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + LOG(info, + "doCompactPostingList start"); + +#if 0 + Tree::Iterator i(tree.begin(treeMgr)); + + postings.performCompaction(i, capacityNeeded); +#else + (void) tree; + (void) treeMgr; + (void) postings; + (void) postingsAlloc; +#endif + + LOG(info, + "doCompactPostingList done"); +} + + +void +AttributePostingListTest:: +bumpGeneration(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + (void) tree; + (void) valueHandle; + postingsAlloc.freeze(); + postingsAlloc.assign_generation(_handler.getCurrentGeneration()); + postings.assign_generation(_handler.getCurrentGeneration()); + _handler.incGeneration(); +} + +void +AttributePostingListTest:: +reclaim_memory(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + (void) tree; + (void) valueHandle; + postingsAlloc.reclaim_memory(_handler.get_oldest_used_generation()); + postings.reclaim_memory(_handler.get_oldest_used_generation()); +} + +int +AttributePostingListTest::Main() +{ + TEST_INIT("postinglist_test"); + + fillRandomValues(1000, 10); + + allocTree(); + insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + lookupRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + _intNodeAlloc->freeze(); + _intNodeAlloc->assign_generation(_handler.getCurrentGeneration()); + doCompactEnumStore(*_intTree, *_intNodeAlloc, *_intKeyStore); + removeRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + freeTree(true); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::AttributePostingListTest); |