diff options
Diffstat (limited to 'searchlib/src/tests/memoryindex')
48 files changed, 6200 insertions, 0 deletions
diff --git a/searchlib/src/tests/memoryindex/btree/.gitignore b/searchlib/src/tests/memoryindex/btree/.gitignore new file mode 100644 index 00000000000..94440affa90 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +btree_test +frozenbtree_test +searchlib_btree_test_app +searchlib_frozenbtree_test_app diff --git a/searchlib/src/tests/memoryindex/btree/CMakeLists.txt b/searchlib/src/tests/memoryindex/btree/CMakeLists.txt new file mode 100644 index 00000000000..8b523030cab --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_btree_test_app + SOURCES + btree_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_btree_test_app COMMAND searchlib_btree_test_app) +vespa_add_executable(searchlib_frozenbtree_test_app + SOURCES + frozenbtree_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_frozenbtree_test_app COMMAND searchlib_frozenbtree_test_app) diff --git a/searchlib/src/tests/memoryindex/btree/DESC b/searchlib/src/tests/memoryindex/btree/DESC new file mode 100644 index 00000000000..02739da7527 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/DESC @@ -0,0 +1 @@ +btree test. Take a look at btree_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/btree/FILES b/searchlib/src/tests/memoryindex/btree/FILES new file mode 100644 index 00000000000..e63a2f68eb4 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/FILES @@ -0,0 +1 @@ +btree_test.cpp diff --git a/searchlib/src/tests/memoryindex/btree/btree_test.cpp b/searchlib/src/tests/memoryindex/btree/btree_test.cpp new file mode 100644 index 00000000000..5fb6761ba57 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/btree_test.cpp @@ -0,0 +1,1282 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("btree_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <string> +#include <vespa/searchlib/btree/btreeroot.h> +#include <vespa/searchlib/btree/btreebuilder.h> +#include <vespa/searchlib/btree/btreenodeallocator.h> +#include <vespa/searchlib/btree/btree.h> +#include <vespa/searchlib/btree/btreestore.h> +#include <vespa/searchlib/util/rand48.h> + +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodestore.hpp> +#include <vespa/searchlib/btree/btreeiterator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/btree/btreebuilder.hpp> +#include <vespa/searchlib/btree/btree.hpp> +#include <vespa/searchlib/btree/btreestore.hpp> + +using vespalib::GenerationHandler; + +namespace search { +namespace btree { + +namespace { + +template <typename T> +std::string +toStr(const T & v) +{ + std::stringstream ss; + ss << v; + return ss.str(); +} + +} + +typedef BTreeTraits<4, 4, 31, false> MyTraits; + +#define KEYWRAP + +#ifdef KEYWRAP + +// Force use of functor to compare keys. +class WrapInt +{ +public: + int _val; + WrapInt(int val) : _val(val) {} + WrapInt(void) : _val(0) {} + bool operator==(const WrapInt & rhs) const { return _val == rhs._val; } +}; + +std::ostream & +operator<<(std::ostream &s, const WrapInt &i) +{ + s << i._val; + return s; +} + +typedef WrapInt MyKey; +class MyComp +{ +public: + bool + operator()(const WrapInt &a, const WrapInt &b) const + { + return a._val < b._val; + } +}; + +#define UNWRAP(key) (key._val) +#else +typedef int MyKey; +typedef std::less<int> MyComp; +#define UNWRAP(key) (key) +#endif + +typedef BTree<MyKey, std::string, + btree::NoAggregated, + MyComp, MyTraits> MyTree; +typedef BTreeStore<MyKey, std::string, + btree::NoAggregated, + MyComp, MyTraits> MyTreeStore; +typedef MyTree::Builder MyTreeBuilder; +typedef MyTree::LeafNodeType MyLeafNode; +typedef MyTree::InternalNodeType MyInternalNode; +typedef MyTree::NodeAllocatorType MyNodeAllocator; +typedef std::pair<MyKey, std::string> LeafPair; +typedef MyTreeStore::KeyDataType MyKeyData; +typedef MyTreeStore::KeyDataTypeRefPair MyKeyDataRefPair; + +typedef BTree<int, BTreeNoLeafData, btree::NoAggregated> SetTreeB; + +typedef BTreeTraits<16, 16, 10, false> LSeekTraits; +typedef BTree<int, BTreeNoLeafData, btree::NoAggregated, + std::less<int>, LSeekTraits> SetTreeL; + +struct LeafPairLess { + bool operator()(const LeafPair & lhs, const LeafPair & rhs) const { + return UNWRAP(lhs.first) < UNWRAP(rhs.first); + } +}; + +template <typename ManagerType> +void +cleanup(GenerationHandler & g, ManagerType & m) +{ + m.freeze(); + m.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + m.trimHoldLists(g.getFirstUsedGeneration()); +} + +template <typename ManagerType, typename NodeType> +void +cleanup(GenerationHandler & g, + ManagerType & m, + BTreeNode::Ref n1Ref, NodeType * n1, + BTreeNode::Ref n2Ref = BTreeNode::Ref(), NodeType * n2 = NULL) +{ + assert(ManagerType::isValidRef(n1Ref)); + m.holdNode(n1Ref, n1); + if (n2 != NULL) { + assert(ManagerType::isValidRef(n2Ref)); + m.holdNode(n2Ref, n2); + } else { + assert(!ManagerType::isValidRef(n2Ref)); + } + cleanup(g, m); +} + +class Test : public vespalib::TestApp { +private: + template <typename LeafNodeType> + bool assertLeafNode(const std::string & exp, const LeafNodeType & n); + bool assertSeek(int skey, int ekey, const MyTree & tree); + bool assertSeek(int skey, int ekey, MyTree::Iterator & itr); + bool assertMemoryUsage(const MemoryUsage & exp, const MemoryUsage & act); + + void + buildSubTree(const std::vector<LeafPair> &sub, + size_t numEntries); + + void requireThatNodeInsertWorks(); + void requireThatNodeSplitInsertWorks(); + void requireThatNodeStealWorks(); + void requireThatNodeRemoveWorks(); + void requireThatNodeLowerBoundWorks(); + void requireThatWeCanInsertAndRemoveFromTree(); + void requireThatSortedTreeInsertWorks(); + void requireThatCornerCaseTreeFindWorks(); + void requireThatBasicTreeIteratorWorks(); + void requireThatTreeIteratorSeekWorks(); + void requireThatTreeIteratorAssignWorks(); + void requireThatMemoryUsageIsCalculated(); + template <typename TreeType> + void requireThatLowerBoundWorksT(); + void requireThatLowerBoundWorks(); + template <typename TreeType> + void requireThatUpperBoundWorksT(); + void requireThatUpperBoundWorks(); + void requireThatUpdateOfKeyWorks(); + + void + requireThatSmallNodesWorks(); + + void + requireThatApplyWorks(); + + void + requireThatIteratorDistanceWorks(int numEntries); + + void + requireThatIteratorDistanceWorks(); +public: + int Main(); +}; + +template <typename LeafNodeType> +bool +Test::assertLeafNode(const std::string & exp, const LeafNodeType & n) +{ + std::stringstream ss; + ss << "["; + for (uint32_t i = 0; i < n.validSlots(); ++i) { + if (i > 0) ss << ","; + ss << n.getKey(i) << ":" << n.getData(i); + } + ss << "]"; + if (!EXPECT_EQUAL(exp, ss.str())) return false; + return true; +} + +bool +Test::assertSeek(int skey, int ekey, const MyTree & tree) +{ + MyTree::Iterator itr = tree.begin(); + return assertSeek(skey, ekey, itr); +} + +bool +Test::assertSeek(int skey, int ekey, MyTree::Iterator & itr) +{ + MyTree::Iterator bseekItr = itr; + MyTree::Iterator lseekItr = itr; + bseekItr.binarySeek(skey); + lseekItr.linearSeek(skey); + if (!EXPECT_EQUAL(ekey, UNWRAP(bseekItr.getKey()))) return false; + if (!EXPECT_EQUAL(ekey, UNWRAP(lseekItr.getKey()))) return false; + itr = bseekItr; + return true; +} + +bool +Test::assertMemoryUsage(const MemoryUsage & exp, const MemoryUsage & act) +{ + if (!EXPECT_EQUAL(exp.allocatedBytes(), act.allocatedBytes())) return false; + if (!EXPECT_EQUAL(exp.usedBytes(), act.usedBytes())) return false; + if (!EXPECT_EQUAL(exp.deadBytes(), act.deadBytes())) return false; + if (!EXPECT_EQUAL(exp.allocatedBytesOnHold(), act.allocatedBytesOnHold())) return false; + return true; +} + +void +Test::requireThatNodeInsertWorks() +{ + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = m.allocLeafNode(); + MyLeafNode *n = nPair.second; + EXPECT_TRUE(n->isLeaf()); + EXPECT_EQUAL(0u, n->validSlots()); + n->insert(0, 20, "b"); + EXPECT_TRUE(!n->isFull()); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[20:b]", *n)); + n->insert(0, 10, "a"); + EXPECT_TRUE(!n->isFull()); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[10:a,20:b]", *n)); + EXPECT_EQUAL(20, UNWRAP(n->getLastKey())); + EXPECT_EQUAL("b", n->getLastData()); + n->insert(2, 30, "c"); + EXPECT_TRUE(!n->isFull()); + n->insert(3, 40, "d"); + EXPECT_TRUE(n->isFull()); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[10:a,20:b,30:c,40:d]", *n)); + cleanup(g, m, nPair.first, n); +} + +MyLeafNode::RefPair +getLeafNode(MyNodeAllocator &allocator) +{ + MyLeafNode::RefPair nPair = allocator.allocLeafNode(); + MyLeafNode *n = nPair.second; + n->insert(0, 1, "a"); + n->insert(1, 3, "c"); + n->insert(2, 5, "e"); + n->insert(3, 7, "g"); + return nPair; +} + +void +Test::requireThatNodeSplitInsertWorks() +{ + { // new entry in current node + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + MyLeafNode::RefPair sPair = m.allocLeafNode(); + MyLeafNode *s = sPair.second; + n->splitInsert(s, 2, 4, "d"); + EXPECT_TRUE(assertLeafNode("[1:a,3:c,4:d]", *n)); + EXPECT_TRUE(assertLeafNode("[5:e,7:g]", *s)); + cleanup(g, m, nPair.first, n, sPair.first, s); + } + { // new entry in split node + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + MyLeafNode::RefPair sPair = m.allocLeafNode(); + MyLeafNode *s = sPair.second; + n->splitInsert(s, 3, 6, "f"); + EXPECT_TRUE(assertLeafNode("[1:a,3:c,5:e]", *n)); + EXPECT_TRUE(assertLeafNode("[6:f,7:g]", *s)); + cleanup(g, m, nPair.first, n, sPair.first, s); + } + { // new entry at end + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + MyLeafNode::RefPair sPair = m.allocLeafNode(); + MyLeafNode *s = sPair.second; + n->splitInsert(s, 4, 8, "h"); + EXPECT_TRUE(assertLeafNode("[1:a,3:c,5:e]", *n)); + EXPECT_TRUE(assertLeafNode("[7:g,8:h]", *s)); + cleanup(g, m, nPair.first, n, sPair.first, s); + } +} + +struct BTreeStealTraits +{ + static const size_t LEAF_SLOTS = 6; + static const size_t INTERNAL_SLOTS = 6; +}; + +void +Test::requireThatNodeStealWorks() +{ + typedef BTreeLeafNode<int, std::string, + btree::NoAggregated, 6> MyStealNode; + typedef BTreeNodeAllocator<int, std::string, + btree::NoAggregated, + BTreeStealTraits::INTERNAL_SLOTS, BTreeStealTraits::LEAF_SLOTS> + MyStealManager; + { // steal all from left + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 4, "d"); + n->insert(1, 5, "e"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 1, "a"); + v->insert(1, 2, "b"); + v->insert(2, 3, "c"); + n->stealAllFromLeftNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c,4:d,5:e]", *n)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } + { // steal all from right + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 1, "a"); + n->insert(1, 2, "b"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 3, "c"); + v->insert(1, 4, "d"); + v->insert(2, 5, "e"); + n->stealAllFromRightNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c,4:d,5:e]", *n)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } + { // steal some from left + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 5, "e"); + n->insert(1, 6, "f"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 1, "a"); + v->insert(1, 2, "b"); + v->insert(2, 3, "c"); + v->insert(3, 4, "d"); + n->stealSomeFromLeftNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(v->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[4:d,5:e,6:f]", *n)); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c]", *v)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } + { // steal some from right + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 1, "a"); + n->insert(1, 2, "b"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 3, "c"); + v->insert(1, 4, "d"); + v->insert(2, 5, "e"); + v->insert(3, 6, "f"); + n->stealSomeFromRightNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(v->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c]", *n)); + EXPECT_TRUE(assertLeafNode("[4:d,5:e,6:f]", *v)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } +} + +void +Test::requireThatNodeRemoveWorks() +{ + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + n->remove(1); + EXPECT_TRUE(assertLeafNode("[1:a,5:e,7:g]", *n)); + cleanup(g, m, nPair.first, n); +} + +void +Test::requireThatNodeLowerBoundWorks() +{ + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + EXPECT_EQUAL(1u, n->lower_bound(3, MyComp())); + EXPECT_FALSE(MyComp()(3, n->getKey(1u))); + EXPECT_EQUAL(0u, n->lower_bound(0, MyComp())); + EXPECT_TRUE(MyComp()(0, n->getKey(0u))); + EXPECT_EQUAL(1u, n->lower_bound(2, MyComp())); + EXPECT_TRUE(MyComp()(2, n->getKey(1u))); + EXPECT_EQUAL(3u, n->lower_bound(6, MyComp())); + EXPECT_TRUE(MyComp()(6, n->getKey(3u))); + EXPECT_EQUAL(4u, n->lower_bound(8, MyComp())); + cleanup(g, m, nPair.first, n); +} + +void +generateData(std::vector<LeafPair> & data, size_t numEntries) +{ + data.reserve(numEntries); + Rand48 rnd; + rnd.srand48(10); + for (size_t i = 0; i < numEntries; ++i) { + int num = rnd.lrand48() % 10000000; + std::string str = toStr(num); + data.push_back(std::make_pair(num, str)); + } +} + + +void +Test::buildSubTree(const std::vector<LeafPair> &sub, + size_t numEntries) +{ + GenerationHandler g; + MyTree tree; + MyTreeBuilder builder(tree.getAllocator()); + + std::vector<LeafPair> sorted(sub.begin(), sub.begin() + numEntries); + std::sort(sorted.begin(), sorted.end(), LeafPairLess()); + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(sorted[i].first); + const std::string & str = sorted[i].second; + builder.insert(num, str); + } + tree.assign(builder); + assert(numEntries == tree.size()); + assert(tree.isValid()); + EXPECT_EQUAL(numEntries, tree.size()); + EXPECT_TRUE(tree.isValid()); + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator ritr = itr; + if (numEntries > 0) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(numEntries, ritr.position()); + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(numEntries - 1, ritr.position()); + } else { + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + } + for (size_t i = 0; i < numEntries; ++i) { + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(sorted[i].first, itr.getKey()); + EXPECT_EQUAL(sorted[i].second, itr.getData()); + ++itr; + } + EXPECT_TRUE(!itr.valid()); + ritr = itr; + EXPECT_TRUE(!ritr.valid()); + --ritr; + for (size_t i = 0; i < numEntries; ++i) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(sorted[numEntries - 1 - i].first, ritr.getKey()); + EXPECT_EQUAL(sorted[numEntries - 1 - i].second, ritr.getData()); + --ritr; + } + EXPECT_TRUE(!ritr.valid()); +} + +void +Test::requireThatWeCanInsertAndRemoveFromTree() +{ + GenerationHandler g; + MyTree tree; + std::vector<LeafPair> exp; + std::vector<LeafPair> sorted; + size_t numEntries = 1000; + generateData(exp, numEntries); + sorted = exp; + std::sort(sorted.begin(), sorted.end(), LeafPairLess()); + // insert entries + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(exp[i].first); + const std::string & str = exp[i].second; + EXPECT_TRUE(!tree.find(num).valid()); + //LOG(info, "insert[%zu](%d, %s)", i, num, str.c_str()); + EXPECT_TRUE(tree.insert(num, str)); + EXPECT_TRUE(!tree.insert(num, str)); + for (size_t j = 0; j <= i; ++j) { + //LOG(info, "find[%zu](%d)", j, exp[j].first._val); + MyTree::Iterator itr = tree.find(exp[j].first); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(exp[j].first, itr.getKey()); + EXPECT_EQUAL(exp[j].second, itr.getData()); + } + EXPECT_EQUAL(i + 1u, tree.size()); + EXPECT_TRUE(tree.isValid()); + buildSubTree(exp, i + 1); + } + //std::cout << "tree: " << tree.toString() << std::endl; + + { + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator itre = itr; + MyTree::Iterator itre2; + MyTree::Iterator ritr = itr; + while (itre.valid()) + ++itre; + if (numEntries > 0) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(numEntries, ritr.position()); + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(numEntries - 1, ritr.position()); + } else { + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + } + MyTree::Iterator pitr = itr; + for (size_t i = 0; i < numEntries; ++i) { + ssize_t si = i; + ssize_t sileft = numEntries - i; + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(i, itr.position()); + EXPECT_EQUAL(sileft, itre - itr); + EXPECT_EQUAL(-sileft, itr - itre); + EXPECT_EQUAL(sileft, itre2 - itr); + EXPECT_EQUAL(-sileft, itr - itre2); + EXPECT_EQUAL(si, itr - tree.begin()); + EXPECT_EQUAL(-si, tree.begin() - itr); + EXPECT_EQUAL(i != 0, itr - pitr); + EXPECT_EQUAL(-(i != 0), pitr - itr); + EXPECT_EQUAL(sorted[i].first, itr.getKey()); + EXPECT_EQUAL(sorted[i].second, itr.getData()); + pitr = itr; + ++itr; + ritr = itr; + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_TRUE(ritr == pitr); + } + EXPECT_TRUE(!itr.valid()); + EXPECT_EQUAL(numEntries, itr.position()); + ssize_t sNumEntries = numEntries; + EXPECT_EQUAL(sNumEntries, itr - tree.begin()); + EXPECT_EQUAL(-sNumEntries, tree.begin() - itr); + EXPECT_EQUAL(1, itr - pitr); + EXPECT_EQUAL(-1, pitr - itr); + } + // compact full tree by calling incremental compaction methods in a loop + { + MyTree::NodeAllocatorType &manager = tree.getAllocator(); + std::vector<uint32_t> toHold = manager.startCompact(); + MyTree::Iterator itr = tree.begin(); + tree.setRoot(itr.moveFirstLeafNode(tree.getRoot())); + while (itr.valid()) { + // LOG(info, "Leaf moved to %d", UNWRAP(itr.getKey())); + itr.moveNextLeafNode(); + } + manager.finishCompact(toHold); + manager.freeze(); + manager.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + manager.trimHoldLists(g.getFirstUsedGeneration()); + } + // remove entries + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(exp[i].first); + //LOG(info, "remove[%zu](%d)", i, num); + //std::cout << "tree: " << tree.toString() << std::endl; + EXPECT_TRUE(tree.remove(num)); + EXPECT_TRUE(!tree.find(num).valid()); + EXPECT_TRUE(!tree.remove(num)); + EXPECT_TRUE(tree.isValid()); + for (size_t j = i + 1; j < numEntries; ++j) { + MyTree::Iterator itr = tree.find(exp[j].first); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(exp[j].first, itr.getKey()); + EXPECT_EQUAL(exp[j].second, itr.getData()); + } + EXPECT_EQUAL(numEntries - 1 - i, tree.size()); + } +} + +void +Test::requireThatSortedTreeInsertWorks() +{ + { + GenerationHandler g; + MyTree tree; + for (int i = 0; i < 1000; ++i) { + EXPECT_TRUE(tree.insert(i, toStr(i))); + MyTree::Iterator itr = tree.find(i); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(toStr(i), itr.getData()); + EXPECT_TRUE(tree.isValid()); + } + } + { + GenerationHandler g; + MyTree tree; + for (int i = 1000; i > 0; --i) { + EXPECT_TRUE(tree.insert(i, toStr(i))); + MyTree::Iterator itr = tree.find(i); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(toStr(i), itr.getData()); + EXPECT_TRUE(tree.isValid()); + } + } +} + +void +Test::requireThatCornerCaseTreeFindWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 1; i < 100; ++i) { + tree.insert(i, toStr(i)); + } + EXPECT_TRUE(!tree.find(0).valid()); // lower than lowest + EXPECT_TRUE(!tree.find(1000).valid()); // higher than highest +} + +void +Test::requireThatBasicTreeIteratorWorks() +{ + GenerationHandler g; + MyTree tree; + EXPECT_TRUE(!tree.begin().valid()); + std::vector<LeafPair> exp; + size_t numEntries = 1000; + generateData(exp, numEntries); + for (size_t i = 0; i < numEntries; ++i) { + tree.insert(exp[i].first, exp[i].second); + } + std::sort(exp.begin(), exp.end(), LeafPairLess()); + size_t ei = 0; + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator ritr; + EXPECT_EQUAL(1000u, itr.size()); + for (; itr.valid(); ++itr) { + //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str()); + EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(itr.getKey())); + EXPECT_EQUAL(exp[ei].second, itr.getData()); + ei++; + ritr = itr; + } + EXPECT_EQUAL(numEntries, ei); + for (; ritr.valid(); --ritr) { + --ei; + //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str()); + EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(ritr.getKey())); + EXPECT_EQUAL(exp[ei].second, ritr.getData()); + } +} + +void +Test::requireThatTreeIteratorSeekWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 0; i < 40; i += 2) { + tree.insert(i, toStr(i)); + } + //std::cout << tree.toString() << std::endl; + EXPECT_TRUE(assertSeek(2, 2, tree)); // next key + EXPECT_TRUE(assertSeek(10, 10, tree)); // skip to existing + EXPECT_TRUE(assertSeek(26, 26, tree)); // skip to existing + EXPECT_TRUE(assertSeek(11, 12, tree)); // skip to non-existing + EXPECT_TRUE(assertSeek(23, 24, tree)); // skip to non-existing + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(4, 4, itr)); + EXPECT_TRUE(assertSeek(14, 14, itr)); + EXPECT_TRUE(assertSeek(18, 18, itr)); + EXPECT_TRUE(assertSeek(36, 36, itr)); + } + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(3, 4, itr)); + EXPECT_TRUE(assertSeek(13, 14, itr)); + EXPECT_TRUE(assertSeek(17, 18, itr)); + EXPECT_TRUE(assertSeek(35, 36, itr)); + } + { + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator itr2 = tree.begin(); + itr.binarySeek(40); // outside + itr2.linearSeek(40); // outside + EXPECT_TRUE(!itr.valid()); + EXPECT_TRUE(!itr2.valid()); + } + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(8, 8, itr)); + for (int i = 10; i < 40; i += 2) { + ++itr; + EXPECT_EQUAL(i, UNWRAP(itr.getKey())); + } + } + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(26, 26, itr)); + for (int i = 28; i < 40; i += 2) { + ++itr; + EXPECT_EQUAL(i, UNWRAP(itr.getKey())); + } + } + GenerationHandler g2; + MyTree tree2; // only leaf node + tree2.insert(0, "0"); + tree2.insert(2, "2"); + tree2.insert(4, "4"); + EXPECT_TRUE(assertSeek(1, 2, tree2)); + EXPECT_TRUE(assertSeek(2, 2, tree2)); + { + MyTree::Iterator itr = tree2.begin(); + MyTree::Iterator itr2 = tree2.begin(); + itr.binarySeek(5); // outside + itr2.linearSeek(5); // outside + EXPECT_TRUE(!itr.valid()); + EXPECT_TRUE(!itr2.valid()); + } +} + +void +Test::requireThatTreeIteratorAssignWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 0; i < 1000; ++i) { + tree.insert(i, toStr(i)); + } + for (int i = 0; i < 1000; ++i) { + MyTree::Iterator itr = tree.find(i); + MyTree::Iterator itr2 = itr; + EXPECT_TRUE(itr == itr2); + int expNum = i; + for (; itr2.valid(); ++itr2) { + EXPECT_EQUAL(expNum++, UNWRAP(itr2.getKey())); + } + EXPECT_EQUAL(1000, expNum); + } +} + +void +Test::requireThatMemoryUsageIsCalculated() +{ + typedef BTreeNodeAllocator<int32_t, int8_t, + btree::NoAggregated, + MyTraits::INTERNAL_SLOTS, MyTraits::LEAF_SLOTS> NodeAllocator; + typedef NodeAllocator::InternalNodeType INode; + typedef NodeAllocator::LeafNodeType LNode; + typedef NodeAllocator::InternalNodeTypeRefPair IRef; + typedef NodeAllocator::LeafNodeTypeRefPair LRef; + LOG(info, "sizeof(BTreeNode)=%zu, sizeof(INode)=%zu, sizeof(LNode)=%zu", + sizeof(BTreeNode), sizeof(INode), sizeof(LNode)); + EXPECT_GREATER(sizeof(INode), sizeof(LNode)); + GenerationHandler gh; + gh.incGeneration(); + NodeAllocator tm; + MemoryUsage mu; + const uint32_t initialInternalNodes = 128u; + const uint32_t initialLeafNodes = 128u; + mu.incAllocatedBytes(sizeof(INode) * initialInternalNodes); + mu.incAllocatedBytes(sizeof(LNode) * initialLeafNodes); + mu.incUsedBytes(sizeof(INode)); + mu.incDeadBytes(sizeof(INode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // add internal node + IRef ir = tm.allocInternalNode(1); + mu.incUsedBytes(sizeof(INode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // add leaf node + LRef lr = tm.allocLeafNode(); + mu.incUsedBytes(sizeof(LNode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // move nodes to hold list + tm.freeze(); // mark allocated nodes as frozen so we can hold them later on + tm.holdNode(ir.first, ir.second); + mu.incAllocatedBytesOnHold(sizeof(INode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + tm.holdNode(lr.first, lr.second); + mu.incAllocatedBytesOnHold(sizeof(LNode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // trim hold lists + tm.transferHoldLists(gh.getCurrentGeneration()); + gh.incGeneration(); + tm.trimHoldLists(gh.getFirstUsedGeneration()); + mu = MemoryUsage(); + mu.incAllocatedBytes(sizeof(INode) * initialInternalNodes); + mu.incAllocatedBytes(sizeof(LNode) * initialLeafNodes); + mu.incUsedBytes(sizeof(INode) * 2); + mu.incDeadBytes(sizeof(INode) * 2); + mu.incUsedBytes(sizeof(LNode)); + mu.incDeadBytes(sizeof(LNode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); +} + +template <typename TreeType> +void +Test::requireThatLowerBoundWorksT() +{ + GenerationHandler g; + TreeType t; + EXPECT_TRUE(t.insert(10, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(20, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(30, BTreeNoLeafData())); + EXPECT_EQUAL(10, t.lowerBound(9).getKey()); + EXPECT_EQUAL(20, t.lowerBound(20).getKey()); + EXPECT_EQUAL(30, t.lowerBound(21).getKey()); + EXPECT_EQUAL(30, t.lowerBound(30).getKey()); + EXPECT_TRUE(!t.lowerBound(31).valid()); + for (int i = 40; i < 1000; i+=10) { + EXPECT_TRUE(t.insert(i, BTreeNoLeafData())); + } + for (int i = 9; i < 990; i+=10) { + EXPECT_EQUAL(i + 1, t.lowerBound(i).getKey()); + EXPECT_EQUAL(i + 1, t.lowerBound(i + 1).getKey()); + } + EXPECT_TRUE(!t.lowerBound(991).valid()); +} + +void +Test::requireThatLowerBoundWorks() +{ + requireThatLowerBoundWorksT<SetTreeB>(); + requireThatLowerBoundWorksT<SetTreeL>(); +} + +template <typename TreeType> +void +Test::requireThatUpperBoundWorksT() +{ + GenerationHandler g; + TreeType t; + EXPECT_TRUE(t.insert(10, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(20, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(30, BTreeNoLeafData())); + EXPECT_EQUAL(10, t.upperBound(9).getKey()); + EXPECT_EQUAL(30, t.upperBound(20).getKey()); + EXPECT_EQUAL(30, t.upperBound(21).getKey()); + EXPECT_TRUE(!t.upperBound(30).valid()); + for (int i = 40; i < 1000; i+=10) { + EXPECT_TRUE(t.insert(i, BTreeNoLeafData())); + } + for (int i = 9; i < 980; i+=10) { + EXPECT_EQUAL(i + 1, t.upperBound(i).getKey()); + EXPECT_EQUAL(i + 11, t.upperBound(i + 1).getKey()); + } + EXPECT_TRUE(!t.upperBound(990).valid()); +} + +void +Test::requireThatUpperBoundWorks() +{ + requireThatUpperBoundWorksT<SetTreeB>(); + requireThatUpperBoundWorksT<SetTreeL>(); +} + +struct UpdKeyComp { + int _remainder; + mutable size_t _numErrors; + UpdKeyComp(int remainder) : _remainder(remainder), _numErrors(0) {} + bool operator() (const int & lhs, const int & rhs) const { + if (lhs % 2 != _remainder) ++_numErrors; + if (rhs % 2 != _remainder) ++_numErrors; + return lhs < rhs; + } +}; + +void +Test::requireThatUpdateOfKeyWorks() +{ + typedef BTree<int, BTreeNoLeafData, + btree::NoAggregated, + UpdKeyComp &> UpdKeyTree; + typedef UpdKeyTree::Iterator UpdKeyTreeIterator; + GenerationHandler g; + UpdKeyTree t; + UpdKeyComp cmp1(0); + for (int i = 0; i < 1000; i+=2) { + EXPECT_TRUE(t.insert(i, BTreeNoLeafData(), cmp1)); + } + EXPECT_EQUAL(0u, cmp1._numErrors); + for (int i = 0; i < 1000; i+=2) { + UpdKeyTreeIterator itr = t.find(i, cmp1); + itr.writeKey(i + 1); + } + UpdKeyComp cmp2(1); + for (int i = 1; i < 1000; i+=2) { + UpdKeyTreeIterator itr = t.find(i, cmp2); + EXPECT_TRUE(itr.valid()); + } + EXPECT_EQUAL(0u, cmp2._numErrors); +} + + +void +Test::requireThatSmallNodesWorks(void) +{ + typedef BTreeStore<MyKey, std::string, btree::NoAggregated, MyComp, + BTreeDefaultTraits> TreeStore; + GenerationHandler g; + TreeStore s; + + EntryRef root; + EXPECT_EQUAL(0u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 40, "fourty")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 20, "twenty")); + EXPECT_TRUE(!s.insert(root, 20, "twenty.not")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(2u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 60, "sixty")); + EXPECT_TRUE(!s.insert(root, 60, "sixty.not")); + EXPECT_TRUE(!s.insert(root, 20, "twenty.not")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(3u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 50, "fifty")); + EXPECT_TRUE(!s.insert(root, 50, "fifty.not")); + EXPECT_TRUE(!s.insert(root, 60, "sixty.not")); + EXPECT_TRUE(!s.insert(root, 20, "twenty.not")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(4u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(s.insert(root, 1000 + i, "big")); + if (i > 0) { + EXPECT_TRUE(!s.insert(root, 1000 + i - 1, "big")); + } + EXPECT_EQUAL(5u + i, s.size(root)); + EXPECT_EQUAL(5u + i <= 8u, s.isSmallArray(root)); + } + EXPECT_TRUE(s.remove(root, 40)); + EXPECT_TRUE(!s.remove(root, 40)); + EXPECT_EQUAL(103u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + EXPECT_TRUE(s.remove(root, 20)); + EXPECT_TRUE(!s.remove(root, 20)); + EXPECT_EQUAL(102u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + EXPECT_TRUE(s.remove(root, 50)); + EXPECT_TRUE(!s.remove(root, 50)); + EXPECT_EQUAL(101u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(s.remove(root, 1000 + i)); + if (i > 0) { + EXPECT_TRUE(!s.remove(root, 1000 + i - 1)); + } + EXPECT_EQUAL(100 - i, s.size(root)); + EXPECT_EQUAL(100 - i <= 8u, s.isSmallArray(root)); + } + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + s.clear(root); + s.clearBuilder(); + s.freeze(); + s.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + s.trimHoldLists(g.getFirstUsedGeneration()); +} + + +void +Test::requireThatApplyWorks(void) +{ + typedef BTreeStore<MyKey, std::string, btree::NoAggregated, MyComp, + BTreeDefaultTraits> TreeStore; + typedef TreeStore::KeyType KeyType; + typedef TreeStore::KeyDataType KeyDataType; + GenerationHandler g; + TreeStore s; + std::vector<KeyDataType> additions; + std::vector<KeyType> removals; + + EntryRef root; + EXPECT_EQUAL(0u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(40, "fourty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(20, "twenty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(2u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(60, "sixty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(3u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(50, "fifty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(4u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + for (uint32_t i = 0; i < 100; ++i) { + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(1000 + i, "big")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(5u + i, s.size(root)); + EXPECT_EQUAL(5u + i <= 8u, s.isSmallArray(root)); + } + + additions.clear(); + removals.clear(); + removals.push_back(40); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(103u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + removals.push_back(20); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(102u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + removals.push_back(50); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(101u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + for (uint32_t i = 0; i < 100; ++i) { + additions.clear(); + removals.clear(); + removals.push_back(1000 +i); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(100 - i, s.size(root)); + EXPECT_EQUAL(100 - i <= 8u, s.isSmallArray(root)); + } + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + for (uint32_t i = 0; i < 20; ++i) + additions.push_back(KeyDataType(1000 + i, "big")); + removals.push_back(60); + removals.push_back(1002); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(20u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(19u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + for (uint32_t i = 0; i < 20; ++i) + additions.push_back(KeyDataType(1100 + i, "big")); + for (uint32_t i = 0; i < 10; ++i) + removals.push_back(1000 + i); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(30u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + s.clear(root); + s.clearBuilder(); + s.freeze(); + s.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + s.trimHoldLists(g.getFirstUsedGeneration()); +} + +class MyTreeTestIterator : public MyTree::Iterator +{ +public: + MyTreeTestIterator(const MyTree::Iterator &rhs) + : MyTree::Iterator(rhs) + { + } + + int + getPathSize(void) const + { + return _pathSize; + } +}; + + +void +Test::requireThatIteratorDistanceWorks(int numEntries) +{ + GenerationHandler g; + MyTree tree; + typedef MyTree::Iterator Iterator; + for (int i = 0; i < numEntries; ++i) { + tree.insert(i, toStr(i)); + } + MyTreeTestIterator tit = tree.begin(); + LOG(info, + "numEntries=%d, iterator pathSize=%d", + numEntries, tit.getPathSize()); + Iterator it = tree.begin(); + for (int i = 0; i <= numEntries; ++i) { + Iterator iit = tree.lowerBound(i); + Iterator iitn = tree.lowerBound(i + 1); + Iterator iitu = tree.upperBound(i); + Iterator iitls = tree.begin(); + Iterator iitbs = tree.begin(); + Iterator iitlsp = tree.begin(); + Iterator iitbsp = tree.begin(); + Iterator iitlb(tree.getRoot(), tree.getAllocator()); + iitlb.lower_bound(i); + Iterator iitlb2(BTreeNode::Ref(), tree.getAllocator()); + iitlb2.lower_bound(tree.getRoot(), i); + if (i > 0) { + iitls.linearSeek(i); + iitbs.binarySeek(i); + ++it; + } + iitlsp.linearSeekPast(i); + iitbsp.binarySeekPast(i); + Iterator iitlsp2 = iitls; + Iterator iitbsp2 = iitbs; + Iterator iitnr = i < numEntries ? iitn : tree.begin(); + --iitnr; + if (i < numEntries) { + iitlsp2.linearSeekPast(i); + iitbsp2.binarySeekPast(i); + } + EXPECT_EQUAL(i, static_cast<int>(iit.position())); + EXPECT_EQUAL(i < numEntries, iit.valid()); + EXPECT_TRUE(iit.identical(it)); + EXPECT_TRUE(iit.identical(iitls)); + EXPECT_TRUE(iit.identical(iitbs)); + EXPECT_TRUE(iit.identical(iitnr)); + EXPECT_TRUE(iit.identical(iitlb)); + EXPECT_TRUE(iit.identical(iitlb2)); + EXPECT_TRUE(iitn.identical(iitu)); + EXPECT_TRUE(iitn.identical(iitlsp)); + EXPECT_TRUE(iitn.identical(iitbsp)); + EXPECT_TRUE(iitn.identical(iitlsp2)); + EXPECT_TRUE(iitn.identical(iitbsp2)); + if (i < numEntries) { + EXPECT_EQUAL(i + 1, static_cast<int>(iitn.position())); + EXPECT_EQUAL(i + 1 < numEntries, iitn.valid()); + } + for (int j = 0; j <= numEntries; ++j) { + Iterator jit = tree.lowerBound(j); + EXPECT_EQUAL(j, static_cast<int>(jit.position())); + EXPECT_EQUAL(j < numEntries, jit.valid()); + EXPECT_EQUAL(i - j, iit - jit); + EXPECT_EQUAL(j - i, jit - iit); + + Iterator jit2 = jit; + jit2.setupEnd(); + EXPECT_EQUAL(numEntries - j, jit2 - jit); + EXPECT_EQUAL(numEntries - i, jit2 - iit); + EXPECT_EQUAL(j - numEntries, jit - jit2); + EXPECT_EQUAL(i - numEntries, iit - jit2); + } + } +} + + +void +Test::requireThatIteratorDistanceWorks() +{ + requireThatIteratorDistanceWorks(1); + requireThatIteratorDistanceWorks(3); + requireThatIteratorDistanceWorks(8); + requireThatIteratorDistanceWorks(20); + requireThatIteratorDistanceWorks(100); + requireThatIteratorDistanceWorks(400); +} + + +int +Test::Main() +{ + TEST_INIT("btree_test"); + + requireThatNodeInsertWorks(); + requireThatNodeSplitInsertWorks(); + requireThatNodeStealWorks(); + requireThatNodeRemoveWorks(); + requireThatNodeLowerBoundWorks(); + requireThatWeCanInsertAndRemoveFromTree(); + requireThatSortedTreeInsertWorks(); + requireThatCornerCaseTreeFindWorks(); + requireThatBasicTreeIteratorWorks(); + requireThatTreeIteratorSeekWorks(); + requireThatTreeIteratorAssignWorks(); + requireThatMemoryUsageIsCalculated(); + requireThatLowerBoundWorks(); + requireThatUpperBoundWorks(); + requireThatUpdateOfKeyWorks(); + requireThatSmallNodesWorks(); + requireThatApplyWorks(); + requireThatIteratorDistanceWorks(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::btree::Test); diff --git a/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp b/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp new file mode 100644 index 00000000000..817d024c60f --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp @@ -0,0 +1,513 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("frozenbtree_test"); +#define DEBUG_FROZENBTREE +#define LOG_FROZENBTREEXX +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/util/rand48.h> +#include <vespa/searchlib/btree/btreeroot.h> +#include <vespa/searchlib/btree/btreenodeallocator.h> +#include <vespa/searchlib/btree/btreeiterator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodestore.hpp> +#include <algorithm> +#include <limits> +#include <map> + +using search::btree::BTreeRoot; +using search::btree::BTreeNode; +using search::btree::BTreeInternalNode; +using search::btree::BTreeLeafNode; +using search::btree::BTreeDefaultTraits; +using vespalib::GenerationHandler; + +namespace search { + + +class FrozenBTreeTest : public vespalib::TestApp +{ +public: + typedef int KeyType; +private: + std::vector<KeyType> _randomValues; + std::vector<KeyType> _sortedRandomValues; + +public: + typedef int DataType; + typedef BTreeRoot<KeyType, DataType, + btree::NoAggregated, + std::less<KeyType>, + BTreeDefaultTraits> Tree; + typedef Tree::NodeAllocatorType NodeAllocator; + typedef Tree::InternalNodeType InternalNodeType; + typedef Tree::LeafNodeType LeafNodeType; + typedef Tree::Iterator Iterator; + typedef Tree::ConstIterator ConstIterator; +private: + GenerationHandler *_generationHandler; + NodeAllocator *_allocator; + Tree *_tree; + + Rand48 _randomGenerator; + + void + allocTree(void); + + void + freeTree(bool verbose); + + void + fillRandomValues(unsigned int count); + + void + insertRandomValues(Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values); + + void + removeRandomValues(Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values); + + void + lookupRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values); + + void + lookupGoneRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values); + + void + lookupFrozenRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values); + + void + sortRandomValues(void); + + void + traverseTreeIterator(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &sorted, + bool frozen); + + void + printSubEnumTree(BTreeNode::Ref node, + NodeAllocator &allocator, + int indent) const; + + void + printEnumTree(const Tree *tree, + NodeAllocator &allocator); + + static const char * + frozenName(bool frozen) + { + return frozen ? "frozen" : "thawed"; + } +public: + FrozenBTreeTest(void) + : vespalib::TestApp(), + _randomValues(), + _sortedRandomValues(), + _generationHandler(NULL), + _allocator(NULL), + _tree(NULL), + _randomGenerator() + { + } + + int Main(void); +}; + + + +void +FrozenBTreeTest::allocTree(void) +{ + assert(_generationHandler == NULL); + assert(_allocator == NULL); + assert(_tree == NULL); + _generationHandler = new GenerationHandler; + _allocator = new NodeAllocator(); + _tree = new Tree; +} + + +void +FrozenBTreeTest::freeTree(bool verbose) +{ +#if 0 + LOG(info, + "freeTree before clear: %" PRIu64 " (%" PRIu64 " held)" + ", %" PRIu32 " leaves", + static_cast<uint64_t>(_intTree->getUsedMemory()), + static_cast<uint64_t>(_intTree->getHeldMemory()), + _intTree->validLeaves()); + _intTree->clear(); + LOG(info, + "freeTree before unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast<uint64_t>(_intTree->getUsedMemory()), + static_cast<uint64_t>(_intTree->getHeldMemory())); + _intTree->dropFrozen(); + _intTree->removeOldGenerations(_intTree->getGeneration() + 1); + LOG(info, + "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast<uint64_t>(_intTree->getUsedMemory()), + static_cast<uint64_t>(_intTree->getHeldMemory())); + if (verbose) + LOG(info, + "%d+%d leftover tree nodes", + _intTree->getNumInternalNodes(), + _intTree->getNumLeafNodes()); + EXPECT_TRUE(_intTree->getNumInternalNodes() == 0 && + _intTree->getNumLeafNodes() == 0); + delete _intTree; + _intTree = NULL; + delete _intKeyStore; + _intKeyStore = NULL; +#endif + (void) verbose; + _tree->clear(*_allocator); + _allocator->freeze(); + _allocator->transferHoldLists(_generationHandler->getCurrentGeneration()); + _generationHandler->incGeneration(); + _allocator->trimHoldLists(_generationHandler->getFirstUsedGeneration()); + delete _tree; + _tree = NULL; + delete _allocator; + _allocator = NULL; + delete _generationHandler; + _generationHandler = NULL; +} + + +void +FrozenBTreeTest::fillRandomValues(unsigned int count) +{ + unsigned int i; + + LOG(info, + "Filling %u random values", count); + _randomValues.clear(); + _randomValues.reserve(count); + _randomGenerator.srand48(42); + for (i = 0; i <count; i++) + _randomValues.push_back(_randomGenerator.lrand48()); + + EXPECT_TRUE(_randomValues.size() == count); +} + + +void +FrozenBTreeTest:: +insertRandomValues(Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values) +{ + std::vector<KeyType>::const_iterator i(values.begin()); + std::vector<KeyType>::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "insertRandomValues start"); + for (; i != ie; ++i) { +#ifdef LOG_FROZENBTREE + LOG(info, "Try lookup %d before insert", *i); +#endif + p = tree.find(*i, allocator); + if (!p.valid()) { + DataType val = *i + 42; + if (tree.insert(*i, val, allocator)) + p = tree.find(*i, allocator); + } + ASSERT_TRUE(p.valid() && p.getKey() == *i && p.getData() == *i + 42); +#ifdef DEBUG_FROZENBTREEX + printEnumTree(&tree); +#endif + } + ASSERT_TRUE(tree.isValid(allocator)); + ASSERT_TRUE(tree.isValidFrozen(allocator)); + LOG(info, "insertRandomValues done"); +} + + +void +FrozenBTreeTest:: +removeRandomValues(Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> & values) +{ + std::vector<KeyType>::const_iterator i(values.begin()); + std::vector<KeyType>::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "removeRandomValues start"); + for (; i != ie; ++i) { +#ifdef LOG_FROZENBTREE + LOG(info, "Try lookup %d before remove", *i); +#endif + p = tree.find(*i, allocator); + if (p.valid()) { + if (tree.remove(*i, allocator)) + p = tree.find(*i, allocator); + } + ASSERT_TRUE(!p.valid()); +#ifdef DEBUG_FROZENBTREEX + tree.printTree(); +#endif + } + ASSERT_TRUE(tree.isValid(allocator)); + ASSERT_TRUE(tree.isValidFrozen(allocator)); + LOG(info, "removeRandomValues done"); +} + + +void +FrozenBTreeTest:: +lookupRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values) +{ + std::vector<KeyType>::const_iterator i(values.begin()); + std::vector<KeyType>::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "lookupRandomValues start"); + for (; i != ie; ++i) { + p = tree.find(*i, allocator); + ASSERT_TRUE(p.valid() && p.getKey() == *i); + } + LOG(info, "lookupRandomValues done"); +} + + +void +FrozenBTreeTest:: +lookupGoneRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values) +{ + std::vector<KeyType>::const_iterator i(values.begin()); + std::vector<KeyType>::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "lookupGoneRandomValues start"); + for (; i != ie; ++i) { + p = tree.find(*i, allocator); + ASSERT_TRUE(!p.valid()); + } + LOG(info, "lookupGoneRandomValues done"); +} + + +void +FrozenBTreeTest:: +lookupFrozenRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values) +{ + std::vector<KeyType>::const_iterator i(values.begin()); + std::vector<KeyType>::const_iterator ie(values.end()); + ConstIterator p; + + LOG(info, "lookupFrozenRandomValues start"); + for (; i != ie; ++i) { + p = tree.getFrozenView(allocator).find(*i, std::less<int>()); + ASSERT_TRUE(p.valid() && p.getKey() == *i && p.getData() == *i + 42); + } + LOG(info, "lookupFrozenRandomValues done"); +} + + +void +FrozenBTreeTest::sortRandomValues(void) +{ + std::vector<KeyType>::iterator i; + std::vector<KeyType>::iterator ie; + uint32_t okcnt; + int prevVal; + std::vector<KeyType> sorted; + + LOG(info, "sortRandomValues start"); + sorted = _randomValues; + std::sort(sorted.begin(), sorted.end()); + _sortedRandomValues.clear(); + _sortedRandomValues.reserve(sorted.size()); + + okcnt = 0; + prevVal = 0; + ie = sorted.end(); + for (i = sorted.begin(); i != ie; ++i) { + if (i == _sortedRandomValues.begin() || *i > prevVal) { + okcnt++; + _sortedRandomValues.push_back(*i); + } else if (*i == prevVal) + okcnt++; + else + abort(); + prevVal = *i; + } + EXPECT_TRUE(okcnt == sorted.size()); + LOG(info, "sortRandomValues done"); +} + + +void +FrozenBTreeTest:: +traverseTreeIterator(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &sorted, + bool frozen) +{ + LOG(info, + "traverseTreeIterator %s start", + frozenName(frozen)); + + std::vector<KeyType>::const_iterator i; + + i = sorted.begin(); + if (frozen) { + ConstIterator ai; + ai = tree.getFrozenView(allocator).begin(); + for (;ai.valid(); ++ai, ++i) + { + ASSERT_TRUE(ai.getKey() == *i); + } + } else { + Iterator ai; + ai = tree.begin(allocator); + for (;ai.valid(); ++ai, ++i) + { + ASSERT_TRUE(ai.getKey() == *i); + } + } + + + ASSERT_TRUE(i == sorted.end()); + + LOG(info, + "traverseTreeIterator %s done", + frozenName(frozen)); +} + + +void +FrozenBTreeTest:: +printSubEnumTree(BTreeNode::Ref node, + NodeAllocator &allocator, + int indent) const +{ + // typedef BTreeNode Node; + typedef LeafNodeType LeafNode; + typedef InternalNodeType InternalNode; + BTreeNode::Ref subNode; + unsigned int i; + + if (allocator.isLeafRef(node)) { + const LeafNode *lnode = allocator.mapLeafRef(node); + printf("%*s LeafNode %s valid=%d\n", + indent, "", + lnode->getFrozen() ? "frozen" : "thawed", + lnode->validSlots()); + for (i = 0; i < lnode->validSlots(); i++) { + + KeyType k = lnode->getKey(i); + DataType d = lnode->getData(i); + printf("leaf value %3d %d %d\n", + (int) i, + (int) k, + (int) d); + } + return; + } + const InternalNode *inode = allocator.mapInternalRef(node); + printf("%*s IntermediteNode %s valid=%d\n", + indent, "", + inode->getFrozen() ? "frozen" : "thawed", + inode->validSlots()); + for (i = 0; i < inode->validSlots(); i++) { + subNode = inode->getChild(i); + assert(subNode != BTreeNode::Ref()); + printSubEnumTree(subNode, allocator, indent + 4); + } +} + + +void +FrozenBTreeTest::printEnumTree(const Tree *tree, + NodeAllocator &allocator) +{ + printf("Tree Dump start\n"); + if (!NodeAllocator::isValidRef(tree->getRoot())) { + printf("EMPTY\n"); + } else { + printSubEnumTree(tree->getRoot(), allocator, 0); + } + printf("Tree Dump done\n"); +} + + + +int +FrozenBTreeTest::Main() +{ + TEST_INIT("frozenbtree_test"); + + fillRandomValues(1000); + sortRandomValues(); + + allocTree(); + insertRandomValues(*_tree, *_allocator, _randomValues); + lookupRandomValues(*_tree, *_allocator, _randomValues); + _allocator->freeze(); + _allocator->transferHoldLists(_generationHandler->getCurrentGeneration()); + lookupFrozenRandomValues(*_tree, *_allocator, _randomValues); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + false); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + true); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + false); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + true); + removeRandomValues(*_tree, *_allocator, _randomValues); + lookupGoneRandomValues(*_tree, *_allocator, _randomValues); + lookupFrozenRandomValues(*_tree, *_allocator,_randomValues); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + true); + insertRandomValues(*_tree, *_allocator, _randomValues); + freeTree(true); + + fillRandomValues(1000000); + sortRandomValues(); + + allocTree(); + insertRandomValues(*_tree, *_allocator, _randomValues); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + false); + freeTree(false); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::FrozenBTreeTest); diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore b/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore new file mode 100644 index 00000000000..3ad290f1731 --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore @@ -0,0 +1 @@ +searchlib_compact_document_words_store_test_app diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt b/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt new file mode 100644 index 00000000000..666639f20ba --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_compact_document_words_store_test_app + SOURCES + compact_document_words_store_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_compact_document_words_store_test_app COMMAND searchlib_compact_document_words_store_test_app) diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/DESC b/searchlib/src/tests/memoryindex/compact_document_words_store/DESC new file mode 100644 index 00000000000..ee9c4b346a2 --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/DESC @@ -0,0 +1 @@ +compact_document_words_store test. Take a look at compact_document_words_store_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/FILES b/searchlib/src/tests/memoryindex/compact_document_words_store/FILES new file mode 100644 index 00000000000..fb2fb1d637b --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/FILES @@ -0,0 +1 @@ +compact_document_words_store_test.cpp diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp b/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp new file mode 100644 index 00000000000..2a3bffb2fe6 --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP(".memoryindex.compact_document_words_store_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/btree/entryref.h> +#include <vespa/searchlib/memoryindex/compact_document_words_store.h> +#include <vespa/vespalib/stllike/string.h> +#include <iostream> +#include <map> + +using namespace search; +using namespace search::btree; +using namespace search::memoryindex; + +typedef CompactDocumentWordsStore::Builder Builder; +typedef CompactDocumentWordsStore::Iterator Iterator; +typedef Builder::WordRefVector WordRefVector; + +const EntryRef w1(1); +const EntryRef w2(2); +const EntryRef w3(3); +const EntryRef w4(4); +const uint32_t d1(111); +const uint32_t d2(222); +const uint32_t d3(333); +const uint32_t d4(444); + +WordRefVector +build(Iterator itr) +{ + WordRefVector words; + for (; itr.valid(); ++itr) { + words.push_back(itr.wordRef()); + } + return words; +} + +vespalib::string +toStr(Iterator itr) +{ + WordRefVector words = build(itr); + std::ostringstream oss; + oss << "["; + bool firstWord = true; + for (auto word : words) { + if (!firstWord) oss << ","; + oss << word.ref(); + firstWord = false; + } + oss << "]"; + return oss.str(); +} + +struct SingleFixture +{ + CompactDocumentWordsStore _store; + SingleFixture() : _store() { + _store.insert(Builder(d1).insert(w1).insert(w2).insert(w3)); + } +}; + +struct MultiFixture +{ + CompactDocumentWordsStore _store; + MultiFixture() : _store() { + _store.insert(Builder(d1).insert(w1)); + _store.insert(Builder(d2).insert(w2)); + _store.insert(Builder(d3).insert(w3)); + } +}; + + +TEST_F("require that fields and words can be added for a document", SingleFixture) +{ + EXPECT_EQUAL("[1,2,3]", toStr(f._store.get(d1))); +} + +TEST_F("require that multiple documents can be added", MultiFixture) +{ + EXPECT_EQUAL("[1]", toStr(f._store.get(d1))); + EXPECT_EQUAL("[2]", toStr(f._store.get(d2))); + EXPECT_EQUAL("[3]", toStr(f._store.get(d3))); + EXPECT_FALSE(f._store.get(d4).valid()); +} + +TEST_F("require that documents can be removed", MultiFixture) +{ + f._store.remove(d2); + EXPECT_TRUE(f._store.get(d1).valid()); + EXPECT_FALSE(f._store.get(d2).valid()); + EXPECT_TRUE(f._store.get(d3).valid()); +} + +TEST_F("require that documents can be removed and re-inserted", MultiFixture) +{ + f._store.remove(d2); + f._store.insert(Builder(d2).insert(w4)); + EXPECT_EQUAL("[4]", toStr(f._store.get(d2))); +} + +TEST("require that a lot of words can be inserted, retrieved and removed") +{ + CompactDocumentWordsStore store; + for (uint32_t docId = 0; docId < 50; ++docId) { + Builder b(docId); + for (uint32_t wordRef = 0; wordRef < 20000; ++wordRef) { + b.insert(wordRef); + } + store.insert(b); + MemoryUsage usage = store.getMemoryUsage(); + std::cout << "memory usage (insert): docId=" << docId << ", alloc=" << usage.allocatedBytes() << ", used=" << usage.usedBytes() << std::endl; + } + for (uint32_t docId = 0; docId < 50; ++docId) { + WordRefVector words = build(store.get(docId)); + EXPECT_EQUAL(20000u, words.size()); + uint32_t wordRef = 0; + for (auto word : words) { + EXPECT_EQUAL(wordRef++, word.ref()); + } + store.remove(docId); + MemoryUsage usage = store.getMemoryUsage(); + std::cout << "memory usage (remove): docId=" << docId << ", alloc=" << usage.allocatedBytes() << ", used=" << usage.usedBytes() << std::endl; + } +} + +TEST("require that initial memory usage is reported") +{ + CompactDocumentWordsStore store; + CompactDocumentWordsStore::DocumentWordsMap docs; + CompactDocumentWordsStore::Store internalStore; + MemoryUsage initExp; + initExp.incAllocatedBytes(docs.getMemoryConsumption()); + initExp.incUsedBytes(docs.getMemoryUsed()); + initExp.merge(internalStore.getMemoryUsage()); + MemoryUsage init = store.getMemoryUsage(); + EXPECT_EQUAL(initExp.allocatedBytes(), init.allocatedBytes()); + EXPECT_EQUAL(initExp.usedBytes(), init.usedBytes()); + EXPECT_GREATER(init.allocatedBytes(), init.usedBytes()); + EXPECT_GREATER(init.allocatedBytes(), 0u); + EXPECT_GREATER(init.usedBytes(), 0u); +} + +TEST("require that memory usage is updated after insert") +{ + CompactDocumentWordsStore store; + MemoryUsage init = store.getMemoryUsage(); + + store.insert(Builder(d1).insert(w1)); + MemoryUsage after = store.getMemoryUsage(); + EXPECT_GREATER_EQUAL(after.allocatedBytes(), init.allocatedBytes()); + EXPECT_GREATER(after.usedBytes(), init.usedBytes()); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } + diff --git a/searchlib/src/tests/memoryindex/datastore/.gitignore b/searchlib/src/tests/memoryindex/datastore/.gitignore new file mode 100644 index 00000000000..98f4acc70a8 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/.gitignore @@ -0,0 +1,8 @@ +.depend +Makefile +datastore_test +featurestore_test +wordstore_test +searchlib_datastore_test_app +searchlib_featurestore_test_app +searchlib_wordstore_test_app diff --git a/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt b/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt new file mode 100644 index 00000000000..da45288fe5e --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_datastore_test_app + SOURCES + datastore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_datastore_test_app COMMAND searchlib_datastore_test_app) +vespa_add_executable(searchlib_featurestore_test_app + SOURCES + featurestore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_featurestore_test_app COMMAND searchlib_featurestore_test_app) +vespa_add_executable(searchlib_wordstore_test_app + SOURCES + wordstore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_wordstore_test_app COMMAND searchlib_wordstore_test_app) diff --git a/searchlib/src/tests/memoryindex/datastore/DESC b/searchlib/src/tests/memoryindex/datastore/DESC new file mode 100644 index 00000000000..56725396b65 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/DESC @@ -0,0 +1 @@ +datastore test. Take a look at datastore_test.cpp and wordstore_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/datastore/FILES b/searchlib/src/tests/memoryindex/datastore/FILES new file mode 100644 index 00000000000..6cbbaf6a328 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/FILES @@ -0,0 +1,2 @@ +datastore_test.cpp +wordstore_test.cpp diff --git a/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp b/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp new file mode 100644 index 00000000000..be55dd7ee1e --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp @@ -0,0 +1,432 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("datastore_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/btree/datastore.h> +#include <vespa/searchlib/btree/datastore.hpp> + +namespace search { +namespace btree { + +class MyStore : public DataStore<int, EntryRefT<3, 2> > { +private: + typedef DataStore<int, EntryRefT<3, 2> > ParentType; + using ParentType::_buffers; + using ParentType::_states; + using ParentType::_activeBufferIds; +public: + MyStore() {} + + void + holdBuffer(uint32_t bufferId) + { + ParentType::holdBuffer(bufferId); + } + + void + holdElem(EntryRef ref, uint64_t len) + { + ParentType::holdElem(ref, len); + } + + void + transferHoldLists(generation_t generation) + { + ParentType::transferHoldLists(generation); + } + + void trimElemHoldList(generation_t usedGen) { + ParentType::trimElemHoldList(usedGen); + } + void incDead(EntryRef ref, uint64_t dead) { + ParentType::incDead(ref, dead); + } + void ensureBufferCapacity(size_t sizeNeeded) { + ParentType::ensureBufferCapacity(0, sizeNeeded); + } + void enableFreeLists() { + ParentType::enableFreeLists(); + } + + void + switchActiveBuffer(void) + { + ParentType::switchActiveBuffer(0, 0u); + } + std::vector<void *> & buffers() { return _buffers; } + std::vector<BufferState> &statesVec() { return _states; } + size_t activeBufferId() const { return _activeBufferIds[0]; } +}; + +typedef MyStore::RefType MyRef; + +class Test : public vespalib::TestApp { +private: + bool assertMemStats(const DataStoreBase::MemStats & exp, + const DataStoreBase::MemStats & act); + void requireThatEntryRefIsWorking(); + void requireThatAlignedEntryRefIsWorking(); + void requireThatEntriesCanBeAddedAndRetrieved(); + void requireThatAddEntryTriggersChangeOfBuffer(); + void requireThatWeCanHoldAndTrimBuffers(); + void requireThatWeCanHoldAndTrimElements(); + void requireThatWeCanUseFreeLists(); + void requireThatMemoryStatsAreCalculated(); + void requireThatMemoryUsageIsCalculated(); + + void + requireThatWecanDisableElemHoldList(void); +public: + int Main(); +}; + +bool +Test::assertMemStats(const DataStoreBase::MemStats & exp, + const DataStoreBase::MemStats & act) +{ + if (!EXPECT_EQUAL(exp._allocElems, act._allocElems)) return false; + if (!EXPECT_EQUAL(exp._usedElems, act._usedElems)) return false; + if (!EXPECT_EQUAL(exp._deadElems, act._deadElems)) return false; + if (!EXPECT_EQUAL(exp._holdElems, act._holdElems)) return false; + if (!EXPECT_EQUAL(exp._freeBuffers, act._freeBuffers)) return false; + if (!EXPECT_EQUAL(exp._activeBuffers, act._activeBuffers)) return false; + if (!EXPECT_EQUAL(exp._holdBuffers, act._holdBuffers)) return false; + return true; +} + +void +Test::requireThatEntryRefIsWorking() +{ + typedef EntryRefT<22> MyRefType; + EXPECT_EQUAL(4194304u, MyRefType::offsetSize()); + EXPECT_EQUAL(1024u, MyRefType::numBuffers()); + { + MyRefType r(0, 0); + EXPECT_EQUAL(0u, r.offset()); + EXPECT_EQUAL(0u, r.bufferId()); + } + { + MyRefType r(237, 13); + EXPECT_EQUAL(237u, r.offset()); + EXPECT_EQUAL(13u, r.bufferId()); + } + { + MyRefType r(4194303, 1023); + EXPECT_EQUAL(4194303u, r.offset()); + EXPECT_EQUAL(1023u, r.bufferId()); + } + { + MyRefType r1(6498, 76); + MyRefType r2(r1); + EXPECT_EQUAL(r1.offset(), r2.offset()); + EXPECT_EQUAL(r1.bufferId(), r2.bufferId()); + } +} + +void +Test::requireThatAlignedEntryRefIsWorking() +{ + typedef AlignedEntryRefT<22, 2> MyRefType; // 4 byte alignement + EXPECT_EQUAL(4 * 4194304u, MyRefType::offsetSize()); + EXPECT_EQUAL(1024u, MyRefType::numBuffers()); + EXPECT_EQUAL(0u, MyRefType::align(0)); + EXPECT_EQUAL(4u, MyRefType::align(1)); + EXPECT_EQUAL(4u, MyRefType::align(2)); + EXPECT_EQUAL(4u, MyRefType::align(3)); + EXPECT_EQUAL(4u, MyRefType::align(4)); + EXPECT_EQUAL(8u, MyRefType::align(5)); + { + MyRefType r(0, 0); + EXPECT_EQUAL(0u, r.offset()); + EXPECT_EQUAL(0u, r.bufferId()); + } + { + MyRefType r(237, 13); + EXPECT_EQUAL(MyRefType::align(237), r.offset()); + EXPECT_EQUAL(13u, r.bufferId()); + } + { + MyRefType r(MyRefType::offsetSize() - 4, 1023); + EXPECT_EQUAL(MyRefType::align(MyRefType::offsetSize() - 4), r.offset()); + EXPECT_EQUAL(1023u, r.bufferId()); + } +} + +void +Test::requireThatEntriesCanBeAddedAndRetrieved() +{ + typedef DataStore<int> IntStore; + IntStore ds; + EntryRef r1 = ds.addEntry(10); + EntryRef r2 = ds.addEntry(20); + EntryRef r3 = ds.addEntry(30); + EXPECT_EQUAL(1u, IntStore::RefType(r1).offset()); + EXPECT_EQUAL(2u, IntStore::RefType(r2).offset()); + EXPECT_EQUAL(3u, IntStore::RefType(r3).offset()); + EXPECT_EQUAL(0u, IntStore::RefType(r1).bufferId()); + EXPECT_EQUAL(0u, IntStore::RefType(r2).bufferId()); + EXPECT_EQUAL(0u, IntStore::RefType(r3).bufferId()); + EXPECT_EQUAL(10, ds.getEntry(r1)); + EXPECT_EQUAL(20, ds.getEntry(r2)); + EXPECT_EQUAL(30, ds.getEntry(r3)); +} + +void +Test::requireThatAddEntryTriggersChangeOfBuffer() +{ + typedef DataStore<uint64_t, EntryRefT<10, 10> > Store; + Store s; + uint64_t num = 0; + uint32_t lastId = 0; + uint64_t lastNum = 0; + for (;;++num) { + EntryRef r = s.addEntry(num); + EXPECT_EQUAL(num, s.getEntry(r)); + uint32_t bufferId = Store::RefType(r).bufferId(); + if (bufferId > lastId) { + LOG(info, "Changed to bufferId %u after %" PRIu64 " nums", bufferId, num); + EXPECT_EQUAL(Store::RefType::offsetSize() - (lastId == 0), + num - lastNum); + lastId = bufferId; + lastNum = num; + } + if (bufferId == 2) { + break; + } + } + EXPECT_EQUAL(Store::RefType::offsetSize() * 2 - 1, num); + LOG(info, "Added %" PRIu64 " nums in 2 buffers", num); +} + +void +Test::requireThatWeCanHoldAndTrimBuffers() +{ + MyStore s; + EXPECT_EQUAL(0u, MyRef(s.addEntry(1)).bufferId()); + s.switchActiveBuffer(); + EXPECT_EQUAL(1u, s.activeBufferId()); + s.holdBuffer(0); // hold last buffer + s.transferHoldLists(10); + + EXPECT_EQUAL(1u, MyRef(s.addEntry(2)).bufferId()); + s.switchActiveBuffer(); + EXPECT_EQUAL(2u, s.activeBufferId()); + s.holdBuffer(1); // hold last buffer + s.transferHoldLists(20); + + EXPECT_EQUAL(2u, MyRef(s.addEntry(3)).bufferId()); + s.switchActiveBuffer(); + EXPECT_EQUAL(3u, s.activeBufferId()); + s.holdBuffer(2); // hold last buffer + s.transferHoldLists(30); + + EXPECT_EQUAL(3u, MyRef(s.addEntry(4)).bufferId()); + s.holdBuffer(3); // hold current buffer + s.transferHoldLists(40); + + EXPECT_TRUE(s.statesVec()[0].size() != 0); + EXPECT_TRUE(s.statesVec()[1].size() != 0); + EXPECT_TRUE(s.statesVec()[2].size() != 0); + EXPECT_TRUE(s.statesVec()[3].size() != 0); + s.trimHoldLists(11); + EXPECT_TRUE(s.statesVec()[0].size() == 0); + EXPECT_TRUE(s.statesVec()[1].size() != 0); + EXPECT_TRUE(s.statesVec()[2].size() != 0); + EXPECT_TRUE(s.statesVec()[3].size() != 0); + + s.switchActiveBuffer(); + EXPECT_EQUAL(0u, s.activeBufferId()); + EXPECT_EQUAL(0u, MyRef(s.addEntry(5)).bufferId()); + s.trimHoldLists(41); + EXPECT_TRUE(s.statesVec()[0].size() != 0); + EXPECT_TRUE(s.statesVec()[1].size() == 0); + EXPECT_TRUE(s.statesVec()[2].size() == 0); + EXPECT_TRUE(s.statesVec()[3].size() == 0); +} + +void +Test::requireThatWeCanHoldAndTrimElements() +{ + MyStore s; + MyRef r1 = s.addEntry(1); + s.holdElem(r1, 1); + s.transferHoldLists(10); + MyRef r2 = s.addEntry(2); + s.holdElem(r2, 1); + s.transferHoldLists(20); + MyRef r3 = s.addEntry(3); + s.holdElem(r3, 1); + s.transferHoldLists(30); + EXPECT_EQUAL(1, s.getEntry(r1)); + EXPECT_EQUAL(2, s.getEntry(r2)); + EXPECT_EQUAL(3, s.getEntry(r3)); + s.trimElemHoldList(11); + EXPECT_EQUAL(0, s.getEntry(r1)); + EXPECT_EQUAL(2, s.getEntry(r2)); + EXPECT_EQUAL(3, s.getEntry(r3)); + s.trimElemHoldList(31); + EXPECT_EQUAL(0, s.getEntry(r1)); + EXPECT_EQUAL(0, s.getEntry(r2)); + EXPECT_EQUAL(0, s.getEntry(r3)); +} + +void +Test::requireThatWeCanUseFreeLists() +{ + MyStore s; + s.enableFreeLists(); + MyRef r1 = s.addEntry2(1); + s.holdElem(r1, 1); + s.transferHoldLists(10); + MyRef r2 = s.addEntry2(2); + s.holdElem(r2, 1); + s.transferHoldLists(20); + s.trimElemHoldList(11); + MyRef r3 = s.addEntry2(3); // reuse r1 + EXPECT_EQUAL(r1.offset(), r3.offset()); + EXPECT_EQUAL(r1.bufferId(), r3.bufferId()); + MyRef r4 = s.addEntry2(4); + EXPECT_EQUAL(r2.offset() + 1, r4.offset()); + s.trimElemHoldList(21); + MyRef r5 = s.addEntry2(5); // reuse r2 + EXPECT_EQUAL(r2.offset(), r5.offset()); + EXPECT_EQUAL(r2.bufferId(), r5.bufferId()); + MyRef r6 = s.addEntry2(6); + EXPECT_EQUAL(r4.offset() + 1, r6.offset()); + EXPECT_EQUAL(3, s.getEntry(r1)); + EXPECT_EQUAL(5, s.getEntry(r2)); + EXPECT_EQUAL(3, s.getEntry(r3)); + EXPECT_EQUAL(4, s.getEntry(r4)); + EXPECT_EQUAL(5, s.getEntry(r5)); + EXPECT_EQUAL(6, s.getEntry(r6)); +} + +void +Test::requireThatMemoryStatsAreCalculated() +{ + MyStore s; + DataStoreBase::MemStats m; + m._allocElems = MyRef::offsetSize(); + m._usedElems = 1; // ref = 0 is reserved + m._deadElems = 1; // ref = 0 is reserved + m._holdElems = 0; + m._activeBuffers = 1; + m._freeBuffers = MyRef::numBuffers() - 1; + m._holdBuffers = 0; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // add entry + MyRef r = s.addEntry(10); + m._usedElems++; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // inc dead + s.incDead(r, 1); + m._deadElems++; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // hold buffer + s.addEntry(20); + s.addEntry(30); + s.holdBuffer(r.bufferId()); + s.transferHoldLists(100); + m._usedElems += 2; + m._holdElems += 2; // used - dead + m._activeBuffers--; + m._holdBuffers++; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // new active buffer + s.switchActiveBuffer(); + s.addEntry(40); + m._allocElems += MyRef::offsetSize(); + m._usedElems++; + m._activeBuffers++; + m._freeBuffers--; + + // trim hold buffer + s.trimHoldLists(101); + m._allocElems -= MyRef::offsetSize(); + m._usedElems = 1; + m._deadElems = 0; + m._holdElems = 0; + m._freeBuffers = MyRef::numBuffers() - 1; + m._holdBuffers = 0; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); +} + +void +Test::requireThatMemoryUsageIsCalculated() +{ + MyStore s; + MyRef r = s.addEntry(10); + s.addEntry(20); + s.addEntry(30); + s.addEntry(40); + s.incDead(r, 1); + s.holdBuffer(r.bufferId()); + s.transferHoldLists(100); + MemoryUsage m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(5 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(2 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(3 * sizeof(int), m.allocatedBytesOnHold()); + s.trimHoldLists(101); +} + + +void +Test::requireThatWecanDisableElemHoldList(void) +{ + MyStore s; + MyRef r1 = s.addEntry(10); + MyRef r2 = s.addEntry(20); + MyRef r3 = s.addEntry(30); + (void) r3; + MemoryUsage m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(4 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(0 * sizeof(int), m.allocatedBytesOnHold()); + s.holdElem(r1, 1); + m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(4 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.allocatedBytesOnHold()); + s.disableElemHoldList(); + s.holdElem(r2, 1); + m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(4 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(2 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.allocatedBytesOnHold()); + s.transferHoldLists(100); + s.trimHoldLists(101); +} + +int +Test::Main() +{ + TEST_INIT("datastore_test"); + + requireThatEntryRefIsWorking(); + requireThatAlignedEntryRefIsWorking(); + requireThatEntriesCanBeAddedAndRetrieved(); + requireThatAddEntryTriggersChangeOfBuffer(); + requireThatWeCanHoldAndTrimBuffers(); + requireThatWeCanHoldAndTrimElements(); + requireThatWeCanUseFreeLists(); + requireThatMemoryStatsAreCalculated(); + requireThatMemoryUsageIsCalculated(); + requireThatWecanDisableElemHoldList(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::btree::Test); + diff --git a/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp b/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp new file mode 100644 index 00000000000..87d32c90b78 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp @@ -0,0 +1,245 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("featurestore_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/memoryindex/featurestore.h> + +using namespace search::btree; +using namespace search::index; + +namespace search +{ + + +namespace memoryindex +{ + + +class Test : public vespalib::TestApp +{ +private: + Schema _schema; + + const Schema & + getSchema(void) const + { + return _schema; + } + + bool + assertFeatures(const DocIdAndFeatures &exp, + const DocIdAndFeatures &act); + + void + requireThatFeaturesCanBeAddedAndRetrieved(void); + + void + requireThatNextWordsAreWorking(void); + void + requireThatAddFeaturesTriggersChangeOfBuffer(void); + +public: + Test(void); + + int + Main(void); +}; + + +bool +Test::assertFeatures(const DocIdAndFeatures &exp, + const DocIdAndFeatures &act) +{ + // docid is not encoded as part of features + if (!EXPECT_EQUAL(exp._elements.size(), + act._elements.size())) + return false; + for (size_t i = 0; i < exp._elements.size(); ++i) { + if (!EXPECT_EQUAL(exp._elements[i]._elementId, + act._elements[i]._elementId)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._numOccs, + act._elements[i]._numOccs)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._weight, act._elements[i]._weight)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._elementLen, + act._elements[i]._elementLen)) + return false; + } + if (!EXPECT_EQUAL(exp._wordPositions.size(), act._wordPositions.size())) + return false; + for (size_t i = 0; i < exp._wordPositions.size(); ++i) { + if (!EXPECT_EQUAL(exp._wordPositions[i]._wordPos, + act._wordPositions[i]._wordPos)) return false; + } + return true; +} + + +DocIdAndFeatures +getFeatures(uint32_t numOccs, + int32_t weight, + uint32_t elemLen) +{ + DocIdAndFeatures f; + f._docId = 0; + f._elements.push_back(WordDocElementFeatures(0)); + f._elements.back().setNumOccs(numOccs); + f._elements.back().setWeight(weight); + f._elements.back().setElementLen(elemLen); + for (uint32_t i = 0; i < numOccs; ++i) { + f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); + } + return f; +} + + +void +Test::requireThatFeaturesCanBeAddedAndRetrieved(void) +{ + FeatureStore fs(getSchema()); + DocIdAndFeatures act; + EntryRef r1; + EntryRef r2; + std::pair<EntryRef, uint64_t> r; + { + DocIdAndFeatures f = getFeatures(2, 4, 8); + r = fs.addFeatures(0, f); + r1 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_EQUAL(FeatureStore::RefType::align(1u), + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r1).offset(), + FeatureStore::RefType(r1).bufferId()); + fs.getFeatures(0, r1, act); + // weight not encoded for single value + EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act)); + } + { + DocIdAndFeatures f = getFeatures(4, 8, 16); + r = fs.addFeatures(1, f); + r2 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_TRUE(FeatureStore::RefType(r2).offset() > + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r2).offset(), + FeatureStore::RefType(r2).bufferId()); + fs.getFeatures(1, r2, act); + EXPECT_TRUE(assertFeatures(f, act)); + } +} + + +void +Test::requireThatNextWordsAreWorking(void) +{ + FeatureStore fs(getSchema()); + DocIdAndFeatures act; + EntryRef r1; + EntryRef r2; + std::pair<EntryRef, uint64_t> r; + { + DocIdAndFeatures f = getFeatures(2, 4, 8); + r = fs.addFeatures(0, f); + r1 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_EQUAL(FeatureStore::RefType::align(1u), + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r1).offset(), + FeatureStore::RefType(r1).bufferId()); + fs.getFeatures(0, r1, act); + // weight not encoded for single value + EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act)); + } + { + DocIdAndFeatures f = getFeatures(4, 8, 16); + r = fs.addFeatures(1, f); + r2 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_TRUE(FeatureStore::RefType(r2).offset() > + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r2).offset(), + FeatureStore::RefType(r2).bufferId()); + fs.getFeatures(1, r2, act); + EXPECT_TRUE(assertFeatures(f, act)); + } +} + + +void +Test::requireThatAddFeaturesTriggersChangeOfBuffer(void) +{ + FeatureStore fs(getSchema()); + size_t cnt = 1; + DocIdAndFeatures act; + uint32_t lastId = 0; + for (;;++cnt) { + uint32_t numOccs = (cnt % 100) + 1; + DocIdAndFeatures f = getFeatures(numOccs, 1, numOccs + 1); + std::pair<EntryRef, uint64_t> r = fs.addFeatures(0, f); + fs.getFeatures(0, r.first, act); + EXPECT_TRUE(assertFeatures(f, act)); + uint32_t bufferId = FeatureStore::RefType(r.first).bufferId(); + if (bufferId > lastId) { + LOG(info, + "Changed to bufferId %u after %zu feature sets", + bufferId, cnt); + lastId = bufferId; + } + if (bufferId == 1) { + break; + } + } + EXPECT_EQUAL(1u, lastId); + LOG(info, "Added %zu feature sets in 1 buffer", cnt); +} + + +Test::Test() + : _schema() +{ + _schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f1", + Schema::STRING, + Schema::WEIGHTEDSET)); +} + + +int +Test::Main() +{ + TEST_INIT("featurestore_test"); + + requireThatFeaturesCanBeAddedAndRetrieved(); + requireThatNextWordsAreWorking(); + requireThatAddFeaturesTriggersChangeOfBuffer(); + + TEST_DONE(); +} + + +} + + +} + + +TEST_APPHOOK(search::memoryindex::Test); diff --git a/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp new file mode 100644 index 00000000000..825992b3b4f --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp @@ -0,0 +1,104 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("wordstore_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/memoryindex/wordstore.h> + +using namespace search::btree; + +namespace search { +namespace memoryindex { + +class Test : public vespalib::TestApp { +private: + void requireThatWordsCanBeAddedAndRetrieved(); + void requireThatAddWordTriggersChangeOfBuffer(); +public: + int Main(); +}; + +void +Test::requireThatWordsCanBeAddedAndRetrieved() +{ + std::string w1 = "require"; + std::string w2 = "that"; + std::string w3 = "words"; + WordStore ws; + EntryRef r1 = ws.addWord(w1); + EntryRef r2 = ws.addWord(w2); + EntryRef r3 = ws.addWord(w3); + uint32_t invp = WordStore::RefType::align(1); // Reserved as invalid + uint32_t w1s = w1.size() + 1; + uint32_t w1p = WordStore::RefType::pad(w1s); + uint32_t w2s = w2.size() + 1; + uint32_t w2p = WordStore::RefType::pad(w2s); + EXPECT_EQUAL(invp, WordStore::RefType(r1).offset()); + EXPECT_EQUAL(invp + w1s + w1p, WordStore::RefType(r2).offset()); + EXPECT_EQUAL(invp + w1s + w1p + w2s + w2p, WordStore::RefType(r3).offset()); + EXPECT_EQUAL(0u, WordStore::RefType(r1).bufferId()); + EXPECT_EQUAL(0u, WordStore::RefType(r2).bufferId()); + EXPECT_EQUAL(0u, WordStore::RefType(r3).bufferId()); + EXPECT_EQUAL(std::string("require"), ws.getWord(r1)); + EXPECT_EQUAL(std::string("that"), ws.getWord(r2)); + EXPECT_EQUAL(std::string("words"), ws.getWord(r3)); +} + +void +Test::requireThatAddWordTriggersChangeOfBuffer() +{ + WordStore ws; + size_t word = 0; + uint32_t lastId = 0; + size_t lastWord = 0; + char wordStr[10]; + size_t entrySize = WordStore::RefType::align(6 + 1); + size_t initBufferSpace = 1024u * WordStore::RefType::align(1); + size_t bufferSpace = initBufferSpace; + size_t bufferWords = (bufferSpace - WordStore::RefType::align(1)) / + entrySize; + size_t usedSpace = 0; + size_t sumBufferWords = 0; + for (;;++word) { + sprintf(wordStr, "%6zu", word); + // all words uses 12 bytes (include padding) + EntryRef r = ws.addWord(std::string(wordStr)); + EXPECT_EQUAL(std::string(wordStr), ws.getWord(r)); + uint32_t bufferId = WordStore::RefType(r).bufferId(); + if (bufferId > lastId) { + LOG(info, + "Changed to bufferId %u after %zu words", + bufferId, word); + EXPECT_EQUAL(bufferWords, word - lastWord); + lastId = bufferId; + lastWord = word; + usedSpace += bufferWords * entrySize; + sumBufferWords += bufferWords; + bufferSpace = usedSpace + initBufferSpace; + bufferWords = bufferSpace / entrySize; + } + if (bufferId == 4) { + break; + } + } + // each buffer can have offsetSize / 12 words + EXPECT_EQUAL(sumBufferWords, word); + LOG(info, "Added %zu words in 4 buffers", word); +} + +int +Test::Main() +{ + TEST_INIT("wordstore_test"); + + requireThatWordsCanBeAddedAndRetrieved(); + requireThatAddWordTriggersChangeOfBuffer(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::memoryindex::Test); + diff --git a/searchlib/src/tests/memoryindex/dictionary/.gitignore b/searchlib/src/tests/memoryindex/dictionary/.gitignore new file mode 100644 index 00000000000..d404d7d7063 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +dictionary_test +dump +/urldump +searchlib_dictionary_test_app diff --git a/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt b/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt new file mode 100644 index 00000000000..9520b37d267 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_dictionary_test_app + SOURCES + dictionary_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_dictionary_test_app COMMAND searchlib_dictionary_test_app) diff --git a/searchlib/src/tests/memoryindex/dictionary/DESC b/searchlib/src/tests/memoryindex/dictionary/DESC new file mode 100644 index 00000000000..ff559f42641 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/DESC @@ -0,0 +1 @@ +dictionary test. Take a look at dictionary_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/dictionary/FILES b/searchlib/src/tests/memoryindex/dictionary/FILES new file mode 100644 index 00000000000..1f3a8ebef87 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/FILES @@ -0,0 +1 @@ +dictionary_test.cpp diff --git a/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp b/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp new file mode 100644 index 00000000000..ef8383b23c7 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp @@ -0,0 +1,1528 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + +/* $Id$ + * + * Copyright (C) 2011 Yahoo! Technologies Norway AS + * + * All Rights Reserved + * + */ + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/searchlib/diskindex/checkpointfile.h> +#include <vespa/searchlib/diskindex/fusion.h> +#include <vespa/searchlib/diskindex/indexbuilder.h> +#include <vespa/searchlib/diskindex/zcposoccrandread.h> +#include <vespa/searchlib/fef/fieldpositionsiterator.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/indexbuilder.h> +#include <vespa/searchlib/index/schemautil.h> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/btree/btreeiterator.hpp> +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodestore.hpp> +#include <vespa/searchlib/memoryindex/dictionary.h> +#include <vespa/searchlib/memoryindex/documentinverter.h> +#include <vespa/searchlib/memoryindex/fieldinverter.h> +#include <vespa/searchlib/memoryindex/document_remover.h> +#include <vespa/searchlib/memoryindex/featurestore.h> +#include <vespa/searchlib/memoryindex/postingiterator.h> +#include <vespa/searchlib/memoryindex/ordereddocumentinserter.h> +#include <vespa/searchlib/common/sequencedtaskexecutor.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/testkit/testapp.h> + +LOG_SETUP("dictionary_test"); + +namespace search +{ + +using namespace btree; +using namespace fef; +using namespace index; +using queryeval::SearchIterator; +using document::Document; +using diskindex::CheckPointFile; +using vespalib::GenerationHandler; +using test::InitRangeVerifier; + +namespace memoryindex +{ + +typedef Dictionary::PostingList PostingList; +typedef PostingList::Iterator PostingItr; +typedef PostingList::ConstIterator PostingConstItr; + +class MyBuilder : public IndexBuilder { +private: + std::stringstream _ss; + bool _insideWord; + bool _insideField; + bool _insideDoc; + bool _insideElem; + bool _firstWord; + bool _firstField; + bool _firstDoc; + bool _firstElem; + bool _firstPos; +public: + + MyBuilder(const Schema &schema) + : IndexBuilder(schema), + _ss(), + _insideWord(false), + _insideField(false), + _insideDoc(false), + _insideElem(false), + _firstWord(true), + _firstField(true), + _firstDoc(true), + _firstElem(true), + _firstPos(true) + { + } + + virtual void + startWord(const vespalib::stringref &word) + { + assert(_insideField); + assert(!_insideWord); + if (!_firstWord) + _ss << ","; + _ss << "w=" << word << "["; + _firstDoc = true; + _insideWord = true; + } + + virtual void + endWord(void) + { + assert(_insideWord); + assert(!_insideDoc); + _ss << "]"; + _firstWord = false; + _insideWord = false; + } + + virtual void + startField(uint32_t fieldId) + { + assert(!_insideField); + if (!_firstField) _ss << ","; + _ss << "f=" << fieldId << "["; + _firstWord = true; + _insideField = true; + } + + virtual void + endField() + { + assert(_insideField); + assert(!_insideWord); + _ss << "]"; + _firstField = false; + _insideField = false; + } + + virtual void + startDocument(uint32_t docId) + { + assert(_insideWord); + assert(!_insideDoc); + if (!_firstDoc) _ss << ","; + _ss << "d=" << docId << "["; + _firstElem = true; + _insideDoc = true; + } + + virtual void + endDocument(void) + { + assert(_insideDoc); + assert(!_insideElem); + _ss << "]"; + _firstDoc = false; + _insideDoc = false; + } + + virtual void + startElement(uint32_t elementId, + int32_t weight, + uint32_t elementLen) + { + assert(_insideDoc); + assert(!_insideElem); + if (!_firstElem) + _ss << ","; + _ss << "e=" << elementId << + ",w=" << weight << ",l=" << elementLen << "["; + _firstPos = true; + _insideElem = true; + } + + virtual void + endElement(void) + { + assert(_insideElem); + _ss << "]"; + _firstElem = false; + _insideElem = false; + } + + virtual void + addOcc(const WordDocElementWordPosFeatures &features) + { + assert(_insideElem); + if (!_firstPos) _ss << ","; + _ss << features.getWordPos(); + _firstPos = false; + } + + std::string + toStr(void) const + { + return _ss.str(); + } +}; + +std::string +toString(FieldPositionsIterator posItr, + bool hasElements = false, + bool hasWeights = false) +{ + std::stringstream ss; + ss << "{"; + ss << posItr.getFieldLength() << ":"; + bool first = true; + for (; posItr.valid(); posItr.next()) { + if (!first) ss << ","; + ss << posItr.getPosition(); + first = false; + if (hasElements) { + ss << "[e=" << posItr.getElementId(); + if (hasWeights) + ss << ",w=" << posItr.getElementWeight(); + ss << ",l=" << posItr.getElementLen() << "]"; + } + } + ss << "}"; + return ss.str(); +} + +bool +assertPostingList(const std::string &exp, + PostingConstItr itr, + const FeatureStore *store = NULL) +{ + std::stringstream ss; + FeatureStore::DecodeContextCooked decoder(NULL); + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + ss << "["; + for (size_t i = 0; itr.valid(); ++itr, ++i) { + if (i > 0) ss << ","; + uint32_t docId = itr.getKey(); + ss << docId; + if (store != NULL) { // consider features as well + EntryRef ref(itr.getData()); + store->setupForField(0, decoder); + store->setupForUnpackFeatures(ref, decoder); + decoder.unpackFeatures(matchData, docId); + ss << toString(tfmd.getIterator()); + } + } + ss << "]"; + return EXPECT_EQUAL(exp, ss.str()); +} + +bool +assertPostingList(std::vector<uint32_t> &exp, PostingConstItr itr) +{ + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < exp.size(); ++i) { + if (i > 0) ss << ","; + ss << exp[i]; + } + ss << "]"; + return assertPostingList(ss.str(), itr); +} + + +namespace +{ + +/** + * MockDictionary is a simple mockup of memory index, used to verify + * that we get correct posting lists from real memory index. + */ +class MockDictionary +{ + std::map<std::pair<vespalib::string, uint32_t>, std::set<uint32_t>> _dict; + vespalib::string _word; + uint32_t _fieldId; + +public: + void + setNextWord(const vespalib::string &word) + { + _word = word; + } + + void + setNextField(uint32_t fieldId) + { + _fieldId = fieldId; + } + + void + add(uint32_t docId) + { + _dict[std::make_pair(_word, _fieldId)].insert(docId); + } + + void + remove(uint32_t docId) + { + _dict[std::make_pair(_word, _fieldId)].erase(docId); + } + + std::vector<uint32_t> + find(const vespalib::string &word, uint32_t fieldId) + { + std::vector<uint32_t> res; + for (auto docId : _dict[std::make_pair(word, fieldId)] ) { + res.push_back(docId); + } + return res; + } + + auto begin() + { + return _dict.begin(); + } + + auto end() + { + return _dict.end(); + } +}; + + +/** + * MockWordStoreScan is a helper class to ensure that previous word is + * still stored safely in memory, to satisfy OrderedDocumentInserter + * needs. + */ +class MockWordStoreScan +{ + vespalib::string _word0; + vespalib::string _word1; + vespalib::string *_prevWord; + vespalib::string *_word; + +public: + MockWordStoreScan() + : _word0(), + _word1(), + _prevWord(&_word0), + _word(&_word1) + { + } + + const vespalib::string & + getWord() const + { + return *_word; + } + + const vespalib::string & + setWord(const vespalib::string &word) + { + std::swap(_prevWord, _word); + *_word = word; + return *_word; + } +}; + +/** + * MyInserter performs insertions on both a mockup version of memory index + * and a real memory index. Mockup version is used to calculate expected + * answers. + */ +class MyInserter +{ + MockWordStoreScan _wordStoreScan; + MockDictionary _mock; + Dictionary _d; + DocIdAndPosOccFeatures _features; + IOrderedDocumentInserter *_documentInserter; + +public: + MyInserter(const Schema &schema) + : _wordStoreScan(), + _mock(), + _d(schema), + _features(), + _documentInserter(nullptr) + { + _features.addNextOcc(0, 0, 1, 1); + } + + void + setNextWord(const vespalib::string &word) + { + const vespalib::string &w = _wordStoreScan.setWord(word); + _documentInserter->setNextWord(w); + _mock.setNextWord(w); + } + + void + setNextField(uint32_t fieldId) + { + if (_documentInserter != nullptr) { + _documentInserter->flush(); + } + _documentInserter = &_d.getFieldIndex(fieldId)->getInserter(); + _documentInserter->rewind(); + _mock.setNextField(fieldId); + } + + void + add(uint32_t docId) + { + _documentInserter->add(docId, _features); + _mock.add(docId); + } + + void + remove(uint32_t docId) + { + _documentInserter->remove(docId); + _mock.remove(docId); + } + + bool + assertPosting(const vespalib::string &word, + uint32_t fieldId) + { + std::vector<uint32_t> exp = _mock.find(word, fieldId); + PostingConstItr itr = _d.find(word, fieldId); + return EXPECT_TRUE(assertPostingList(exp, itr)); + } + + bool + assertPostings() + { + if (_documentInserter != nullptr) { + _documentInserter->flush(); + } + for (auto wfp : _mock) { + auto &wf = wfp.first; + auto &word = wf.first; + auto fieldId = wf.second; + if (!EXPECT_TRUE(assertPosting(word, fieldId))) { + return false; + } + } + return true; + } + + void + rewind() + { + if (_documentInserter != nullptr) { + _documentInserter->flush(); + _documentInserter = nullptr; + } + } + + uint32_t + getNumUniqueWords() + { + return _d.getNumUniqueWords(); + } + + Dictionary &getDict() { return _d; } +}; + +void +myremove(uint32_t docId, DocumentInverter &inv, Dictionary &d, + ISequencedTaskExecutor &invertThreads) +{ + inv.removeDocument(docId); + invertThreads.sync(); + inv.pushDocuments(d, std::shared_ptr<IDestructorCallback>()); +} + + +class WrapInserter +{ + OrderedDocumentInserter &_inserter; +public: + WrapInserter(Dictionary &d, uint32_t fieldId) + : _inserter(d.getFieldIndex(fieldId)->getInserter()) + { + } + + WrapInserter &word(const vespalib::stringref &word_) + { + _inserter.setNextWord(word_); + return *this; + } + + WrapInserter &add(uint32_t docId, const index::DocIdAndFeatures &features) + { + _inserter.add(docId, features); + return *this; + } + + WrapInserter &add(uint32_t docId) + { + DocIdAndPosOccFeatures features; + features.addNextOcc(0, 0, 1, 1); + return add(docId, features); + } + + WrapInserter &remove(uint32_t docId) + { + _inserter.remove(docId); + return *this; + } + + WrapInserter &flush() + { + _inserter.flush(); + return *this; + } + + WrapInserter &rewind() + { + _inserter.rewind(); + return *this; + } + + btree::EntryRef + getWordRef() + { + return _inserter.getWordRef(); + } +}; + + +class MyDrainRemoves : IDocumentRemoveListener +{ + DocumentRemover &_remover; +public: + virtual void remove(const vespalib::stringref, uint32_t) override { } + + MyDrainRemoves(Dictionary &d, uint32_t fieldId) + : _remover(d.getFieldIndex(fieldId)->getDocumentRemover()) + { + } + + void drain(uint32_t docId) + { + _remover.remove(docId, *this); + } +}; + +void +myPushDocument(DocumentInverter &inv, Dictionary &d) +{ + inv.pushDocuments(d, std::shared_ptr<IDestructorCallback>()); +} + + +const FeatureStore * +featureStorePtr(const Dictionary &d, uint32_t fieldId) +{ + return &d.getFieldIndex(fieldId)->getFeatureStore(); +} + +const FeatureStore & +featureStoreRef(const Dictionary &d, uint32_t fieldId) +{ + return d.getFieldIndex(fieldId)->getFeatureStore(); +} + + +DataStoreBase::MemStats +getFeatureStoreMemStats(const Dictionary &d) +{ + DataStoreBase::MemStats res; + uint32_t numFields = d.getNumFields(); + for (uint32_t fieldId = 0; fieldId < numFields; ++fieldId) { + DataStoreBase::MemStats stats = + d.getFieldIndex(fieldId)->getFeatureStore().getMemStats(); + res += stats; + } + return res; +} + + +void myCommit(Dictionary &d, ISequencedTaskExecutor &pushThreads) +{ + uint32_t fieldId = 0; + for (auto &fieldIndex : d.getFieldIndexes()) { + pushThreads.execute(fieldId, + [fieldIndex(fieldIndex.get())]() + { fieldIndex->commit(); }); + ++fieldId; + } + pushThreads.sync(); +} + + +void +myCompactFeatures(Dictionary &d, ISequencedTaskExecutor &pushThreads) +{ + uint32_t fieldId = 0; + for (auto &fieldIndex : d.getFieldIndexes()) { + pushThreads.execute(fieldId, + [fieldIndex(fieldIndex.get())]() + { fieldIndex->compactFeatures(); }); + ++fieldId; + } +} + +} + + +struct Fixture +{ + Schema _schema; + Fixture() : _schema() { + _schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f2", Schema::STRING, + Schema::ARRAY)); + _schema.addIndexField(Schema::IndexField("f3", Schema::STRING, + Schema::WEIGHTEDSET)); + } + const Schema & getSchema() const { return _schema; } +}; + +TEST_F("requireThatFreshInsertWorks", Fixture) +{ + Dictionary d(f.getSchema()); + SequencedTaskExecutor pushThreads(2); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + EXPECT_EQUAL(0u, d.getNumUniqueWords()); + WrapInserter(d, 0).word("a").add(10).flush(); + EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + myCommit(d, pushThreads); + EXPECT_TRUE(assertPostingList("[10]", d.findFrozen("a", 0))); + EXPECT_EQUAL(1u, d.getNumUniqueWords()); +} + +TEST_F("requireThatAppendInsertWorks", Fixture) +{ + Dictionary d(f.getSchema()); + SequencedTaskExecutor pushThreads(2); + WrapInserter(d, 0).word("a").add(10).flush().rewind(). + word("a").add(5).flush(); + EXPECT_TRUE(assertPostingList("[5,10]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + WrapInserter(d, 0).rewind().word("a").add(20).flush(); + EXPECT_TRUE(assertPostingList("[5,10,20]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + myCommit(d, pushThreads); + EXPECT_TRUE(assertPostingList("[5,10,20]", d.findFrozen("a", 0))); +} + +TEST_F("requireThatMultiplePostingListsCanExist", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(10).word("b").add(11).add(15).flush(); + WrapInserter(d, 1).word("a").add(5).word("b").add(12).flush(); + EXPECT_EQUAL(4u, d.getNumUniqueWords()); + EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[5]", d.find("a", 1))); + EXPECT_TRUE(assertPostingList("[11,15]", d.find("b", 0))); + EXPECT_TRUE(assertPostingList("[12]", d.find("b", 1))); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 2))); + EXPECT_TRUE(assertPostingList("[]", d.find("c", 0))); +} + +TEST_F("requireThatRemoveWorks", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").remove(10).flush(); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); + WrapInserter(d, 0).add(10).add(20).add(30).flush(); + EXPECT_TRUE(assertPostingList("[10,20,30]", d.find("a", 0))); + WrapInserter(d, 0).rewind().word("a").remove(10).flush(); + EXPECT_TRUE(assertPostingList("[20,30]", d.find("a", 0))); + WrapInserter(d, 0).remove(20).flush(); + EXPECT_TRUE(assertPostingList("[30]", d.find("a", 0))); + WrapInserter(d, 0).remove(30).flush(); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); + EXPECT_EQUAL(1u, d.getNumUniqueWords()); + MyDrainRemoves(d, 0).drain(10); + WrapInserter(d, 0).rewind().word("a").add(10).flush(); + EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); +} + +TEST_F("requireThatMultipleInsertAndRemoveWorks", Fixture) +{ + MyInserter inserter(f.getSchema()); + uint32_t numFields = 4; + for (uint32_t fi = 0; fi < numFields; ++fi) { + inserter.setNextField(fi); + for (char w = 'a'; w <= 'z'; ++w) { + std::string word(&w, 1); + inserter.setNextWord(word); + for (uint32_t di = 0; di < (uint32_t) w; ++di) { // insert + inserter.add(di * 3); + } + EXPECT_EQUAL((w - 'a' + 1u) + ('z' - 'a' +1u) * fi, + inserter.getNumUniqueWords()); + } + } + EXPECT_TRUE(inserter.assertPostings()); + inserter.rewind(); + for (uint32_t fi = 0; fi < numFields; ++fi) { + MyDrainRemoves drainRemoves(inserter.getDict(), fi); + for (uint32_t di = 0; di < 'z' * 2 + 1; ++di) { + drainRemoves.drain(di); + } + } + for (uint32_t fi = 0; fi < numFields; ++fi) { + inserter.setNextField(fi); + for (char w = 'a'; w <= 'z'; ++w) { + std::string word(&w, 1); + inserter.setNextWord(word); + for (uint32_t di = 0; di < (uint32_t) w; ++di) { + // remove half of the docs + if ((di % 2) == 0) { + inserter.remove(di * 2); + } else { + inserter.add(di * 2 + 1); + } + } + } + } + EXPECT_TRUE(inserter.assertPostings()); +} + +void +addElement(DocIdAndFeatures &f, + uint32_t elemLen, + uint32_t numOccs, + int32_t weight = 1) +{ + f._elements.push_back(WordDocElementFeatures(f._elements.size())); + f._elements.back().setElementLen(elemLen); + f._elements.back().setWeight(weight); + f._elements.back().setNumOccs(numOccs); + for (uint32_t i = 0; i < numOccs; ++i) { + f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); + } +} + +DocIdAndFeatures +getFeatures(uint32_t elemLen, uint32_t numOccs, int32_t weight = 1) +{ + DocIdAndFeatures f; + addElement(f, elemLen, numOccs, weight); + return f; +} + +TEST_F("requireThatFeaturesAreInPostingLists", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(1, getFeatures(4, 2)).flush(); + EXPECT_TRUE(assertPostingList("[1{4:0,1}]", + d.find("a", 0), + featureStorePtr(d, 0))); + WrapInserter(d, 0).word("b").add(2, getFeatures(5, 1)). + add(3, getFeatures(6, 2)).flush(); + EXPECT_TRUE(assertPostingList("[2{5:0},3{6:0,1}]", + d.find("b", 0), + featureStorePtr(d, 0))); + WrapInserter(d, 1).word("c").add(4, getFeatures(7, 2)).flush(); + EXPECT_TRUE(assertPostingList("[4{7:0,1}]", + d.find("c", 1), + featureStorePtr(d, 1))); +} + +TEST_F("require that initRange conforms", Fixture) { + Dictionary d(f.getSchema()); + InitRangeVerifier ir; + WrapInserter inserter(d, 0); + inserter.word("a"); + for (uint32_t docId : ir.getExpectedDocIds()) { + inserter.add(docId); + } + inserter.flush(); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + PostingIterator itr(d.find("a", 0), featureStoreRef(d, 0), 0, matchData); + ir.verify(itr); +} + +TEST_F("requireThatPostingIteratorIsWorking", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(10, getFeatures(4, 1)). + add(20, getFeatures(5, 2)). + add(30, getFeatures(6, 1)). + add(40, getFeatures(7, 2)).flush(); + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + PostingIterator itr(d.find("not", 0), + featureStoreRef(d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(d.find("a", 0), + featureStoreRef(d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{4:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{6:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(itr.seek(40)); + EXPECT_EQUAL(40u, itr.getDocId()); + itr.unpack(40); + EXPECT_EQUAL("{7:0,1}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(41)); + EXPECT_TRUE(itr.isAtEnd()); + } +} + +TEST_F("requireThatDumpingToIndexBuilderIsWorking", Fixture) +{ + { + MyBuilder b(f.getSchema()); + WordDocElementWordPosFeatures wpf; + b.startField(4); + b.startWord("a"); + b.startDocument(2); + b.startElement(0, 10, 20); + wpf.setWordPos(1); + b.addOcc(wpf); + wpf.setWordPos(3); + b.addOcc(wpf); + b.endElement(); + b.endDocument(); + b.endWord(); + b.endField(); + EXPECT_EQUAL("f=4[w=a[d=2[e=0,w=10,l=20[1,3]]]]", b.toStr()); + } + { + Dictionary d(f.getSchema()); + MyBuilder b(f.getSchema()); + DocIdAndFeatures df; + WrapInserter(d, 1).word("a").add(5, getFeatures(2, 1)). + add(7, getFeatures(3, 2)). + word("b").add(5, getFeatures(12, 2)).flush(); + + df = getFeatures(4, 1); + addElement(df, 5, 2); + WrapInserter(d, 2).word("a").add(5, df); + df = getFeatures(6, 1); + addElement(df, 7, 2); + WrapInserter(d, 2).add(7, df).flush(); + + df = getFeatures(8, 1, 12); + addElement(df, 9, 2, 13); + WrapInserter(d, 3).word("a").add(5, df); + df = getFeatures(10, 1, 14); + addElement(df, 11, 2, 15); + WrapInserter(d, 3).add(7, df).flush(); + + d.dump(b); + + EXPECT_EQUAL("f=0[]," + "f=1[w=a[d=5[e=0,w=1,l=2[0]],d=7[e=0,w=1,l=3[0,1]]]," + "w=b[d=5[e=0,w=1,l=12[0,1]]]]," + "f=2[w=a[d=5[e=0,w=1,l=4[0],e=1,w=1,l=5[0,1]]," + "d=7[e=0,w=1,l=6[0],e=1,w=1,l=7[0,1]]]]," + "f=3[w=a[d=5[e=0,w=12,l=8[0],e=1,w=13,l=9[0,1]]," + "d=7[e=0,w=14,l=10[0],e=1,w=15,l=11[0,1]]]]", + b.toStr()); + } + { // test word with no docs + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(2, getFeatures(2, 1)). + word("b").add(4, getFeatures(4, 1)).flush().rewind(). + word("a").remove(2).flush(); + { + MyBuilder b(f.getSchema()); + d.dump(b); + EXPECT_EQUAL("f=0[w=b[d=4[e=0,w=1,l=4[0]]]],f=1[],f=2[],f=3[]", + b.toStr()); + } + { + search::diskindex::IndexBuilder b(f.getSchema()); + b.setPrefix("dump"); + TuneFileIndexing tuneFileIndexing; + DummyFileHeaderContext fileHeaderContext; + b.open(5, 2, tuneFileIndexing, fileHeaderContext); + d.dump(b); + b.close(); + } + } +} + + +template <typename FixtureBase> +class DictionaryFixture : public FixtureBase +{ +public: + using FixtureBase::getSchema; + Dictionary _d; + DocBuilder _b; + SequencedTaskExecutor _invertThreads; + SequencedTaskExecutor _pushThreads; + DocumentInverter _inv; + + DictionaryFixture() + : FixtureBase(), + _d(getSchema()), + _b(getSchema()), + _invertThreads(2), + _pushThreads(2), + _inv(getSchema(), _invertThreads, _pushThreads) + { + } +}; + + +TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) +{ + Document::UP doc; + + f._b.startDocument("doc::10"); + f._b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(10, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::20"); + f._b.startIndexField("f0"). + addStr("a").addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(20, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::30"); + f._b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + addStr("e").addStr("f"). + endField(); + f._b.startIndexField("f1"). + addStr("\nw2").addStr("w").addStr("x"). + addStr("\nw3").addStr("y").addStr("z"). + endField(); + f._b.startIndexField("f2"). + startElement(4). + addStr("w").addStr("x"). + endElement(). + startElement(5). + addStr("y").addStr("z"). + endElement(). + endField(); + f._b.startIndexField("f3"). + startElement(6). + addStr("w").addStr("x"). + endElement(). + startElement(7). + addStr("y").addStr("z"). + endElement(). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(30, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::40"); + f._b.startIndexField("f0"). + addStr("a").addStr("a").addStr("b").addStr("c").addStr("a"). + addStr("e").addStr("f"). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(40, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::999"); + f._b.startIndexField("f0"). + addStr("this").addStr("is").addStr("_a_").addStr("test"). + addStr("for").addStr("insertion").addStr("speed").addStr("with"). + addStr("more").addStr("than").addStr("just").addStr("__a__"). + addStr("few").addStr("words").addStr("present").addStr("in"). + addStr("some").addStr("of").addStr("the").addStr("fields"). + endField(); + f._b.startIndexField("f1"). + addStr("the").addStr("other").addStr("field").addStr("also"). + addStr("has").addStr("some").addStr("content"). + endField(); + f._b.startIndexField("f2"). + startElement(1). + addStr("strange").addStr("things").addStr("here"). + addStr("has").addStr("some").addStr("content"). + endElement(). + endField(); + f._b.startIndexField("f3"). + startElement(3). + addStr("not").addStr("a").addStr("weighty").addStr("argument"). + endElement(). + endField(); + doc = f._b.endDocument(); + for (uint32_t docId = 10000; docId < 20000; ++docId) { + f._inv.invertDocument(docId, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + } + + f._pushThreads.sync(); + DataStoreBase::MemStats beforeStats = getFeatureStoreMemStats(f._d); + LOG(info, + "Before feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64 + ", deadElems=%" PRIu64 ", holdElems=%" PRIu64 + ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32 + ", holdBuffers=%" PRIu32, + beforeStats._allocElems, + beforeStats._usedElems, + beforeStats._deadElems, + beforeStats._holdElems, + beforeStats._freeBuffers, + beforeStats._activeBuffers, + beforeStats._holdBuffers); + myCompactFeatures(f._d, f._pushThreads); + std::vector<std::unique_ptr<GenerationHandler::Guard>> guards; + for (auto &fieldIndex : f._d.getFieldIndexes()) { + guards.push_back(std::make_unique<GenerationHandler::Guard> + (fieldIndex->takeGenerationGuard())); + } + myCommit(f._d, f._pushThreads); + DataStoreBase::MemStats duringStats = getFeatureStoreMemStats(f._d); + LOG(info, + "During feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64 + ", deadElems=%" PRIu64 ", holdElems=%" PRIu64 + ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32 + ", holdBuffers=%" PRIu32, + duringStats._allocElems, + duringStats._usedElems, + duringStats._deadElems, + duringStats._holdElems, + duringStats._freeBuffers, + duringStats._activeBuffers, + duringStats._holdBuffers); + guards.clear(); + myCommit(f._d, f._pushThreads); + DataStoreBase::MemStats afterStats = getFeatureStoreMemStats(f._d); + LOG(info, + "After feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64 + ", deadElems=%" PRIu64 ", holdElems=%" PRIu64 + ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32 + ", holdBuffers=%" PRIu32, + afterStats._allocElems, + afterStats._usedElems, + afterStats._deadElems, + afterStats._holdElems, + afterStats._freeBuffers, + afterStats._activeBuffers, + afterStats._holdBuffers); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + PostingIterator itr(f._d.findFrozen("not", 0), featureStoreRef(f._d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(f._d.findFrozen("a", 0), featureStoreRef(f._d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{4:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{6:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(itr.seek(40)); + EXPECT_EQUAL(40u, itr.getDocId()); + itr.unpack(40); + EXPECT_EQUAL("{7:0,1,4}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(41)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(f._d.findFrozen("x", 0), featureStoreRef(f._d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(f._d.findFrozen("x", 1), featureStoreRef(f._d, 1), + 1, matchData); + itr.initFullRange(); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{6:2[e=0,w=1,l=6]}", + toString(tfmd.getIterator(), true, true)); + } + { + PostingIterator itr(f._d.findFrozen("x", 2), featureStoreRef(f._d, 2), + 2, matchData); + itr.initFullRange(); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + // weight is hardcoded to 1 for new style il doc array field + EXPECT_EQUAL("{2:1[e=0,w=1,l=2]}", + toString(tfmd.getIterator(), true, true)); + } + { + PostingIterator itr(f._d.findFrozen("x", 3), featureStoreRef(f._d, 3), + 3, matchData); + itr.initFullRange(); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{2:1[e=0,w=6,l=2]}", + toString(tfmd.getIterator(), true, true)); + } +} + +TEST_F("requireThatInverterHandlesRemoveViaDocumentRemover", + DictionaryFixture<Fixture>) +{ + Document::UP doc; + + f._b.startDocument("doc::1"); + f._b.startIndexField("f0").addStr("a").addStr("b").endField(); + f._b.startIndexField("f1").addStr("a").addStr("c").endField(); + Document::UP doc1 = f._b.endDocument(); + f._inv.invertDocument(1, *doc1.get()); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::2"); + f._b.startIndexField("f0").addStr("b").addStr("c").endField(); + Document::UP doc2 = f._b.endDocument(); + f._inv.invertDocument(2, *doc2.get()); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + EXPECT_TRUE(assertPostingList("[1]", f._d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[1,2]", f._d.find("b", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._d.find("c", 0))); + EXPECT_TRUE(assertPostingList("[1]", f._d.find("a", 1))); + EXPECT_TRUE(assertPostingList("[1]", f._d.find("c", 1))); + + myremove(1, f._inv, f._d, f._invertThreads); + f._pushThreads.sync(); + + EXPECT_TRUE(assertPostingList("[]", f._d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._d.find("b", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._d.find("c", 0))); + EXPECT_TRUE(assertPostingList("[]", f._d.find("a", 1))); + EXPECT_TRUE(assertPostingList("[]", f._d.find("c", 1))); +} + +class UriFixture +{ +public: + Schema _schema; + UriFixture() + : _schema() + { + _schema.addUriIndexFields(Schema::IndexField("iu", + Schema::STRING)); + _schema.addUriIndexFields(Schema::IndexField("iau", + Schema::STRING, + Schema::ARRAY)); + _schema.addUriIndexFields(Schema::IndexField("iwu", + Schema::STRING, + Schema::WEIGHTEDSET)); + } + const Schema & getSchema() const { return _schema; } +}; + + +TEST_F("requireThatUriIndexingIsWorking", DictionaryFixture<UriFixture>) +{ + Document::UP doc; + + f._b.startDocument("doc::10"); + f._b.startIndexField("iu"). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("81"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("4"). + endSubField(). + endField(); + f._b.startIndexField("iau"). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("8"). + endSubField(). + endElement(). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("9"). + endSubField(). + endElement(). + endField(); + f._b.startIndexField("iwu"). + startElement(4). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("83"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("12"). + endSubField(). + endElement(). + startElement(7). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("85"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("13"). + endSubField(). + endElement(). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(10, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + + f._pushThreads.sync(); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iu"); + PostingIterator itr(f._d.findFrozen("not", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iu"); + PostingIterator itr(f._d.findFrozen("yahoo", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{9:2}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iau"); + PostingIterator itr(f._d.findFrozen("yahoo", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{9:2[e=0,l=9]}", + toString(tfmd.getIterator(), true, false)); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iwu"); + PostingIterator itr(f._d.findFrozen("yahoo", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{9:2[e=0,w=4,l=9]}", + toString(tfmd.getIterator(), true, true)); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + search::diskindex::IndexBuilder dib(f.getSchema()); + dib.setPrefix("urldump"); + TuneFileIndexing tuneFileIndexing; + DummyFileHeaderContext fileHeaderContext; + dib.open(11, f._d.getNumUniqueWords(), tuneFileIndexing, + fileHeaderContext); + f._d.dump(dib); + dib.close(); + } +} + + +class SingleFieldFixture +{ +public: + Schema _schema; + SingleFieldFixture() + : _schema() + { + _schema.addIndexField(Schema::IndexField("i", Schema::STRING)); + } + const Schema & getSchema() const { return _schema; } +}; + +TEST_F("requireThatCjkIndexingIsWorking", DictionaryFixture<SingleFieldFixture>) +{ + Document::UP doc; + + f._b.startDocument("doc::10"); + f._b.startIndexField("i"). + addStr("我就是那个"). + setAutoSpace(false). + addStr("大灰狼"). + setAutoSpace(true). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(10, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + + f._pushThreads.sync(); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + uint32_t fieldId = f.getSchema().getIndexFieldId("i"); + PostingIterator itr(f._d.findFrozen("not", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("i"); + PostingIterator itr(f._d.findFrozen("我就" + "是那个", + fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{2:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("i"); + PostingIterator itr(f._d.findFrozen("大灰" + "狼", + fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{2:1}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } +} + +void +insertAndAssertTuple(const vespalib::string &word, uint32_t fieldId, uint32_t docId, + Dictionary &dict) +{ + EntryRef wordRef = WrapInserter(dict, fieldId).rewind().word(word). + add(docId).flush().getWordRef(); + EXPECT_EQUAL(word, + dict.getFieldIndex(fieldId)->getWordStore().getWord(wordRef)); + MyDrainRemoves(dict, fieldId).drain(docId); +} + +TEST_F("require that insert tells which word ref that was inserted", Fixture) +{ + Dictionary d(f.getSchema()); + insertAndAssertTuple("a", 1, 11, d); + insertAndAssertTuple("b", 1, 11, d); + insertAndAssertTuple("a", 2, 11, d); + + insertAndAssertTuple("a", 1, 22, d); + insertAndAssertTuple("b", 2, 22, d); + insertAndAssertTuple("c", 2, 22, d); +} + +struct RemoverFixture : public Fixture +{ + Dictionary _d; + SequencedTaskExecutor _invertThreads; + SequencedTaskExecutor _pushThreads; + + RemoverFixture() + : + Fixture(), + _d(getSchema()), + _invertThreads(2), + _pushThreads(2) + { + } + void assertPostingLists(const vespalib::string &e1, + const vespalib::string &e2, + const vespalib::string &e3) { + EXPECT_TRUE(assertPostingList(e1, _d.find("a", 1))); + EXPECT_TRUE(assertPostingList(e2, _d.find("a", 2))); + EXPECT_TRUE(assertPostingList(e3, _d.find("b", 1))); + } + void remove(uint32_t docId) { + DocumentInverter inv(getSchema(), _invertThreads, _pushThreads); + myremove(docId, inv, _d, _invertThreads); + _pushThreads.sync(); + EXPECT_FALSE(_d.getFieldIndex(0u)->getDocumentRemover(). + getStore().get(docId).valid()); + } +}; + +TEST_F("require that document remover can remove several documents", RemoverFixture) +{ + WrapInserter(f._d, 1).word("a").add(11).add(13).add(15). + word("b").add(11).add(15).flush(); + WrapInserter(f._d, 2).word("a").add(11).add(13).flush(); + f.assertPostingLists("[11,13,15]", "[11,13]", "[11,15]"); + + f.remove(13); + f.assertPostingLists("[11,15]", "[11]", "[11,15]"); + + f.remove(11); + f.assertPostingLists("[15]", "[]", "[15]"); + + f.remove(15); + f.assertPostingLists("[]", "[]", "[]"); +} + +TEST_F("require that removal of non-existing document does not do anything", RemoverFixture) +{ + WrapInserter(f._d, 1).word("a").add(11).word("b").add(11).flush(); + WrapInserter(f._d, 2).word("a").add(11).flush(); + f.assertPostingLists("[11]", "[11]", "[11]"); + f.remove(13); + f.assertPostingLists("[11]", "[11]", "[11]"); +} + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/document_remover/.gitignore b/searchlib/src/tests/memoryindex/document_remover/.gitignore new file mode 100644 index 00000000000..2126f9147bd --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/.gitignore @@ -0,0 +1 @@ +searchlib_document_remover_test_app diff --git a/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt b/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt new file mode 100644 index 00000000000..e918d0400b2 --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_document_remover_test_app + SOURCES + document_remover_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_document_remover_test_app COMMAND searchlib_document_remover_test_app) diff --git a/searchlib/src/tests/memoryindex/document_remover/DESC b/searchlib/src/tests/memoryindex/document_remover/DESC new file mode 100644 index 00000000000..7fe35ab896f --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/DESC @@ -0,0 +1 @@ +document remover test. Take a look at document_remover_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/document_remover/FILES b/searchlib/src/tests/memoryindex/document_remover/FILES new file mode 100644 index 00000000000..9b7cb9a8cfa --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/FILES @@ -0,0 +1 @@ +document_remover_test.cpp diff --git a/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp b/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp new file mode 100644 index 00000000000..8c6751adbeb --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp @@ -0,0 +1,144 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("document_remover_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/memoryindex/document_remover.h> +#include <vespa/searchlib/memoryindex/wordstore.h> +#include <vespa/searchlib/memoryindex/i_document_remove_listener.h> +#include <vespa/vespalib/test/insertion_operators.h> +#include <map> + +using namespace search; +using namespace search::memoryindex; + +struct WordFieldPair +{ + vespalib::string _word; + uint32_t _fieldId; + WordFieldPair(const vespalib::stringref &word, uint32_t fieldId) + : _word(word), _fieldId(fieldId) + {} + bool operator<(const WordFieldPair &rhs) { + if (_word != rhs._word) { + return _word < rhs._word; + } + return _fieldId < rhs._fieldId; + } +}; + +typedef std::vector<WordFieldPair> WordFieldVector; + +std::ostream & +operator<<(std::ostream &os, const WordFieldPair &val) +{ + os << "{" << val._word << "," << val._fieldId << "}"; + return os; +} + +struct MockRemoveListener : public IDocumentRemoveListener +{ + WordFieldVector _words; + uint32_t _expDocId; + uint32_t _fieldId; + virtual void remove(const vespalib::stringref word, uint32_t docId) override { + EXPECT_EQUAL(_expDocId, docId); + _words.emplace_back(word, _fieldId); + } + void reset(uint32_t expDocId) { + _words.clear(); + _expDocId = expDocId; + } + vespalib::string getWords() { + std::sort(_words.begin(), _words.end()); + std::ostringstream oss; + oss << _words; + return oss.str(); + } + void setFieldId(uint32_t fieldId) { _fieldId = fieldId; } +}; + +struct Fixture +{ + MockRemoveListener _listener; + std::vector<std::unique_ptr<WordStore>> _wordStores; + std::vector<std::map<vespalib::string, btree::EntryRef>> _wordToRefMaps; + std::vector<std::unique_ptr<DocumentRemover>> _removers; + Fixture() + : _listener(), + _wordStores(), + _wordToRefMaps(), + _removers() + { + uint32_t numFields = 4; + for (uint32_t fieldId = 0; fieldId < numFields; ++fieldId) { + _wordStores.push_back(std::make_unique<WordStore>()); + _removers.push_back(std::make_unique<DocumentRemover> + (*_wordStores.back())); + } + _wordToRefMaps.resize(numFields); + } + btree::EntryRef getWordRef(const vespalib::string &word, uint32_t fieldId) { + auto &wordToRefMap = _wordToRefMaps[fieldId]; + WordStore &wordStore = *_wordStores[fieldId]; + auto itr = wordToRefMap.find(word); + if (itr == wordToRefMap.end()) { + btree::EntryRef ref = wordStore.addWord(word); + wordToRefMap[word] = ref; + return ref; + } + return itr->second; + } + Fixture &insert(const vespalib::string &word, uint32_t fieldId, uint32_t docId) { + assert(fieldId < _wordStores.size()); + _removers[fieldId]->insert(getWordRef(word, fieldId), docId); + return *this; + } + void flush() { + for (auto &remover : _removers) { + remover->flush(); + } + } + vespalib::string remove(uint32_t docId) { + _listener.reset(docId); + uint32_t fieldId = 0; + for (auto &remover : _removers) { + _listener.setFieldId(fieldId); + remover->remove(docId, _listener); + ++fieldId; + } + return _listener.getWords(); + } +}; + +TEST_F("require that {word,fieldId} pairs for multiple doc ids can be inserted", Fixture) +{ + f.insert("a", 1, 10).insert("a", 1, 20).insert("a", 1, 30); + f.insert("a", 2, 10).insert("a", 2, 20); + f.insert("b", 1, 20).insert("b", 1, 30); + f.insert("b", 2, 10).insert("b", 2, 30); + f.insert("c", 1, 10); + f.insert("c", 2, 20); + f.insert("c", 3, 30); + f.flush(); + + EXPECT_EQUAL("[{a,1},{a,2},{b,2},{c,1}]", f.remove(10)); + EXPECT_EQUAL("[{a,1},{a,2},{b,1},{c,2}]", f.remove(20)); + EXPECT_EQUAL("[{a,1},{b,1},{b,2},{c,3}]", f.remove(30)); +} + +TEST_F("require that we can insert after flush", Fixture) +{ + f.insert("a", 1, 10).insert("b", 1, 10); + f.flush(); + f.insert("b", 1, 20).insert("b", 2, 20); + f.flush(); + + EXPECT_EQUAL("[{a,1},{b,1}]", f.remove(10)); + EXPECT_EQUAL("[{b,1},{b,2}]", f.remove(20)); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/documentinverter/.gitignore b/searchlib/src/tests/memoryindex/documentinverter/.gitignore new file mode 100644 index 00000000000..1e9666b2d63 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/.gitignore @@ -0,0 +1 @@ +searchlib_documentinverter_test_app diff --git a/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt new file mode 100644 index 00000000000..85a77fad361 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_documentinverter_test_app + SOURCES + documentinverter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_documentinverter_test_app COMMAND searchlib_documentinverter_test_app) diff --git a/searchlib/src/tests/memoryindex/documentinverter/DESC b/searchlib/src/tests/memoryindex/documentinverter/DESC new file mode 100644 index 00000000000..5dc610c2a24 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/DESC @@ -0,0 +1 @@ +Document inverter test. Take a look at documentinverter_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/documentinverter/FILES b/searchlib/src/tests/memoryindex/documentinverter/FILES new file mode 100644 index 00000000000..c54817b9df1 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/FILES @@ -0,0 +1 @@ +documentinverter_test.cpp diff --git a/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp b/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp new file mode 100644 index 00000000000..d3ad1f54e95 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp @@ -0,0 +1,294 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("documentinverter_test"); +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/memoryindex/documentinverter.h> +#include <vespa/searchlib/memoryindex/fieldinverter.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h> +#include <vespa/searchlib/common/sequencedtaskexecutor.h> +#include <vespa/vespalib/testkit/testapp.h> + +namespace search +{ + + +using document::Document; +using index::DocBuilder; +using index::Schema; + +namespace memoryindex +{ + + +namespace +{ + + +Document::UP +makeDoc10(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc11(DocBuilder &b) +{ + b.startDocument("doc::11"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("e").addStr("f"). + endField(); + b.startIndexField("f1"). + addStr("a").addStr("g"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc12(DocBuilder &b) +{ + b.startDocument("doc::12"); + b.startIndexField("f0"). + addStr("h").addStr("doc12"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc13(DocBuilder &b) +{ + b.startDocument("doc::13"); + b.startIndexField("f0"). + addStr("i").addStr("doc13"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc14(DocBuilder &b) +{ + b.startDocument("doc::14"); + b.startIndexField("f0"). + addStr("j").addStr("doc14"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc15(DocBuilder &b) +{ + b.startDocument("doc::15"); + return b.endDocument(); +} + +} + +struct Fixture +{ + Schema _schema; + DocBuilder _b; + SequencedTaskExecutor _invertThreads; + SequencedTaskExecutor _pushThreads; + DocumentInverter _inv; + test::OrderedDocumentInserter _inserter; + + static Schema + makeSchema() + { + Schema schema; + schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f2", Schema::STRING, + Schema::ARRAY)); + schema.addIndexField(Schema::IndexField("f3", Schema::STRING, + Schema::WEIGHTEDSET)); + return schema; + } + + Fixture() + : _schema(makeSchema()), + _b(_schema), + _invertThreads(2), + _pushThreads(2), + _inv(_schema, _invertThreads, _pushThreads), + _inserter() + { + } + + void + pushDocuments() + { + _invertThreads.sync(); + uint32_t fieldId = 0; + for (auto &inverter : _inv.getInverters()) { + _inserter.setFieldId(fieldId); + inverter->pushDocuments(_inserter); + ++fieldId; + } + _pushThreads.sync(); + } +}; + + +TEST_F("requireThatFreshInsertWorks", Fixture) +{ + f._inv.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatMultipleDocsWork", Fixture) +{ + f._inv.invertDocument(10, *makeDoc10(f._b)); + f._inv.invertDocument(11, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,a=11," + "w=b,a=10,a=11," + "w=c,a=10,w=d,a=10," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); +} + + +TEST_F("requireThatRemoveWorks", Fixture) +{ + f._inv.getInverter(0)->remove("b", 10); + f._inv.getInverter(0)->remove("a", 10); + f._inv.getInverter(0)->remove("b", 11); + f._inv.getInverter(2)->remove("c", 12); + f._inv.getInverter(1)->remove("a", 10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,r=10," + "w=b,r=10,r=11," + "f=1,w=a,r=10," + "f=2,w=c,r=12", + f._inserter.toStr()); +} + + +TEST_F("requireThatReputWorks", Fixture) +{ + f._inv.invertDocument(10, *makeDoc10(f._b)); + f._inv.invertDocument(10, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=e,a=10," + "w=f,a=10," + "f=1,w=a,a=10," + "w=g,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatAbortPendingDocWorks", Fixture) +{ + Document::UP doc10 = makeDoc10(f._b); + Document::UP doc11 = makeDoc11(f._b); + Document::UP doc12 = makeDoc12(f._b); + Document::UP doc13 = makeDoc13(f._b); + Document::UP doc14 = makeDoc14(f._b); + + f._inv.invertDocument(10, *doc10); + f._inv.invertDocument(11, *doc11); + f._inv.removeDocument(10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=11," + "w=b,a=11," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); + + f._inv.invertDocument(10, *doc10); + f._inv.invertDocument(11, *doc11); + f._inv.invertDocument(12, *doc12); + f._inv.invertDocument(13, *doc13); + f._inv.invertDocument(14, *doc14); + f._inv.removeDocument(11); + f._inv.removeDocument(13); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10," + "w=doc12,a=12," + "w=doc14,a=14," + "w=h,a=12," + "w=j,a=14", + f._inserter.toStr()); + + f._inv.invertDocument(10, *doc10); + f._inv.invertDocument(11, *doc11); + f._inv.invertDocument(12, *doc12); + f._inv.invertDocument(13, *doc13); + f._inv.invertDocument(14, *doc14); + f._inv.removeDocument(11); + f._inv.removeDocument(12); + f._inv.removeDocument(13); + f._inv.removeDocument(14); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); + + +} + + +TEST_F("requireThatMixOfAddAndRemoveWorks", Fixture) +{ + f._inv.getInverter(0)->remove("a", 11); + f._inv.getInverter(0)->remove("c", 9); + f._inv.getInverter(0)->remove("d", 10); + f._inv.getInverter(0)->remove("z", 12); + f._inv.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,r=11," + "w=b,a=10," + "w=c,r=9,a=10," + "w=d,r=10,a=10," + "w=z,r=12", + f._inserter.toStr()); +} + + +TEST_F("require that empty document can be inverted", Fixture) +{ + f._inv.invertDocument(15, *makeDoc15(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/fieldinverter/.gitignore b/searchlib/src/tests/memoryindex/fieldinverter/.gitignore new file mode 100644 index 00000000000..482663dd92e --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/.gitignore @@ -0,0 +1 @@ +searchlib_fieldinverter_test_app diff --git a/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt new file mode 100644 index 00000000000..9d81ebbb57c --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fieldinverter_test_app + SOURCES + fieldinverter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_fieldinverter_test_app COMMAND searchlib_fieldinverter_test_app) diff --git a/searchlib/src/tests/memoryindex/fieldinverter/DESC b/searchlib/src/tests/memoryindex/fieldinverter/DESC new file mode 100644 index 00000000000..a40890fdc3d --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/DESC @@ -0,0 +1 @@ +Field inverter test. Take a look at fieldinverter_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/fieldinverter/FILES b/searchlib/src/tests/memoryindex/fieldinverter/FILES new file mode 100644 index 00000000000..892febd1c50 --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/FILES @@ -0,0 +1 @@ +fieldinverter_test.cpp diff --git a/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp b/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp new file mode 100644 index 00000000000..6216ba9eb3c --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp @@ -0,0 +1,338 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("fieldinverter_test"); +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/memoryindex/fieldinverter.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/document/repo/fixedtyperepo.h> + +namespace search +{ + + +using document::Document; +using index::DocBuilder; +using index::Schema; + +namespace memoryindex +{ + + +namespace +{ + + +Document::UP +makeDoc10(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc11(DocBuilder &b) +{ + b.startDocument("doc::11"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("e").addStr("f"). + endField(); + b.startIndexField("f1"). + addStr("a").addStr("g"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc12(DocBuilder &b) +{ + b.startDocument("doc::12"); + b.startIndexField("f0"). + addStr("h").addStr("doc12"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc13(DocBuilder &b) +{ + b.startDocument("doc::13"); + b.startIndexField("f0"). + addStr("i").addStr("doc13"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc14(DocBuilder &b) +{ + b.startDocument("doc::14"); + b.startIndexField("f0"). + addStr("j").addStr("doc14"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc15(DocBuilder &b) +{ + b.startDocument("doc::15"); + return b.endDocument(); +} + + +Document::UP +makeDoc16(DocBuilder &b) +{ + b.startDocument("doc::16"); + b.startIndexField("f0").addStr("foo").addStr("bar").addStr("baz"). + addTermAnnotation("altbaz").addStr("y").addTermAnnotation("alty"). + addStr("z").endField(); + return b.endDocument(); +} + +} + +struct Fixture +{ + Schema _schema; + DocBuilder _b; + std::vector<std::unique_ptr<FieldInverter> > _inverters; + test::OrderedDocumentInserter _inserter; + + static Schema + makeSchema() + { + Schema schema; + schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f2", Schema::STRING, + Schema::ARRAY)); + schema.addIndexField(Schema::IndexField("f3", Schema::STRING, + Schema::WEIGHTEDSET)); + return schema; + } + + Fixture() + : _schema(makeSchema()), + _b(_schema), + _inverters(), + _inserter() + { + for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields(); + ++fieldId) { + _inverters.push_back(std::make_unique<FieldInverter>(_schema, + fieldId)); + } + } + + void + invertDocument(uint32_t docId, const Document &doc) + { + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + vespalib::stringref fieldName = + _schema.getIndexField(fieldId).getName(); + inverter->invertField(docId, doc.getValue(fieldName)); + ++fieldId; + } + } + + void + pushDocuments() + { + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + _inserter.setFieldId(fieldId); + inverter->pushDocuments(_inserter); + ++fieldId; + } + } + + void + removeDocument(uint32_t docId) { + for (auto &inverter : _inverters) { + inverter->removeDocument(docId); + } + } +}; + + +TEST_F("requireThatFreshInsertWorks", Fixture) +{ + f.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatMultipleDocsWork", Fixture) +{ + f.invertDocument(10, *makeDoc10(f._b)); + f.invertDocument(11, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,a=11," + "w=b,a=10,a=11," + "w=c,a=10,w=d,a=10," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); +} + + +TEST_F("requireThatRemoveWorks", Fixture) +{ + f._inverters[0]->remove("b", 10); + f._inverters[0]->remove("a", 10); + f._inverters[0]->remove("b", 11); + f._inverters[2]->remove("c", 12); + f._inverters[1]->remove("a", 10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,r=10," + "w=b,r=10,r=11," + "f=1,w=a,r=10," + "f=2,w=c,r=12", + f._inserter.toStr()); +} + + +TEST_F("requireThatReputWorks", Fixture) +{ + f.invertDocument(10, *makeDoc10(f._b)); + f.invertDocument(10, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=e,a=10," + "w=f,a=10," + "f=1,w=a,a=10," + "w=g,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatAbortPendingDocWorks", Fixture) +{ + Document::UP doc10 = makeDoc10(f._b); + Document::UP doc11 = makeDoc11(f._b); + Document::UP doc12 = makeDoc12(f._b); + Document::UP doc13 = makeDoc13(f._b); + Document::UP doc14 = makeDoc14(f._b); + + f.invertDocument(10, *doc10); + f.invertDocument(11, *doc11); + f.removeDocument(10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=11," + "w=b,a=11," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); + + f.invertDocument(10, *doc10); + f.invertDocument(11, *doc11); + f.invertDocument(12, *doc12); + f.invertDocument(13, *doc13); + f.invertDocument(14, *doc14); + f.removeDocument(11); + f.removeDocument(13); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10," + "w=doc12,a=12," + "w=doc14,a=14," + "w=h,a=12," + "w=j,a=14", + f._inserter.toStr()); + + f.invertDocument(10, *doc10); + f.invertDocument(11, *doc11); + f.invertDocument(12, *doc12); + f.invertDocument(13, *doc13); + f.invertDocument(14, *doc14); + f.removeDocument(11); + f.removeDocument(12); + f.removeDocument(13); + f.removeDocument(14); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); + + +} + + +TEST_F("requireThatMixOfAddAndRemoveWorks", Fixture) +{ + f._inverters[0]->remove("a", 11); + f._inverters[0]->remove("c", 9); + f._inverters[0]->remove("d", 10); + f._inverters[0]->remove("z", 12); + f.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,r=11," + "w=b,a=10," + "w=c,r=9,a=10," + "w=d,r=10,a=10," + "w=z,r=12", + f._inserter.toStr()); +} + + +TEST_F("require that empty document can be inverted", Fixture) +{ + f.invertDocument(15, *makeDoc15(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("require that multiple words at same position works", Fixture) +{ + f.invertDocument(16, *makeDoc16(f._b)); + f._inserter.setVerbose(); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=altbaz,a=16(e=0,w=1,l=5[2])," + "w=alty,a=16(e=0,w=1,l=5[3])," + "w=bar,a=16(e=0,w=1,l=5[1])," + "w=baz,a=16(e=0,w=1,l=5[2])," + "w=foo,a=16(e=0,w=1,l=5[0])," + "w=y,a=16(e=0,w=1,l=5[3])," + "w=z,a=16(e=0,w=1,l=5[4])", + f._inserter.toStr()); +} + + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/memoryindex/.gitignore b/searchlib/src/tests/memoryindex/memoryindex/.gitignore new file mode 100644 index 00000000000..174d0a494e2 --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +memoryindex_test +sourceselectorwriter_test +searchlib_memoryindex_test_app diff --git a/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt b/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt new file mode 100644 index 00000000000..f25089e85bb --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_memoryindex_test_app + SOURCES + memoryindex_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_memoryindex_test_app COMMAND searchlib_memoryindex_test_app) diff --git a/searchlib/src/tests/memoryindex/memoryindex/DESC b/searchlib/src/tests/memoryindex/memoryindex/DESC new file mode 100644 index 00000000000..87b69181803 --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/DESC @@ -0,0 +1 @@ +memoryindex test. Take a look at memoryindex_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/memoryindex/FILES b/searchlib/src/tests/memoryindex/memoryindex/FILES new file mode 100644 index 00000000000..4faa7668dfc --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/FILES @@ -0,0 +1 @@ +memoryindex_test.cpp diff --git a/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp b/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp new file mode 100644 index 00000000000..7d2afc151d5 --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp @@ -0,0 +1,438 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("memoryindex_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/memoryindex/memoryindex.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> +#include <vespa/searchlib/queryeval/fake_search.h> +#include <vespa/searchlib/queryeval/fake_searchable.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/searchlib/common/sequencedtaskexecutor.h> +#include <vespa/searchlib/common/scheduletaskcallback.h> +#include <vespa/vespalib/util/threadstackexecutor.h> + +using document::Document; +using document::FieldValue; +using search::query::Node; +using search::query::SimplePhrase; +using search::query::SimpleStringTerm; +using search::makeLambdaTask; +using search::ScheduleTaskCallback; +using namespace search::fef; +using namespace search::index; +using namespace search::memoryindex; +using namespace search::queryeval; + +//----------------------------------------------------------------------------- + +struct Setup { + Schema schema; + Setup &field(const std::string &name) { + schema.addIndexField(Schema::IndexField(name, + Schema::STRING)); + return *this; + } +}; + +//----------------------------------------------------------------------------- + +struct Index { + Schema schema; + vespalib::ThreadStackExecutor _executor; + search::SequencedTaskExecutor _invertThreads; + search::SequencedTaskExecutor _pushThreads; + MemoryIndex index; + DocBuilder builder; + uint32_t docid; + std::string currentField; + + Index(const Setup &setup) + : schema(setup.schema), + _executor(1, 128 * 1024), + _invertThreads(2), + _pushThreads(2), + index(schema, _invertThreads, _pushThreads), + builder(schema), + docid(1), + currentField() + { + } + void closeField() { + if (!currentField.empty()) { + builder.endField(); + currentField.clear(); + } + } + Index &doc(uint32_t id) { + docid = id; + builder.startDocument(vespalib::make_string("doc::%u", id)); + return *this; + } + Index &field(const std::string &name) { + closeField(); + builder.startIndexField(name); + currentField = name; + return *this; + } + Index &add(const std::string &token) { + builder.addStr(token); + return *this; + } + void internalSyncCommit() { + vespalib::Gate gate; + index.commit(std::make_shared<ScheduleTaskCallback> + (_executor, + makeLambdaTask([&]() { gate.countDown(); }))); + gate.await(); + } + Document::UP commit() { + closeField(); + Document::UP d = builder.endDocument(); + index.insertDocument(docid, *d); + internalSyncCommit(); + return d; + } + Index &remove(uint32_t id) { + index.removeDocument(id); + internalSyncCommit(); + return *this; + } + +private: + Index(const Index &index); + Index &operator=(const Index &index); +}; + +//----------------------------------------------------------------------------- + +std::string toString(SearchIterator & search) +{ + std::ostringstream oss; + bool first = true; + for (search.seek(1); ! search.isAtEnd(); search.seek(search.getDocId() + 1)) { + if (!first) oss << ","; + oss << search.getDocId(); + first = false; + } + return oss.str(); +} + +//----------------------------------------------------------------------------- + +const std::string title("title"); +const std::string body("body"); +const std::string foo("foo"); +const std::string bar("bar"); + +//----------------------------------------------------------------------------- + +bool +verifyResult(const FakeResult &expect, + Searchable &index, + std::string fieldName, + const Node &term) +{ + uint32_t fieldId = 0; + FakeRequestContext requestContext; + + MatchDataLayout mdl; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + + FieldSpec field(fieldName, fieldId, handle); + FieldSpecList fields; + fields.add(field); + + Blueprint::UP result = index.createBlueprint(requestContext, fields, term); + if (!EXPECT_TRUE(result.get() != 0)) { + return false; + } + EXPECT_EQUAL(expect.inspect().size(), result->getState().estimate().estHits); + EXPECT_EQUAL(expect.inspect().empty(), result->getState().estimate().empty); + + result->fetchPostings(true); + SearchIterator::UP search = result->createSearch(*match_data, true); + if (!EXPECT_TRUE(search.get() != 0)) { + return false; + } + TermFieldMatchData &tmd = *match_data->resolveTermField(handle); + + FakeResult actual; + search->initFullRange(); + for (search->seek(1); !search->isAtEnd(); search->seek(search->getDocId() + 1)) { + actual.doc(search->getDocId()); + search->unpack(search->getDocId()); + EXPECT_EQUAL(search->getDocId(), tmd.getDocId()); + FieldPositionsIterator p = tmd.getIterator(); + actual.len(p.getFieldLength()); + for (; p.valid(); p.next()) { + actual.pos(p.getPosition()); + } + } + return EXPECT_EQUAL(expect, actual); +} + +namespace { +SimpleStringTerm makeTerm(const std::string &term) { + return SimpleStringTerm(term, "field", 0, search::query::Weight(0)); +} + +Node::UP makePhrase(const std::string &term1, const std::string &term2) { + SimplePhrase * phrase = new SimplePhrase("field", 0, search::query::Weight(0)); + Node::UP node(phrase); + phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term1)))); + phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term2)))); + return node; +} +} // namespace + +// tests basic usage; index some documents in docid order and perform +// some searches. +TEST("testIndexAndSearch") +{ + Index index(Setup().field(title).field(body)); + index.doc(1) + .field(title).add(foo).add(bar).add(foo) + .field(body).add(foo).add(foo).add(foo) + .commit(); + index.doc(2) + .field(title).add(bar).add(foo) + .field(body).add(bar).add(bar).add(bar).add(bar) + .commit(); + + // search for "foo" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(2) + .doc(2).len(2).pos(1), + index.index, title, makeTerm(foo))); + + // search for "bar" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0), + index.index, title, makeTerm(bar))); + + // search for "foo" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(1).pos(2), + index.index, body, makeTerm(foo))); + + // search for "bar" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(2).len(4).pos(0).pos(1).pos(2).pos(3), + index.index, body, makeTerm(bar))); + + // search for "bogus" in "title" + EXPECT_TRUE(verifyResult(FakeResult(), + index.index, title, makeTerm("bogus"))); + + // search for "foo" in "bogus" + EXPECT_TRUE(verifyResult(FakeResult(), + index.index, "bogus", makeTerm(foo))); + + // search for "bar foo" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0), + index.index, title, *makePhrase(bar, foo))); + +} + +// tests index update behavior; remove/update and unordered docid +// indexing. +TEST("require that documents can be removed and updated") +{ + Index index(Setup().field(title)); + + // add unordered + index.doc(3).field(title).add(foo).add(foo).add(foo).commit(); + Document::UP doc1 = index.doc(1).field(title).add(foo).commit(); + Document::UP doc2 = index.doc(2).field(title).add(foo).add(foo).commit(); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(1).pos(0) + .doc(2).len(2).pos(0).pos(1) + .doc(3).len(3).pos(0).pos(1).pos(2), + index.index, title, makeTerm(foo))); + + // remove document + index.remove(2); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(1).pos(0) + .doc(3).len(3).pos(0).pos(1).pos(2), + index.index, title, makeTerm(foo))); + + // update document + index.doc(1).field(title).add(bar).add(foo).add(foo).commit(); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1).pos(2) + .doc(3).len(3).pos(0).pos(1).pos(2), + index.index, title, makeTerm(foo))); +} + +// test the fake field source here, to make sure it acts similar to +// the memory index field source. +TEST("testFakeSearchable") +{ + Index index(Setup().field(title).field(body)); + + // setup fake field source with predefined results + FakeSearchable fakeSource; + fakeSource.addResult(title, foo, + FakeResult() + .doc(1).len(3).pos(0).pos(2) + .doc(2).len(2).pos(1)); + fakeSource.addResult(title, bar, + FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0)); + fakeSource.addResult(body, foo, + FakeResult() + .doc(1).len(3).pos(0).pos(1).pos(2)); + fakeSource.addResult(body, bar, + FakeResult() + .doc(2).len(4).pos(0).pos(1).pos(2).pos(3)); + + // search for "foo" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(2) + .doc(2).len(2).pos(1), + fakeSource, title, makeTerm(foo))); + + // search for "bar" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0), + fakeSource, title, makeTerm(bar))); + + // search for "foo" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(1).pos(2), + fakeSource, body, makeTerm(foo))); + + // search for "bar" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(2).len(4).pos(0).pos(1).pos(2).pos(3), + fakeSource, body, makeTerm(bar))); + + // search for "bogus" in "title" + EXPECT_TRUE(verifyResult(FakeResult(), + fakeSource, title, makeTerm("bogus"))); + + // search for foo in "bogus" + EXPECT_TRUE(verifyResult(FakeResult(), + fakeSource, "bogus", makeTerm(foo))); +} + +TEST("requireThatFrozenIndexIgnoresUpdates") +{ + Index index(Setup().field(title)); + Document::UP doc1 = index.doc(1).field(title).add(foo).add(bar).commit(); + FakeResult ffr = FakeResult().doc(1).len(2).pos(0); + EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo))); + EXPECT_TRUE(!index.index.isFrozen()); + index.index.freeze(); + EXPECT_TRUE(index.index.isFrozen()); + index.doc(2).field(title).add(bar).add(foo).commit(); // not added + EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo))); + index.remove(1); // not removed + EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo))); +} + +TEST("requireThatNumDocsAndDocIdLimitIsReturned") +{ + Index index(Setup().field(title)); + EXPECT_EQUAL(0u, index.index.getNumDocs()); + EXPECT_EQUAL(1u, index.index.getDocIdLimit()); + Document::UP doc1 = index.doc(1).field(title).add(foo).commit(); + EXPECT_EQUAL(1u, index.index.getNumDocs()); + EXPECT_EQUAL(2u, index.index.getDocIdLimit()); + Document::UP doc4 = index.doc(4).field(title).add(foo).commit(); + EXPECT_EQUAL(2u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); + Document::UP doc2 = index.doc(2).field(title).add(foo).commit(); + EXPECT_EQUAL(3u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); + // re-add doc4 + index.doc(4).field(title).add(bar).commit(); + EXPECT_EQUAL(3u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); + // remove doc2 + index.remove(2); + EXPECT_EQUAL(2u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); +} + +TEST("requireThatWeUnderstandTheMemoryFootprint") +{ + { + Setup setup; + Index index(setup); + EXPECT_EQUAL(0u, index.index.getStaticMemoryFootprint()); + EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes()); + } + { + Index index(Setup().field("f1")); + EXPECT_EQUAL(118852u, index.index.getStaticMemoryFootprint()); + EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes()); + } + { + Index index(Setup().field("f1").field("f2")); + EXPECT_EQUAL(2*118852u, index.index.getStaticMemoryFootprint()); + EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes()); + } +} + +TEST("requireThatNumWordsIsReturned") +{ + Index index(Setup().field(title)); + EXPECT_EQUAL(0u, index.index.getNumWords()); + index.doc(1).field(title).add(foo).commit(); + EXPECT_EQUAL(1u, index.index.getNumWords()); + index.doc(2).field(title).add(foo).add(bar).add(body).commit(); + EXPECT_EQUAL(3u, index.index.getNumWords()); +} + +TEST("requireThatWeCanFakeBitVector") +{ + Index index(Setup().field(title)); + index.doc(1).field(title).add(foo).commit(); + index.doc(3).field(title).add(foo).commit(); + { + uint32_t fieldId = 0; + + MatchDataLayout mdl; + FakeRequestContext requestContext; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + + // filter field + FieldSpec field(title, fieldId, handle, true); + FieldSpecList fields; + fields.add(field); + + Searchable &searchable = index.index; + Blueprint::UP res = searchable.createBlueprint(requestContext, fields, makeTerm(foo)); + EXPECT_TRUE(res.get() != NULL); + + res->fetchPostings(true); + SearchIterator::UP search = res->createSearch(*match_data, true); + EXPECT_TRUE(search.get() != NULL); + EXPECT_TRUE(dynamic_cast<BooleanMatchIteratorWrapper *>(search.get()) != NULL); + search->initFullRange(); + EXPECT_EQUAL("1,3", toString(*search)); + } +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore new file mode 100644 index 00000000000..b2636fe5e81 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore @@ -0,0 +1 @@ +searchlib_urlfieldinverter_test_app diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt new file mode 100644 index 00000000000..c5a0374fad9 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_urlfieldinverter_test_app + SOURCES + urlfieldinverter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_urlfieldinverter_test_app COMMAND searchlib_urlfieldinverter_test_app) diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/DESC b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC new file mode 100644 index 00000000000..00115ada607 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC @@ -0,0 +1 @@ +UrlField inverter test. Take a look at urlfieldinverter_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/FILES b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES new file mode 100644 index 00000000000..ac08b0a3e90 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES @@ -0,0 +1 @@ +urlfieldinverter_test.cpp diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp new file mode 100644 index 00000000000..30b5883f153 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp @@ -0,0 +1,579 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("urlfieldinverter_test"); +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/memoryindex/fieldinverter.h> +#include <vespa/searchlib/memoryindex/urlfieldinverter.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/document/repo/fixedtyperepo.h> + +namespace search +{ + + +using document::Document; +using index::DocBuilder; +using index::DocTypeBuilder; +using index::Schema; + +namespace memoryindex +{ + +namespace { +const vespalib::string url = "url"; +} + + +namespace +{ + +Document::UP +makeDoc10Single(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("81"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("4"). + endSubField(). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc10Array(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("8"). + endSubField(). + endElement(). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("9"). + endSubField(). + endElement(). + endField(); + return b.endDocument(); +} + +Document::UP +makeDoc10WeightedSet(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startElement(4). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("83"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("12"). + endSubField(). + endElement(). + startElement(7). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("85"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("13"). + endSubField(). + endElement(). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc10Empty(DocBuilder &b) +{ + b.startDocument("doc::10"); + return b.endDocument(); +} + +} + +struct Fixture +{ + Schema _schema; + DocBuilder _b; + std::vector<std::unique_ptr<FieldInverter> > _inverters; + std::unique_ptr<UrlFieldInverter> _urlInverter; + test::OrderedDocumentInserter _inserter; + DocTypeBuilder::SchemaIndexFields _schemaIndexFields; + + static Schema + makeSchema(Schema::CollectionType collectionType) + { + Schema schema; + schema.addUriIndexFields(Schema::IndexField("url", Schema::STRING, + collectionType)); + return schema; + } + + Fixture(Schema::CollectionType collectionType) + : _schema(makeSchema(collectionType)), + _b(_schema), + _inverters(), + _urlInverter(), + _inserter(), + _schemaIndexFields() + { + _schemaIndexFields.setup(_schema); + for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields(); + ++fieldId) { + _inverters.push_back(std::make_unique<FieldInverter>(_schema, + fieldId)); + } + DocTypeBuilder::UriField &urlField = + _schemaIndexFields._uriFields.front(); + _urlInverter = std::make_unique<UrlFieldInverter> + (collectionType, + _inverters[urlField._all].get(), + _inverters[urlField._scheme].get(), + _inverters[urlField._host].get(), + _inverters[urlField._port].get(), + _inverters[urlField._path].get(), + _inverters[urlField._query].get(), + _inverters[urlField._fragment].get(), + _inverters[urlField._hostname].get()); + } + + void + invertDocument(uint32_t docId, const Document &doc) + { + _urlInverter->invertField(docId, doc.getValue(url)); + } + + void + pushDocuments() + { + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + _inserter.setFieldId(fieldId); + inverter->pushDocuments(_inserter); + ++fieldId; + } + } + + void + enableAnnotations() + { + _urlInverter->setUseAnnotations(true); + } +}; + + +TEST_F("requireThatSingleUrlFieldWorks", Fixture(Schema::SINGLE)) +{ + f.invertDocument(10, *makeDoc10Single(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=4,a=10," + "w=81,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=81,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=4,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatArrayUrlFieldWorks", Fixture(Schema::ARRAY)) +{ + f.invertDocument(10, *makeDoc10Array(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=8,a=10," + "w=82,a=10," + "w=9,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=82,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=8,a=10," + "w=9,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET)) +{ + f.invertDocument(10, *makeDoc10WeightedSet(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=12,a=10," + "w=13,a=10," + "w=2,a=10," + "w=83,a=10," + "w=85,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=83,a=10," + "w=85,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=12,a=10," + "w=13,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedSingleUrlFieldWorks", Fixture(Schema::SINGLE)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Single(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=4,a=10," + "w=81,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=81,a=10," + "f=4," + "w=altfluke,a=10," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=4,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatAnnotatedArrayUrlFieldWorks", Fixture(Schema::ARRAY)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Array(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=8,a=10," + "w=82,a=10," + "w=9,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=82,a=10," + "f=4," + "w=altfluke,a=10," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=8,a=10," + "w=9,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedWeightedSetFieldWorks", + Fixture(Schema::WEIGHTEDSET)) +{ + f.enableAnnotations(); + f._inserter.setVerbose(); + f.invertDocument(10, *makeDoc10WeightedSet(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=12,a=10(e=0,w=4,l=9[8])," + "w=13,a=10(e=1,w=7,l=9[8])," + "w=2,a=10(e=0,w=4,l=9[7],e=1,w=7,l=9[7])," + "w=83,a=10(e=0,w=4,l=9[4])," + "w=85,a=10(e=1,w=7,l=9[4])," + "w=ab,a=10(e=0,w=4,l=9[6],e=1,w=7,l=9[6])," + "w=com,a=10(e=0,w=4,l=9[3],e=1,w=7,l=9[3])," + "w=flickr,a=10(e=1,w=7,l=9[2])," + "w=fluke,a=10(e=0,w=4,l=9[5],e=1,w=7,l=9[5])," + "w=http,a=10(e=0,w=4,l=9[0],e=1,w=7,l=9[0])," + "w=www,a=10(e=0,w=4,l=9[1],e=1,w=7,l=9[1])," + "w=yahoo,a=10(e=0,w=4,l=9[2])," + "f=1," + "w=http,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0])," + "f=2," + "w=com,a=10(e=0,w=4,l=3[2],e=1,w=7,l=3[2])," + "w=flickr,a=10(e=1,w=7,l=3[1])," + "w=www,a=10(e=0,w=4,l=3[0],e=1,w=7,l=3[0])," + "w=yahoo,a=10(e=0,w=4,l=3[1])," + "f=3," + "w=83,a=10(e=0,w=4,l=1[0])," + "w=85,a=10(e=1,w=7,l=1[0])," + "f=4," + "w=altfluke,a=10(e=0,w=4,l=1[0])," + "w=fluke,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0])," + "f=5," + "w=2,a=10(e=0,w=4,l=2[1],e=1,w=7,l=2[1])," + "w=ab,a=10(e=0,w=4,l=2[0],e=1,w=7,l=2[0])," + "f=6," + "w=12,a=10(e=0,w=4,l=1[0])," + "w=13,a=10(e=1,w=7,l=1[0])," + "f=7," + "w=EnDhOsT,a=10(e=0,w=4,l=5[4],e=1,w=7,l=5[4])," + "w=StArThOsT,a=10(e=0,w=4,l=5[0],e=1,w=7,l=5[0])," + "w=com,a=10(e=0,w=4,l=5[3],e=1,w=7,l=5[3])," + "w=flickr,a=10(e=1,w=7,l=5[2])," + "w=www,a=10(e=0,w=4,l=5[1],e=1,w=7,l=5[1])," + "w=yahoo,a=10(e=0,w=4,l=5[2])", + f._inserter.toStr()); +} + + +TEST_F("requireThatEmptySingleFieldWorks", Fixture(Schema::SINGLE)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatEmptyArrayFieldWorks", Fixture(Schema::ARRAY)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatEmptyWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptySingleFieldWorks", Fixture(Schema::SINGLE)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptyArrayFieldWorks", Fixture(Schema::ARRAY)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptyWeightedSetFieldWorks", + Fixture(Schema::WEIGHTEDSET)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } |