diff options
author | Tor Egge <Tor.Egge@yahoo-inc.com> | 2017-06-23 14:00:59 +0000 |
---|---|---|
committer | Tor Egge <Tor.Egge@yahoo-inc.com> | 2017-06-23 14:00:59 +0000 |
commit | d0d380f57e0cb0ad0b4ceba6ba3f2c5b68107a1c (patch) | |
tree | 347f6bc47ff91bb054da17cdd897a597de826660 /searchlib | |
parent | 37e072c994b3788d1fb3df92b98064508b548e20 (diff) |
Try to redistribute elements with left or right sibling leaf node if current
leaf node is full. This eliminates som leaf node split operations and
increases the number of entries used in each leaf node when keys are inserted
in sorted order (i.e. fewer leaf nodes needed for the same number of entries).
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/tests/btree/btree_test.cpp | 85 | ||||
-rw-r--r-- | searchlib/src/tests/btree/btreeaggregation_test.cpp | 88 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/btree/btreeinserter.h | 3 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/btree/btreeinserter.hpp | 68 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/btree/btreeiterator.h | 4 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/btree/btreeiterator.hpp | 49 |
6 files changed, 265 insertions, 32 deletions
diff --git a/searchlib/src/tests/btree/btree_test.cpp b/searchlib/src/tests/btree/btree_test.cpp index 622f380e8e3..fd93daab6c6 100644 --- a/searchlib/src/tests/btree/btree_test.cpp +++ b/searchlib/src/tests/btree/btree_test.cpp @@ -147,12 +147,22 @@ assertTree(const std::string &exp, const Tree &t) template <typename Tree> void +populateTree(Tree &t, uint32_t count, uint32_t delta) +{ + uint32_t key = 1; + int32_t value = 101; + for (uint32_t i = 0; i < count; ++i) { + t.insert(key, value); + key += delta; + value += delta; + } +} + +template <typename Tree> +void populateLeafNode(Tree &t) { - t.insert(1, 101); - t.insert(3, 103); - t.insert(5, 105); - t.insert(7, 107); + populateTree(t, 4, 2); } @@ -319,24 +329,57 @@ Test::requireThatTreeInsertWorks() } { // multi level node split Tree t; - for (uint32_t i = 1; i < 27; i += 2) { - t.insert(i, i + 100); - } - EXPECT_TRUE(assertTree("{{5,11,17,25}} -> " - "{{1:101,3:103,5:105}," - "{7:107,9:109,11:111}," - "{13:113,15:115,17:117}," - "{19:119,21:121,23:123,25:125}}", t)); - t.insert(27, 127); - EXPECT_TRUE(assertTree("{{17,27}} -> " - "{{5,11,17},{23,27}} -> " - "{{1:101,3:103,5:105}," - "{7:107,9:109,11:111}," - "{13:113,15:115,17:117}," - "{19:119,21:121,23:123}," - "{25:125,27:127}}", t)); + populateTree(t, 16, 2); + EXPECT_TRUE(assertTree("{{7,15,23,31}} -> " + "{{1:101,3:103,5:105,7:107}," + "{9:109,11:111,13:113,15:115}," + "{17:117,19:119,21:121,23:123}," + "{25:125,27:127,29:129,31:131}}", t)); + t.insert(33, 133); + EXPECT_TRUE(assertTree("{{23,33}} -> " + "{{7,15,23},{29,33}} -> " + "{{1:101,3:103,5:105,7:107}," + "{9:109,11:111,13:113,15:115}," + "{17:117,19:119,21:121,23:123}," + "{25:125,27:127,29:129}," + "{31:131,33:133}}", t)); + } + { // give to left node to avoid split + Tree t; + populateTree(t, 8, 2); + t.remove(5); + EXPECT_TRUE(assertTree("{{7,15}} -> " + "{{1:101,3:103,7:107}," + "{9:109,11:111,13:113,15:115}}", t)); + t.insert(10, 110); + EXPECT_TRUE(assertTree("{{9,15}} -> " + "{{1:101,3:103,7:107,9:109}," + "{10:110,11:111,13:113,15:115}}", t)); + } + { // not give to left node to avoid split, but insert at end at left node + Tree t; + populateTree(t, 8, 2); + t.remove(5); + EXPECT_TRUE(assertTree("{{7,15}} -> " + "{{1:101,3:103,7:107}," + "{9:109,11:111,13:113,15:115}}", t)); + t.insert(8, 108); + EXPECT_TRUE(assertTree("{{8,15}} -> " + "{{1:101,3:103,7:107,8:108}," + "{9:109,11:111,13:113,15:115}}", t)); + } + { // give to right node to avoid split + Tree t; + populateTree(t, 8, 2); + t.remove(13); + EXPECT_TRUE(assertTree("{{7,15}} -> " + "{{1:101,3:103,5:105,7:107}," + "{9:109,11:111,15:115}}", t)); + t.insert(4, 104); + EXPECT_TRUE(assertTree("{{5,15}} -> " + "{{1:101,3:103,4:104,5:105}," + "{7:107,9:109,11:111,15:115}}", t)); } - } MyLeafNode::RefPair diff --git a/searchlib/src/tests/btree/btreeaggregation_test.cpp b/searchlib/src/tests/btree/btreeaggregation_test.cpp index e53fc04d38a..35608a11db2 100644 --- a/searchlib/src/tests/btree/btreeaggregation_test.cpp +++ b/searchlib/src/tests/btree/btreeaggregation_test.cpp @@ -312,6 +312,7 @@ private: void requireThatNodeInsertWorks(); void requireThatNodeSplitInsertWorks(); + void requireThatTreeInsertWorks(); void requireThatNodeStealWorks(); void requireThatNodeRemoveWorks(); void requireThatWeCanInsertAndRemoveFromTree(); @@ -409,13 +410,21 @@ Test::requireThatNodeInsertWorks() } void -getLeafNode(MyTree &t) +populateTree(MyTree &t, uint32_t count, uint32_t delta) { - t.insert(1, 101); - t.insert(3, 103); - t.insert(5, 105); - t.insert(7, 107); -// EXPECT_TRUE(assertTree("[1:101,3:103,5:105,7:107[min=101,max=107]]", t)); + uint32_t key = 1; + int32_t value = 101; + for (uint32_t i = 0; i < count; ++i) { + t.insert(key, value); + key += delta; + value += delta; + } +} + +void +populateLeafNode(MyTree &t) +{ + populateTree(t, 4, 2); } void @@ -423,7 +432,7 @@ Test::requireThatNodeSplitInsertWorks() { { // new entry in current node MyTree t; - getLeafNode(t); + populateLeafNode(t); t.insert(4, 104); EXPECT_TRUE(assertTree("{{4,7[min=101,max=107]}} -> " "{{1:101,3:103,4:104[min=101,max=104]}," @@ -431,7 +440,7 @@ Test::requireThatNodeSplitInsertWorks() } { // new entry in split node MyTree t; - getLeafNode(t); + populateLeafNode(t); t.insert(6, 106); EXPECT_TRUE(assertTree("{{5,7[min=101,max=107]}} -> " "{{1:101,3:103,5:105[min=101,max=105]}," @@ -439,7 +448,7 @@ Test::requireThatNodeSplitInsertWorks() } { // new entry at end MyTree t; - getLeafNode(t); + populateLeafNode(t); t.insert(8, 108); EXPECT_TRUE(assertTree("{{5,8[min=101,max=108]}} -> " "{{1:101,3:103,5:105[min=101,max=105]}," @@ -447,6 +456,64 @@ Test::requireThatNodeSplitInsertWorks() } } +void +Test::requireThatTreeInsertWorks() +{ + { // multi level node split + MyTree t; + populateTree(t, 16, 2); + EXPECT_TRUE(assertTree("{{7,15,23,31[min=101,max=131]}} -> " + "{{1:101,3:103,5:105,7:107[min=101,max=107]}," + "{9:109,11:111,13:113,15:115[min=109,max=115]}," + "{17:117,19:119,21:121,23:123[min=117,max=123]}," + "{25:125,27:127,29:129,31:131[min=125,max=131]}}", t)); + t.insert(33, 133); + EXPECT_TRUE(assertTree("{{23,33[min=101,max=133]}} -> " + "{{7,15,23[min=101,max=123]},{29,33[min=125,max=133]}} -> " + "{{1:101,3:103,5:105,7:107[min=101,max=107]}," + "{9:109,11:111,13:113,15:115[min=109,max=115]}," + "{17:117,19:119,21:121,23:123[min=117,max=123]}," + "{25:125,27:127,29:129[min=125,max=129]}," + "{31:131,33:133[min=131,max=133]}}", t)); + } + { // give to left node to avoid split + MyTree t; + populateTree(t, 8, 2); + t.remove(5); + EXPECT_TRUE(assertTree("{{7,15[min=101,max=115]}} -> " + "{{1:101,3:103,7:107[min=101,max=107]}," + "{9:109,11:111,13:113,15:115[min=109,max=115]}}", t)); + t.insert(10, 110); + EXPECT_TRUE(assertTree("{{9,15[min=101,max=115]}} -> " + "{{1:101,3:103,7:107,9:109[min=101,max=109]}," + "{10:110,11:111,13:113,15:115[min=110,max=115]}}", t)); + } + { // not give to left node to avoid split, but insert at end at left node + MyTree t; + populateTree(t, 8, 2); + t.remove(5); + EXPECT_TRUE(assertTree("{{7,15[min=101,max=115]}} -> " + "{{1:101,3:103,7:107[min=101,max=107]}," + "{9:109,11:111,13:113,15:115[min=109,max=115]}}", t)); + t.insert(8, 108); + EXPECT_TRUE(assertTree("{{8,15[min=101,max=115]}} -> " + "{{1:101,3:103,7:107,8:108[min=101,max=108]}," + "{9:109,11:111,13:113,15:115[min=109,max=115]}}", t)); + } + { // give to right node to avoid split + MyTree t; + populateTree(t, 8, 2); + t.remove(13); + EXPECT_TRUE(assertTree("{{7,15[min=101,max=115]}} -> " + "{{1:101,3:103,5:105,7:107[min=101,max=107]}," + "{9:109,11:111,15:115[min=109,max=115]}}", t)); + t.insert(4, 104); + EXPECT_TRUE(assertTree("{{5,15[min=101,max=115]}} -> " + "{{1:101,3:103,4:104,5:105[min=101,max=105]}," + "{7:107,9:109,11:111,15:115[min=107,max=115]}}", t)); + } +} + struct BTreeStealTraits { static const size_t LEAF_SLOTS = 6; @@ -538,7 +605,7 @@ void Test::requireThatNodeRemoveWorks() { MyTree t; - getLeafNode(t); + populateLeafNode(t); t.remove(3); EXPECT_TRUE(assertTree("{{1:101,5:105,7:107[min=101,max=107]}}", t)); t.remove(1); @@ -1037,6 +1104,7 @@ Test::Main() requireThatNodeInsertWorks(); requireThatNodeSplitInsertWorks(); + requireThatTreeInsertWorks(); requireThatNodeStealWorks(); requireThatNodeRemoveWorks(); requireThatWeCanInsertAndRemoveFromTree(); diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.h b/searchlib/src/vespa/searchlib/btree/btreeinserter.h index 4ea1b5a3650..620e64d3115 100644 --- a/searchlib/src/vespa/searchlib/btree/btreeinserter.h +++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.h @@ -42,6 +42,9 @@ public: typedef DataT DataType; typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair; typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair; + using Inserter = BTreeInserter<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>; + + static void giveLeafEntries(LeafNodeType *sNode, Iterator &itr, AggrCalcT aggrCalc); static void insert(BTreeNode::Ref &root, diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp b/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp index 97c40845e2c..5be006ea0f7 100644 --- a/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp +++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp @@ -10,6 +10,68 @@ namespace search { namespace btree { +template <typename KeyT, typename DataT, typename AggrT, typename CompareT, + typename TraitsT, class AggrCalcT> +void +BTreeInserter<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::giveLeafEntries(LeafNodeType *sNode, Iterator &itr, AggrCalcT aggrCalc) +{ + NodeAllocatorType &allocator(itr.getAllocator()); + typename Iterator::PathElement &pe = itr.getPath(0); + InternalNodeType *pNode = pe.getWNode(); + uint32_t idx = pe.getIdx(); + BTreeNode::Ref sNodeRef = pNode->getChild(idx); + BTreeNode::Ref leftRef = BTreeNode::Ref(); + LeafNodeType * leftNode = nullptr; + BTreeNode::Ref rightRef = BTreeNode::Ref(); + LeafNodeType * rightNode = nullptr; + if (idx > 0) { + leftRef = pNode->getChild(idx - 1); + leftNode = allocator.template mapLeafRef(leftRef); + } + if (idx + 1 < pNode->validSlots()) { + rightRef = pNode->getChild(idx + 1); + rightNode = allocator.template mapLeafRef(rightRef); + } + if (leftNode != nullptr && leftNode->validSlots() < LeafNodeType::maxSlots() && + (rightNode == nullptr || leftNode->validSlots() < rightNode->validSlots())) { + if (leftNode->getFrozen()) { + LeafNodeTypeRefPair thawed = + allocator.thawNode(leftRef, leftNode); + leftRef = thawed.ref; + leftNode = thawed.data; + } + uint32_t oldLeftValid = leftNode->validSlots(); + if (itr.getLeafNodeIdx() == 0 && (oldLeftValid + 1 == LeafNodeType::maxSlots())) { + pNode->update(idx - 1, leftNode->getLastKey(), leftRef); + itr.adjustGivenNoEntriesToLeftLeafNode(); + } else { + leftNode->stealSomeFromRightNode(sNode, allocator); + uint32_t given = leftNode->validSlots() - oldLeftValid; + pNode->update(idx, sNode->getLastKey(), sNodeRef); + pNode->update(idx - 1, leftNode->getLastKey(), leftRef); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*leftNode, allocator, aggrCalc); + Aggregator::recalc(*sNode, allocator, aggrCalc); + } + itr.adjustGivenEntriesToLeftLeafNode(given); + } + } else if (rightNode != nullptr && rightNode->validSlots() < LeafNodeType::maxSlots()) { + if (rightNode->getFrozen()) { + LeafNodeTypeRefPair thawed = + allocator.thawNode(rightRef, rightNode); + rightRef = thawed.ref; + rightNode = thawed.data; + } + rightNode->stealSomeFromLeftNode(sNode, allocator); + pNode->update(idx, sNode->getLastKey(), sNodeRef); + pNode->update(idx + 1, rightNode->getLastKey(), rightRef); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*rightNode, allocator, aggrCalc); + Aggregator::recalc(*sNode, allocator, aggrCalc); + } + itr.adjustGivenEntriesToRightLeafNode(); + } +} template <typename KeyT, typename DataT, typename AggrT, typename CompareT, typename TraitsT, class AggrCalcT> @@ -30,8 +92,12 @@ insert(BTreeNode::Ref &root, --itr; } root = itr.thaw(root); - uint32_t idx = itr.getLeafNodeIdx() + (inRange ? 0 : 1); LeafNodeType * lnode = itr.getLeafNode(); + if (lnode->isFull() && itr.getPathSize() > 0) { + giveLeafEntries(lnode, itr, aggrCalc); + lnode = itr.getLeafNode(); + } + uint32_t idx = itr.getLeafNodeIdx() + (inRange ? 0 : 1); BTreeNode::Ref splitNodeRef; const KeyT *splitLastKey = nullptr; bool inRightSplit = false; diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.h b/searchlib/src/vespa/searchlib/btree/btreeiterator.h index 444a26f36d4..fcf1ee030d2 100644 --- a/searchlib/src/vespa/searchlib/btree/btreeiterator.h +++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.h @@ -865,6 +865,10 @@ private: _leaf.adjustSteal(stolen); } } + + void adjustGivenNoEntriesToLeftLeafNode(); + void adjustGivenEntriesToLeftLeafNode(uint32_t given); + void adjustGivenEntriesToRightLeafNode(); }; diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp index 8d3955778ce..fca22d3e797 100644 --- a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp +++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp @@ -1325,6 +1325,55 @@ removeLast(BTreeNode::Ref rootRef) _leaf.setNode(nullptr); } +template <typename KeyT, typename DataT, typename AggrT, typename CompareT, typename TraitsT> +void +BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::adjustGivenNoEntriesToLeftLeafNode() +{ + auto &pe = _path[0]; + uint32_t pidx = pe.getIdx() - 1; + BTreeNode::Ref sRef = pe.getNode()->getChild(pidx); + const LeafNodeType *sNode = _allocator->mapLeafRef(sRef); + pe.setIdx(pidx); + _leaf.setNodeAndIdx(sNode, sNode->validSlots()); +} + +template <typename KeyT, typename DataT, typename AggrT, typename CompareT, typename TraitsT> +void +BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::adjustGivenEntriesToLeftLeafNode(uint32_t given) +{ + uint32_t idx = _leaf.getIdx(); + if (idx >= given) { + _leaf.setIdx(idx - given); + } else { + auto &pe = _path[0]; + uint32_t pidx = pe.getIdx() - 1; + BTreeNode::Ref sRef = pe.getNode()->getChild(pidx); + const LeafNodeType *sNode = _allocator->mapLeafRef(sRef); + idx += sNode->validSlots(); + assert(given <= idx); + pe.setIdx(pidx); + _leaf.setNodeAndIdx(sNode, idx - given); + } +} + +template <typename KeyT, typename DataT, typename AggrT, typename CompareT, typename TraitsT> +void +BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::adjustGivenEntriesToRightLeafNode() +{ + uint32_t idx = _leaf.getIdx(); + const LeafNodeType *sNode = _leaf.getNode(); + if (idx > sNode->validSlots()) { + auto &pe = _path[0]; + const InternalNodeType *pNode = pe.getNode(); + uint32_t pidx = pe.getIdx() + 1; + idx -= sNode->validSlots(); + BTreeNode::Ref sRef = pNode->getChild(pidx); + sNode = _allocator->mapLeafRef(sRef); + assert(idx <= sNode->validSlots()); + pe.setIdx(pidx); + _leaf.setNodeAndIdx(sNode, idx); + } +} } // namespace search::btree } // namespace search |