From d0d380f57e0cb0ad0b4ceba6ba3f2c5b68107a1c Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 23 Jun 2017 14:00:59 +0000 Subject: Try to redistribute elements with left or right sibling leaf node if current leaf node is full. This eliminates som leaf node split operations and increases the number of entries used in each leaf node when keys are inserted in sorted order (i.e. fewer leaf nodes needed for the same number of entries). --- searchlib/src/tests/btree/btree_test.cpp | 85 +++++++++++++++------ .../src/tests/btree/btreeaggregation_test.cpp | 88 +++++++++++++++++++--- .../src/vespa/searchlib/btree/btreeinserter.h | 3 + .../src/vespa/searchlib/btree/btreeinserter.hpp | 68 ++++++++++++++++- .../src/vespa/searchlib/btree/btreeiterator.h | 4 + .../src/vespa/searchlib/btree/btreeiterator.hpp | 49 ++++++++++++ 6 files changed, 265 insertions(+), 32 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/btree/btree_test.cpp b/searchlib/src/tests/btree/btree_test.cpp index 622f380e8e3..fd93daab6c6 100644 --- a/searchlib/src/tests/btree/btree_test.cpp +++ b/searchlib/src/tests/btree/btree_test.cpp @@ -145,14 +145,24 @@ assertTree(const std::string &exp, const Tree &t) return true; } +template +void +populateTree(Tree &t, uint32_t count, uint32_t delta) +{ + uint32_t key = 1; + int32_t value = 101; + for (uint32_t i = 0; i < count; ++i) { + t.insert(key, value); + key += delta; + value += delta; + } +} + template void populateLeafNode(Tree &t) { - t.insert(1, 101); - t.insert(3, 103); - t.insert(5, 105); - t.insert(7, 107); + populateTree(t, 4, 2); } @@ -319,24 +329,57 @@ Test::requireThatTreeInsertWorks() } { // multi level node split Tree t; - for (uint32_t i = 1; i < 27; i += 2) { - t.insert(i, i + 100); - } - EXPECT_TRUE(assertTree("{{5,11,17,25}} -> " - "{{1:101,3:103,5:105}," - "{7:107,9:109,11:111}," - "{13:113,15:115,17:117}," - "{19:119,21:121,23:123,25:125}}", t)); - t.insert(27, 127); - EXPECT_TRUE(assertTree("{{17,27}} -> " - "{{5,11,17},{23,27}} -> " - "{{1:101,3:103,5:105}," - "{7:107,9:109,11:111}," - "{13:113,15:115,17:117}," - "{19:119,21:121,23:123}," - "{25:125,27:127}}", t)); + populateTree(t, 16, 2); + EXPECT_TRUE(assertTree("{{7,15,23,31}} -> " + "{{1:101,3:103,5:105,7:107}," + "{9:109,11:111,13:113,15:115}," + "{17:117,19:119,21:121,23:123}," + "{25:125,27:127,29:129,31:131}}", t)); + t.insert(33, 133); + EXPECT_TRUE(assertTree("{{23,33}} -> " + "{{7,15,23},{29,33}} -> " + "{{1:101,3:103,5:105,7:107}," + "{9:109,11:111,13:113,15:115}," + "{17:117,19:119,21:121,23:123}," + "{25:125,27:127,29:129}," + "{31:131,33:133}}", t)); + } + { // give to left node to avoid split + Tree t; + populateTree(t, 8, 2); + t.remove(5); + EXPECT_TRUE(assertTree("{{7,15}} -> " + "{{1:101,3:103,7:107}," + "{9:109,11:111,13:113,15:115}}", t)); + t.insert(10, 110); + EXPECT_TRUE(assertTree("{{9,15}} -> " + "{{1:101,3:103,7:107,9:109}," + "{10:110,11:111,13:113,15:115}}", t)); + } + { // not give to left node to avoid split, but insert at end at left node + Tree t; + populateTree(t, 8, 2); + t.remove(5); + EXPECT_TRUE(assertTree("{{7,15}} -> " + "{{1:101,3:103,7:107}," + "{9:109,11:111,13:113,15:115}}", t)); + t.insert(8, 108); + EXPECT_TRUE(assertTree("{{8,15}} -> " + "{{1:101,3:103,7:107,8:108}," + "{9:109,11:111,13:113,15:115}}", t)); + } + { // give to right node to avoid split + Tree t; + populateTree(t, 8, 2); + t.remove(13); + EXPECT_TRUE(assertTree("{{7,15}} -> " + "{{1:101,3:103,5:105,7:107}," + "{9:109,11:111,15:115}}", t)); + t.insert(4, 104); + EXPECT_TRUE(assertTree("{{5,15}} -> " + "{{1:101,3:103,4:104,5:105}," + "{7:107,9:109,11:111,15:115}}", t)); } - } MyLeafNode::RefPair diff --git a/searchlib/src/tests/btree/btreeaggregation_test.cpp b/searchlib/src/tests/btree/btreeaggregation_test.cpp index e53fc04d38a..35608a11db2 100644 --- a/searchlib/src/tests/btree/btreeaggregation_test.cpp +++ b/searchlib/src/tests/btree/btreeaggregation_test.cpp @@ -312,6 +312,7 @@ private: void requireThatNodeInsertWorks(); void requireThatNodeSplitInsertWorks(); + void requireThatTreeInsertWorks(); void requireThatNodeStealWorks(); void requireThatNodeRemoveWorks(); void requireThatWeCanInsertAndRemoveFromTree(); @@ -409,13 +410,21 @@ Test::requireThatNodeInsertWorks() } void -getLeafNode(MyTree &t) +populateTree(MyTree &t, uint32_t count, uint32_t delta) { - t.insert(1, 101); - t.insert(3, 103); - t.insert(5, 105); - t.insert(7, 107); -// EXPECT_TRUE(assertTree("[1:101,3:103,5:105,7:107[min=101,max=107]]", t)); + uint32_t key = 1; + int32_t value = 101; + for (uint32_t i = 0; i < count; ++i) { + t.insert(key, value); + key += delta; + value += delta; + } +} + +void +populateLeafNode(MyTree &t) +{ + populateTree(t, 4, 2); } void @@ -423,7 +432,7 @@ Test::requireThatNodeSplitInsertWorks() { { // new entry in current node MyTree t; - getLeafNode(t); + populateLeafNode(t); t.insert(4, 104); EXPECT_TRUE(assertTree("{{4,7[min=101,max=107]}} -> " "{{1:101,3:103,4:104[min=101,max=104]}," @@ -431,7 +440,7 @@ Test::requireThatNodeSplitInsertWorks() } { // new entry in split node MyTree t; - getLeafNode(t); + populateLeafNode(t); t.insert(6, 106); EXPECT_TRUE(assertTree("{{5,7[min=101,max=107]}} -> " "{{1:101,3:103,5:105[min=101,max=105]}," @@ -439,7 +448,7 @@ Test::requireThatNodeSplitInsertWorks() } { // new entry at end MyTree t; - getLeafNode(t); + populateLeafNode(t); t.insert(8, 108); EXPECT_TRUE(assertTree("{{5,8[min=101,max=108]}} -> " "{{1:101,3:103,5:105[min=101,max=105]}," @@ -447,6 +456,64 @@ Test::requireThatNodeSplitInsertWorks() } } +void +Test::requireThatTreeInsertWorks() +{ + { // multi level node split + MyTree t; + populateTree(t, 16, 2); + EXPECT_TRUE(assertTree("{{7,15,23,31[min=101,max=131]}} -> " + "{{1:101,3:103,5:105,7:107[min=101,max=107]}," + "{9:109,11:111,13:113,15:115[min=109,max=115]}," + "{17:117,19:119,21:121,23:123[min=117,max=123]}," + "{25:125,27:127,29:129,31:131[min=125,max=131]}}", t)); + t.insert(33, 133); + EXPECT_TRUE(assertTree("{{23,33[min=101,max=133]}} -> " + "{{7,15,23[min=101,max=123]},{29,33[min=125,max=133]}} -> " + "{{1:101,3:103,5:105,7:107[min=101,max=107]}," + "{9:109,11:111,13:113,15:115[min=109,max=115]}," + "{17:117,19:119,21:121,23:123[min=117,max=123]}," + "{25:125,27:127,29:129[min=125,max=129]}," + "{31:131,33:133[min=131,max=133]}}", t)); + } + { // give to left node to avoid split + MyTree t; + populateTree(t, 8, 2); + t.remove(5); + EXPECT_TRUE(assertTree("{{7,15[min=101,max=115]}} -> " + "{{1:101,3:103,7:107[min=101,max=107]}," + "{9:109,11:111,13:113,15:115[min=109,max=115]}}", t)); + t.insert(10, 110); + EXPECT_TRUE(assertTree("{{9,15[min=101,max=115]}} -> " + "{{1:101,3:103,7:107,9:109[min=101,max=109]}," + "{10:110,11:111,13:113,15:115[min=110,max=115]}}", t)); + } + { // not give to left node to avoid split, but insert at end at left node + MyTree t; + populateTree(t, 8, 2); + t.remove(5); + EXPECT_TRUE(assertTree("{{7,15[min=101,max=115]}} -> " + "{{1:101,3:103,7:107[min=101,max=107]}," + "{9:109,11:111,13:113,15:115[min=109,max=115]}}", t)); + t.insert(8, 108); + EXPECT_TRUE(assertTree("{{8,15[min=101,max=115]}} -> " + "{{1:101,3:103,7:107,8:108[min=101,max=108]}," + "{9:109,11:111,13:113,15:115[min=109,max=115]}}", t)); + } + { // give to right node to avoid split + MyTree t; + populateTree(t, 8, 2); + t.remove(13); + EXPECT_TRUE(assertTree("{{7,15[min=101,max=115]}} -> " + "{{1:101,3:103,5:105,7:107[min=101,max=107]}," + "{9:109,11:111,15:115[min=109,max=115]}}", t)); + t.insert(4, 104); + EXPECT_TRUE(assertTree("{{5,15[min=101,max=115]}} -> " + "{{1:101,3:103,4:104,5:105[min=101,max=105]}," + "{7:107,9:109,11:111,15:115[min=107,max=115]}}", t)); + } +} + struct BTreeStealTraits { static const size_t LEAF_SLOTS = 6; @@ -538,7 +605,7 @@ void Test::requireThatNodeRemoveWorks() { MyTree t; - getLeafNode(t); + populateLeafNode(t); t.remove(3); EXPECT_TRUE(assertTree("{{1:101,5:105,7:107[min=101,max=107]}}", t)); t.remove(1); @@ -1037,6 +1104,7 @@ Test::Main() requireThatNodeInsertWorks(); requireThatNodeSplitInsertWorks(); + requireThatTreeInsertWorks(); requireThatNodeStealWorks(); requireThatNodeRemoveWorks(); requireThatWeCanInsertAndRemoveFromTree(); diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.h b/searchlib/src/vespa/searchlib/btree/btreeinserter.h index 4ea1b5a3650..620e64d3115 100644 --- a/searchlib/src/vespa/searchlib/btree/btreeinserter.h +++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.h @@ -42,6 +42,9 @@ public: typedef DataT DataType; typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair; typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair; + using Inserter = BTreeInserter; + + static void giveLeafEntries(LeafNodeType *sNode, Iterator &itr, AggrCalcT aggrCalc); static void insert(BTreeNode::Ref &root, diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp b/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp index 97c40845e2c..5be006ea0f7 100644 --- a/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp +++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp @@ -10,6 +10,68 @@ namespace search { namespace btree { +template +void +BTreeInserter::giveLeafEntries(LeafNodeType *sNode, Iterator &itr, AggrCalcT aggrCalc) +{ + NodeAllocatorType &allocator(itr.getAllocator()); + typename Iterator::PathElement &pe = itr.getPath(0); + InternalNodeType *pNode = pe.getWNode(); + uint32_t idx = pe.getIdx(); + BTreeNode::Ref sNodeRef = pNode->getChild(idx); + BTreeNode::Ref leftRef = BTreeNode::Ref(); + LeafNodeType * leftNode = nullptr; + BTreeNode::Ref rightRef = BTreeNode::Ref(); + LeafNodeType * rightNode = nullptr; + if (idx > 0) { + leftRef = pNode->getChild(idx - 1); + leftNode = allocator.template mapLeafRef(leftRef); + } + if (idx + 1 < pNode->validSlots()) { + rightRef = pNode->getChild(idx + 1); + rightNode = allocator.template mapLeafRef(rightRef); + } + if (leftNode != nullptr && leftNode->validSlots() < LeafNodeType::maxSlots() && + (rightNode == nullptr || leftNode->validSlots() < rightNode->validSlots())) { + if (leftNode->getFrozen()) { + LeafNodeTypeRefPair thawed = + allocator.thawNode(leftRef, leftNode); + leftRef = thawed.ref; + leftNode = thawed.data; + } + uint32_t oldLeftValid = leftNode->validSlots(); + if (itr.getLeafNodeIdx() == 0 && (oldLeftValid + 1 == LeafNodeType::maxSlots())) { + pNode->update(idx - 1, leftNode->getLastKey(), leftRef); + itr.adjustGivenNoEntriesToLeftLeafNode(); + } else { + leftNode->stealSomeFromRightNode(sNode, allocator); + uint32_t given = leftNode->validSlots() - oldLeftValid; + pNode->update(idx, sNode->getLastKey(), sNodeRef); + pNode->update(idx - 1, leftNode->getLastKey(), leftRef); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*leftNode, allocator, aggrCalc); + Aggregator::recalc(*sNode, allocator, aggrCalc); + } + itr.adjustGivenEntriesToLeftLeafNode(given); + } + } else if (rightNode != nullptr && rightNode->validSlots() < LeafNodeType::maxSlots()) { + if (rightNode->getFrozen()) { + LeafNodeTypeRefPair thawed = + allocator.thawNode(rightRef, rightNode); + rightRef = thawed.ref; + rightNode = thawed.data; + } + rightNode->stealSomeFromLeftNode(sNode, allocator); + pNode->update(idx, sNode->getLastKey(), sNodeRef); + pNode->update(idx + 1, rightNode->getLastKey(), rightRef); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*rightNode, allocator, aggrCalc); + Aggregator::recalc(*sNode, allocator, aggrCalc); + } + itr.adjustGivenEntriesToRightLeafNode(); + } +} template @@ -30,8 +92,12 @@ insert(BTreeNode::Ref &root, --itr; } root = itr.thaw(root); - uint32_t idx = itr.getLeafNodeIdx() + (inRange ? 0 : 1); LeafNodeType * lnode = itr.getLeafNode(); + if (lnode->isFull() && itr.getPathSize() > 0) { + giveLeafEntries(lnode, itr, aggrCalc); + lnode = itr.getLeafNode(); + } + uint32_t idx = itr.getLeafNodeIdx() + (inRange ? 0 : 1); BTreeNode::Ref splitNodeRef; const KeyT *splitLastKey = nullptr; bool inRightSplit = false; diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.h b/searchlib/src/vespa/searchlib/btree/btreeiterator.h index 444a26f36d4..fcf1ee030d2 100644 --- a/searchlib/src/vespa/searchlib/btree/btreeiterator.h +++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.h @@ -865,6 +865,10 @@ private: _leaf.adjustSteal(stolen); } } + + void adjustGivenNoEntriesToLeftLeafNode(); + void adjustGivenEntriesToLeftLeafNode(uint32_t given); + void adjustGivenEntriesToRightLeafNode(); }; diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp index 8d3955778ce..fca22d3e797 100644 --- a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp +++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp @@ -1325,6 +1325,55 @@ removeLast(BTreeNode::Ref rootRef) _leaf.setNode(nullptr); } +template +void +BTreeIterator::adjustGivenNoEntriesToLeftLeafNode() +{ + auto &pe = _path[0]; + uint32_t pidx = pe.getIdx() - 1; + BTreeNode::Ref sRef = pe.getNode()->getChild(pidx); + const LeafNodeType *sNode = _allocator->mapLeafRef(sRef); + pe.setIdx(pidx); + _leaf.setNodeAndIdx(sNode, sNode->validSlots()); +} + +template +void +BTreeIterator::adjustGivenEntriesToLeftLeafNode(uint32_t given) +{ + uint32_t idx = _leaf.getIdx(); + if (idx >= given) { + _leaf.setIdx(idx - given); + } else { + auto &pe = _path[0]; + uint32_t pidx = pe.getIdx() - 1; + BTreeNode::Ref sRef = pe.getNode()->getChild(pidx); + const LeafNodeType *sNode = _allocator->mapLeafRef(sRef); + idx += sNode->validSlots(); + assert(given <= idx); + pe.setIdx(pidx); + _leaf.setNodeAndIdx(sNode, idx - given); + } +} + +template +void +BTreeIterator::adjustGivenEntriesToRightLeafNode() +{ + uint32_t idx = _leaf.getIdx(); + const LeafNodeType *sNode = _leaf.getNode(); + if (idx > sNode->validSlots()) { + auto &pe = _path[0]; + const InternalNodeType *pNode = pe.getNode(); + uint32_t pidx = pe.getIdx() + 1; + idx -= sNode->validSlots(); + BTreeNode::Ref sRef = pNode->getChild(pidx); + sNode = _allocator->mapLeafRef(sRef); + assert(idx <= sNode->validSlots()); + pe.setIdx(pidx); + _leaf.setNodeAndIdx(sNode, idx); + } +} } // namespace search::btree } // namespace search -- cgit v1.2.3