aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@yahoo-inc.com>2017-06-23 14:00:59 +0000
committerTor Egge <Tor.Egge@yahoo-inc.com>2017-06-23 14:00:59 +0000
commitd0d380f57e0cb0ad0b4ceba6ba3f2c5b68107a1c (patch)
tree347f6bc47ff91bb054da17cdd897a597de826660 /searchlib
parent37e072c994b3788d1fb3df92b98064508b548e20 (diff)
Try to redistribute elements with left or right sibling leaf node if current
leaf node is full. This eliminates som leaf node split operations and increases the number of entries used in each leaf node when keys are inserted in sorted order (i.e. fewer leaf nodes needed for the same number of entries).
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/btree/btree_test.cpp85
-rw-r--r--searchlib/src/tests/btree/btreeaggregation_test.cpp88
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeinserter.h3
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeinserter.hpp68
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeiterator.h4
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeiterator.hpp49
6 files changed, 265 insertions, 32 deletions
diff --git a/searchlib/src/tests/btree/btree_test.cpp b/searchlib/src/tests/btree/btree_test.cpp
index 622f380e8e3..fd93daab6c6 100644
--- a/searchlib/src/tests/btree/btree_test.cpp
+++ b/searchlib/src/tests/btree/btree_test.cpp
@@ -147,12 +147,22 @@ assertTree(const std::string &exp, const Tree &t)
template <typename Tree>
void
+populateTree(Tree &t, uint32_t count, uint32_t delta)
+{
+ uint32_t key = 1;
+ int32_t value = 101;
+ for (uint32_t i = 0; i < count; ++i) {
+ t.insert(key, value);
+ key += delta;
+ value += delta;
+ }
+}
+
+template <typename Tree>
+void
populateLeafNode(Tree &t)
{
- t.insert(1, 101);
- t.insert(3, 103);
- t.insert(5, 105);
- t.insert(7, 107);
+ populateTree(t, 4, 2);
}
@@ -319,24 +329,57 @@ Test::requireThatTreeInsertWorks()
}
{ // multi level node split
Tree t;
- for (uint32_t i = 1; i < 27; i += 2) {
- t.insert(i, i + 100);
- }
- EXPECT_TRUE(assertTree("{{5,11,17,25}} -> "
- "{{1:101,3:103,5:105},"
- "{7:107,9:109,11:111},"
- "{13:113,15:115,17:117},"
- "{19:119,21:121,23:123,25:125}}", t));
- t.insert(27, 127);
- EXPECT_TRUE(assertTree("{{17,27}} -> "
- "{{5,11,17},{23,27}} -> "
- "{{1:101,3:103,5:105},"
- "{7:107,9:109,11:111},"
- "{13:113,15:115,17:117},"
- "{19:119,21:121,23:123},"
- "{25:125,27:127}}", t));
+ populateTree(t, 16, 2);
+ EXPECT_TRUE(assertTree("{{7,15,23,31}} -> "
+ "{{1:101,3:103,5:105,7:107},"
+ "{9:109,11:111,13:113,15:115},"
+ "{17:117,19:119,21:121,23:123},"
+ "{25:125,27:127,29:129,31:131}}", t));
+ t.insert(33, 133);
+ EXPECT_TRUE(assertTree("{{23,33}} -> "
+ "{{7,15,23},{29,33}} -> "
+ "{{1:101,3:103,5:105,7:107},"
+ "{9:109,11:111,13:113,15:115},"
+ "{17:117,19:119,21:121,23:123},"
+ "{25:125,27:127,29:129},"
+ "{31:131,33:133}}", t));
+ }
+ { // give to left node to avoid split
+ Tree t;
+ populateTree(t, 8, 2);
+ t.remove(5);
+ EXPECT_TRUE(assertTree("{{7,15}} -> "
+ "{{1:101,3:103,7:107},"
+ "{9:109,11:111,13:113,15:115}}", t));
+ t.insert(10, 110);
+ EXPECT_TRUE(assertTree("{{9,15}} -> "
+ "{{1:101,3:103,7:107,9:109},"
+ "{10:110,11:111,13:113,15:115}}", t));
+ }
+ { // not give to left node to avoid split, but insert at end at left node
+ Tree t;
+ populateTree(t, 8, 2);
+ t.remove(5);
+ EXPECT_TRUE(assertTree("{{7,15}} -> "
+ "{{1:101,3:103,7:107},"
+ "{9:109,11:111,13:113,15:115}}", t));
+ t.insert(8, 108);
+ EXPECT_TRUE(assertTree("{{8,15}} -> "
+ "{{1:101,3:103,7:107,8:108},"
+ "{9:109,11:111,13:113,15:115}}", t));
+ }
+ { // give to right node to avoid split
+ Tree t;
+ populateTree(t, 8, 2);
+ t.remove(13);
+ EXPECT_TRUE(assertTree("{{7,15}} -> "
+ "{{1:101,3:103,5:105,7:107},"
+ "{9:109,11:111,15:115}}", t));
+ t.insert(4, 104);
+ EXPECT_TRUE(assertTree("{{5,15}} -> "
+ "{{1:101,3:103,4:104,5:105},"
+ "{7:107,9:109,11:111,15:115}}", t));
}
-
}
MyLeafNode::RefPair
diff --git a/searchlib/src/tests/btree/btreeaggregation_test.cpp b/searchlib/src/tests/btree/btreeaggregation_test.cpp
index e53fc04d38a..35608a11db2 100644
--- a/searchlib/src/tests/btree/btreeaggregation_test.cpp
+++ b/searchlib/src/tests/btree/btreeaggregation_test.cpp
@@ -312,6 +312,7 @@ private:
void requireThatNodeInsertWorks();
void requireThatNodeSplitInsertWorks();
+ void requireThatTreeInsertWorks();
void requireThatNodeStealWorks();
void requireThatNodeRemoveWorks();
void requireThatWeCanInsertAndRemoveFromTree();
@@ -409,13 +410,21 @@ Test::requireThatNodeInsertWorks()
}
void
-getLeafNode(MyTree &t)
+populateTree(MyTree &t, uint32_t count, uint32_t delta)
{
- t.insert(1, 101);
- t.insert(3, 103);
- t.insert(5, 105);
- t.insert(7, 107);
-// EXPECT_TRUE(assertTree("[1:101,3:103,5:105,7:107[min=101,max=107]]", t));
+ uint32_t key = 1;
+ int32_t value = 101;
+ for (uint32_t i = 0; i < count; ++i) {
+ t.insert(key, value);
+ key += delta;
+ value += delta;
+ }
+}
+
+void
+populateLeafNode(MyTree &t)
+{
+ populateTree(t, 4, 2);
}
void
@@ -423,7 +432,7 @@ Test::requireThatNodeSplitInsertWorks()
{
{ // new entry in current node
MyTree t;
- getLeafNode(t);
+ populateLeafNode(t);
t.insert(4, 104);
EXPECT_TRUE(assertTree("{{4,7[min=101,max=107]}} -> "
"{{1:101,3:103,4:104[min=101,max=104]},"
@@ -431,7 +440,7 @@ Test::requireThatNodeSplitInsertWorks()
}
{ // new entry in split node
MyTree t;
- getLeafNode(t);
+ populateLeafNode(t);
t.insert(6, 106);
EXPECT_TRUE(assertTree("{{5,7[min=101,max=107]}} -> "
"{{1:101,3:103,5:105[min=101,max=105]},"
@@ -439,7 +448,7 @@ Test::requireThatNodeSplitInsertWorks()
}
{ // new entry at end
MyTree t;
- getLeafNode(t);
+ populateLeafNode(t);
t.insert(8, 108);
EXPECT_TRUE(assertTree("{{5,8[min=101,max=108]}} -> "
"{{1:101,3:103,5:105[min=101,max=105]},"
@@ -447,6 +456,64 @@ Test::requireThatNodeSplitInsertWorks()
}
}
+void
+Test::requireThatTreeInsertWorks()
+{
+ { // multi level node split
+ MyTree t;
+ populateTree(t, 16, 2);
+ EXPECT_TRUE(assertTree("{{7,15,23,31[min=101,max=131]}} -> "
+ "{{1:101,3:103,5:105,7:107[min=101,max=107]},"
+ "{9:109,11:111,13:113,15:115[min=109,max=115]},"
+ "{17:117,19:119,21:121,23:123[min=117,max=123]},"
+ "{25:125,27:127,29:129,31:131[min=125,max=131]}}", t));
+ t.insert(33, 133);
+ EXPECT_TRUE(assertTree("{{23,33[min=101,max=133]}} -> "
+ "{{7,15,23[min=101,max=123]},{29,33[min=125,max=133]}} -> "
+ "{{1:101,3:103,5:105,7:107[min=101,max=107]},"
+ "{9:109,11:111,13:113,15:115[min=109,max=115]},"
+ "{17:117,19:119,21:121,23:123[min=117,max=123]},"
+ "{25:125,27:127,29:129[min=125,max=129]},"
+ "{31:131,33:133[min=131,max=133]}}", t));
+ }
+ { // give to left node to avoid split
+ MyTree t;
+ populateTree(t, 8, 2);
+ t.remove(5);
+ EXPECT_TRUE(assertTree("{{7,15[min=101,max=115]}} -> "
+ "{{1:101,3:103,7:107[min=101,max=107]},"
+ "{9:109,11:111,13:113,15:115[min=109,max=115]}}", t));
+ t.insert(10, 110);
+ EXPECT_TRUE(assertTree("{{9,15[min=101,max=115]}} -> "
+ "{{1:101,3:103,7:107,9:109[min=101,max=109]},"
+ "{10:110,11:111,13:113,15:115[min=110,max=115]}}", t));
+ }
+ { // not give to left node to avoid split, but insert at end at left node
+ MyTree t;
+ populateTree(t, 8, 2);
+ t.remove(5);
+ EXPECT_TRUE(assertTree("{{7,15[min=101,max=115]}} -> "
+ "{{1:101,3:103,7:107[min=101,max=107]},"
+ "{9:109,11:111,13:113,15:115[min=109,max=115]}}", t));
+ t.insert(8, 108);
+ EXPECT_TRUE(assertTree("{{8,15[min=101,max=115]}} -> "
+ "{{1:101,3:103,7:107,8:108[min=101,max=108]},"
+ "{9:109,11:111,13:113,15:115[min=109,max=115]}}", t));
+ }
+ { // give to right node to avoid split
+ MyTree t;
+ populateTree(t, 8, 2);
+ t.remove(13);
+ EXPECT_TRUE(assertTree("{{7,15[min=101,max=115]}} -> "
+ "{{1:101,3:103,5:105,7:107[min=101,max=107]},"
+ "{9:109,11:111,15:115[min=109,max=115]}}", t));
+ t.insert(4, 104);
+ EXPECT_TRUE(assertTree("{{5,15[min=101,max=115]}} -> "
+ "{{1:101,3:103,4:104,5:105[min=101,max=105]},"
+ "{7:107,9:109,11:111,15:115[min=107,max=115]}}", t));
+ }
+}
+
struct BTreeStealTraits
{
static const size_t LEAF_SLOTS = 6;
@@ -538,7 +605,7 @@ void
Test::requireThatNodeRemoveWorks()
{
MyTree t;
- getLeafNode(t);
+ populateLeafNode(t);
t.remove(3);
EXPECT_TRUE(assertTree("{{1:101,5:105,7:107[min=101,max=107]}}", t));
t.remove(1);
@@ -1037,6 +1104,7 @@ Test::Main()
requireThatNodeInsertWorks();
requireThatNodeSplitInsertWorks();
+ requireThatTreeInsertWorks();
requireThatNodeStealWorks();
requireThatNodeRemoveWorks();
requireThatWeCanInsertAndRemoveFromTree();
diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.h b/searchlib/src/vespa/searchlib/btree/btreeinserter.h
index 4ea1b5a3650..620e64d3115 100644
--- a/searchlib/src/vespa/searchlib/btree/btreeinserter.h
+++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.h
@@ -42,6 +42,9 @@ public:
typedef DataT DataType;
typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair;
typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair;
+ using Inserter = BTreeInserter<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>;
+
+ static void giveLeafEntries(LeafNodeType *sNode, Iterator &itr, AggrCalcT aggrCalc);
static void
insert(BTreeNode::Ref &root,
diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp b/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp
index 97c40845e2c..5be006ea0f7 100644
--- a/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp
+++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp
@@ -10,6 +10,68 @@
namespace search {
namespace btree {
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, class AggrCalcT>
+void
+BTreeInserter<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::giveLeafEntries(LeafNodeType *sNode, Iterator &itr, AggrCalcT aggrCalc)
+{
+ NodeAllocatorType &allocator(itr.getAllocator());
+ typename Iterator::PathElement &pe = itr.getPath(0);
+ InternalNodeType *pNode = pe.getWNode();
+ uint32_t idx = pe.getIdx();
+ BTreeNode::Ref sNodeRef = pNode->getChild(idx);
+ BTreeNode::Ref leftRef = BTreeNode::Ref();
+ LeafNodeType * leftNode = nullptr;
+ BTreeNode::Ref rightRef = BTreeNode::Ref();
+ LeafNodeType * rightNode = nullptr;
+ if (idx > 0) {
+ leftRef = pNode->getChild(idx - 1);
+ leftNode = allocator.template mapLeafRef(leftRef);
+ }
+ if (idx + 1 < pNode->validSlots()) {
+ rightRef = pNode->getChild(idx + 1);
+ rightNode = allocator.template mapLeafRef(rightRef);
+ }
+ if (leftNode != nullptr && leftNode->validSlots() < LeafNodeType::maxSlots() &&
+ (rightNode == nullptr || leftNode->validSlots() < rightNode->validSlots())) {
+ if (leftNode->getFrozen()) {
+ LeafNodeTypeRefPair thawed =
+ allocator.thawNode(leftRef, leftNode);
+ leftRef = thawed.ref;
+ leftNode = thawed.data;
+ }
+ uint32_t oldLeftValid = leftNode->validSlots();
+ if (itr.getLeafNodeIdx() == 0 && (oldLeftValid + 1 == LeafNodeType::maxSlots())) {
+ pNode->update(idx - 1, leftNode->getLastKey(), leftRef);
+ itr.adjustGivenNoEntriesToLeftLeafNode();
+ } else {
+ leftNode->stealSomeFromRightNode(sNode, allocator);
+ uint32_t given = leftNode->validSlots() - oldLeftValid;
+ pNode->update(idx, sNode->getLastKey(), sNodeRef);
+ pNode->update(idx - 1, leftNode->getLastKey(), leftRef);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*leftNode, allocator, aggrCalc);
+ Aggregator::recalc(*sNode, allocator, aggrCalc);
+ }
+ itr.adjustGivenEntriesToLeftLeafNode(given);
+ }
+ } else if (rightNode != nullptr && rightNode->validSlots() < LeafNodeType::maxSlots()) {
+ if (rightNode->getFrozen()) {
+ LeafNodeTypeRefPair thawed =
+ allocator.thawNode(rightRef, rightNode);
+ rightRef = thawed.ref;
+ rightNode = thawed.data;
+ }
+ rightNode->stealSomeFromLeftNode(sNode, allocator);
+ pNode->update(idx, sNode->getLastKey(), sNodeRef);
+ pNode->update(idx + 1, rightNode->getLastKey(), rightRef);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*rightNode, allocator, aggrCalc);
+ Aggregator::recalc(*sNode, allocator, aggrCalc);
+ }
+ itr.adjustGivenEntriesToRightLeafNode();
+ }
+}
template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
typename TraitsT, class AggrCalcT>
@@ -30,8 +92,12 @@ insert(BTreeNode::Ref &root,
--itr;
}
root = itr.thaw(root);
- uint32_t idx = itr.getLeafNodeIdx() + (inRange ? 0 : 1);
LeafNodeType * lnode = itr.getLeafNode();
+ if (lnode->isFull() && itr.getPathSize() > 0) {
+ giveLeafEntries(lnode, itr, aggrCalc);
+ lnode = itr.getLeafNode();
+ }
+ uint32_t idx = itr.getLeafNodeIdx() + (inRange ? 0 : 1);
BTreeNode::Ref splitNodeRef;
const KeyT *splitLastKey = nullptr;
bool inRightSplit = false;
diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.h b/searchlib/src/vespa/searchlib/btree/btreeiterator.h
index 444a26f36d4..fcf1ee030d2 100644
--- a/searchlib/src/vespa/searchlib/btree/btreeiterator.h
+++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.h
@@ -865,6 +865,10 @@ private:
_leaf.adjustSteal(stolen);
}
}
+
+ void adjustGivenNoEntriesToLeftLeafNode();
+ void adjustGivenEntriesToLeftLeafNode(uint32_t given);
+ void adjustGivenEntriesToRightLeafNode();
};
diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp
index 8d3955778ce..fca22d3e797 100644
--- a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp
+++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp
@@ -1325,6 +1325,55 @@ removeLast(BTreeNode::Ref rootRef)
_leaf.setNode(nullptr);
}
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT, typename TraitsT>
+void
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::adjustGivenNoEntriesToLeftLeafNode()
+{
+ auto &pe = _path[0];
+ uint32_t pidx = pe.getIdx() - 1;
+ BTreeNode::Ref sRef = pe.getNode()->getChild(pidx);
+ const LeafNodeType *sNode = _allocator->mapLeafRef(sRef);
+ pe.setIdx(pidx);
+ _leaf.setNodeAndIdx(sNode, sNode->validSlots());
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT, typename TraitsT>
+void
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::adjustGivenEntriesToLeftLeafNode(uint32_t given)
+{
+ uint32_t idx = _leaf.getIdx();
+ if (idx >= given) {
+ _leaf.setIdx(idx - given);
+ } else {
+ auto &pe = _path[0];
+ uint32_t pidx = pe.getIdx() - 1;
+ BTreeNode::Ref sRef = pe.getNode()->getChild(pidx);
+ const LeafNodeType *sNode = _allocator->mapLeafRef(sRef);
+ idx += sNode->validSlots();
+ assert(given <= idx);
+ pe.setIdx(pidx);
+ _leaf.setNodeAndIdx(sNode, idx - given);
+ }
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT, typename TraitsT>
+void
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::adjustGivenEntriesToRightLeafNode()
+{
+ uint32_t idx = _leaf.getIdx();
+ const LeafNodeType *sNode = _leaf.getNode();
+ if (idx > sNode->validSlots()) {
+ auto &pe = _path[0];
+ const InternalNodeType *pNode = pe.getNode();
+ uint32_t pidx = pe.getIdx() + 1;
+ idx -= sNode->validSlots();
+ BTreeNode::Ref sRef = pNode->getChild(pidx);
+ sNode = _allocator->mapLeafRef(sRef);
+ assert(idx <= sNode->validSlots());
+ pe.setIdx(pidx);
+ _leaf.setNodeAndIdx(sNode, idx);
+ }
+}
} // namespace search::btree
} // namespace search