aboutsummaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-04-23 12:35:58 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2020-04-23 12:35:58 +0000
commitd389b0e2519a139dc9103a22702be175698262d8 (patch)
treeddfcebc115042310a4fb33b205494d550128fb55 /vespalib
parent037816b9203d7e0ba271dfc7ceace1f7af7633b6 (diff)
Add support for aggregating over B-tree keys
This complements the existing support for aggregating over values. Let aggregate calculator specify whether it expects to be invoked with keys or values. In the current implementation there is some code duplication that could have been removed by using e.g. tag dispatch, but this is a pragmatic choice intended to guarantee these changes do not introduce any performance regressions for existing code using value aggregation. Can be refactored later once we have a functional baseline with this for both keys and values.
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/tests/btree/btreeaggregation_test.cpp82
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp6
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreeinserter.hpp6
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreeiterator.hpp16
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreeremover.hpp14
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreestore.hpp6
-rw-r--r--vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h1
-rw-r--r--vespalib/src/vespa/vespalib/btree/noaggrcalc.h2
8 files changed, 109 insertions, 24 deletions
diff --git a/vespalib/src/tests/btree/btreeaggregation_test.cpp b/vespalib/src/tests/btree/btreeaggregation_test.cpp
index 0ce3d2c7d04..92d8955ab93 100644
--- a/vespalib/src/tests/btree/btreeaggregation_test.cpp
+++ b/vespalib/src/tests/btree/btreeaggregation_test.cpp
@@ -1,7 +1,5 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/log/log.h>
-LOG_SETUP("btreeaggregation_test");
-#include <vespa/vespalib/testkit/testapp.h>
+
#include <vespa/vespalib/btree/btreeroot.h>
#include <vespa/vespalib/btree/btreebuilder.h>
#include <vespa/vespalib/btree/btreenodeallocator.h>
@@ -17,6 +15,7 @@ LOG_SETUP("btreeaggregation_test");
#include <vespa/vespalib/btree/btreestore.hpp>
#include <vespa/vespalib/btree/btreeaggregator.hpp>
#include <vespa/vespalib/test/btree/btree_printer.h>
+#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/util/rand48.h>
#include <iostream>
@@ -24,11 +23,13 @@ LOG_SETUP("btreeaggregation_test");
#include <set>
#include <string>
+#include <vespa/log/log.h>
+LOG_SETUP("btreeaggregation_test");
+
using vespalib::GenerationHandler;
using search::datastore::EntryRef;
-namespace search {
-namespace btree {
+namespace search::btree {
namespace {
@@ -99,6 +100,11 @@ typedef std::less<int> MyComp;
#define UNWRAP(key) (key)
#endif
+struct KeyMinMaxAggrCalc : MinMaxAggrCalc {
+ constexpr static bool aggregate_over_values() { return false; }
+ constexpr static int32_t getVal(const MyKey& key) { return key._val; }
+};
+
typedef BTree<MyKey, int32_t,
btree::MinMaxAggregated,
MyComp, MyTraits,
@@ -130,6 +136,7 @@ struct LeafPairLess {
}
};
+using MyKeyAggrTree = BTree<MyKey, int32_t, btree::MinMaxAggregated, MyComp, MyTraits, KeyMinMaxAggrCalc>;
class MockTree
{
@@ -311,10 +318,13 @@ private:
size_t numEntries);
void requireThatNodeInsertWorks();
+ void keys_are_aggregated_correctly_on_node_insertions();
void requireThatNodeSplitInsertWorks();
+ void keys_are_aggregated_correctly_when_node_split_on_insert();
void requireThatTreeInsertWorks();
void requireThatNodeStealWorks();
void requireThatNodeRemoveWorks();
+ void keys_are_aggregated_correctly_on_node_removal();
void requireThatWeCanInsertAndRemoveFromTree();
void requireThatSortedTreeInsertWorks();
void requireThatCornerCaseTreeFindWorks();
@@ -409,6 +419,17 @@ Test::requireThatNodeInsertWorks()
EXPECT_TRUE(assertTree("{{10:101,20:102,30:103,40:104[min=101,max=104]}}", t));
}
+void Test::keys_are_aggregated_correctly_on_node_insertions() {
+ MyKeyAggrTree t;
+ t.insert(20, 102);
+ EXPECT_TRUE(assertTree("{{20:102[min=20,max=20]}}", t));
+ t.insert(10, 101);
+ EXPECT_TRUE(assertTree("{{10:101,20:102[min=10,max=20]}}", t));
+ t.insert(30, 103);
+ t.insert(40, 104);
+ EXPECT_TRUE(assertTree("{{10:101,20:102,30:103,40:104[min=10,max=40]}}", t));
+}
+
template <typename Tree>
void
populateTree(Tree &t, uint32_t count, uint32_t delta)
@@ -422,8 +443,9 @@ populateTree(Tree &t, uint32_t count, uint32_t delta)
}
}
+template <typename Tree>
void
-populateLeafNode(MyTree &t)
+populateLeafNode(Tree &t)
{
populateTree(t, 4, 2);
}
@@ -457,6 +479,33 @@ Test::requireThatNodeSplitInsertWorks()
}
}
+void Test::keys_are_aggregated_correctly_when_node_split_on_insert() {
+ { // new entry in current node
+ MyKeyAggrTree t;
+ populateLeafNode(t);
+ t.insert(4, 104);
+ EXPECT_TRUE(assertTree("{{4,7[min=1,max=7]}} -> "
+ "{{1:101,3:103,4:104[min=1,max=4]},"
+ "{5:105,7:107[min=5,max=7]}}", t));
+ }
+ { // new entry in split node
+ MyKeyAggrTree t;
+ populateLeafNode(t);
+ t.insert(6, 106);
+ EXPECT_TRUE(assertTree("{{5,7[min=1,max=7]}} -> "
+ "{{1:101,3:103,5:105[min=1,max=5]},"
+ "{6:106,7:107[min=6,max=7]}}", t));
+ }
+ { // new entry at end
+ MyKeyAggrTree t;
+ populateLeafNode(t);
+ t.insert(8, 108);
+ EXPECT_TRUE(assertTree("{{5,8[min=1,max=8]}} -> "
+ "{{1:101,3:103,5:105[min=1,max=5]},"
+ "{7:107,8:108[min=7,max=8]}}", t));
+ }
+}
+
void
Test::requireThatTreeInsertWorks()
{
@@ -645,6 +694,17 @@ Test::requireThatNodeRemoveWorks()
EXPECT_TRUE(assertTree("{{5:105[min=105,max=105]}}", t));
}
+void Test::keys_are_aggregated_correctly_on_node_removal() {
+ MyKeyAggrTree t;
+ populateLeafNode(t);
+ t.remove(3);
+ EXPECT_TRUE(assertTree("{{1:101,5:105,7:107[min=1,max=7]}}", t));
+ t.remove(1);
+ EXPECT_TRUE(assertTree("{{5:105,7:107[min=5,max=7]}}", t));
+ t.remove(7);
+ EXPECT_TRUE(assertTree("{{5:105[min=5,max=5]}}", t));
+}
+
void
generateData(std::vector<LeafPair> & data, size_t numEntries)
{
@@ -960,10 +1020,8 @@ struct UpdKeyComp {
void
Test::requireThatUpdateOfKeyWorks()
{
- typedef BTree<int, BTreeNoLeafData,
- btree::NoAggregated,
- UpdKeyComp &> UpdKeyTree;
- typedef UpdKeyTree::Iterator UpdKeyTreeIterator;
+ using UpdKeyTree = BTree<int, BTreeNoLeafData, btree::NoAggregated, UpdKeyComp &>;
+ using UpdKeyTreeIterator = UpdKeyTree::Iterator;
GenerationHandler g;
UpdKeyTree t;
UpdKeyComp cmp1(0);
@@ -1134,10 +1192,13 @@ Test::Main()
TEST_INIT("btreeaggregation_test");
requireThatNodeInsertWorks();
+ keys_are_aggregated_correctly_on_node_insertions();
requireThatNodeSplitInsertWorks();
+ keys_are_aggregated_correctly_when_node_split_on_insert();
requireThatTreeInsertWorks();
requireThatNodeStealWorks();
requireThatNodeRemoveWorks();
+ keys_are_aggregated_correctly_on_node_removal();
requireThatWeCanInsertAndRemoveFromTree();
requireThatSortedTreeInsertWorks();
requireThatCornerCaseTreeFindWorks();
@@ -1152,6 +1213,5 @@ Test::Main()
}
}
-}
TEST_APPHOOK(search::btree::Test);
diff --git a/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp b/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp
index e1318ab5a66..34dbfd3e899 100644
--- a/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp
+++ b/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp
@@ -13,7 +13,11 @@ BTreeAggregator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::aggr
{
AggrT a;
for (uint32_t i = 0, ie = node.validSlots(); i < ie; ++i) {
- aggrCalc.add(a, aggrCalc.getVal(node.getData(i)));
+ if constexpr (AggrCalcT::aggregate_over_values()) {
+ aggrCalc.add(a, aggrCalc.getVal(node.getData(i)));
+ } else {
+ aggrCalc.add(a, aggrCalc.getVal(node.getKey(i)));
+ }
}
return a;
}
diff --git a/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp b/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp
index e1518da5639..5c6b02a5a46 100644
--- a/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp
+++ b/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp
@@ -121,7 +121,11 @@ insert(BTreeNode::Ref &root,
lnode->insert(idx, key, data);
itr.setLeafNodeIdx(idx);
if constexpr (AggrCalcT::hasAggregated()) {
- aggrCalc.add(lnode->getAggregated(), aggrCalc.getVal(data));
+ if constexpr (AggrCalcT::aggregate_over_values()) {
+ aggrCalc.add(lnode->getAggregated(), aggrCalc.getVal(data));
+ } else {
+ aggrCalc.add(lnode->getAggregated(), aggrCalc.getVal(key));
+ }
ca = lnode->getAggregated();
}
}
diff --git a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp
index 27a50e04f1b..98a14943d36 100644
--- a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp
+++ b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp
@@ -1089,12 +1089,12 @@ BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
updateData(const DataType & data, [[maybe_unused]] const AggrCalcT &aggrCalc)
{
LeafNodeType * lnode = getLeafNode();
- if constexpr (AggrCalcT::hasAggregated()) {
+ if constexpr (AggrCalcT::hasAggregated() && AggrCalcT::aggregate_over_values()) {
AggrT oldca(lnode->getAggregated());
- typedef BTreeAggregator<KeyT, DataT, AggrT,
- TraitsT::INTERNAL_SLOTS,
- TraitsT::LEAF_SLOTS,
- AggrCalcT> Aggregator;
+ using Aggregator = BTreeAggregator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ AggrCalcT>;
if (aggrCalc.update(lnode->getAggregated(),
aggrCalc.getVal(lnode->getData(_leaf.getIdx())),
aggrCalc.getVal(data))) {
@@ -1193,7 +1193,11 @@ insertFirst(const KeyType &key, const DataType &data,
lnode.data->insert(0, key, data);
if constexpr (AggrCalcT::hasAggregated()) {
AggrT a;
- aggrCalc.add(a, aggrCalc.getVal(data));
+ if constexpr (AggrCalcT::aggregate_over_values()) {
+ aggrCalc.add(a, aggrCalc.getVal(data));
+ } else {
+ aggrCalc.add(a, aggrCalc.getVal(key));
+ }
lnode.data->getAggregated() = a;
}
_leafRoot = lnode.data;
diff --git a/vespalib/src/vespa/vespalib/btree/btreeremover.hpp b/vespalib/src/vespa/vespalib/btree/btreeremover.hpp
index 6a9a7ebcc7e..468b24f2a81 100644
--- a/vespalib/src/vespa/vespalib/btree/btreeremover.hpp
+++ b/vespalib/src/vespa/vespalib/btree/btreeremover.hpp
@@ -110,11 +110,17 @@ remove(BTreeNode::Ref &root,
NodeAllocatorType &allocator(itr.getAllocator());
AggrT oldca(AggrCalcT::hasAggregated() ? lnode->getAggregated() : AggrT());
AggrT ca;
- if (AggrCalcT::hasAggregated() &&
- aggrCalc.remove(lnode->getAggregated(),
- aggrCalc.getVal(lnode->getData(idx)))) {
+ if constexpr (AggrCalcT::hasAggregated()) {
+ bool removed;
+ if constexpr (AggrCalcT::aggregate_over_values()) {
+ removed = aggrCalc.remove(lnode->getAggregated(), aggrCalc.getVal(lnode->getData(idx)));
+ } else {
+ removed = aggrCalc.remove(lnode->getAggregated(), aggrCalc.getVal(lnode->getKey(idx)));
+ }
lnode->remove(idx);
- Aggregator::recalc(*lnode, aggrCalc);
+ if (removed) {
+ Aggregator::recalc(*lnode, aggrCalc);
+ }
} else {
lnode->remove(idx);
}
diff --git a/vespalib/src/vespa/vespalib/btree/btreestore.hpp b/vespalib/src/vespa/vespalib/btree/btreestore.hpp
index ec3be588de4..e6c96600b70 100644
--- a/vespalib/src/vespa/vespalib/btree/btreestore.hpp
+++ b/vespalib/src/vespa/vespalib/btree/btreestore.hpp
@@ -959,7 +959,11 @@ getAggregated(const EntryRef ref) const
const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
AggregatedType a;
for (uint32_t i = 0; i < clusterSize; ++i) {
- _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getData()));
+ if constexpr (AggrCalcT::aggregate_over_values()) {
+ _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getData()));
+ } else {
+ _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getKey()));
+ }
}
return a;
}
diff --git a/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h b/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h
index 66e23127c5a..6ef809861b7 100644
--- a/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h
+++ b/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h
@@ -11,6 +11,7 @@ class MinMaxAggrCalc
public:
constexpr MinMaxAggrCalc() = default;
constexpr static bool hasAggregated() { return true; }
+ constexpr static bool aggregate_over_values() { return true; }
static int32_t getVal(int32_t val) { return val; }
static void add(MinMaxAggregated &a, int32_t val) { a.add(val); }
static void add(MinMaxAggregated &a, const MinMaxAggregated &ca) { a.add(ca); }
diff --git a/vespalib/src/vespa/vespalib/btree/noaggrcalc.h b/vespalib/src/vespa/vespalib/btree/noaggrcalc.h
index 079abc57224..51958229430 100644
--- a/vespalib/src/vespa/vespalib/btree/noaggrcalc.h
+++ b/vespalib/src/vespa/vespalib/btree/noaggrcalc.h
@@ -17,6 +17,8 @@ public:
return false;
}
+ constexpr static bool aggregate_over_values() { return true; }
+
template <typename DataT>
static inline int32_t
getVal(const DataT &val)