summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-04-24 11:08:30 +0200
committerGitHub <noreply@github.com>2020-04-24 11:08:30 +0200
commit525ab5bc9bd3ab0e1a60c6332df80b03d181b3cd (patch)
treeecde049a33482b45f5452da1219df1a181141c95 /vespalib
parentf99e796c5c90197d8f4fea55b4c00c8d41a7b97d (diff)
parentc3ee3ebc4e85e408a4be486e52745743bac85d0f (diff)
Merge pull request #13034 from vespa-engine/vekterli/support-aggregating-over-btree-keys
Add support for aggregating over B-tree keys
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/tests/btree/btreeaggregation_test.cpp82
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp6
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreeinserter.hpp6
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreeiterator.hpp16
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreeremover.hpp14
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreestore.hpp6
-rw-r--r--vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h1
-rw-r--r--vespalib/src/vespa/vespalib/btree/noaggrcalc.h2
8 files changed, 109 insertions, 24 deletions
diff --git a/vespalib/src/tests/btree/btreeaggregation_test.cpp b/vespalib/src/tests/btree/btreeaggregation_test.cpp
index 0ce3d2c7d04..92d8955ab93 100644
--- a/vespalib/src/tests/btree/btreeaggregation_test.cpp
+++ b/vespalib/src/tests/btree/btreeaggregation_test.cpp
@@ -1,7 +1,5 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/log/log.h>
-LOG_SETUP("btreeaggregation_test");
-#include <vespa/vespalib/testkit/testapp.h>
+
#include <vespa/vespalib/btree/btreeroot.h>
#include <vespa/vespalib/btree/btreebuilder.h>
#include <vespa/vespalib/btree/btreenodeallocator.h>
@@ -17,6 +15,7 @@ LOG_SETUP("btreeaggregation_test");
#include <vespa/vespalib/btree/btreestore.hpp>
#include <vespa/vespalib/btree/btreeaggregator.hpp>
#include <vespa/vespalib/test/btree/btree_printer.h>
+#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/util/rand48.h>
#include <iostream>
@@ -24,11 +23,13 @@ LOG_SETUP("btreeaggregation_test");
#include <set>
#include <string>
+#include <vespa/log/log.h>
+LOG_SETUP("btreeaggregation_test");
+
using vespalib::GenerationHandler;
using search::datastore::EntryRef;
-namespace search {
-namespace btree {
+namespace search::btree {
namespace {
@@ -99,6 +100,11 @@ typedef std::less<int> MyComp;
#define UNWRAP(key) (key)
#endif
+struct KeyMinMaxAggrCalc : MinMaxAggrCalc {
+ constexpr static bool aggregate_over_values() { return false; }
+ constexpr static int32_t getVal(const MyKey& key) { return key._val; }
+};
+
typedef BTree<MyKey, int32_t,
btree::MinMaxAggregated,
MyComp, MyTraits,
@@ -130,6 +136,7 @@ struct LeafPairLess {
}
};
+using MyKeyAggrTree = BTree<MyKey, int32_t, btree::MinMaxAggregated, MyComp, MyTraits, KeyMinMaxAggrCalc>;
class MockTree
{
@@ -311,10 +318,13 @@ private:
size_t numEntries);
void requireThatNodeInsertWorks();
+ void keys_are_aggregated_correctly_on_node_insertions();
void requireThatNodeSplitInsertWorks();
+ void keys_are_aggregated_correctly_when_node_split_on_insert();
void requireThatTreeInsertWorks();
void requireThatNodeStealWorks();
void requireThatNodeRemoveWorks();
+ void keys_are_aggregated_correctly_on_node_removal();
void requireThatWeCanInsertAndRemoveFromTree();
void requireThatSortedTreeInsertWorks();
void requireThatCornerCaseTreeFindWorks();
@@ -409,6 +419,17 @@ Test::requireThatNodeInsertWorks()
EXPECT_TRUE(assertTree("{{10:101,20:102,30:103,40:104[min=101,max=104]}}", t));
}
+void Test::keys_are_aggregated_correctly_on_node_insertions() {
+ MyKeyAggrTree t;
+ t.insert(20, 102);
+ EXPECT_TRUE(assertTree("{{20:102[min=20,max=20]}}", t));
+ t.insert(10, 101);
+ EXPECT_TRUE(assertTree("{{10:101,20:102[min=10,max=20]}}", t));
+ t.insert(30, 103);
+ t.insert(40, 104);
+ EXPECT_TRUE(assertTree("{{10:101,20:102,30:103,40:104[min=10,max=40]}}", t));
+}
+
template <typename Tree>
void
populateTree(Tree &t, uint32_t count, uint32_t delta)
@@ -422,8 +443,9 @@ populateTree(Tree &t, uint32_t count, uint32_t delta)
}
}
+template <typename Tree>
void
-populateLeafNode(MyTree &t)
+populateLeafNode(Tree &t)
{
populateTree(t, 4, 2);
}
@@ -457,6 +479,33 @@ Test::requireThatNodeSplitInsertWorks()
}
}
+void Test::keys_are_aggregated_correctly_when_node_split_on_insert() {
+ { // new entry in current node
+ MyKeyAggrTree t;
+ populateLeafNode(t);
+ t.insert(4, 104);
+ EXPECT_TRUE(assertTree("{{4,7[min=1,max=7]}} -> "
+ "{{1:101,3:103,4:104[min=1,max=4]},"
+ "{5:105,7:107[min=5,max=7]}}", t));
+ }
+ { // new entry in split node
+ MyKeyAggrTree t;
+ populateLeafNode(t);
+ t.insert(6, 106);
+ EXPECT_TRUE(assertTree("{{5,7[min=1,max=7]}} -> "
+ "{{1:101,3:103,5:105[min=1,max=5]},"
+ "{6:106,7:107[min=6,max=7]}}", t));
+ }
+ { // new entry at end
+ MyKeyAggrTree t;
+ populateLeafNode(t);
+ t.insert(8, 108);
+ EXPECT_TRUE(assertTree("{{5,8[min=1,max=8]}} -> "
+ "{{1:101,3:103,5:105[min=1,max=5]},"
+ "{7:107,8:108[min=7,max=8]}}", t));
+ }
+}
+
void
Test::requireThatTreeInsertWorks()
{
@@ -645,6 +694,17 @@ Test::requireThatNodeRemoveWorks()
EXPECT_TRUE(assertTree("{{5:105[min=105,max=105]}}", t));
}
+void Test::keys_are_aggregated_correctly_on_node_removal() {
+ MyKeyAggrTree t;
+ populateLeafNode(t);
+ t.remove(3);
+ EXPECT_TRUE(assertTree("{{1:101,5:105,7:107[min=1,max=7]}}", t));
+ t.remove(1);
+ EXPECT_TRUE(assertTree("{{5:105,7:107[min=5,max=7]}}", t));
+ t.remove(7);
+ EXPECT_TRUE(assertTree("{{5:105[min=5,max=5]}}", t));
+}
+
void
generateData(std::vector<LeafPair> & data, size_t numEntries)
{
@@ -960,10 +1020,8 @@ struct UpdKeyComp {
void
Test::requireThatUpdateOfKeyWorks()
{
- typedef BTree<int, BTreeNoLeafData,
- btree::NoAggregated,
- UpdKeyComp &> UpdKeyTree;
- typedef UpdKeyTree::Iterator UpdKeyTreeIterator;
+ using UpdKeyTree = BTree<int, BTreeNoLeafData, btree::NoAggregated, UpdKeyComp &>;
+ using UpdKeyTreeIterator = UpdKeyTree::Iterator;
GenerationHandler g;
UpdKeyTree t;
UpdKeyComp cmp1(0);
@@ -1134,10 +1192,13 @@ Test::Main()
TEST_INIT("btreeaggregation_test");
requireThatNodeInsertWorks();
+ keys_are_aggregated_correctly_on_node_insertions();
requireThatNodeSplitInsertWorks();
+ keys_are_aggregated_correctly_when_node_split_on_insert();
requireThatTreeInsertWorks();
requireThatNodeStealWorks();
requireThatNodeRemoveWorks();
+ keys_are_aggregated_correctly_on_node_removal();
requireThatWeCanInsertAndRemoveFromTree();
requireThatSortedTreeInsertWorks();
requireThatCornerCaseTreeFindWorks();
@@ -1152,6 +1213,5 @@ Test::Main()
}
}
-}
TEST_APPHOOK(search::btree::Test);
diff --git a/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp b/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp
index e1318ab5a66..34dbfd3e899 100644
--- a/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp
+++ b/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp
@@ -13,7 +13,11 @@ BTreeAggregator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::aggr
{
AggrT a;
for (uint32_t i = 0, ie = node.validSlots(); i < ie; ++i) {
- aggrCalc.add(a, aggrCalc.getVal(node.getData(i)));
+ if constexpr (AggrCalcT::aggregate_over_values()) {
+ aggrCalc.add(a, aggrCalc.getVal(node.getData(i)));
+ } else {
+ aggrCalc.add(a, aggrCalc.getVal(node.getKey(i)));
+ }
}
return a;
}
diff --git a/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp b/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp
index e1518da5639..5c6b02a5a46 100644
--- a/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp
+++ b/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp
@@ -121,7 +121,11 @@ insert(BTreeNode::Ref &root,
lnode->insert(idx, key, data);
itr.setLeafNodeIdx(idx);
if constexpr (AggrCalcT::hasAggregated()) {
- aggrCalc.add(lnode->getAggregated(), aggrCalc.getVal(data));
+ if constexpr (AggrCalcT::aggregate_over_values()) {
+ aggrCalc.add(lnode->getAggregated(), aggrCalc.getVal(data));
+ } else {
+ aggrCalc.add(lnode->getAggregated(), aggrCalc.getVal(key));
+ }
ca = lnode->getAggregated();
}
}
diff --git a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp
index 27a50e04f1b..98a14943d36 100644
--- a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp
+++ b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp
@@ -1089,12 +1089,12 @@ BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
updateData(const DataType & data, [[maybe_unused]] const AggrCalcT &aggrCalc)
{
LeafNodeType * lnode = getLeafNode();
- if constexpr (AggrCalcT::hasAggregated()) {
+ if constexpr (AggrCalcT::hasAggregated() && AggrCalcT::aggregate_over_values()) {
AggrT oldca(lnode->getAggregated());
- typedef BTreeAggregator<KeyT, DataT, AggrT,
- TraitsT::INTERNAL_SLOTS,
- TraitsT::LEAF_SLOTS,
- AggrCalcT> Aggregator;
+ using Aggregator = BTreeAggregator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ AggrCalcT>;
if (aggrCalc.update(lnode->getAggregated(),
aggrCalc.getVal(lnode->getData(_leaf.getIdx())),
aggrCalc.getVal(data))) {
@@ -1193,7 +1193,11 @@ insertFirst(const KeyType &key, const DataType &data,
lnode.data->insert(0, key, data);
if constexpr (AggrCalcT::hasAggregated()) {
AggrT a;
- aggrCalc.add(a, aggrCalc.getVal(data));
+ if constexpr (AggrCalcT::aggregate_over_values()) {
+ aggrCalc.add(a, aggrCalc.getVal(data));
+ } else {
+ aggrCalc.add(a, aggrCalc.getVal(key));
+ }
lnode.data->getAggregated() = a;
}
_leafRoot = lnode.data;
diff --git a/vespalib/src/vespa/vespalib/btree/btreeremover.hpp b/vespalib/src/vespa/vespalib/btree/btreeremover.hpp
index 6a9a7ebcc7e..acb51b18b0c 100644
--- a/vespalib/src/vespa/vespalib/btree/btreeremover.hpp
+++ b/vespalib/src/vespa/vespalib/btree/btreeremover.hpp
@@ -110,11 +110,17 @@ remove(BTreeNode::Ref &root,
NodeAllocatorType &allocator(itr.getAllocator());
AggrT oldca(AggrCalcT::hasAggregated() ? lnode->getAggregated() : AggrT());
AggrT ca;
- if (AggrCalcT::hasAggregated() &&
- aggrCalc.remove(lnode->getAggregated(),
- aggrCalc.getVal(lnode->getData(idx)))) {
+ if constexpr (AggrCalcT::hasAggregated()) {
+ bool need_aggregation_recalc;
+ if constexpr (AggrCalcT::aggregate_over_values()) {
+ need_aggregation_recalc = aggrCalc.remove(lnode->getAggregated(), aggrCalc.getVal(lnode->getData(idx)));
+ } else {
+ need_aggregation_recalc = aggrCalc.remove(lnode->getAggregated(), aggrCalc.getVal(lnode->getKey(idx)));
+ }
lnode->remove(idx);
- Aggregator::recalc(*lnode, aggrCalc);
+ if (need_aggregation_recalc) {
+ Aggregator::recalc(*lnode, aggrCalc);
+ }
} else {
lnode->remove(idx);
}
diff --git a/vespalib/src/vespa/vespalib/btree/btreestore.hpp b/vespalib/src/vespa/vespalib/btree/btreestore.hpp
index ec3be588de4..e6c96600b70 100644
--- a/vespalib/src/vespa/vespalib/btree/btreestore.hpp
+++ b/vespalib/src/vespa/vespalib/btree/btreestore.hpp
@@ -959,7 +959,11 @@ getAggregated(const EntryRef ref) const
const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
AggregatedType a;
for (uint32_t i = 0; i < clusterSize; ++i) {
- _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getData()));
+ if constexpr (AggrCalcT::aggregate_over_values()) {
+ _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getData()));
+ } else {
+ _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getKey()));
+ }
}
return a;
}
diff --git a/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h b/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h
index 66e23127c5a..6ef809861b7 100644
--- a/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h
+++ b/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h
@@ -11,6 +11,7 @@ class MinMaxAggrCalc
public:
constexpr MinMaxAggrCalc() = default;
constexpr static bool hasAggregated() { return true; }
+ constexpr static bool aggregate_over_values() { return true; }
static int32_t getVal(int32_t val) { return val; }
static void add(MinMaxAggregated &a, int32_t val) { a.add(val); }
static void add(MinMaxAggregated &a, const MinMaxAggregated &ca) { a.add(ca); }
diff --git a/vespalib/src/vespa/vespalib/btree/noaggrcalc.h b/vespalib/src/vespa/vespalib/btree/noaggrcalc.h
index 079abc57224..51958229430 100644
--- a/vespalib/src/vespa/vespalib/btree/noaggrcalc.h
+++ b/vespalib/src/vespa/vespalib/btree/noaggrcalc.h
@@ -17,6 +17,8 @@ public:
return false;
}
+ constexpr static bool aggregate_over_values() { return true; }
+
template <typename DataT>
static inline int32_t
getVal(const DataT &val)