diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2020-04-24 11:08:30 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-24 11:08:30 +0200 |
commit | 525ab5bc9bd3ab0e1a60c6332df80b03d181b3cd (patch) | |
tree | ecde049a33482b45f5452da1219df1a181141c95 /vespalib | |
parent | f99e796c5c90197d8f4fea55b4c00c8d41a7b97d (diff) | |
parent | c3ee3ebc4e85e408a4be486e52745743bac85d0f (diff) |
Merge pull request #13034 from vespa-engine/vekterli/support-aggregating-over-btree-keys
Add support for aggregating over B-tree keys
Diffstat (limited to 'vespalib')
8 files changed, 109 insertions, 24 deletions
diff --git a/vespalib/src/tests/btree/btreeaggregation_test.cpp b/vespalib/src/tests/btree/btreeaggregation_test.cpp index 0ce3d2c7d04..92d8955ab93 100644 --- a/vespalib/src/tests/btree/btreeaggregation_test.cpp +++ b/vespalib/src/tests/btree/btreeaggregation_test.cpp @@ -1,7 +1,5 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/log/log.h> -LOG_SETUP("btreeaggregation_test"); -#include <vespa/vespalib/testkit/testapp.h> + #include <vespa/vespalib/btree/btreeroot.h> #include <vespa/vespalib/btree/btreebuilder.h> #include <vespa/vespalib/btree/btreenodeallocator.h> @@ -17,6 +15,7 @@ LOG_SETUP("btreeaggregation_test"); #include <vespa/vespalib/btree/btreestore.hpp> #include <vespa/vespalib/btree/btreeaggregator.hpp> #include <vespa/vespalib/test/btree/btree_printer.h> +#include <vespa/vespalib/testkit/testapp.h> #include <vespa/vespalib/util/rand48.h> #include <iostream> @@ -24,11 +23,13 @@ LOG_SETUP("btreeaggregation_test"); #include <set> #include <string> +#include <vespa/log/log.h> +LOG_SETUP("btreeaggregation_test"); + using vespalib::GenerationHandler; using search::datastore::EntryRef; -namespace search { -namespace btree { +namespace search::btree { namespace { @@ -99,6 +100,11 @@ typedef std::less<int> MyComp; #define UNWRAP(key) (key) #endif +struct KeyMinMaxAggrCalc : MinMaxAggrCalc { + constexpr static bool aggregate_over_values() { return false; } + constexpr static int32_t getVal(const MyKey& key) { return key._val; } +}; + typedef BTree<MyKey, int32_t, btree::MinMaxAggregated, MyComp, MyTraits, @@ -130,6 +136,7 @@ struct LeafPairLess { } }; +using MyKeyAggrTree = BTree<MyKey, int32_t, btree::MinMaxAggregated, MyComp, MyTraits, KeyMinMaxAggrCalc>; class MockTree { @@ -311,10 +318,13 @@ private: size_t numEntries); void requireThatNodeInsertWorks(); + void keys_are_aggregated_correctly_on_node_insertions(); void requireThatNodeSplitInsertWorks(); + void keys_are_aggregated_correctly_when_node_split_on_insert(); void requireThatTreeInsertWorks(); void requireThatNodeStealWorks(); void requireThatNodeRemoveWorks(); + void keys_are_aggregated_correctly_on_node_removal(); void requireThatWeCanInsertAndRemoveFromTree(); void requireThatSortedTreeInsertWorks(); void requireThatCornerCaseTreeFindWorks(); @@ -409,6 +419,17 @@ Test::requireThatNodeInsertWorks() EXPECT_TRUE(assertTree("{{10:101,20:102,30:103,40:104[min=101,max=104]}}", t)); } +void Test::keys_are_aggregated_correctly_on_node_insertions() { + MyKeyAggrTree t; + t.insert(20, 102); + EXPECT_TRUE(assertTree("{{20:102[min=20,max=20]}}", t)); + t.insert(10, 101); + EXPECT_TRUE(assertTree("{{10:101,20:102[min=10,max=20]}}", t)); + t.insert(30, 103); + t.insert(40, 104); + EXPECT_TRUE(assertTree("{{10:101,20:102,30:103,40:104[min=10,max=40]}}", t)); +} + template <typename Tree> void populateTree(Tree &t, uint32_t count, uint32_t delta) @@ -422,8 +443,9 @@ populateTree(Tree &t, uint32_t count, uint32_t delta) } } +template <typename Tree> void -populateLeafNode(MyTree &t) +populateLeafNode(Tree &t) { populateTree(t, 4, 2); } @@ -457,6 +479,33 @@ Test::requireThatNodeSplitInsertWorks() } } +void Test::keys_are_aggregated_correctly_when_node_split_on_insert() { + { // new entry in current node + MyKeyAggrTree t; + populateLeafNode(t); + t.insert(4, 104); + EXPECT_TRUE(assertTree("{{4,7[min=1,max=7]}} -> " + "{{1:101,3:103,4:104[min=1,max=4]}," + "{5:105,7:107[min=5,max=7]}}", t)); + } + { // new entry in split node + MyKeyAggrTree t; + populateLeafNode(t); + t.insert(6, 106); + EXPECT_TRUE(assertTree("{{5,7[min=1,max=7]}} -> " + "{{1:101,3:103,5:105[min=1,max=5]}," + "{6:106,7:107[min=6,max=7]}}", t)); + } + { // new entry at end + MyKeyAggrTree t; + populateLeafNode(t); + t.insert(8, 108); + EXPECT_TRUE(assertTree("{{5,8[min=1,max=8]}} -> " + "{{1:101,3:103,5:105[min=1,max=5]}," + "{7:107,8:108[min=7,max=8]}}", t)); + } +} + void Test::requireThatTreeInsertWorks() { @@ -645,6 +694,17 @@ Test::requireThatNodeRemoveWorks() EXPECT_TRUE(assertTree("{{5:105[min=105,max=105]}}", t)); } +void Test::keys_are_aggregated_correctly_on_node_removal() { + MyKeyAggrTree t; + populateLeafNode(t); + t.remove(3); + EXPECT_TRUE(assertTree("{{1:101,5:105,7:107[min=1,max=7]}}", t)); + t.remove(1); + EXPECT_TRUE(assertTree("{{5:105,7:107[min=5,max=7]}}", t)); + t.remove(7); + EXPECT_TRUE(assertTree("{{5:105[min=5,max=5]}}", t)); +} + void generateData(std::vector<LeafPair> & data, size_t numEntries) { @@ -960,10 +1020,8 @@ struct UpdKeyComp { void Test::requireThatUpdateOfKeyWorks() { - typedef BTree<int, BTreeNoLeafData, - btree::NoAggregated, - UpdKeyComp &> UpdKeyTree; - typedef UpdKeyTree::Iterator UpdKeyTreeIterator; + using UpdKeyTree = BTree<int, BTreeNoLeafData, btree::NoAggregated, UpdKeyComp &>; + using UpdKeyTreeIterator = UpdKeyTree::Iterator; GenerationHandler g; UpdKeyTree t; UpdKeyComp cmp1(0); @@ -1134,10 +1192,13 @@ Test::Main() TEST_INIT("btreeaggregation_test"); requireThatNodeInsertWorks(); + keys_are_aggregated_correctly_on_node_insertions(); requireThatNodeSplitInsertWorks(); + keys_are_aggregated_correctly_when_node_split_on_insert(); requireThatTreeInsertWorks(); requireThatNodeStealWorks(); requireThatNodeRemoveWorks(); + keys_are_aggregated_correctly_on_node_removal(); requireThatWeCanInsertAndRemoveFromTree(); requireThatSortedTreeInsertWorks(); requireThatCornerCaseTreeFindWorks(); @@ -1152,6 +1213,5 @@ Test::Main() } } -} TEST_APPHOOK(search::btree::Test); diff --git a/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp b/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp index e1318ab5a66..34dbfd3e899 100644 --- a/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp +++ b/vespalib/src/vespa/vespalib/btree/btreeaggregator.hpp @@ -13,7 +13,11 @@ BTreeAggregator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::aggr { AggrT a; for (uint32_t i = 0, ie = node.validSlots(); i < ie; ++i) { - aggrCalc.add(a, aggrCalc.getVal(node.getData(i))); + if constexpr (AggrCalcT::aggregate_over_values()) { + aggrCalc.add(a, aggrCalc.getVal(node.getData(i))); + } else { + aggrCalc.add(a, aggrCalc.getVal(node.getKey(i))); + } } return a; } diff --git a/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp b/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp index e1518da5639..5c6b02a5a46 100644 --- a/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp +++ b/vespalib/src/vespa/vespalib/btree/btreeinserter.hpp @@ -121,7 +121,11 @@ insert(BTreeNode::Ref &root, lnode->insert(idx, key, data); itr.setLeafNodeIdx(idx); if constexpr (AggrCalcT::hasAggregated()) { - aggrCalc.add(lnode->getAggregated(), aggrCalc.getVal(data)); + if constexpr (AggrCalcT::aggregate_over_values()) { + aggrCalc.add(lnode->getAggregated(), aggrCalc.getVal(data)); + } else { + aggrCalc.add(lnode->getAggregated(), aggrCalc.getVal(key)); + } ca = lnode->getAggregated(); } } diff --git a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp index 27a50e04f1b..98a14943d36 100644 --- a/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp +++ b/vespalib/src/vespa/vespalib/btree/btreeiterator.hpp @@ -1089,12 +1089,12 @@ BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>:: updateData(const DataType & data, [[maybe_unused]] const AggrCalcT &aggrCalc) { LeafNodeType * lnode = getLeafNode(); - if constexpr (AggrCalcT::hasAggregated()) { + if constexpr (AggrCalcT::hasAggregated() && AggrCalcT::aggregate_over_values()) { AggrT oldca(lnode->getAggregated()); - typedef BTreeAggregator<KeyT, DataT, AggrT, - TraitsT::INTERNAL_SLOTS, - TraitsT::LEAF_SLOTS, - AggrCalcT> Aggregator; + using Aggregator = BTreeAggregator<KeyT, DataT, AggrT, + TraitsT::INTERNAL_SLOTS, + TraitsT::LEAF_SLOTS, + AggrCalcT>; if (aggrCalc.update(lnode->getAggregated(), aggrCalc.getVal(lnode->getData(_leaf.getIdx())), aggrCalc.getVal(data))) { @@ -1193,7 +1193,11 @@ insertFirst(const KeyType &key, const DataType &data, lnode.data->insert(0, key, data); if constexpr (AggrCalcT::hasAggregated()) { AggrT a; - aggrCalc.add(a, aggrCalc.getVal(data)); + if constexpr (AggrCalcT::aggregate_over_values()) { + aggrCalc.add(a, aggrCalc.getVal(data)); + } else { + aggrCalc.add(a, aggrCalc.getVal(key)); + } lnode.data->getAggregated() = a; } _leafRoot = lnode.data; diff --git a/vespalib/src/vespa/vespalib/btree/btreeremover.hpp b/vespalib/src/vespa/vespalib/btree/btreeremover.hpp index 6a9a7ebcc7e..acb51b18b0c 100644 --- a/vespalib/src/vespa/vespalib/btree/btreeremover.hpp +++ b/vespalib/src/vespa/vespalib/btree/btreeremover.hpp @@ -110,11 +110,17 @@ remove(BTreeNode::Ref &root, NodeAllocatorType &allocator(itr.getAllocator()); AggrT oldca(AggrCalcT::hasAggregated() ? lnode->getAggregated() : AggrT()); AggrT ca; - if (AggrCalcT::hasAggregated() && - aggrCalc.remove(lnode->getAggregated(), - aggrCalc.getVal(lnode->getData(idx)))) { + if constexpr (AggrCalcT::hasAggregated()) { + bool need_aggregation_recalc; + if constexpr (AggrCalcT::aggregate_over_values()) { + need_aggregation_recalc = aggrCalc.remove(lnode->getAggregated(), aggrCalc.getVal(lnode->getData(idx))); + } else { + need_aggregation_recalc = aggrCalc.remove(lnode->getAggregated(), aggrCalc.getVal(lnode->getKey(idx))); + } lnode->remove(idx); - Aggregator::recalc(*lnode, aggrCalc); + if (need_aggregation_recalc) { + Aggregator::recalc(*lnode, aggrCalc); + } } else { lnode->remove(idx); } diff --git a/vespalib/src/vespa/vespalib/btree/btreestore.hpp b/vespalib/src/vespa/vespalib/btree/btreestore.hpp index ec3be588de4..e6c96600b70 100644 --- a/vespalib/src/vespa/vespalib/btree/btreestore.hpp +++ b/vespalib/src/vespa/vespalib/btree/btreestore.hpp @@ -959,7 +959,11 @@ getAggregated(const EntryRef ref) const const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); AggregatedType a; for (uint32_t i = 0; i < clusterSize; ++i) { - _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getData())); + if constexpr (AggrCalcT::aggregate_over_values()) { + _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getData())); + } else { + _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getKey())); + } } return a; } diff --git a/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h b/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h index 66e23127c5a..6ef809861b7 100644 --- a/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h +++ b/vespalib/src/vespa/vespalib/btree/minmaxaggrcalc.h @@ -11,6 +11,7 @@ class MinMaxAggrCalc public: constexpr MinMaxAggrCalc() = default; constexpr static bool hasAggregated() { return true; } + constexpr static bool aggregate_over_values() { return true; } static int32_t getVal(int32_t val) { return val; } static void add(MinMaxAggregated &a, int32_t val) { a.add(val); } static void add(MinMaxAggregated &a, const MinMaxAggregated &ca) { a.add(ca); } diff --git a/vespalib/src/vespa/vespalib/btree/noaggrcalc.h b/vespalib/src/vespa/vespalib/btree/noaggrcalc.h index 079abc57224..51958229430 100644 --- a/vespalib/src/vespa/vespalib/btree/noaggrcalc.h +++ b/vespalib/src/vespa/vespalib/btree/noaggrcalc.h @@ -17,6 +17,8 @@ public: return false; } + constexpr static bool aggregate_over_values() { return true; } + template <typename DataT> static inline int32_t getVal(const DataT &val) |