diff options
-rw-r--r-- | searchlib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | searchlib/src/tests/btree/.gitignore | 1 | ||||
-rw-r--r-- | searchlib/src/tests/btree/CMakeLists.txt | 8 | ||||
-rw-r--r-- | searchlib/src/tests/btree/scanspeed.cpp | 181 | ||||
-rw-r--r-- | vespalib/src/tests/btree/btree_test.cpp | 77 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/btree/btreeiterator.h | 103 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/btree/btreenode.h | 33 |
7 files changed, 404 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index e9e2087e9d1..c5dd468e4fd 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -100,6 +100,7 @@ vespa_define_module( src/tests/attribute/tensorattribute src/tests/bitcompression/expgolomb src/tests/bitvector + src/tests/btree src/tests/bytecomplens src/tests/common/bitvector src/tests/common/location diff --git a/searchlib/src/tests/btree/.gitignore b/searchlib/src/tests/btree/.gitignore new file mode 100644 index 00000000000..ec4090e3658 --- /dev/null +++ b/searchlib/src/tests/btree/.gitignore @@ -0,0 +1 @@ +searchlib_scanspeed_app diff --git a/searchlib/src/tests/btree/CMakeLists.txt b/searchlib/src/tests/btree/CMakeLists.txt new file mode 100644 index 00000000000..ff396144c52 --- /dev/null +++ b/searchlib/src/tests/btree/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_scanspeed_app + SOURCES + scanspeed.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_scanspeed_app COMMAND vespalib_scanspeed_app BENCHMARK) diff --git a/searchlib/src/tests/btree/scanspeed.cpp b/searchlib/src/tests/btree/scanspeed.cpp new file mode 100644 index 00000000000..1474edd6b0b --- /dev/null +++ b/searchlib/src/tests/btree/scanspeed.cpp @@ -0,0 +1,181 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/btree/btreeroot.h> +#include <vespa/vespalib/btree/btreebuilder.h> +#include <vespa/vespalib/btree/btreenodeallocator.h> +#include <vespa/vespalib/btree/btree.h> +#include <vespa/vespalib/btree/btreestore.h> +#include <vespa/vespalib/btree/btreenodeallocator.hpp> +#include <vespa/vespalib/btree/btreenode.hpp> +#include <vespa/vespalib/btree/btreenodestore.hpp> +#include <vespa/vespalib/btree/btreeiterator.hpp> +#include <vespa/vespalib/btree/btreeroot.hpp> +#include <vespa/vespalib/btree/btreebuilder.hpp> +#include <vespa/vespalib/btree/btree.hpp> +#include <vespa/vespalib/btree/btreestore.hpp> +#include <vespa/vespalib/util/time.h> +#include <vespa/searchlib/common/bitvector.h> + +#include <vespa/fastos/app.h> + +using vespalib::btree::BTree; +using vespalib::btree::BTreeNode; +using vespalib::btree::BTreeTraits; + +enum class ScanMethod +{ + ITERATOR, + FUNCTOR +}; + +class ScanSpeed : public FastOS_Application +{ + template <typename Traits> + void work_loop(ScanMethod scan_method); + int Main() override; +}; + + +namespace { + +const char *scan_method_name(ScanMethod scan_method) +{ + switch (scan_method) { + case ScanMethod::ITERATOR: + return "iterator"; + default: + return "functor"; + } +} + +class ScanOnce { +public: + virtual ~ScanOnce() = default; + virtual void operator()(search::BitVector &bv) = 0; +}; + +template <typename Tree> +class ScanTree : public ScanOnce { +protected: + const Tree &_tree; + int _startval; + int _endval; +public: + ScanTree(const Tree &tree, int startval, int endval) + : _tree(tree), + _startval(startval), + _endval(endval) + { + } + ~ScanTree() override { } +}; + +template <typename Tree> +class ScanWithIterator : public ScanTree<Tree> { +public: + ScanWithIterator(const Tree &tree, int startval, int endval) + : ScanTree<Tree>(tree, startval, endval) + { + } + ~ScanWithIterator() override = default; + void operator()(search::BitVector &bv) override; +}; + +template <typename Tree> +void +ScanWithIterator<Tree>::operator()(search::BitVector &bv) +{ + using ConstIterator = typename Tree::ConstIterator; + ConstIterator itr(BTreeNode::Ref(), this->_tree.getAllocator()); + itr.lower_bound(this->_tree.getRoot(), this->_startval); + while (itr.valid() && itr.getKey() < this->_endval) { + bv.setBit(itr.getKey()); + ++itr; + } +} + +template <typename Tree> +class ScanWithFunctor : public ScanTree<Tree> { + +public: + ScanWithFunctor(const Tree &tree, int startval, int endval) + : ScanTree<Tree>(tree, startval, endval) + { + } + ~ScanWithFunctor() override = default; + void operator()(search::BitVector &bv) override; +}; + +template <typename Tree> +void +ScanWithFunctor<Tree>::operator()(search::BitVector &bv) +{ + using ConstIterator = typename Tree::ConstIterator; + ConstIterator start(BTreeNode::Ref(), this->_tree.getAllocator()); + ConstIterator end(BTreeNode::Ref(), this->_tree.getAllocator()); + start.lower_bound(this->_tree.getRoot(), this->_startval); + end.lower_bound(this->_tree.getRoot(), this->_endval); + start.foreach_key_range(end, [&](int key) { bv.setBit(key); } ); +} + +} + +template <typename Traits> +void +ScanSpeed::work_loop(ScanMethod scan_method) +{ + vespalib::GenerationHandler g; + using Tree = BTree<int, int, vespalib::btree::NoAggregated, std::less<int>, Traits>; + using Builder = typename Tree::Builder; + Tree tree; + Builder builder(tree.getAllocator()); + size_t numEntries = 1000000; + size_t numInnerLoops = 1000; + for (size_t i = 0; i < numEntries; ++i) { + builder.insert(i, 0); + } + tree.assign(builder); + assert(numEntries == tree.size()); + assert(tree.isValid()); + std::unique_ptr<ScanOnce> scan_once; + if (scan_method == ScanMethod::ITERATOR) { + scan_once = std::make_unique<ScanWithIterator<Tree>>(tree, 4, numEntries - 4); + } else { + scan_once = std::make_unique<ScanWithFunctor<Tree>>(tree, 4, numEntries - 4); + } + auto bv = search::BitVector::create(numEntries); + vespalib::Timer timer; + for (size_t innerl = 0; innerl < numInnerLoops; ++innerl) { + (*scan_once)(*bv); + } + double used = vespalib::to_s(timer.elapsed()); + printf("Elapsed time for scanning %ld entries is %8.5f, " + "scanmethod=%s, fanout=%u,%u\n", + numEntries * numInnerLoops, + used, + scan_method_name(scan_method), + static_cast<int>(Traits::LEAF_SLOTS), + static_cast<int>(Traits::INTERNAL_SLOTS)); + fflush(stdout); +} + + +int +ScanSpeed::Main() +{ + using SmallTraits = BTreeTraits<4, 4, 31, false>; + using DefTraits = vespalib::btree::BTreeDefaultTraits; + using LargeTraits = BTreeTraits<32, 16, 10, true>; + using HugeTraits = BTreeTraits<64, 16, 10, true>; + work_loop<SmallTraits>(ScanMethod::ITERATOR); + work_loop<DefTraits>(ScanMethod::ITERATOR); + work_loop<LargeTraits>(ScanMethod::ITERATOR); + work_loop<HugeTraits>(ScanMethod::ITERATOR); + work_loop<SmallTraits>(ScanMethod::FUNCTOR); + work_loop<DefTraits>(ScanMethod::FUNCTOR); + work_loop<LargeTraits>(ScanMethod::FUNCTOR); + work_loop<HugeTraits>(ScanMethod::FUNCTOR); + return 0; +} + +FASTOS_MAIN(ScanSpeed); diff --git a/vespalib/src/tests/btree/btree_test.cpp b/vespalib/src/tests/btree/btree_test.cpp index 848c8a37125..63afd8b770f 100644 --- a/vespalib/src/tests/btree/btree_test.cpp +++ b/vespalib/src/tests/btree/btree_test.cpp @@ -36,6 +36,54 @@ toStr(const T & v) return ss.str(); } +class SequenceValidator +{ + int _wanted_count; + int _prev_key; + int _count; + bool _failed; + +public: + SequenceValidator(int start, int wanted_count) + : _wanted_count(wanted_count), + _prev_key(start - 1), + _count(0), + _failed(false) + { + } + + bool failed() const { + return _failed || _wanted_count != _count; + } + + void operator()(int key) { + if (key != _prev_key + 1) { + _failed = true; + } + _prev_key = key; + ++_count; + } +}; + +class ForeachKeyValidator +{ + SequenceValidator & _validator; +public: + ForeachKeyValidator(SequenceValidator &validator) + : _validator(validator) + { + } + void operator()(int key) { + _validator(key); + } +}; + +template <typename Iterator> +void validate_subrange(Iterator &start, Iterator &end, SequenceValidator &validator) { + start.foreach_key_range(end, ForeachKeyValidator(validator)); + EXPECT_FALSE(validator.failed()); +} + } typedef BTreeTraits<4, 4, 31, false> MyTraits; @@ -210,6 +258,8 @@ private: void requireThatIteratorDistanceWorks(); + + void requireThatForeachKeyWorks(); public: int Main() override; }; @@ -1489,6 +1539,32 @@ Test::requireThatIteratorDistanceWorks() requireThatIteratorDistanceWorks(400); } +void +Test::requireThatForeachKeyWorks() +{ + using Tree = BTree<int, int, btree::NoAggregated, MyComp, MyTraits>; + using Iterator = typename Tree::ConstIterator; + Tree t; + populateTree(t, 256, 1); + + { + // Whole range + SequenceValidator validator(1, 256); + t.foreach_key(ForeachKeyValidator(validator)); + EXPECT_FALSE(validator.failed()); + } + { + // Subranges + for (int startval = 1; startval < 259; ++startval) { + for (int endval = 1; endval < 259; ++endval) { + SequenceValidator validator(startval, std::max(0, std::min(endval,257) - std::min(startval, 257))); + Iterator start = t.lowerBound(startval); + Iterator end = t.lowerBound(endval); + validate_subrange(start, end, validator); + } + } + } +}; int Test::Main() @@ -1515,6 +1591,7 @@ Test::Main() requireThatSmallNodesWorks(); requireThatApplyWorks(); requireThatIteratorDistanceWorks(); + requireThatForeachKeyWorks(); TEST_DONE(); } diff --git a/vespalib/src/vespa/vespalib/btree/btreeiterator.h b/vespalib/src/vespa/vespalib/btree/btreeiterator.h index 55ab37759ad..6933fc1c2d0 100644 --- a/vespalib/src/vespa/vespalib/btree/btreeiterator.h +++ b/vespalib/src/vespa/vespalib/btree/btreeiterator.h @@ -303,6 +303,47 @@ protected: * @param pathSize New tree height (number of levels of internal nodes) */ VESPA_DLL_LOCAL void clearPath(uint32_t pathSize); + + /** + * Call func with leaf entry key value as argument for all leaf entries in subtree + * from this iterator position to end of subtree. + */ + template <typename FunctionType> + void + foreach_key_range_start(uint32_t level, FunctionType func) const + { + if (level > 0u) { + --level; + foreach_key_range_start(level, func); + auto &store = _allocator->getNodeStore(); + auto node = _path[level].getNode(); + uint32_t idx = _path[level].getIdx(); + node->foreach_key_range(store, idx + 1, node->validSlots(), func); + } else { + _leaf.getNode()->foreach_key_range(_leaf.getIdx(), _leaf.getNode()->validSlots(), func); + } + } + + /** + * Call func with leaf entry key value as argument for all leaf entries in subtree + * from start of subtree until this iterator position is reached (i.e. entries in + * subtree before this iterator position). + */ + template <typename FunctionType> + void + foreach_key_range_end(uint32_t level, FunctionType func) const + { + if (level > 0u) { + --level; + auto &store = _allocator->getNodeStore(); + auto node = _path[level].getNode(); + uint32_t eidx = _path[level].getIdx(); + node->foreach_key_range(store, 0, eidx, func); + foreach_key_range_end(level, func); + } else { + _leaf.getNode()->foreach_key_range(0, _leaf.getIdx(), func); + } + } public: bool @@ -451,6 +492,68 @@ public: _leafRoot->foreach_key(func); } } + + /** + * Call func with leaf entry key value as argument for all leaf entries in tree from + * this iterator position until end_itr position is reached (i.e. entries in + * range [this iterator, end_itr)). + */ + template <typename FunctionType> + void + foreach_key_range(const BTreeIteratorBase &end_itr, FunctionType func) const + { + if (!valid()) { + return; + } + if (!end_itr.valid()) { + foreach_key_range_start(_pathSize, func); + return; + } + assert(_pathSize == end_itr._pathSize); + assert(_allocator == end_itr._allocator); + uint32_t level = _pathSize; + if (level > 0u) { + /** + * Tree has intermediate nodes. Detect lowest shared tree node for this + * iterator and end_itr. + */ + uint32_t idx; + uint32_t eidx; + do { + --level; + assert(_path[level].getNode() == end_itr._path[level].getNode()); + idx = _path[level].getIdx(); + eidx = end_itr._path[level].getIdx(); + if (idx > eidx) { + return; + } + if (idx != eidx) { + ++level; + break; + } + } while (level != 0); + if (level > 0u) { + // Lowest shared node is an intermediate node. + // Left subtree for child [idx], from this iterator position to end of subtree. + foreach_key_range_start(level - 1, func); + auto &store = _allocator->getNodeStore(); + auto node = _path[level - 1].getNode(); + // Any intermediate subtrees for children [idx + 1, eidx). + node->foreach_key_range(store, idx + 1, eidx, func); + // Right subtree for child [eidx], from start of subtree to end_itr position. + end_itr.foreach_key_range_end(level - 1, func); + return; + } else { + // Lowest shared node is a leaf node. + assert(_leaf.getNode() == end_itr._leaf.getNode()); + } + } + uint32_t idx = _leaf.getIdx(); + uint32_t eidx = end_itr._leaf.getIdx(); + if (idx < eidx) { + _leaf.getNode()->foreach_key_range(idx, eidx, func); + } + } }; diff --git a/vespalib/src/vespa/vespalib/btree/btreenode.h b/vespalib/src/vespa/vespalib/btree/btreenode.h index b34be33ccf5..0c70e70bc6a 100644 --- a/vespalib/src/vespa/vespalib/btree/btreenode.h +++ b/vespalib/src/vespa/vespalib/btree/btreenode.h @@ -370,6 +370,26 @@ public: } } + /** + * Call func with leaf entry key value as argument for all leaf entries in subtrees + * for children [start_idx, end_idx). + */ + template <typename NodeStoreType, typename FunctionType> + void foreach_key_range(NodeStoreType &store, uint32_t start_idx, uint32_t end_idx, FunctionType func) const { + const BTreeNode::Ref *it = this->_data; + const BTreeNode::Ref *ite = it + end_idx; + it += start_idx; + if (this->getLevel() > 1u) { + for (; it != ite; ++it) { + store.mapInternalRef(*it)->foreach_key(store, func); + } + } else { + for (; it != ite; ++it) { + store.mapLeafRef(*it)->foreach_key(func); + } + } + } + template <typename NodeStoreType, typename FunctionType> void foreach(NodeStoreType &store, FunctionType func) const { const BTreeNode::Ref *it = this->_data; @@ -459,6 +479,19 @@ public: } } + /** + * Call func with leaf entry key value as argument for leaf entries [start_idx, end_idx). + */ + template <typename FunctionType> + void foreach_key_range(uint32_t start_idx, uint32_t end_idx, FunctionType func) const { + const KeyT *it = _keys; + const KeyT *ite = it + end_idx; + it += start_idx; + for (; it != ite; ++it) { + func(*it); + } + } + template <typename FunctionType> void foreach(FunctionType func) const { const KeyT *it = _keys; |