summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/btree/.gitignore1
-rw-r--r--searchlib/src/tests/btree/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/btree/scanspeed.cpp181
-rw-r--r--vespalib/src/tests/btree/btree_test.cpp77
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreeiterator.h103
-rw-r--r--vespalib/src/vespa/vespalib/btree/btreenode.h33
7 files changed, 404 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index e9e2087e9d1..c5dd468e4fd 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -100,6 +100,7 @@ vespa_define_module(
src/tests/attribute/tensorattribute
src/tests/bitcompression/expgolomb
src/tests/bitvector
+ src/tests/btree
src/tests/bytecomplens
src/tests/common/bitvector
src/tests/common/location
diff --git a/searchlib/src/tests/btree/.gitignore b/searchlib/src/tests/btree/.gitignore
new file mode 100644
index 00000000000..ec4090e3658
--- /dev/null
+++ b/searchlib/src/tests/btree/.gitignore
@@ -0,0 +1 @@
+searchlib_scanspeed_app
diff --git a/searchlib/src/tests/btree/CMakeLists.txt b/searchlib/src/tests/btree/CMakeLists.txt
new file mode 100644
index 00000000000..ff396144c52
--- /dev/null
+++ b/searchlib/src/tests/btree/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_scanspeed_app
+ SOURCES
+ scanspeed.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_scanspeed_app COMMAND vespalib_scanspeed_app BENCHMARK)
diff --git a/searchlib/src/tests/btree/scanspeed.cpp b/searchlib/src/tests/btree/scanspeed.cpp
new file mode 100644
index 00000000000..1474edd6b0b
--- /dev/null
+++ b/searchlib/src/tests/btree/scanspeed.cpp
@@ -0,0 +1,181 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/btree/btreeroot.h>
+#include <vespa/vespalib/btree/btreebuilder.h>
+#include <vespa/vespalib/btree/btreenodeallocator.h>
+#include <vespa/vespalib/btree/btree.h>
+#include <vespa/vespalib/btree/btreestore.h>
+#include <vespa/vespalib/btree/btreenodeallocator.hpp>
+#include <vespa/vespalib/btree/btreenode.hpp>
+#include <vespa/vespalib/btree/btreenodestore.hpp>
+#include <vespa/vespalib/btree/btreeiterator.hpp>
+#include <vespa/vespalib/btree/btreeroot.hpp>
+#include <vespa/vespalib/btree/btreebuilder.hpp>
+#include <vespa/vespalib/btree/btree.hpp>
+#include <vespa/vespalib/btree/btreestore.hpp>
+#include <vespa/vespalib/util/time.h>
+#include <vespa/searchlib/common/bitvector.h>
+
+#include <vespa/fastos/app.h>
+
+using vespalib::btree::BTree;
+using vespalib::btree::BTreeNode;
+using vespalib::btree::BTreeTraits;
+
+enum class ScanMethod
+{
+ ITERATOR,
+ FUNCTOR
+};
+
+class ScanSpeed : public FastOS_Application
+{
+ template <typename Traits>
+ void work_loop(ScanMethod scan_method);
+ int Main() override;
+};
+
+
+namespace {
+
+const char *scan_method_name(ScanMethod scan_method)
+{
+ switch (scan_method) {
+ case ScanMethod::ITERATOR:
+ return "iterator";
+ default:
+ return "functor";
+ }
+}
+
+class ScanOnce {
+public:
+ virtual ~ScanOnce() = default;
+ virtual void operator()(search::BitVector &bv) = 0;
+};
+
+template <typename Tree>
+class ScanTree : public ScanOnce {
+protected:
+ const Tree &_tree;
+ int _startval;
+ int _endval;
+public:
+ ScanTree(const Tree &tree, int startval, int endval)
+ : _tree(tree),
+ _startval(startval),
+ _endval(endval)
+ {
+ }
+ ~ScanTree() override { }
+};
+
+template <typename Tree>
+class ScanWithIterator : public ScanTree<Tree> {
+public:
+ ScanWithIterator(const Tree &tree, int startval, int endval)
+ : ScanTree<Tree>(tree, startval, endval)
+ {
+ }
+ ~ScanWithIterator() override = default;
+ void operator()(search::BitVector &bv) override;
+};
+
+template <typename Tree>
+void
+ScanWithIterator<Tree>::operator()(search::BitVector &bv)
+{
+ using ConstIterator = typename Tree::ConstIterator;
+ ConstIterator itr(BTreeNode::Ref(), this->_tree.getAllocator());
+ itr.lower_bound(this->_tree.getRoot(), this->_startval);
+ while (itr.valid() && itr.getKey() < this->_endval) {
+ bv.setBit(itr.getKey());
+ ++itr;
+ }
+}
+
+template <typename Tree>
+class ScanWithFunctor : public ScanTree<Tree> {
+
+public:
+ ScanWithFunctor(const Tree &tree, int startval, int endval)
+ : ScanTree<Tree>(tree, startval, endval)
+ {
+ }
+ ~ScanWithFunctor() override = default;
+ void operator()(search::BitVector &bv) override;
+};
+
+template <typename Tree>
+void
+ScanWithFunctor<Tree>::operator()(search::BitVector &bv)
+{
+ using ConstIterator = typename Tree::ConstIterator;
+ ConstIterator start(BTreeNode::Ref(), this->_tree.getAllocator());
+ ConstIterator end(BTreeNode::Ref(), this->_tree.getAllocator());
+ start.lower_bound(this->_tree.getRoot(), this->_startval);
+ end.lower_bound(this->_tree.getRoot(), this->_endval);
+ start.foreach_key_range(end, [&](int key) { bv.setBit(key); } );
+}
+
+}
+
+template <typename Traits>
+void
+ScanSpeed::work_loop(ScanMethod scan_method)
+{
+ vespalib::GenerationHandler g;
+ using Tree = BTree<int, int, vespalib::btree::NoAggregated, std::less<int>, Traits>;
+ using Builder = typename Tree::Builder;
+ Tree tree;
+ Builder builder(tree.getAllocator());
+ size_t numEntries = 1000000;
+ size_t numInnerLoops = 1000;
+ for (size_t i = 0; i < numEntries; ++i) {
+ builder.insert(i, 0);
+ }
+ tree.assign(builder);
+ assert(numEntries == tree.size());
+ assert(tree.isValid());
+ std::unique_ptr<ScanOnce> scan_once;
+ if (scan_method == ScanMethod::ITERATOR) {
+ scan_once = std::make_unique<ScanWithIterator<Tree>>(tree, 4, numEntries - 4);
+ } else {
+ scan_once = std::make_unique<ScanWithFunctor<Tree>>(tree, 4, numEntries - 4);
+ }
+ auto bv = search::BitVector::create(numEntries);
+ vespalib::Timer timer;
+ for (size_t innerl = 0; innerl < numInnerLoops; ++innerl) {
+ (*scan_once)(*bv);
+ }
+ double used = vespalib::to_s(timer.elapsed());
+ printf("Elapsed time for scanning %ld entries is %8.5f, "
+ "scanmethod=%s, fanout=%u,%u\n",
+ numEntries * numInnerLoops,
+ used,
+ scan_method_name(scan_method),
+ static_cast<int>(Traits::LEAF_SLOTS),
+ static_cast<int>(Traits::INTERNAL_SLOTS));
+ fflush(stdout);
+}
+
+
+int
+ScanSpeed::Main()
+{
+ using SmallTraits = BTreeTraits<4, 4, 31, false>;
+ using DefTraits = vespalib::btree::BTreeDefaultTraits;
+ using LargeTraits = BTreeTraits<32, 16, 10, true>;
+ using HugeTraits = BTreeTraits<64, 16, 10, true>;
+ work_loop<SmallTraits>(ScanMethod::ITERATOR);
+ work_loop<DefTraits>(ScanMethod::ITERATOR);
+ work_loop<LargeTraits>(ScanMethod::ITERATOR);
+ work_loop<HugeTraits>(ScanMethod::ITERATOR);
+ work_loop<SmallTraits>(ScanMethod::FUNCTOR);
+ work_loop<DefTraits>(ScanMethod::FUNCTOR);
+ work_loop<LargeTraits>(ScanMethod::FUNCTOR);
+ work_loop<HugeTraits>(ScanMethod::FUNCTOR);
+ return 0;
+}
+
+FASTOS_MAIN(ScanSpeed);
diff --git a/vespalib/src/tests/btree/btree_test.cpp b/vespalib/src/tests/btree/btree_test.cpp
index 848c8a37125..63afd8b770f 100644
--- a/vespalib/src/tests/btree/btree_test.cpp
+++ b/vespalib/src/tests/btree/btree_test.cpp
@@ -36,6 +36,54 @@ toStr(const T & v)
return ss.str();
}
+class SequenceValidator
+{
+ int _wanted_count;
+ int _prev_key;
+ int _count;
+ bool _failed;
+
+public:
+ SequenceValidator(int start, int wanted_count)
+ : _wanted_count(wanted_count),
+ _prev_key(start - 1),
+ _count(0),
+ _failed(false)
+ {
+ }
+
+ bool failed() const {
+ return _failed || _wanted_count != _count;
+ }
+
+ void operator()(int key) {
+ if (key != _prev_key + 1) {
+ _failed = true;
+ }
+ _prev_key = key;
+ ++_count;
+ }
+};
+
+class ForeachKeyValidator
+{
+ SequenceValidator & _validator;
+public:
+ ForeachKeyValidator(SequenceValidator &validator)
+ : _validator(validator)
+ {
+ }
+ void operator()(int key) {
+ _validator(key);
+ }
+};
+
+template <typename Iterator>
+void validate_subrange(Iterator &start, Iterator &end, SequenceValidator &validator) {
+ start.foreach_key_range(end, ForeachKeyValidator(validator));
+ EXPECT_FALSE(validator.failed());
+}
+
}
typedef BTreeTraits<4, 4, 31, false> MyTraits;
@@ -210,6 +258,8 @@ private:
void
requireThatIteratorDistanceWorks();
+
+ void requireThatForeachKeyWorks();
public:
int Main() override;
};
@@ -1489,6 +1539,32 @@ Test::requireThatIteratorDistanceWorks()
requireThatIteratorDistanceWorks(400);
}
+void
+Test::requireThatForeachKeyWorks()
+{
+ using Tree = BTree<int, int, btree::NoAggregated, MyComp, MyTraits>;
+ using Iterator = typename Tree::ConstIterator;
+ Tree t;
+ populateTree(t, 256, 1);
+
+ {
+ // Whole range
+ SequenceValidator validator(1, 256);
+ t.foreach_key(ForeachKeyValidator(validator));
+ EXPECT_FALSE(validator.failed());
+ }
+ {
+ // Subranges
+ for (int startval = 1; startval < 259; ++startval) {
+ for (int endval = 1; endval < 259; ++endval) {
+ SequenceValidator validator(startval, std::max(0, std::min(endval,257) - std::min(startval, 257)));
+ Iterator start = t.lowerBound(startval);
+ Iterator end = t.lowerBound(endval);
+ validate_subrange(start, end, validator);
+ }
+ }
+ }
+};
int
Test::Main()
@@ -1515,6 +1591,7 @@ Test::Main()
requireThatSmallNodesWorks();
requireThatApplyWorks();
requireThatIteratorDistanceWorks();
+ requireThatForeachKeyWorks();
TEST_DONE();
}
diff --git a/vespalib/src/vespa/vespalib/btree/btreeiterator.h b/vespalib/src/vespa/vespalib/btree/btreeiterator.h
index 55ab37759ad..6933fc1c2d0 100644
--- a/vespalib/src/vespa/vespalib/btree/btreeiterator.h
+++ b/vespalib/src/vespa/vespalib/btree/btreeiterator.h
@@ -303,6 +303,47 @@ protected:
* @param pathSize New tree height (number of levels of internal nodes)
*/
VESPA_DLL_LOCAL void clearPath(uint32_t pathSize);
+
+ /**
+ * Call func with leaf entry key value as argument for all leaf entries in subtree
+ * from this iterator position to end of subtree.
+ */
+ template <typename FunctionType>
+ void
+ foreach_key_range_start(uint32_t level, FunctionType func) const
+ {
+ if (level > 0u) {
+ --level;
+ foreach_key_range_start(level, func);
+ auto &store = _allocator->getNodeStore();
+ auto node = _path[level].getNode();
+ uint32_t idx = _path[level].getIdx();
+ node->foreach_key_range(store, idx + 1, node->validSlots(), func);
+ } else {
+ _leaf.getNode()->foreach_key_range(_leaf.getIdx(), _leaf.getNode()->validSlots(), func);
+ }
+ }
+
+ /**
+ * Call func with leaf entry key value as argument for all leaf entries in subtree
+ * from start of subtree until this iterator position is reached (i.e. entries in
+ * subtree before this iterator position).
+ */
+ template <typename FunctionType>
+ void
+ foreach_key_range_end(uint32_t level, FunctionType func) const
+ {
+ if (level > 0u) {
+ --level;
+ auto &store = _allocator->getNodeStore();
+ auto node = _path[level].getNode();
+ uint32_t eidx = _path[level].getIdx();
+ node->foreach_key_range(store, 0, eidx, func);
+ foreach_key_range_end(level, func);
+ } else {
+ _leaf.getNode()->foreach_key_range(0, _leaf.getIdx(), func);
+ }
+ }
public:
bool
@@ -451,6 +492,68 @@ public:
_leafRoot->foreach_key(func);
}
}
+
+ /**
+ * Call func with leaf entry key value as argument for all leaf entries in tree from
+ * this iterator position until end_itr position is reached (i.e. entries in
+ * range [this iterator, end_itr)).
+ */
+ template <typename FunctionType>
+ void
+ foreach_key_range(const BTreeIteratorBase &end_itr, FunctionType func) const
+ {
+ if (!valid()) {
+ return;
+ }
+ if (!end_itr.valid()) {
+ foreach_key_range_start(_pathSize, func);
+ return;
+ }
+ assert(_pathSize == end_itr._pathSize);
+ assert(_allocator == end_itr._allocator);
+ uint32_t level = _pathSize;
+ if (level > 0u) {
+ /**
+ * Tree has intermediate nodes. Detect lowest shared tree node for this
+ * iterator and end_itr.
+ */
+ uint32_t idx;
+ uint32_t eidx;
+ do {
+ --level;
+ assert(_path[level].getNode() == end_itr._path[level].getNode());
+ idx = _path[level].getIdx();
+ eidx = end_itr._path[level].getIdx();
+ if (idx > eidx) {
+ return;
+ }
+ if (idx != eidx) {
+ ++level;
+ break;
+ }
+ } while (level != 0);
+ if (level > 0u) {
+ // Lowest shared node is an intermediate node.
+ // Left subtree for child [idx], from this iterator position to end of subtree.
+ foreach_key_range_start(level - 1, func);
+ auto &store = _allocator->getNodeStore();
+ auto node = _path[level - 1].getNode();
+ // Any intermediate subtrees for children [idx + 1, eidx).
+ node->foreach_key_range(store, idx + 1, eidx, func);
+ // Right subtree for child [eidx], from start of subtree to end_itr position.
+ end_itr.foreach_key_range_end(level - 1, func);
+ return;
+ } else {
+ // Lowest shared node is a leaf node.
+ assert(_leaf.getNode() == end_itr._leaf.getNode());
+ }
+ }
+ uint32_t idx = _leaf.getIdx();
+ uint32_t eidx = end_itr._leaf.getIdx();
+ if (idx < eidx) {
+ _leaf.getNode()->foreach_key_range(idx, eidx, func);
+ }
+ }
};
diff --git a/vespalib/src/vespa/vespalib/btree/btreenode.h b/vespalib/src/vespa/vespalib/btree/btreenode.h
index b34be33ccf5..0c70e70bc6a 100644
--- a/vespalib/src/vespa/vespalib/btree/btreenode.h
+++ b/vespalib/src/vespa/vespalib/btree/btreenode.h
@@ -370,6 +370,26 @@ public:
}
}
+ /**
+ * Call func with leaf entry key value as argument for all leaf entries in subtrees
+ * for children [start_idx, end_idx).
+ */
+ template <typename NodeStoreType, typename FunctionType>
+ void foreach_key_range(NodeStoreType &store, uint32_t start_idx, uint32_t end_idx, FunctionType func) const {
+ const BTreeNode::Ref *it = this->_data;
+ const BTreeNode::Ref *ite = it + end_idx;
+ it += start_idx;
+ if (this->getLevel() > 1u) {
+ for (; it != ite; ++it) {
+ store.mapInternalRef(*it)->foreach_key(store, func);
+ }
+ } else {
+ for (; it != ite; ++it) {
+ store.mapLeafRef(*it)->foreach_key(func);
+ }
+ }
+ }
+
template <typename NodeStoreType, typename FunctionType>
void foreach(NodeStoreType &store, FunctionType func) const {
const BTreeNode::Ref *it = this->_data;
@@ -459,6 +479,19 @@ public:
}
}
+ /**
+ * Call func with leaf entry key value as argument for leaf entries [start_idx, end_idx).
+ */
+ template <typename FunctionType>
+ void foreach_key_range(uint32_t start_idx, uint32_t end_idx, FunctionType func) const {
+ const KeyT *it = _keys;
+ const KeyT *ite = it + end_idx;
+ it += start_idx;
+ for (; it != ite; ++it) {
+ func(*it);
+ }
+ }
+
template <typename FunctionType>
void foreach(FunctionType func) const {
const KeyT *it = _keys;