diff options
author | HÃ¥vard Pettersen <3535158+havardpe@users.noreply.github.com> | 2023-01-18 11:35:44 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-18 11:35:44 +0100 |
commit | f075307ccf2e46a7562a53022bebd4e05ba23d31 (patch) | |
tree | 3819f61ecdaa7e350d7738f376de0d0bfe52b56e /searchlib | |
parent | d63b17abfab10cb4eb2077e179c395a80bc1e3d0 (diff) | |
parent | c33f429a97593009e407782df0a5054251d15eba (diff) |
Merge pull request #25609 from vespa-engine/havardpe/profile-source-blender-children
profile source blender children
Diffstat (limited to 'searchlib')
6 files changed, 211 insertions, 33 deletions
diff --git a/searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp b/searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp index b89dc8f9e17..010e72428e2 100644 --- a/searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp +++ b/searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp @@ -2,30 +2,171 @@ #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/util/execution_profiler.h> +#include <vespa/vespalib/util/trinary.h> +#include <vespa/vespalib/util/require.h> #include <vespa/vespalib/data/slime/slime.h> #include <vespa/searchlib/queryeval/profiled_iterator.h> #include <vespa/searchlib/queryeval/simplesearch.h> +#include <vespa/searchlib/queryeval/sourceblendersearch.h> #include <vespa/searchlib/queryeval/andsearch.h> #include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/attribute/fixedsourceselector.h> #include <memory> +#include <map> using namespace search::queryeval; +using search::FixedSourceSelector; using vespalib::ExecutionProfiler; using vespalib::Slime; +using vespalib::Trinary; -SearchIterator::UP create_term(const vespalib::string &name, std::vector<uint32_t> hits) { - auto search = std::make_unique<SimpleSearch>(SimpleResult(hits)); - search->tag(name); +size_t num_docs = 100; + +bool is_true(Trinary a) { + REQUIRE(a != Trinary::Undefined); + return (a == Trinary::True); +} + +bool is_true(Trinary a, Trinary b) { + return is_true(a) && is_true(b); +} + +struct MySources { + FixedSourceSelector selector; + MySources(const std::vector<std::pair<uint32_t,uint32_t>> &entries) + : selector(123, "<file>", num_docs) + { + for (const auto &entry: entries) { + selector.setSource(entry.first, entry.second); + } + } +}; +MySources my_sources({{1,3},{3,3},{5,3}, + {2,5},{4,5},{6,5}}); + +SearchIterator::UP t(std::vector<uint32_t> hits) { + auto search = std::make_unique<SimpleSearch>(SimpleResult(hits), false); + search->tag("t"); + return search; +} + +SearchIterator::UP T(std::vector<uint32_t> hits) { + auto search = std::make_unique<SimpleSearch>(SimpleResult(hits), true); + search->tag("T"); return search; } +SearchIterator::UP OR(SearchIterator::UP s1, SearchIterator::UP s2) { + bool strict = is_true(s1->is_strict(), s2->is_strict()); + return OrSearch::create({std::move(s1), std::move(s2)}, strict); +} + +SearchIterator::UP AND(SearchIterator::UP s1, SearchIterator::UP s2) { + bool strict = is_true(s1->is_strict()); + return AndSearch::create({std::move(s1), std::move(s2)}, strict); +} + +SearchIterator::UP blend(SearchIterator::UP s1, uint32_t id1, + SearchIterator::UP s2, uint32_t id2) +{ + bool strict = is_true(s1->is_strict(), s2->is_strict()); + SourceBlenderSearch::Children list; + list.emplace_back(s1.release(), id1); + list.emplace_back(s2.release(), id2); + return SourceBlenderSearch::create(my_sources.selector.createIterator(), list, strict); +} + SearchIterator::UP create_iterator_tree() { - return AndSearch::create({OrSearch::create({create_term("A", {1,3,5}), - create_term("B", {2,4,6})}, true), - OrSearch::create({create_term("C", {4,6,8}), - create_term("D", {5,7,9})}, false)}, - true); + return AND(OR(T({4,6,8}), + T({5,7,9})), + blend(t({1,3,5,7,9}), 3, + t({2,4,6,8}), 5)); +} + +void collect(std::map<vespalib::string,size_t> &counts, const auto &node) { + if (!node.valid()) { + return; + } + collect(counts, node["roots"]); + collect(counts, node["children"]); + for (size_t i = 0; i < node.entries(); ++i) { + collect(counts, node[i]); + } + const auto &name = node["name"]; + if (name.valid()) { + auto name_str = name.asString().make_string(); + counts[name_str] += node["count"].asLong(); + } +}; + +std::map<vespalib::string,size_t> collect_counts(const auto &root) { + std::map<vespalib::string,size_t> counts; + collect(counts, root); + return counts; +} + +void print_counts(const std::map<vespalib::string,size_t> &counts) { + for (const auto &[name, count]: counts) { + fprintf(stderr, "%s: %zu\n", name.c_str(), count); + } +} + +void verify_result(SearchIterator &search, const std::vector<uint32_t> &hits) { + SimpleResult actual; + SimpleResult expect(hits); + actual.searchStrict(search, num_docs); + EXPECT_EQ(actual, expect); +} + +void verify_termwise_result(SearchIterator &search, const std::vector<uint32_t> &hits) { + search.initRange(1, num_docs); + auto result = search.get_hits(1); + ASSERT_EQ(result->size(), num_docs); + uint32_t pos = 1; + for (uint32_t hit: hits) { + while (pos < hit) { + EXPECT_FALSE(result->testBit(pos++)); + } + EXPECT_TRUE(result->testBit(pos++)); + } +} + +void verify_operation(ExecutionProfiler &profiler, std::map<vespalib::string,size_t> &seen, const vespalib::string &expect) { + Slime slime; + profiler.report(slime.setObject()); + auto counts = collect_counts(slime.get()); + for (const auto &[name, count]: counts) { + if (name == expect) { + EXPECT_EQ(count, ++seen[name]); + } else { + EXPECT_EQ(count, seen[name]); + } + } +} + +TEST(ProfiledIteratorTest, init_seek_unpack_termwise_is_profiled) { + ExecutionProfiler profiler(64); + std::map<vespalib::string,size_t> seen; + auto root = ProfiledIterator::profile(profiler, T({1,2,3})); + root->initRange(1,4); + verify_operation(profiler, seen, "/SimpleSearch/init"); + root->seek(2); + verify_operation(profiler, seen, "/SimpleSearch/seek"); + root->unpack(2); + verify_operation(profiler, seen, "/SimpleSearch/unpack"); + root->initRange(1,4); + verify_operation(profiler, seen, "/SimpleSearch/init"); + auto bits = root->get_hits(1); + verify_operation(profiler, seen, "/SimpleSearch/termwise"); + root->initRange(1,4); + verify_operation(profiler, seen, "/SimpleSearch/init"); + root->or_hits_into(*bits, 1); + verify_operation(profiler, seen, "/SimpleSearch/termwise"); + root->initRange(1,4); + verify_operation(profiler, seen, "/SimpleSearch/init"); + root->and_hits_into(*bits, 1); + verify_operation(profiler, seen, "/SimpleSearch/termwise"); } TEST(ProfiledIteratorTest, iterator_tree_can_be_profiled) { @@ -33,13 +174,20 @@ TEST(ProfiledIteratorTest, iterator_tree_can_be_profiled) { auto root = create_iterator_tree(); root = ProfiledIterator::profile(profiler, std::move(root)); fprintf(stderr, "%s", root->asString().c_str()); - SimpleResult expect({4,5,6}); - SimpleResult actual; - actual.searchStrict(*root, 100); - EXPECT_EQ(actual, expect); + verify_termwise_result(*root, {4,5,6}); + verify_result(*root, {4,5,6}); Slime slime; profiler.report(slime.setObject()); fprintf(stderr, "%s", slime.toString().c_str()); + auto counts = collect_counts(slime.get()); + print_counts(counts); + EXPECT_EQ(counts["/AndSearchStrict/init"], 2); + EXPECT_EQ(counts["/0/OrLikeSearch/init"], 2); + EXPECT_EQ(counts["/0/0/SimpleSearch/init"], 2); + EXPECT_EQ(counts["/0/1/SimpleSearch/init"], 2); + EXPECT_EQ(counts["/1/SourceBlenderSearchNonStrict/init"], 2); + EXPECT_EQ(counts["/1/0/SimpleSearch/init"], 2); + EXPECT_EQ(counts["/1/1/SimpleSearch/init"], 2); } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp index 8880c7ef24e..c50c6ec49f5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp @@ -59,9 +59,9 @@ AlwaysTrueBlueprint::AlwaysTrueBlueprint() : SimpleLeafBlueprint(FieldSpecBaseLi //----------------------------------------------------------------------------- SearchIterator::UP -SimpleBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool) const +SimpleBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool strict) const { - SimpleSearch *ss = new SimpleSearch(_result); + SimpleSearch *ss = new SimpleSearch(_result, strict); SearchIterator::UP search(ss); ss->tag(_tag); return search; @@ -70,7 +70,7 @@ SimpleBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &, SearchIterator::UP SimpleBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const { - SimpleSearch *ss = new SimpleSearch(_result); + SimpleSearch *ss = new SimpleSearch(_result, strict); SearchIterator::UP search(ss); ss->tag(_tag + (strict ? "<strict," : "<nostrict,") + diff --git a/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp index 8ae54606146..e30fcd6f9f5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp @@ -1,11 +1,14 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "profiled_iterator.h" +#include "sourceblendersearch.h" #include <vespa/searchlib/common/bitvector.h> #include <vespa/vespalib/objects/visit.hpp> #include <vespa/vespalib/util/classname.h> #include <vespa/vespalib/util/stringfmt.h> +#include <typeindex> + using vespalib::make_string_short::fmt; namespace search::queryeval { @@ -43,6 +46,17 @@ std::unique_ptr<SearchIterator> create(Profiler &profiler, ctor_token); } +void handle_leaf_node(Profiler &profiler, SearchIterator &leaf, const vespalib::string &path) { + if (leaf.isSourceBlender()) { + auto &source_blender = static_cast<SourceBlenderSearch&>(leaf); + for (size_t i = 0; i < source_blender.getNumChildren(); ++i) { + auto child = source_blender.steal(i); + child = ProfiledIterator::profile(profiler, std::move(child), fmt("%s%zu/", path.c_str(), i)); + source_blender.setChild(i, std::move(child)); + } + } +} + } void @@ -97,18 +111,22 @@ ProfiledIterator::visitMembers(vespalib::ObjectVisitor &visitor) const } std::unique_ptr<SearchIterator> -ProfiledIterator::profile(Profiler &profiler, std::unique_ptr<SearchIterator> root) +ProfiledIterator::profile(Profiler &profiler, std::unique_ptr<SearchIterator> root, const vespalib::string &root_path) { std::vector<UP*> links({&root}); - std::vector<vespalib::string> paths({"/"}); + std::vector<vespalib::string> paths({root_path}); for (size_t offset = 0; offset < links.size(); ++offset) { UP &link = *(links[offset]); vespalib::string path = paths[offset]; size_t first_child = links.size(); link->disclose_children(links); size_t num_children = links.size() - first_child; - for (size_t i = 0; i < num_children; ++i) { - paths.push_back(fmt("%s%zu/", path.c_str(), i)); + if (num_children == 0) { + handle_leaf_node(profiler, *link, path); + } else { + for (size_t i = 0; i < num_children; ++i) { + paths.push_back(fmt("%s%zu/", path.c_str(), i)); + } } link = create(profiler, path, std::move(link), ctor_tag{}); } diff --git a/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h index e8c15501267..f20ad1752d3 100644 --- a/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h @@ -52,7 +52,8 @@ public: Trinary matches_any() const override { return _search->matches_any(); } const PostingInfo *getPostingInfo() const override { return _search->getPostingInfo(); } static std::unique_ptr<SearchIterator> profile(Profiler &profiler, - std::unique_ptr<SearchIterator> root); + std::unique_ptr<SearchIterator> root, + const vespalib::string &root_path = "/"); }; } // namespace diff --git a/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp b/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp index c1445e63fa9..e5518e22c1e 100644 --- a/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp @@ -11,11 +11,11 @@ SimpleSearch::doSeek(uint32_t docid) while (_index < _result.getHitCount() && _result.getHit(_index) < docid) { ++_index; } - if (_index == _result.getHitCount()) { - setAtEnd(); - return; + auto candidate = (_index < _result.getHitCount()) + ? _result.getHit(_index) : search::endDocId; + if ((candidate == docid) || _strict) { + setDocId(candidate); } - setDocId(_result.getHit(_index)); } void @@ -24,14 +24,22 @@ SimpleSearch::doUnpack(uint32_t docid) (void) docid; } -SimpleSearch::SimpleSearch(const SimpleResult &result) - : _tag("<null>"), - _result(result), - _index(0) +SimpleSearch::SimpleSearch(const SimpleResult &result, bool strict) + : _tag("<null>"), + _result(result), + _index(0), + _strict(strict) { } void +SimpleSearch::initRange(uint32_t begin_id, uint32_t end_id) +{ + SearchIterator::initRange(begin_id, end_id); + _index = 0; +} + +void SimpleSearch::visitMembers(vespalib::ObjectVisitor &visitor) const { visit(visitor, "tag", _tag); diff --git a/searchlib/src/vespa/searchlib/queryeval/simplesearch.h b/searchlib/src/vespa/searchlib/queryeval/simplesearch.h index 69d569f9e49..b09d628a87f 100644 --- a/searchlib/src/vespa/searchlib/queryeval/simplesearch.h +++ b/searchlib/src/vespa/searchlib/queryeval/simplesearch.h @@ -14,10 +14,11 @@ namespace search::queryeval { class SimpleSearch : public SearchIterator { private: - vespalib::string _tag; - SimpleResult _result; - uint32_t _index; - + vespalib::string _tag; + SimpleResult _result; + uint32_t _index; + bool _strict; + SimpleSearch(const SimpleSearch &); SimpleSearch &operator=(const SimpleSearch &); @@ -26,11 +27,13 @@ protected: void doUnpack(uint32_t docid) override; public: - SimpleSearch(const SimpleResult &result); + SimpleSearch(const SimpleResult &result, bool strict = true); SimpleSearch &tag(const vespalib::string &t) { _tag = t; return *this; } + Trinary is_strict() const override { return _strict ? Trinary::True : Trinary::False; } + void initRange(uint32_t begin_id, uint32_t end_id) override; void visitMembers(vespalib::ObjectVisitor &visitor) const override; ~SimpleSearch(); }; |