summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2023-01-17 13:11:40 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2023-01-17 15:20:51 +0000
commitc33f429a97593009e407782df0a5054251d15eba (patch)
tree13ac06144a3f6c5c873f0f4f8ed6c2b50d0aafe8 /searchlib
parent60153b60949828b7ae7a29d2f638396b9e2b7494 (diff)
profile source blender children
more testing (termwise) extend simplesearch with strictness and initRange
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp172
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simplesearch.h13
6 files changed, 211 insertions, 33 deletions
diff --git a/searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp b/searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp
index b89dc8f9e17..010e72428e2 100644
--- a/searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp
+++ b/searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp
@@ -2,30 +2,171 @@
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/util/execution_profiler.h>
+#include <vespa/vespalib/util/trinary.h>
+#include <vespa/vespalib/util/require.h>
#include <vespa/vespalib/data/slime/slime.h>
#include <vespa/searchlib/queryeval/profiled_iterator.h>
#include <vespa/searchlib/queryeval/simplesearch.h>
+#include <vespa/searchlib/queryeval/sourceblendersearch.h>
#include <vespa/searchlib/queryeval/andsearch.h>
#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/searchlib/attribute/fixedsourceselector.h>
#include <memory>
+#include <map>
using namespace search::queryeval;
+using search::FixedSourceSelector;
using vespalib::ExecutionProfiler;
using vespalib::Slime;
+using vespalib::Trinary;
-SearchIterator::UP create_term(const vespalib::string &name, std::vector<uint32_t> hits) {
- auto search = std::make_unique<SimpleSearch>(SimpleResult(hits));
- search->tag(name);
+size_t num_docs = 100;
+
+bool is_true(Trinary a) {
+ REQUIRE(a != Trinary::Undefined);
+ return (a == Trinary::True);
+}
+
+bool is_true(Trinary a, Trinary b) {
+ return is_true(a) && is_true(b);
+}
+
+struct MySources {
+ FixedSourceSelector selector;
+ MySources(const std::vector<std::pair<uint32_t,uint32_t>> &entries)
+ : selector(123, "<file>", num_docs)
+ {
+ for (const auto &entry: entries) {
+ selector.setSource(entry.first, entry.second);
+ }
+ }
+};
+MySources my_sources({{1,3},{3,3},{5,3},
+ {2,5},{4,5},{6,5}});
+
+SearchIterator::UP t(std::vector<uint32_t> hits) {
+ auto search = std::make_unique<SimpleSearch>(SimpleResult(hits), false);
+ search->tag("t");
+ return search;
+}
+
+SearchIterator::UP T(std::vector<uint32_t> hits) {
+ auto search = std::make_unique<SimpleSearch>(SimpleResult(hits), true);
+ search->tag("T");
return search;
}
+SearchIterator::UP OR(SearchIterator::UP s1, SearchIterator::UP s2) {
+ bool strict = is_true(s1->is_strict(), s2->is_strict());
+ return OrSearch::create({std::move(s1), std::move(s2)}, strict);
+}
+
+SearchIterator::UP AND(SearchIterator::UP s1, SearchIterator::UP s2) {
+ bool strict = is_true(s1->is_strict());
+ return AndSearch::create({std::move(s1), std::move(s2)}, strict);
+}
+
+SearchIterator::UP blend(SearchIterator::UP s1, uint32_t id1,
+ SearchIterator::UP s2, uint32_t id2)
+{
+ bool strict = is_true(s1->is_strict(), s2->is_strict());
+ SourceBlenderSearch::Children list;
+ list.emplace_back(s1.release(), id1);
+ list.emplace_back(s2.release(), id2);
+ return SourceBlenderSearch::create(my_sources.selector.createIterator(), list, strict);
+}
+
SearchIterator::UP create_iterator_tree() {
- return AndSearch::create({OrSearch::create({create_term("A", {1,3,5}),
- create_term("B", {2,4,6})}, true),
- OrSearch::create({create_term("C", {4,6,8}),
- create_term("D", {5,7,9})}, false)},
- true);
+ return AND(OR(T({4,6,8}),
+ T({5,7,9})),
+ blend(t({1,3,5,7,9}), 3,
+ t({2,4,6,8}), 5));
+}
+
+void collect(std::map<vespalib::string,size_t> &counts, const auto &node) {
+ if (!node.valid()) {
+ return;
+ }
+ collect(counts, node["roots"]);
+ collect(counts, node["children"]);
+ for (size_t i = 0; i < node.entries(); ++i) {
+ collect(counts, node[i]);
+ }
+ const auto &name = node["name"];
+ if (name.valid()) {
+ auto name_str = name.asString().make_string();
+ counts[name_str] += node["count"].asLong();
+ }
+};
+
+std::map<vespalib::string,size_t> collect_counts(const auto &root) {
+ std::map<vespalib::string,size_t> counts;
+ collect(counts, root);
+ return counts;
+}
+
+void print_counts(const std::map<vespalib::string,size_t> &counts) {
+ for (const auto &[name, count]: counts) {
+ fprintf(stderr, "%s: %zu\n", name.c_str(), count);
+ }
+}
+
+void verify_result(SearchIterator &search, const std::vector<uint32_t> &hits) {
+ SimpleResult actual;
+ SimpleResult expect(hits);
+ actual.searchStrict(search, num_docs);
+ EXPECT_EQ(actual, expect);
+}
+
+void verify_termwise_result(SearchIterator &search, const std::vector<uint32_t> &hits) {
+ search.initRange(1, num_docs);
+ auto result = search.get_hits(1);
+ ASSERT_EQ(result->size(), num_docs);
+ uint32_t pos = 1;
+ for (uint32_t hit: hits) {
+ while (pos < hit) {
+ EXPECT_FALSE(result->testBit(pos++));
+ }
+ EXPECT_TRUE(result->testBit(pos++));
+ }
+}
+
+void verify_operation(ExecutionProfiler &profiler, std::map<vespalib::string,size_t> &seen, const vespalib::string &expect) {
+ Slime slime;
+ profiler.report(slime.setObject());
+ auto counts = collect_counts(slime.get());
+ for (const auto &[name, count]: counts) {
+ if (name == expect) {
+ EXPECT_EQ(count, ++seen[name]);
+ } else {
+ EXPECT_EQ(count, seen[name]);
+ }
+ }
+}
+
+TEST(ProfiledIteratorTest, init_seek_unpack_termwise_is_profiled) {
+ ExecutionProfiler profiler(64);
+ std::map<vespalib::string,size_t> seen;
+ auto root = ProfiledIterator::profile(profiler, T({1,2,3}));
+ root->initRange(1,4);
+ verify_operation(profiler, seen, "/SimpleSearch/init");
+ root->seek(2);
+ verify_operation(profiler, seen, "/SimpleSearch/seek");
+ root->unpack(2);
+ verify_operation(profiler, seen, "/SimpleSearch/unpack");
+ root->initRange(1,4);
+ verify_operation(profiler, seen, "/SimpleSearch/init");
+ auto bits = root->get_hits(1);
+ verify_operation(profiler, seen, "/SimpleSearch/termwise");
+ root->initRange(1,4);
+ verify_operation(profiler, seen, "/SimpleSearch/init");
+ root->or_hits_into(*bits, 1);
+ verify_operation(profiler, seen, "/SimpleSearch/termwise");
+ root->initRange(1,4);
+ verify_operation(profiler, seen, "/SimpleSearch/init");
+ root->and_hits_into(*bits, 1);
+ verify_operation(profiler, seen, "/SimpleSearch/termwise");
}
TEST(ProfiledIteratorTest, iterator_tree_can_be_profiled) {
@@ -33,13 +174,20 @@ TEST(ProfiledIteratorTest, iterator_tree_can_be_profiled) {
auto root = create_iterator_tree();
root = ProfiledIterator::profile(profiler, std::move(root));
fprintf(stderr, "%s", root->asString().c_str());
- SimpleResult expect({4,5,6});
- SimpleResult actual;
- actual.searchStrict(*root, 100);
- EXPECT_EQ(actual, expect);
+ verify_termwise_result(*root, {4,5,6});
+ verify_result(*root, {4,5,6});
Slime slime;
profiler.report(slime.setObject());
fprintf(stderr, "%s", slime.toString().c_str());
+ auto counts = collect_counts(slime.get());
+ print_counts(counts);
+ EXPECT_EQ(counts["/AndSearchStrict/init"], 2);
+ EXPECT_EQ(counts["/0/OrLikeSearch/init"], 2);
+ EXPECT_EQ(counts["/0/0/SimpleSearch/init"], 2);
+ EXPECT_EQ(counts["/0/1/SimpleSearch/init"], 2);
+ EXPECT_EQ(counts["/1/SourceBlenderSearchNonStrict/init"], 2);
+ EXPECT_EQ(counts["/1/0/SimpleSearch/init"], 2);
+ EXPECT_EQ(counts["/1/1/SimpleSearch/init"], 2);
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
index 8880c7ef24e..c50c6ec49f5 100644
--- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
@@ -59,9 +59,9 @@ AlwaysTrueBlueprint::AlwaysTrueBlueprint() : SimpleLeafBlueprint(FieldSpecBaseLi
//-----------------------------------------------------------------------------
SearchIterator::UP
-SimpleBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool) const
+SimpleBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool strict) const
{
- SimpleSearch *ss = new SimpleSearch(_result);
+ SimpleSearch *ss = new SimpleSearch(_result, strict);
SearchIterator::UP search(ss);
ss->tag(_tag);
return search;
@@ -70,7 +70,7 @@ SimpleBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &,
SearchIterator::UP
SimpleBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
{
- SimpleSearch *ss = new SimpleSearch(_result);
+ SimpleSearch *ss = new SimpleSearch(_result, strict);
SearchIterator::UP search(ss);
ss->tag(_tag +
(strict ? "<strict," : "<nostrict,") +
diff --git a/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp
index 8ae54606146..e30fcd6f9f5 100644
--- a/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp
@@ -1,11 +1,14 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "profiled_iterator.h"
+#include "sourceblendersearch.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/vespalib/objects/visit.hpp>
#include <vespa/vespalib/util/classname.h>
#include <vespa/vespalib/util/stringfmt.h>
+#include <typeindex>
+
using vespalib::make_string_short::fmt;
namespace search::queryeval {
@@ -43,6 +46,17 @@ std::unique_ptr<SearchIterator> create(Profiler &profiler,
ctor_token);
}
+void handle_leaf_node(Profiler &profiler, SearchIterator &leaf, const vespalib::string &path) {
+ if (leaf.isSourceBlender()) {
+ auto &source_blender = static_cast<SourceBlenderSearch&>(leaf);
+ for (size_t i = 0; i < source_blender.getNumChildren(); ++i) {
+ auto child = source_blender.steal(i);
+ child = ProfiledIterator::profile(profiler, std::move(child), fmt("%s%zu/", path.c_str(), i));
+ source_blender.setChild(i, std::move(child));
+ }
+ }
+}
+
}
void
@@ -97,18 +111,22 @@ ProfiledIterator::visitMembers(vespalib::ObjectVisitor &visitor) const
}
std::unique_ptr<SearchIterator>
-ProfiledIterator::profile(Profiler &profiler, std::unique_ptr<SearchIterator> root)
+ProfiledIterator::profile(Profiler &profiler, std::unique_ptr<SearchIterator> root, const vespalib::string &root_path)
{
std::vector<UP*> links({&root});
- std::vector<vespalib::string> paths({"/"});
+ std::vector<vespalib::string> paths({root_path});
for (size_t offset = 0; offset < links.size(); ++offset) {
UP &link = *(links[offset]);
vespalib::string path = paths[offset];
size_t first_child = links.size();
link->disclose_children(links);
size_t num_children = links.size() - first_child;
- for (size_t i = 0; i < num_children; ++i) {
- paths.push_back(fmt("%s%zu/", path.c_str(), i));
+ if (num_children == 0) {
+ handle_leaf_node(profiler, *link, path);
+ } else {
+ for (size_t i = 0; i < num_children; ++i) {
+ paths.push_back(fmt("%s%zu/", path.c_str(), i));
+ }
}
link = create(profiler, path, std::move(link), ctor_tag{});
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h
index e8c15501267..f20ad1752d3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h
+++ b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h
@@ -52,7 +52,8 @@ public:
Trinary matches_any() const override { return _search->matches_any(); }
const PostingInfo *getPostingInfo() const override { return _search->getPostingInfo(); }
static std::unique_ptr<SearchIterator> profile(Profiler &profiler,
- std::unique_ptr<SearchIterator> root);
+ std::unique_ptr<SearchIterator> root,
+ const vespalib::string &root_path = "/");
};
} // namespace
diff --git a/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp b/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp
index c1445e63fa9..e5518e22c1e 100644
--- a/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp
@@ -11,11 +11,11 @@ SimpleSearch::doSeek(uint32_t docid)
while (_index < _result.getHitCount() && _result.getHit(_index) < docid) {
++_index;
}
- if (_index == _result.getHitCount()) {
- setAtEnd();
- return;
+ auto candidate = (_index < _result.getHitCount())
+ ? _result.getHit(_index) : search::endDocId;
+ if ((candidate == docid) || _strict) {
+ setDocId(candidate);
}
- setDocId(_result.getHit(_index));
}
void
@@ -24,14 +24,22 @@ SimpleSearch::doUnpack(uint32_t docid)
(void) docid;
}
-SimpleSearch::SimpleSearch(const SimpleResult &result)
- : _tag("<null>"),
- _result(result),
- _index(0)
+SimpleSearch::SimpleSearch(const SimpleResult &result, bool strict)
+ : _tag("<null>"),
+ _result(result),
+ _index(0),
+ _strict(strict)
{
}
void
+SimpleSearch::initRange(uint32_t begin_id, uint32_t end_id)
+{
+ SearchIterator::initRange(begin_id, end_id);
+ _index = 0;
+}
+
+void
SimpleSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
{
visit(visitor, "tag", _tag);
diff --git a/searchlib/src/vespa/searchlib/queryeval/simplesearch.h b/searchlib/src/vespa/searchlib/queryeval/simplesearch.h
index 69d569f9e49..b09d628a87f 100644
--- a/searchlib/src/vespa/searchlib/queryeval/simplesearch.h
+++ b/searchlib/src/vespa/searchlib/queryeval/simplesearch.h
@@ -14,10 +14,11 @@ namespace search::queryeval {
class SimpleSearch : public SearchIterator
{
private:
- vespalib::string _tag;
- SimpleResult _result;
- uint32_t _index;
-
+ vespalib::string _tag;
+ SimpleResult _result;
+ uint32_t _index;
+ bool _strict;
+
SimpleSearch(const SimpleSearch &);
SimpleSearch &operator=(const SimpleSearch &);
@@ -26,11 +27,13 @@ protected:
void doUnpack(uint32_t docid) override;
public:
- SimpleSearch(const SimpleResult &result);
+ SimpleSearch(const SimpleResult &result, bool strict = true);
SimpleSearch &tag(const vespalib::string &t) {
_tag = t;
return *this;
}
+ Trinary is_strict() const override { return _strict ? Trinary::True : Trinary::False; }
+ void initRange(uint32_t begin_id, uint32_t end_id) override;
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
~SimpleSearch();
};