diff options
author | Håvard Pettersen <havardpe@oath.com> | 2019-09-12 07:54:40 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2019-09-12 08:16:57 +0000 |
commit | 8ad655f7a07641bd27d7537d0781343278a61f6c (patch) | |
tree | bf1b02ef29e933a58151c0f95710b9ce6411ca43 /searchcore | |
parent | 333c328766f65a209aa83bbbfcff28d15805e75e (diff) |
implement and test unpacking iterators optimizer
Diffstat (limited to 'searchcore')
4 files changed, 485 insertions, 5 deletions
diff --git a/searchcore/CMakeLists.txt b/searchcore/CMakeLists.txt index ac6a9f305dd..45113e95df4 100644 --- a/searchcore/CMakeLists.txt +++ b/searchcore/CMakeLists.txt @@ -120,6 +120,7 @@ vespa_define_module( src/tests/proton/matching/match_phase_limiter src/tests/proton/matching/partial_result src/tests/proton/matching/same_element_builder + src/tests/proton/matching/unpacking_iterators_optimizer src/tests/proton/metrics/documentdb_job_trackers src/tests/proton/metrics/job_load_sampler src/tests/proton/metrics/job_tracked_flush diff --git a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/CMakeLists.txt b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/CMakeLists.txt new file mode 100644 index 00000000000..6fce7151664 --- /dev/null +++ b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcore_unpacking_iterators_optimizer_test_app TEST + SOURCES + unpacking_iterators_optimizer_test.cpp + DEPENDS + searchcore_matching + gtest +) +vespa_add_test(NAME searchcore_unpacking_iterators_optimizer_test_app COMMAND searchcore_unpacking_iterators_optimizer_test_app) diff --git a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp new file mode 100644 index 00000000000..135ee7d39f5 --- /dev/null +++ b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp @@ -0,0 +1,361 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h> +#include <vespa/searchcore/proton/matching/querynodes.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/vespalib/data/smart_buffer.h> +#include <vespa/vespalib/data/output_writer.h> +#include <string> + +using namespace proton::matching; +using namespace search::query; + +using vespalib::SmartBuffer; +using vespalib::OutputWriter; + +struct DumpQuery : QueryVisitor { + OutputWriter &out; + int indent; + DumpQuery(OutputWriter &out_in, int indent_in) + : out(out_in), indent(indent_in) {} + void visit_children(Intermediate &self) { + DumpQuery sub_dump(out, indent + 2); + for (Node *node: self.getChildren()) { + node->accept(sub_dump); + } + } + void visit(And &n) override { + out.printf("%*s%s %zu\n", indent, "", "And", n.getChildren().size()); + visit_children(n); + } + void visit(AndNot &) override {} + void visit(Equiv &) override {} + void visit(NumberTerm &) override {} + void visit(LocationTerm &) override {} + void visit(Near &) override {} + void visit(ONear &) override {} + void visit(Or &n) override { + out.printf("%*s%s %zu\n", indent, "", "Or", n.getChildren().size()); + visit_children(n); + } + void visit(Phrase &n) override { + out.printf("%*s%s %zu%s\n", indent, "", "Phrase", n.getChildren().size(), + n.is_expensive() ? " expensive" : ""); + visit_children(n); + } + void visit(SameElement &n) override { + out.printf("%*s%s %zu%s\n", indent, "", "SameElement", n.getChildren().size(), + n.is_expensive() ? " expensive" : ""); + visit_children(n); + } + void visit(PrefixTerm &) override {} + void visit(RangeTerm &) override {} + void visit(Rank &) override {} + void visit(StringTerm &n) override { + out.printf("%*s%s %s%s%s\n", indent, "", "Term", n.getTerm().c_str(), + (!n.isRanked() && !n.usePositionData()) ? " cheap" : "", + (n.isRanked() != n.usePositionData()) ? " BAD" : ""); + } + void visit(SubstringTerm &) override {} + void visit(SuffixTerm &) override {} + void visit(WeakAnd &) override {} + void visit(WeightedSetTerm &) override {} + void visit(DotProduct &) override {} + void visit(WandTerm &) override {} + void visit(PredicateQuery &) override {} + void visit(RegExpTerm &) override {} +}; + +std::string dump_query(Node &root) { + SmartBuffer buffer(4096); + { + OutputWriter writer(buffer, 1024); + DumpQuery dumper(writer, 0); + root.accept(dumper); + } + auto mem = buffer.obtain(); + return std::string(mem.data, mem.size); +} + +std::string view("view"); +uint32_t id(5); +Weight weight(7); + +void add_phrase(QueryBuilder<ProtonNodeTypes> &builder) { + builder.addPhrase(3, view, id, weight); + { + builder.addStringTerm("a", view, id, weight); + builder.addStringTerm("b", view, id, weight); + builder.addStringTerm("c", view, id, weight); + } +} + +void add_same_element(QueryBuilder<ProtonNodeTypes> &builder) { + builder.addSameElement(2, view); + { + builder.addStringTerm("x", view, id, weight); + builder.addStringTerm("y", view, id, weight); + } +} + +Node::UP make_phrase() { + QueryBuilder<ProtonNodeTypes> builder; + add_phrase(builder); + return builder.build(); +} + +Node::UP make_same_element() { + QueryBuilder<ProtonNodeTypes> builder; + add_same_element(builder); + return builder.build(); +} + +Node::UP make_query_tree() { + QueryBuilder<ProtonNodeTypes> builder; + builder.addAnd(4); + builder.addOr(3); + builder.addStringTerm("t2", view, id, weight); + add_phrase(builder); + add_same_element(builder); + add_same_element(builder); + add_phrase(builder); + builder.addStringTerm("t1", view, id, weight); + return builder.build(); +} + +//----------------------------------------------------------------------------- + +std::string plain_phrase_dump = + "Phrase 3\n" + " Term a\n" + " Term b\n" + " Term c\n"; + +std::string delayed_phrase_dump = + "Phrase 3 expensive\n" + " Term a\n" + " Term b\n" + " Term c\n"; + +std::string split_phrase_dump = + "And 4\n" + " Phrase 3 expensive\n" + " Term a\n" + " Term b\n" + " Term c\n" + " Term a cheap\n" + " Term b cheap\n" + " Term c cheap\n"; + +//----------------------------------------------------------------------------- + +std::string plain_same_element_dump = + "SameElement 2\n" + " Term x\n" + " Term y\n"; + +std::string delayed_same_element_dump = + "SameElement 2 expensive\n" + " Term x\n" + " Term y\n"; + +std::string split_same_element_dump = + "And 3\n" + " SameElement 2 expensive\n" + " Term x\n" + " Term y\n" + " Term x cheap\n" + " Term y cheap\n"; + +//----------------------------------------------------------------------------- + +std::string plain_query_tree_dump = + "And 4\n" + " Or 3\n" + " Term t2\n" + " Phrase 3\n" + " Term a\n" + " Term b\n" + " Term c\n" + " SameElement 2\n" + " Term x\n" + " Term y\n" + " SameElement 2\n" + " Term x\n" + " Term y\n" + " Phrase 3\n" + " Term a\n" + " Term b\n" + " Term c\n" + " Term t1\n"; + +std::string delayed_query_tree_dump = + "And 4\n" + " Or 3\n" + " Term t2\n" + " Phrase 3 expensive\n" + " Term a\n" + " Term b\n" + " Term c\n" + " SameElement 2 expensive\n" + " Term x\n" + " Term y\n" + " SameElement 2 expensive\n" + " Term x\n" + " Term y\n" + " Phrase 3 expensive\n" + " Term a\n" + " Term b\n" + " Term c\n" + " Term t1\n"; + +std::string split_query_tree_dump = + "And 9\n" + " Or 3\n" + " Term t2\n" + " Phrase 3\n" + " Term a\n" + " Term b\n" + " Term c\n" + " SameElement 2\n" + " Term x\n" + " Term y\n" + " SameElement 2 expensive\n" + " Term x\n" + " Term y\n" + " Phrase 3 expensive\n" + " Term a\n" + " Term b\n" + " Term c\n" + " Term t1\n" + " Term x cheap\n" + " Term y cheap\n" + " Term a cheap\n" + " Term b cheap\n" + " Term c cheap\n"; + +std::string delayed_split_query_tree_dump = + "And 9\n" + " Or 3\n" + " Term t2\n" + " Phrase 3 expensive\n" + " Term a\n" + " Term b\n" + " Term c\n" + " SameElement 2 expensive\n" + " Term x\n" + " Term y\n" + " SameElement 2 expensive\n" + " Term x\n" + " Term y\n" + " Phrase 3 expensive\n" + " Term a\n" + " Term b\n" + " Term c\n" + " Term t1\n" + " Term x cheap\n" + " Term y cheap\n" + " Term a cheap\n" + " Term b cheap\n" + " Term c cheap\n"; + +//----------------------------------------------------------------------------- + +Node::UP optimize(Node::UP root, bool white_list, bool split, bool delay) { + return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list, split, delay); +} + +TEST(UnpackingIteratorsOptimizerTest, require_that_root_phrase_node_can_be_left_alone) { + std::string actual1 = dump_query(*optimize(make_phrase(), false, false, false)); + std::string actual2 = dump_query(*optimize(make_phrase(), false, true, false)); + std::string actual3 = dump_query(*optimize(make_phrase(), true, false, false)); + std::string expect = plain_phrase_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); + EXPECT_EQ(actual3, expect); +} + +TEST(UnpackingIteratorsOptimizerTest, require_that_root_phrase_node_can_be_delayed) { + std::string actual1 = dump_query(*optimize(make_phrase(), false, false, true)); + std::string actual2 = dump_query(*optimize(make_phrase(), false, true, true)); + std::string actual3 = dump_query(*optimize(make_phrase(), true, false, true)); + std::string expect = delayed_phrase_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); + EXPECT_EQ(actual3, expect); +} + +TEST(UnpackingIteratorsOptimizerTest, require_that_root_phrase_node_can_be_split) { + std::string actual1 = dump_query(*optimize(make_phrase(), true, true, true)); + std::string actual2 = dump_query(*optimize(make_phrase(), true, true, false)); + std::string expect = split_phrase_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); +} + +//----------------------------------------------------------------------------- + +TEST(UnpackingIteratorsOptimizerTest, require_that_root_same_element_node_can_be_left_alone) { + std::string actual1 = dump_query(*optimize(make_same_element(), false, false, false)); + std::string actual2 = dump_query(*optimize(make_same_element(), false, true, false)); + std::string actual3 = dump_query(*optimize(make_same_element(), true, false, false)); + std::string expect = plain_same_element_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); + EXPECT_EQ(actual3, expect); +} + +TEST(UnpackingIteratorsOptimizerTest, require_that_root_same_element_node_can_be_delayed) { + std::string actual1 = dump_query(*optimize(make_same_element(), false, false, true)); + std::string actual2 = dump_query(*optimize(make_same_element(), false, true, true)); + std::string actual3 = dump_query(*optimize(make_same_element(), true, false, true)); + std::string expect = delayed_same_element_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); + EXPECT_EQ(actual3, expect); +} + +TEST(UnpackingIteratorsOptimizerTest, require_that_root_same_element_node_can_be_split) { + std::string actual1 = dump_query(*optimize(make_same_element(), true, true, true)); + std::string actual2 = dump_query(*optimize(make_same_element(), true, true, false)); + std::string expect = split_same_element_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); +} + +//----------------------------------------------------------------------------- + +TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_left_alone) { + std::string actual1 = dump_query(*optimize(make_query_tree(), false, false, false)); + std::string actual2 = dump_query(*optimize(make_query_tree(), true, false, false)); + std::string expect = plain_query_tree_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); +} + +TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_delayed) { + std::string actual1 = dump_query(*optimize(make_query_tree(), false, false, true)); + std::string actual2 = dump_query(*optimize(make_query_tree(), true, false, true)); + std::string expect = delayed_query_tree_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); +} + +TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_split) { + std::string actual1 = dump_query(*optimize(make_query_tree(), false, true, false)); + std::string actual2 = dump_query(*optimize(make_query_tree(), true, true, false)); + std::string expect = split_query_tree_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); +} + +TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_delayed_and_split) { + std::string actual1 = dump_query(*optimize(make_query_tree(), false, true, true)); + std::string actual2 = dump_query(*optimize(make_query_tree(), true, true, true)); + std::string expect = delayed_split_query_tree_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp index 7b053bf8930..fc510d3c206 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp @@ -2,18 +2,127 @@ #include "unpacking_iterators_optimizer.h" +#include <vespa/log/log.h> +LOG_SETUP(".matching.unpacking_iterators_optimizer"); + +#include "querynodes.h" +#include <vespa/vespalib/util/classname.h> +#include <vespa/searchlib/query/tree/queryvisitor.h> +#include <vespa/searchlib/query/tree/templatetermvisitor.h> +#include <vespa/searchlib/query/tree/querytreecreator.h> + +using namespace search::query; + namespace proton::matching { +namespace { + +struct TermExpander : QueryVisitor { + std::vector<Node::UP> terms; + + template <typename T> + void expand(T &n) { + n.set_expensive(true); + for (Node *child: n.getChildren()) { + Node::UP node = QueryTreeCreator<ProtonNodeTypes>::replicate(*child); + if (Term *term = dynamic_cast<Term *>(node.get())) { + term->setRanked(false); + term->setPositionData(false); + terms.push_back(std::move(node)); + } else { + LOG(error, "Required a search::query::TermNode. Got %s", vespalib::getClassName(*node).c_str()); + } + } + } + void visit(And &) override {} + void visit(AndNot &) override {} + void visit(Equiv &) override {} + void visit(NumberTerm &) override {} + void visit(LocationTerm &) override {} + void visit(Near &) override {} + void visit(ONear &) override {} + void visit(Or &) override {} + void visit(Phrase &n) override { expand(n); } + void visit(SameElement &n) override { expand(n); } + void visit(PrefixTerm &) override {} + void visit(RangeTerm &) override {} + void visit(Rank &) override {} + void visit(StringTerm &) override {} + void visit(SubstringTerm &) override {} + void visit(SuffixTerm &) override {} + void visit(WeakAnd &) override {} + void visit(WeightedSetTerm &) override {} + void visit(DotProduct &) override {} + void visit(WandTerm &) override {} + void visit(PredicateQuery &) override {} + void visit(RegExpTerm &) override {} + void flush(Intermediate &parent) { + for (Node::UP &term: terms) { + parent.append(std::move(term)); + } + terms.clear(); + } +}; + +struct NodeTraverser : TemplateTermVisitor<NodeTraverser, ProtonNodeTypes> +{ + bool split_unpacking_iterators; + bool delay_unpacking_iterators; + + NodeTraverser(bool split_unpacking_iterators_in, + bool delay_unpacking_iterators_in) + : split_unpacking_iterators(split_unpacking_iterators_in), + delay_unpacking_iterators(delay_unpacking_iterators_in) {} + + template <class TermNode> void visitTerm(TermNode &) {} + void visit(ProtonNodeTypes::And &n) override { + for (Node *child: n.getChildren()) { + child->accept(*this); + } + if (split_unpacking_iterators) { + TermExpander expander; + for (Node *child: n.getChildren()) { + child->accept(expander); + } + expander.flush(n); + } + } + void visit(ProtonNodeTypes::Phrase &n) override { + if (delay_unpacking_iterators) { + n.set_expensive(true); + } + } + void visit(ProtonNodeTypes::SameElement &n) override { + if (delay_unpacking_iterators) { + n.set_expensive(true); + } + } +}; + +} // namespace proton::matching::<unnamed> + search::query::Node::UP UnpackingIteratorsOptimizer::optimize(search::query::Node::UP root, bool has_white_list, bool split_unpacking_iterators, bool delay_unpacking_iterators) { - (void) has_white_list; - (void) split_unpacking_iterators; - (void) delay_unpacking_iterators; - return std::move(root); // nop for now + if (split_unpacking_iterators || delay_unpacking_iterators) { + NodeTraverser traverser(split_unpacking_iterators, + delay_unpacking_iterators); + root->accept(traverser); + } + if (has_white_list && split_unpacking_iterators) { + TermExpander expander; + root->accept(expander); + if (!expander.terms.empty()) { + Intermediate::UP and_node = std::make_unique<ProtonNodeTypes::And>(); + and_node->append(std::move(root)); + expander.flush(*and_node); + root = std::move(and_node); + } + } + return std::move(root); } -} +} // namespace proton::matching |