From 59a51afdf4ddf9ac7778f395b9943f479b8fbc74 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Sun, 19 Nov 2023 18:59:48 +0000 Subject: Add flag for marking phrase always expensive. --- .../unpacking_iterators_optimizer_test.cpp | 34 +++++++++++++++++++++- .../searchcore/proton/matching/match_tools.cpp | 3 +- .../src/vespa/searchcore/proton/matching/query.cpp | 5 ++-- .../src/vespa/searchcore/proton/matching/query.h | 10 ++++++- .../matching/unpacking_iterators_optimizer.cpp | 11 +++++-- .../matching/unpacking_iterators_optimizer.h | 2 +- 6 files changed, 57 insertions(+), 8 deletions(-) (limited to 'searchcore') diff --git a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp index eb222518710..2e1a28402a1 100644 --- a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp +++ b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp @@ -254,12 +254,34 @@ std::string split_query_tree_dump = " Term a cheap\n" " Term b cheap\n" " Term c cheap\n"; +std::string split_query_tree_dump_always_expensive = + "And 7\n" + " Or 3\n" + " Term t2\n" + " Phrase 3 expensive\n" + " Term a\n" + " Term b\n" + " Term c\n" + " Term x1\n" + " Term x2\n" + " Phrase 3 expensive\n" + " Term a\n" + " Term b\n" + " Term c\n" + " Term t1\n" + " Term a cheap\n" + " Term b cheap\n" + " Term c cheap\n"; #endif //----------------------------------------------------------------------------- Node::UP optimize(Node::UP root, bool white_list) { - return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list); + return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list, false); +} + +Node::UP optimize(Node::UP root, bool white_list, bool always_mark_phrase_expensive) { + return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list, always_mark_phrase_expensive); } TEST(UnpackingIteratorsOptimizerTest, require_that_root_phrase_node_can_be_left_alone) { @@ -301,4 +323,14 @@ TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_split) { EXPECT_EQ(actual2, expect); } +TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_split_always) { + std::string actual1 = dump_query(*optimize(make_query_tree(), false, false)); + std::string actual2 = dump_query(*optimize(make_query_tree(), true, false)); + std::string actual3 = dump_query(*optimize(make_query_tree(), true, true)); + std::string expect = split_query_tree_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); + EXPECT_EQ(actual3, split_query_tree_dump_always_expensive); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 7e7532a3182..7beecaca613 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -193,7 +193,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter, trace.addEvent(4, "Start query setup"); _query.setWhiteListBlueprint(metaStore.createWhiteListBlueprint()); trace.addEvent(5, "Deserialize and build query tree"); - _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv); + _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv, + AlwaysMarkPhraseExpensive::check(_queryEnv.getProperties(), rankSetup.always_mark_phrase_expensive())); if (_valid) { _query.extractTerms(_queryEnv.terms()); _query.extractLocations(_queryEnv.locations()); diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index d4f4ae8015d..071e914b405 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -165,7 +165,8 @@ Query::~Query() = default; bool Query::buildTree(vespalib::stringref stack, const string &location, - const ViewResolver &resolver, const IIndexEnvironment &indexEnv) + const ViewResolver &resolver, const IIndexEnvironment &indexEnv, + bool always_mark_phrase_expensive) { SimpleQueryStackDumpIterator stack_dump_iterator(stack); _query_tree = QueryTreeCreator::create(stack_dump_iterator); @@ -173,7 +174,7 @@ Query::buildTree(vespalib::stringref stack, const string &location, SameElementModifier prefixSameElementSubIndexes; _query_tree->accept(prefixSameElementSubIndexes); exchange_location_nodes(location, _query_tree, _locations); - _query_tree = UnpackingIteratorsOptimizer::optimize(std::move(_query_tree), bool(_whiteListBlueprint)); + _query_tree = UnpackingIteratorsOptimizer::optimize(std::move(_query_tree), bool(_whiteListBlueprint), always_mark_phrase_expensive); ResolveViewVisitor resolve_visitor(resolver, indexEnv); _query_tree->accept(resolve_visitor); return true; diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index b67672ec3ef..6ea326834a5 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -55,7 +55,15 @@ public: bool buildTree(vespalib::stringref stack, const vespalib::string &location, const ViewResolver &resolver, - const search::fef::IIndexEnvironment &idxEnv); + const search::fef::IIndexEnvironment &idxEnv) + { + return buildTree(stack, location, resolver, idxEnv, false); + } + bool buildTree(vespalib::stringref stack, + const vespalib::string &location, + const ViewResolver &resolver, + const search::fef::IIndexEnvironment &idxEnv, + bool always_mark_phrase_expensive); /** * Extract query terms from the query tree; to be used to build diff --git a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp index e8dc8ab85ba..c9cfbbfd40e 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp @@ -73,6 +73,8 @@ struct TermExpander : QueryVisitor { struct NodeTraverser : TemplateTermVisitor { + bool _always_mark_phrase_expensive; + NodeTraverser(bool always_mark_phrase_expensive) : _always_mark_phrase_expensive(always_mark_phrase_expensive) {} template void visitTerm(TermNode &) {} void visit(ProtonNodeTypes::And &n) override { for (Node *child: n.getChildren()) { @@ -84,14 +86,19 @@ struct NodeTraverser : TemplateTermVisitor } expander.flush(n); } + void visit(Phrase &n) override { + if (_always_mark_phrase_expensive) { + n.set_expensive(true); + } + } }; } // namespace proton::matching:: search::query::Node::UP -UnpackingIteratorsOptimizer::optimize(search::query::Node::UP root, bool has_white_list) +UnpackingIteratorsOptimizer::optimize(search::query::Node::UP root, bool has_white_list, bool always_mark_phrase_expensive) { - NodeTraverser traverser; + NodeTraverser traverser(always_mark_phrase_expensive); root->accept(traverser); if (has_white_list) { TermExpander expander; diff --git a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h index f698b79dd0c..fc08ae3cfdd 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h +++ b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h @@ -12,7 +12,7 @@ namespace proton::matching { * expensive. **/ struct UnpackingIteratorsOptimizer { - static search::query::Node::UP optimize(search::query::Node::UP root, bool has_white_list); + static search::query::Node::UP optimize(search::query::Node::UP root, bool has_white_list, bool always_mark_phrase_expensive); }; } -- cgit v1.2.3