diff options
10 files changed, 79 insertions, 9 deletions
diff --git a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp index eb222518710..2e1a28402a1 100644 --- a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp +++ b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp @@ -254,12 +254,34 @@ std::string split_query_tree_dump = " Term a cheap\n" " Term b cheap\n" " Term c cheap\n"; +std::string split_query_tree_dump_always_expensive = + "And 7\n" + " Or 3\n" + " Term t2\n" + " Phrase 3 expensive\n" + " Term a\n" + " Term b\n" + " Term c\n" + " Term x1\n" + " Term x2\n" + " Phrase 3 expensive\n" + " Term a\n" + " Term b\n" + " Term c\n" + " Term t1\n" + " Term a cheap\n" + " Term b cheap\n" + " Term c cheap\n"; #endif //----------------------------------------------------------------------------- Node::UP optimize(Node::UP root, bool white_list) { - return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list); + return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list, false); +} + +Node::UP optimize(Node::UP root, bool white_list, bool always_mark_phrase_expensive) { + return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list, always_mark_phrase_expensive); } TEST(UnpackingIteratorsOptimizerTest, require_that_root_phrase_node_can_be_left_alone) { @@ -301,4 +323,14 @@ TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_split) { EXPECT_EQ(actual2, expect); } +TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_split_always) { + std::string actual1 = dump_query(*optimize(make_query_tree(), false, false)); + std::string actual2 = dump_query(*optimize(make_query_tree(), true, false)); + std::string actual3 = dump_query(*optimize(make_query_tree(), true, true)); + std::string expect = split_query_tree_dump; + EXPECT_EQ(actual1, expect); + EXPECT_EQ(actual2, expect); + EXPECT_EQ(actual3, split_query_tree_dump_always_expensive); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 7e7532a3182..7beecaca613 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -193,7 +193,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter, trace.addEvent(4, "Start query setup"); _query.setWhiteListBlueprint(metaStore.createWhiteListBlueprint()); trace.addEvent(5, "Deserialize and build query tree"); - _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv); + _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv, + AlwaysMarkPhraseExpensive::check(_queryEnv.getProperties(), rankSetup.always_mark_phrase_expensive())); if (_valid) { _query.extractTerms(_queryEnv.terms()); _query.extractLocations(_queryEnv.locations()); diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index d4f4ae8015d..071e914b405 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -165,7 +165,8 @@ Query::~Query() = default; bool Query::buildTree(vespalib::stringref stack, const string &location, - const ViewResolver &resolver, const IIndexEnvironment &indexEnv) + const ViewResolver &resolver, const IIndexEnvironment &indexEnv, + bool always_mark_phrase_expensive) { SimpleQueryStackDumpIterator stack_dump_iterator(stack); _query_tree = QueryTreeCreator<ProtonNodeTypes>::create(stack_dump_iterator); @@ -173,7 +174,7 @@ Query::buildTree(vespalib::stringref stack, const string &location, SameElementModifier prefixSameElementSubIndexes; _query_tree->accept(prefixSameElementSubIndexes); exchange_location_nodes(location, _query_tree, _locations); - _query_tree = UnpackingIteratorsOptimizer::optimize(std::move(_query_tree), bool(_whiteListBlueprint)); + _query_tree = UnpackingIteratorsOptimizer::optimize(std::move(_query_tree), bool(_whiteListBlueprint), always_mark_phrase_expensive); ResolveViewVisitor resolve_visitor(resolver, indexEnv); _query_tree->accept(resolve_visitor); return true; diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index b67672ec3ef..6ea326834a5 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -55,7 +55,15 @@ public: bool buildTree(vespalib::stringref stack, const vespalib::string &location, const ViewResolver &resolver, - const search::fef::IIndexEnvironment &idxEnv); + const search::fef::IIndexEnvironment &idxEnv) + { + return buildTree(stack, location, resolver, idxEnv, false); + } + bool buildTree(vespalib::stringref stack, + const vespalib::string &location, + const ViewResolver &resolver, + const search::fef::IIndexEnvironment &idxEnv, + bool always_mark_phrase_expensive); /** * Extract query terms from the query tree; to be used to build diff --git a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp index e8dc8ab85ba..c9cfbbfd40e 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp @@ -73,6 +73,8 @@ struct TermExpander : QueryVisitor { struct NodeTraverser : TemplateTermVisitor<NodeTraverser, ProtonNodeTypes> { + bool _always_mark_phrase_expensive; + NodeTraverser(bool always_mark_phrase_expensive) : _always_mark_phrase_expensive(always_mark_phrase_expensive) {} template <class TermNode> void visitTerm(TermNode &) {} void visit(ProtonNodeTypes::And &n) override { for (Node *child: n.getChildren()) { @@ -84,14 +86,19 @@ struct NodeTraverser : TemplateTermVisitor<NodeTraverser, ProtonNodeTypes> } expander.flush(n); } + void visit(Phrase &n) override { + if (_always_mark_phrase_expensive) { + n.set_expensive(true); + } + } }; } // namespace proton::matching::<unnamed> search::query::Node::UP -UnpackingIteratorsOptimizer::optimize(search::query::Node::UP root, bool has_white_list) +UnpackingIteratorsOptimizer::optimize(search::query::Node::UP root, bool has_white_list, bool always_mark_phrase_expensive) { - NodeTraverser traverser; + NodeTraverser traverser(always_mark_phrase_expensive); root->accept(traverser); if (has_white_list) { TermExpander expander; diff --git a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h index f698b79dd0c..fc08ae3cfdd 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h +++ b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h @@ -12,7 +12,7 @@ namespace proton::matching { * expensive. **/ struct UnpackingIteratorsOptimizer { - static search::query::Node::UP optimize(search::query::Node::UP root, bool has_white_list); + static search::query::Node::UP optimize(search::query::Node::UP root, bool has_white_list, bool always_mark_phrase_expensive); }; } diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index 9b111c4bd5d..f063bad66e1 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -454,6 +454,12 @@ FuzzyAlgorithm::lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm return vespalib::fuzzy_matching_algorithm_from_string(value, default_value); } +const vespalib::string AlwaysMarkPhraseExpensive::NAME("vespa.matching.always_mark_phrase_expensive"); +const bool AlwaysMarkPhraseExpensive::DEFAULT_VALUE(false); +bool AlwaysMarkPhraseExpensive::check(const Properties &props, bool fallback) { + return lookupBool(props, NAME, fallback); +} + } // namespace matching namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index c528c4366d6..348ce3ab5e2 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -339,6 +339,17 @@ namespace matching { static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props); static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value); }; + + /** + * When enabled, the unpacking part of the phrase iterator will be tagged as expensive + * under all intermediate iterators, not only AND. + **/ + struct AlwaysMarkPhraseExpensive { + static const vespalib::string NAME; + static const bool DEFAULT_VALUE; + static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); } + static bool check(const Properties &props, bool fallback); + }; } namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 33e7dbda04b..806be9af47c 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -60,6 +60,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _compiled(false), _compileError(false), _degradationAscendingOrder(false), + _always_mark_phrase_expensive(false), _diversityAttribute(), _diversityMinGroups(1), _diversityCutoffFactor(10.0), @@ -135,6 +136,7 @@ RankSetup::configure() _mutateOnSummary._operation = mutate::on_summary::Operation::lookup(_indexEnv.getProperties()); _mutateAllowQueryOverride = mutate::AllowQueryOverride::check(_indexEnv.getProperties()); _enableNestedMultivalueGrouping = temporary::EnableNestedMultivalueGrouping::check(_indexEnv.getProperties()); + _always_mark_phrase_expensive = matching::AlwaysMarkPhraseExpensive::check(_indexEnv.getProperties()); } void diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index 6f4651939ad..0e98c3f1c5d 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -32,7 +32,7 @@ public: : _attribute(attribute), _operation(operation) {} - bool enabled() const { return !_attribute.empty() && !_operation.empty(); } + bool enabled() const noexcept { return !_attribute.empty() && !_operation.empty(); } vespalib::string _attribute; vespalib::string _operation; }; @@ -69,6 +69,7 @@ private: bool _compiled; bool _compileError; bool _degradationAscendingOrder; + bool _always_mark_phrase_expensive; vespalib::string _diversityAttribute; uint32_t _diversityMinGroups; double _diversityCutoffFactor; @@ -221,6 +222,7 @@ public: bool isDegradationOrderAscending() const { return _degradationAscendingOrder; } + bool always_mark_phrase_expensive() const noexcept { return _always_mark_phrase_expensive; } /** get number of hits to collect during graceful degradation in match phase */ uint32_t getDegradationMaxHits() const { return _degradationMaxHits; |