summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-11-19 18:59:48 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2023-11-19 18:59:48 +0000
commit59a51afdf4ddf9ac7778f395b9943f479b8fbc74 (patch)
tree4bcd32f5ca3466380761472506670d2a743fde1b
parentf08f64eefbe294f7997d87d23bffed3ba60ba3f9 (diff)
Add flag for marking phrase always expensive.
-rw-r--r--searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp34
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.h10
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp11
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h2
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h11
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h4
10 files changed, 79 insertions, 9 deletions
diff --git a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp
index eb222518710..2e1a28402a1 100644
--- a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp
+++ b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp
@@ -254,12 +254,34 @@ std::string split_query_tree_dump =
" Term a cheap\n"
" Term b cheap\n"
" Term c cheap\n";
+std::string split_query_tree_dump_always_expensive =
+ "And 7\n"
+ " Or 3\n"
+ " Term t2\n"
+ " Phrase 3 expensive\n"
+ " Term a\n"
+ " Term b\n"
+ " Term c\n"
+ " Term x1\n"
+ " Term x2\n"
+ " Phrase 3 expensive\n"
+ " Term a\n"
+ " Term b\n"
+ " Term c\n"
+ " Term t1\n"
+ " Term a cheap\n"
+ " Term b cheap\n"
+ " Term c cheap\n";
#endif
//-----------------------------------------------------------------------------
Node::UP optimize(Node::UP root, bool white_list) {
- return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list);
+ return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list, false);
+}
+
+Node::UP optimize(Node::UP root, bool white_list, bool always_mark_phrase_expensive) {
+ return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list, always_mark_phrase_expensive);
}
TEST(UnpackingIteratorsOptimizerTest, require_that_root_phrase_node_can_be_left_alone) {
@@ -301,4 +323,14 @@ TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_split) {
EXPECT_EQ(actual2, expect);
}
+TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_split_always) {
+ std::string actual1 = dump_query(*optimize(make_query_tree(), false, false));
+ std::string actual2 = dump_query(*optimize(make_query_tree(), true, false));
+ std::string actual3 = dump_query(*optimize(make_query_tree(), true, true));
+ std::string expect = split_query_tree_dump;
+ EXPECT_EQ(actual1, expect);
+ EXPECT_EQ(actual2, expect);
+ EXPECT_EQ(actual3, split_query_tree_dump_always_expensive);
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index 7e7532a3182..7beecaca613 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -193,7 +193,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
trace.addEvent(4, "Start query setup");
_query.setWhiteListBlueprint(metaStore.createWhiteListBlueprint());
trace.addEvent(5, "Deserialize and build query tree");
- _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv);
+ _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv,
+ AlwaysMarkPhraseExpensive::check(_queryEnv.getProperties(), rankSetup.always_mark_phrase_expensive()));
if (_valid) {
_query.extractTerms(_queryEnv.terms());
_query.extractLocations(_queryEnv.locations());
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
index d4f4ae8015d..071e914b405 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
@@ -165,7 +165,8 @@ Query::~Query() = default;
bool
Query::buildTree(vespalib::stringref stack, const string &location,
- const ViewResolver &resolver, const IIndexEnvironment &indexEnv)
+ const ViewResolver &resolver, const IIndexEnvironment &indexEnv,
+ bool always_mark_phrase_expensive)
{
SimpleQueryStackDumpIterator stack_dump_iterator(stack);
_query_tree = QueryTreeCreator<ProtonNodeTypes>::create(stack_dump_iterator);
@@ -173,7 +174,7 @@ Query::buildTree(vespalib::stringref stack, const string &location,
SameElementModifier prefixSameElementSubIndexes;
_query_tree->accept(prefixSameElementSubIndexes);
exchange_location_nodes(location, _query_tree, _locations);
- _query_tree = UnpackingIteratorsOptimizer::optimize(std::move(_query_tree), bool(_whiteListBlueprint));
+ _query_tree = UnpackingIteratorsOptimizer::optimize(std::move(_query_tree), bool(_whiteListBlueprint), always_mark_phrase_expensive);
ResolveViewVisitor resolve_visitor(resolver, indexEnv);
_query_tree->accept(resolve_visitor);
return true;
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h
index b67672ec3ef..6ea326834a5 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.h
@@ -55,7 +55,15 @@ public:
bool buildTree(vespalib::stringref stack,
const vespalib::string &location,
const ViewResolver &resolver,
- const search::fef::IIndexEnvironment &idxEnv);
+ const search::fef::IIndexEnvironment &idxEnv)
+ {
+ return buildTree(stack, location, resolver, idxEnv, false);
+ }
+ bool buildTree(vespalib::stringref stack,
+ const vespalib::string &location,
+ const ViewResolver &resolver,
+ const search::fef::IIndexEnvironment &idxEnv,
+ bool always_mark_phrase_expensive);
/**
* Extract query terms from the query tree; to be used to build
diff --git a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp
index e8dc8ab85ba..c9cfbbfd40e 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp
@@ -73,6 +73,8 @@ struct TermExpander : QueryVisitor {
struct NodeTraverser : TemplateTermVisitor<NodeTraverser, ProtonNodeTypes>
{
+ bool _always_mark_phrase_expensive;
+ NodeTraverser(bool always_mark_phrase_expensive) : _always_mark_phrase_expensive(always_mark_phrase_expensive) {}
template <class TermNode> void visitTerm(TermNode &) {}
void visit(ProtonNodeTypes::And &n) override {
for (Node *child: n.getChildren()) {
@@ -84,14 +86,19 @@ struct NodeTraverser : TemplateTermVisitor<NodeTraverser, ProtonNodeTypes>
}
expander.flush(n);
}
+ void visit(Phrase &n) override {
+ if (_always_mark_phrase_expensive) {
+ n.set_expensive(true);
+ }
+ }
};
} // namespace proton::matching::<unnamed>
search::query::Node::UP
-UnpackingIteratorsOptimizer::optimize(search::query::Node::UP root, bool has_white_list)
+UnpackingIteratorsOptimizer::optimize(search::query::Node::UP root, bool has_white_list, bool always_mark_phrase_expensive)
{
- NodeTraverser traverser;
+ NodeTraverser traverser(always_mark_phrase_expensive);
root->accept(traverser);
if (has_white_list) {
TermExpander expander;
diff --git a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h
index f698b79dd0c..fc08ae3cfdd 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h
@@ -12,7 +12,7 @@ namespace proton::matching {
* expensive.
**/
struct UnpackingIteratorsOptimizer {
- static search::query::Node::UP optimize(search::query::Node::UP root, bool has_white_list);
+ static search::query::Node::UP optimize(search::query::Node::UP root, bool has_white_list, bool always_mark_phrase_expensive);
};
}
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index 9b111c4bd5d..f063bad66e1 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -454,6 +454,12 @@ FuzzyAlgorithm::lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm
return vespalib::fuzzy_matching_algorithm_from_string(value, default_value);
}
+const vespalib::string AlwaysMarkPhraseExpensive::NAME("vespa.matching.always_mark_phrase_expensive");
+const bool AlwaysMarkPhraseExpensive::DEFAULT_VALUE(false);
+bool AlwaysMarkPhraseExpensive::check(const Properties &props, bool fallback) {
+ return lookupBool(props, NAME, fallback);
+}
+
} // namespace matching
namespace softtimeout {
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index c528c4366d6..348ce3ab5e2 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -339,6 +339,17 @@ namespace matching {
static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props);
static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value);
};
+
+ /**
+ * When enabled, the unpacking part of the phrase iterator will be tagged as expensive
+ * under all intermediate iterators, not only AND.
+ **/
+ struct AlwaysMarkPhraseExpensive {
+ static const vespalib::string NAME;
+ static const bool DEFAULT_VALUE;
+ static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); }
+ static bool check(const Properties &props, bool fallback);
+ };
}
namespace softtimeout {
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index 33e7dbda04b..806be9af47c 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -60,6 +60,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_compiled(false),
_compileError(false),
_degradationAscendingOrder(false),
+ _always_mark_phrase_expensive(false),
_diversityAttribute(),
_diversityMinGroups(1),
_diversityCutoffFactor(10.0),
@@ -135,6 +136,7 @@ RankSetup::configure()
_mutateOnSummary._operation = mutate::on_summary::Operation::lookup(_indexEnv.getProperties());
_mutateAllowQueryOverride = mutate::AllowQueryOverride::check(_indexEnv.getProperties());
_enableNestedMultivalueGrouping = temporary::EnableNestedMultivalueGrouping::check(_indexEnv.getProperties());
+ _always_mark_phrase_expensive = matching::AlwaysMarkPhraseExpensive::check(_indexEnv.getProperties());
}
void
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index 6f4651939ad..0e98c3f1c5d 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -32,7 +32,7 @@ public:
: _attribute(attribute),
_operation(operation)
{}
- bool enabled() const { return !_attribute.empty() && !_operation.empty(); }
+ bool enabled() const noexcept { return !_attribute.empty() && !_operation.empty(); }
vespalib::string _attribute;
vespalib::string _operation;
};
@@ -69,6 +69,7 @@ private:
bool _compiled;
bool _compileError;
bool _degradationAscendingOrder;
+ bool _always_mark_phrase_expensive;
vespalib::string _diversityAttribute;
uint32_t _diversityMinGroups;
double _diversityCutoffFactor;
@@ -221,6 +222,7 @@ public:
bool isDegradationOrderAscending() const {
return _degradationAscendingOrder;
}
+ bool always_mark_phrase_expensive() const noexcept { return _always_mark_phrase_expensive; }
/** get number of hits to collect during graceful degradation in match phase */
uint32_t getDegradationMaxHits() const {
return _degradationMaxHits;