summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-11-20 11:08:43 +0100
committerGitHub <noreply@github.com>2023-11-20 11:08:43 +0100
commit195f000f27001b74721649905a9a1e48fb9f665c (patch)
treefa66f6154d9d8f07b5dab5c311a6a360d80df8a6
parent58596788da2e3fc0afac6ea8f3de0b3af6ce1c32 (diff)
parentf5d72158d305cf027f7ad34956303ac669802046 (diff)
Merge pull request #29379 from vespa-engine/balder/add-flag-for-always-expensive-phrase
Add flag for marking phrase always expensive.
-rw-r--r--config-model-api/abi-spec.json3
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java1
-rw-r--r--config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java5
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java3
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java7
-rw-r--r--searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp34
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.h10
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp11
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h2
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h11
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h4
15 files changed, 97 insertions, 10 deletions
diff --git a/config-model-api/abi-spec.json b/config-model-api/abi-spec.json
index 1b505cdbfae..bf2f2cfac44 100644
--- a/config-model-api/abi-spec.json
+++ b/config-model-api/abi-spec.json
@@ -1288,7 +1288,8 @@
"public java.lang.String unknownConfigDefinition()",
"public int searchHandlerThreadpool()",
"public long mergingMaxMemoryUsagePerNode()",
- "public boolean usePerDocumentThrottledDeleteBucket()"
+ "public boolean usePerDocumentThrottledDeleteBucket()",
+ "public boolean alwaysMarkPhraseExpensive()"
],
"fields" : [ ]
},
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
index 188c4a32978..d9f3a76d9dc 100644
--- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
+++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
@@ -119,6 +119,7 @@ public interface ModelContext {
@ModelFeatureFlag(owners = {"hmusum"}) default int searchHandlerThreadpool() { return 2; }
@ModelFeatureFlag(owners = {"vekterli"}) default long mergingMaxMemoryUsagePerNode() { return -1; }
@ModelFeatureFlag(owners = {"vekterli"}) default boolean usePerDocumentThrottledDeleteBucket() { return false; }
+ @ModelFeatureFlag(owners = {"baldersheim"}) default boolean alwaysMarkPhraseExpensive() { return false; }
}
/** Warning: As elsewhere in this package, do not make backwards incompatible changes that will break old config models! */
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java
index 8606599f530..d87a38e31ae 100644
--- a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java
+++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java
@@ -172,6 +172,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
private final OptionalDouble targetHitsMaxAdjustmentFactor;
private final double rankScoreDropLimit;
private final boolean enableNestedMultivalueGrouping;
+ private final boolean alwaysMarkPhraseExpensive;
/**
* The rank type definitions used to derive settings for the native rank features
@@ -214,6 +215,7 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
numSearchPartitions = compiled.getNumSearchPartitions();
termwiseLimit = compiled.getTermwiseLimit().orElse(deployProperties.featureFlags().defaultTermwiseLimit());
enableNestedMultivalueGrouping = deployProperties.featureFlags().enableNestedMultivalueGrouping();
+ alwaysMarkPhraseExpensive = deployProperties.featureFlags().alwaysMarkPhraseExpensive();
postFilterThreshold = compiled.getPostFilterThreshold();
approximateThreshold = compiled.getApproximateThreshold();
targetHitsMaxAdjustmentFactor = compiled.getTargetHitsMaxAdjustmentFactor();
@@ -467,6 +469,9 @@ public class RawRankProfile implements RankProfilesConfig.Producer {
if (enableNestedMultivalueGrouping) {
properties.add(new Pair<>("vespa.temporary.enable_nested_multivalue_grouping", String.valueOf(enableNestedMultivalueGrouping)));
}
+ if (alwaysMarkPhraseExpensive) {
+ properties.add(new Pair<>("vespa.matching.always_mark_phrase_expensive", String.valueOf(alwaysMarkPhraseExpensive)));
+ }
if (postFilterThreshold.isPresent()) {
properties.add(new Pair<>("vespa.matching.global_filter.upper_limit", String.valueOf(postFilterThreshold.getAsDouble())));
}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
index 28d7c1eaef6..866431c218e 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
@@ -205,6 +205,7 @@ public class ModelContextImpl implements ModelContext {
private final boolean enableGlobalPhase;
private final String summaryDecodePolicy;
private final boolean enableNestedMultivalueGrouping;
+ private final boolean alwaysMarkPhraseExpensive;
private final int contentLayerMetadataFeatureLevel;
private final boolean dynamicHeapSize;
private final String unknownConfigDefinition;
@@ -257,6 +258,7 @@ public class ModelContextImpl implements ModelContext {
this.searchHandlerThreadpool = flagValue(source, appId, version, Flags.SEARCH_HANDLER_THREADPOOL);
this.mergingMaxMemoryUsagePerNode = flagValue(source, appId, version, Flags.MERGING_MAX_MEMORY_USAGE_PER_NODE);
this.usePerDocumentThrottledDeleteBucket = flagValue(source, appId, version, Flags.USE_PER_DOCUMENT_THROTTLED_DELETE_BUCKET);
+ this.alwaysMarkPhraseExpensive = flagValue(source, appId, version, Flags.ALWAYS_MARK_PHRASE_EXPENSIVE);
}
@Override public int heapSizePercentage() { return heapPercentage; }
@@ -305,6 +307,7 @@ public class ModelContextImpl implements ModelContext {
}
@Override public boolean enableGlobalPhase() { return enableGlobalPhase; }
@Override public boolean enableNestedMultivalueGrouping() { return enableNestedMultivalueGrouping; }
+ @Override public boolean alwaysMarkPhraseExpensive() { return alwaysMarkPhraseExpensive; }
@Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; }
@Override public boolean dynamicHeapSize() { return dynamicHeapSize; }
@Override public String unknownConfigDefinition() { return unknownConfigDefinition; }
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index 7a7087fb434..b08dec1c267 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -320,6 +320,13 @@ public class Flags {
"Takes effect at redeployment",
INSTANCE_ID);
+ public static final UnboundBooleanFlag ALWAYS_MARK_PHRASE_EXPENSIVE = defineFeatureFlag(
+ "always-mark-phrase-expensive", false,
+ List.of("baldersheim"), "2023-11-20", "2023-12-31",
+ "If true all phrases will be marked expensive, independent of parents",
+ "Takes effect at redeployment",
+ INSTANCE_ID);
+
public static final UnboundBooleanFlag WRITE_CONFIG_SERVER_SESSION_DATA_AS_ONE_BLOB = defineFeatureFlag(
"write-config-server-session-data-as-blob", false,
List.of("hmusum"), "2023-07-19", "2024-01-01",
diff --git a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp
index eb222518710..2e1a28402a1 100644
--- a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp
+++ b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp
@@ -254,12 +254,34 @@ std::string split_query_tree_dump =
" Term a cheap\n"
" Term b cheap\n"
" Term c cheap\n";
+std::string split_query_tree_dump_always_expensive =
+ "And 7\n"
+ " Or 3\n"
+ " Term t2\n"
+ " Phrase 3 expensive\n"
+ " Term a\n"
+ " Term b\n"
+ " Term c\n"
+ " Term x1\n"
+ " Term x2\n"
+ " Phrase 3 expensive\n"
+ " Term a\n"
+ " Term b\n"
+ " Term c\n"
+ " Term t1\n"
+ " Term a cheap\n"
+ " Term b cheap\n"
+ " Term c cheap\n";
#endif
//-----------------------------------------------------------------------------
Node::UP optimize(Node::UP root, bool white_list) {
- return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list);
+ return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list, false);
+}
+
+Node::UP optimize(Node::UP root, bool white_list, bool always_mark_phrase_expensive) {
+ return UnpackingIteratorsOptimizer::optimize(std::move(root), white_list, always_mark_phrase_expensive);
}
TEST(UnpackingIteratorsOptimizerTest, require_that_root_phrase_node_can_be_left_alone) {
@@ -301,4 +323,14 @@ TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_split) {
EXPECT_EQ(actual2, expect);
}
+TEST(UnpackingIteratorsOptimizerTest, require_that_query_tree_can_be_split_always) {
+ std::string actual1 = dump_query(*optimize(make_query_tree(), false, false));
+ std::string actual2 = dump_query(*optimize(make_query_tree(), true, false));
+ std::string actual3 = dump_query(*optimize(make_query_tree(), true, true));
+ std::string expect = split_query_tree_dump;
+ EXPECT_EQ(actual1, expect);
+ EXPECT_EQ(actual2, expect);
+ EXPECT_EQ(actual3, split_query_tree_dump_always_expensive);
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index 7e7532a3182..7beecaca613 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -193,7 +193,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter,
trace.addEvent(4, "Start query setup");
_query.setWhiteListBlueprint(metaStore.createWhiteListBlueprint());
trace.addEvent(5, "Deserialize and build query tree");
- _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv);
+ _valid = _query.buildTree(queryStack, location, viewResolver, indexEnv,
+ AlwaysMarkPhraseExpensive::check(_queryEnv.getProperties(), rankSetup.always_mark_phrase_expensive()));
if (_valid) {
_query.extractTerms(_queryEnv.terms());
_query.extractLocations(_queryEnv.locations());
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
index d4f4ae8015d..071e914b405 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
@@ -165,7 +165,8 @@ Query::~Query() = default;
bool
Query::buildTree(vespalib::stringref stack, const string &location,
- const ViewResolver &resolver, const IIndexEnvironment &indexEnv)
+ const ViewResolver &resolver, const IIndexEnvironment &indexEnv,
+ bool always_mark_phrase_expensive)
{
SimpleQueryStackDumpIterator stack_dump_iterator(stack);
_query_tree = QueryTreeCreator<ProtonNodeTypes>::create(stack_dump_iterator);
@@ -173,7 +174,7 @@ Query::buildTree(vespalib::stringref stack, const string &location,
SameElementModifier prefixSameElementSubIndexes;
_query_tree->accept(prefixSameElementSubIndexes);
exchange_location_nodes(location, _query_tree, _locations);
- _query_tree = UnpackingIteratorsOptimizer::optimize(std::move(_query_tree), bool(_whiteListBlueprint));
+ _query_tree = UnpackingIteratorsOptimizer::optimize(std::move(_query_tree), bool(_whiteListBlueprint), always_mark_phrase_expensive);
ResolveViewVisitor resolve_visitor(resolver, indexEnv);
_query_tree->accept(resolve_visitor);
return true;
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h
index b67672ec3ef..6ea326834a5 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.h
@@ -55,7 +55,15 @@ public:
bool buildTree(vespalib::stringref stack,
const vespalib::string &location,
const ViewResolver &resolver,
- const search::fef::IIndexEnvironment &idxEnv);
+ const search::fef::IIndexEnvironment &idxEnv)
+ {
+ return buildTree(stack, location, resolver, idxEnv, false);
+ }
+ bool buildTree(vespalib::stringref stack,
+ const vespalib::string &location,
+ const ViewResolver &resolver,
+ const search::fef::IIndexEnvironment &idxEnv,
+ bool always_mark_phrase_expensive);
/**
* Extract query terms from the query tree; to be used to build
diff --git a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp
index e8dc8ab85ba..c9cfbbfd40e 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp
@@ -73,6 +73,8 @@ struct TermExpander : QueryVisitor {
struct NodeTraverser : TemplateTermVisitor<NodeTraverser, ProtonNodeTypes>
{
+ bool _always_mark_phrase_expensive;
+ NodeTraverser(bool always_mark_phrase_expensive) : _always_mark_phrase_expensive(always_mark_phrase_expensive) {}
template <class TermNode> void visitTerm(TermNode &) {}
void visit(ProtonNodeTypes::And &n) override {
for (Node *child: n.getChildren()) {
@@ -84,14 +86,19 @@ struct NodeTraverser : TemplateTermVisitor<NodeTraverser, ProtonNodeTypes>
}
expander.flush(n);
}
+ void visit(Phrase &n) override {
+ if (_always_mark_phrase_expensive) {
+ n.set_expensive(true);
+ }
+ }
};
} // namespace proton::matching::<unnamed>
search::query::Node::UP
-UnpackingIteratorsOptimizer::optimize(search::query::Node::UP root, bool has_white_list)
+UnpackingIteratorsOptimizer::optimize(search::query::Node::UP root, bool has_white_list, bool always_mark_phrase_expensive)
{
- NodeTraverser traverser;
+ NodeTraverser traverser(always_mark_phrase_expensive);
root->accept(traverser);
if (has_white_list) {
TermExpander expander;
diff --git a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h
index f698b79dd0c..fc08ae3cfdd 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.h
@@ -12,7 +12,7 @@ namespace proton::matching {
* expensive.
**/
struct UnpackingIteratorsOptimizer {
- static search::query::Node::UP optimize(search::query::Node::UP root, bool has_white_list);
+ static search::query::Node::UP optimize(search::query::Node::UP root, bool has_white_list, bool always_mark_phrase_expensive);
};
}
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index 9b111c4bd5d..f063bad66e1 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -454,6 +454,12 @@ FuzzyAlgorithm::lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm
return vespalib::fuzzy_matching_algorithm_from_string(value, default_value);
}
+const vespalib::string AlwaysMarkPhraseExpensive::NAME("vespa.matching.always_mark_phrase_expensive");
+const bool AlwaysMarkPhraseExpensive::DEFAULT_VALUE(false);
+bool AlwaysMarkPhraseExpensive::check(const Properties &props, bool fallback) {
+ return lookupBool(props, NAME, fallback);
+}
+
} // namespace matching
namespace softtimeout {
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index c528c4366d6..348ce3ab5e2 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -339,6 +339,17 @@ namespace matching {
static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props);
static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value);
};
+
+ /**
+ * When enabled, the unpacking part of the phrase iterator will be tagged as expensive
+ * under all intermediate iterators, not only AND.
+ **/
+ struct AlwaysMarkPhraseExpensive {
+ static const vespalib::string NAME;
+ static const bool DEFAULT_VALUE;
+ static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); }
+ static bool check(const Properties &props, bool fallback);
+ };
}
namespace softtimeout {
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index 33e7dbda04b..806be9af47c 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -60,6 +60,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_compiled(false),
_compileError(false),
_degradationAscendingOrder(false),
+ _always_mark_phrase_expensive(false),
_diversityAttribute(),
_diversityMinGroups(1),
_diversityCutoffFactor(10.0),
@@ -135,6 +136,7 @@ RankSetup::configure()
_mutateOnSummary._operation = mutate::on_summary::Operation::lookup(_indexEnv.getProperties());
_mutateAllowQueryOverride = mutate::AllowQueryOverride::check(_indexEnv.getProperties());
_enableNestedMultivalueGrouping = temporary::EnableNestedMultivalueGrouping::check(_indexEnv.getProperties());
+ _always_mark_phrase_expensive = matching::AlwaysMarkPhraseExpensive::check(_indexEnv.getProperties());
}
void
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index 6f4651939ad..0e98c3f1c5d 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -32,7 +32,7 @@ public:
: _attribute(attribute),
_operation(operation)
{}
- bool enabled() const { return !_attribute.empty() && !_operation.empty(); }
+ bool enabled() const noexcept { return !_attribute.empty() && !_operation.empty(); }
vespalib::string _attribute;
vespalib::string _operation;
};
@@ -69,6 +69,7 @@ private:
bool _compiled;
bool _compileError;
bool _degradationAscendingOrder;
+ bool _always_mark_phrase_expensive;
vespalib::string _diversityAttribute;
uint32_t _diversityMinGroups;
double _diversityCutoffFactor;
@@ -221,6 +222,7 @@ public:
bool isDegradationOrderAscending() const {
return _degradationAscendingOrder;
}
+ bool always_mark_phrase_expensive() const noexcept { return _always_mark_phrase_expensive; }
/** get number of hits to collect during graceful degradation in match phase */
uint32_t getDegradationMaxHits() const {
return _degradationMaxHits;