diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-11-20 13:30:26 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-20 13:30:26 +0100 |
commit | 92e4c788dbc2f8b26e8a2336744e94d482552fd4 (patch) | |
tree | b31ab7970cd4388ea2dff345de2851fa333b8819 | |
parent | 1a193c56caf6140319705e8e6d24e845faf1c242 (diff) | |
parent | ebc03875ab6fc5c1a3629be25da6d6b56765c4ec (diff) |
Merge pull request #29384 from vespa-engine/balder/always-nesting-multivalue-grouping-for-indexed-search-now
Balder/always nesting multivalue grouping for indexed search now
14 files changed, 37 insertions, 73 deletions
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index d9f3a76d9dc..5016e70aea1 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -111,7 +111,7 @@ public interface ModelContext { @ModelFeatureFlag(owners = {"arnej"}) default String logFileCompressionAlgorithm(String defVal) { return defVal; } @ModelFeatureFlag(owners = {"arnej, bjorncs"}) default boolean enableGlobalPhase() { return true; } @ModelFeatureFlag(owners = {"baldersheim"}, comment = "Select summary decode type") default String summaryDecodePolicy() { return "eager"; } - @ModelFeatureFlag(owners = {"baldersheim"}) default boolean enableNestedMultivalueGrouping() { return false; } + @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter = "8.261") default boolean enableNestedMultivalueGrouping() { return true; } @ModelFeatureFlag(owners = {"jonmv"}, removeAfter = "8.250") default boolean useReconfigurableDispatcher() { return true; } @ModelFeatureFlag(owners = {"vekterli"}) default int contentLayerMetadataFeatureLevel() { return 0; } @ModelFeatureFlag(owners = {"bjorncs"}) default boolean dynamicHeapSize() { return false; } diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java index d87a38e31ae..388d2627224 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java @@ -171,7 +171,6 @@ public class RawRankProfile implements RankProfilesConfig.Producer { private final OptionalDouble approximateThreshold; private final OptionalDouble targetHitsMaxAdjustmentFactor; private final double rankScoreDropLimit; - private final boolean enableNestedMultivalueGrouping; private final boolean alwaysMarkPhraseExpensive; /** @@ -214,7 +213,6 @@ public class RawRankProfile implements RankProfilesConfig.Producer { minHitsPerThread = compiled.getMinHitsPerThread(); numSearchPartitions = compiled.getNumSearchPartitions(); termwiseLimit = compiled.getTermwiseLimit().orElse(deployProperties.featureFlags().defaultTermwiseLimit()); - enableNestedMultivalueGrouping = deployProperties.featureFlags().enableNestedMultivalueGrouping(); alwaysMarkPhraseExpensive = deployProperties.featureFlags().alwaysMarkPhraseExpensive(); postFilterThreshold = compiled.getPostFilterThreshold(); approximateThreshold = compiled.getApproximateThreshold(); @@ -466,9 +464,6 @@ public class RawRankProfile implements RankProfilesConfig.Producer { if (termwiseLimit < 1.0) { properties.add(new Pair<>("vespa.matching.termwise_limit", termwiseLimit + "")); } - if (enableNestedMultivalueGrouping) { - properties.add(new Pair<>("vespa.temporary.enable_nested_multivalue_grouping", String.valueOf(enableNestedMultivalueGrouping))); - } if (alwaysMarkPhraseExpensive) { properties.add(new Pair<>("vespa.matching.always_mark_phrase_expensive", String.valueOf(alwaysMarkPhraseExpensive))); } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index 866431c218e..7985860273f 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -204,7 +204,6 @@ public class ModelContextImpl implements ModelContext { private final int heapPercentage; private final boolean enableGlobalPhase; private final String summaryDecodePolicy; - private final boolean enableNestedMultivalueGrouping; private final boolean alwaysMarkPhraseExpensive; private final int contentLayerMetadataFeatureLevel; private final boolean dynamicHeapSize; @@ -251,7 +250,6 @@ public class ModelContextImpl implements ModelContext { this.heapPercentage = flagValue(source, appId, version, PermanentFlags.HEAP_SIZE_PERCENTAGE); this.enableGlobalPhase = flagValue(source, appId, version, Flags.ENABLE_GLOBAL_PHASE); this.summaryDecodePolicy = flagValue(source, appId, version, Flags.SUMMARY_DECODE_POLICY); - this.enableNestedMultivalueGrouping = flagValue(source, appId, version, Flags.ENABLE_NESTED_MULTIVALUE_GROUPING); this.contentLayerMetadataFeatureLevel = flagValue(source, appId, version, Flags.CONTENT_LAYER_METADATA_FEATURE_LEVEL); this.dynamicHeapSize = flagValue(source, appId, version, Flags.DYNAMIC_HEAP_SIZE); this.unknownConfigDefinition = flagValue(source, appId, version, Flags.UNKNOWN_CONFIG_DEFINITION); @@ -306,7 +304,6 @@ public class ModelContextImpl implements ModelContext { return defVal; } @Override public boolean enableGlobalPhase() { return enableGlobalPhase; } - @Override public boolean enableNestedMultivalueGrouping() { return enableNestedMultivalueGrouping; } @Override public boolean alwaysMarkPhraseExpensive() { return alwaysMarkPhraseExpensive; } @Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; } @Override public boolean dynamicHeapSize() { return dynamicHeapSize; } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index b08dec1c267..64218dbc800 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -313,13 +313,6 @@ public class Flags { "Where specified, CNAME records are used instead of the default ALIAS records, which have a default 60s TTL.", "Takes effect at redeployment from controller"); - public static final UnboundBooleanFlag ENABLE_NESTED_MULTIVALUE_GROUPING = defineFeatureFlag( - "enable-nested-multivalue-grouping", true, - List.of("baldersheim"), "2023-06-29", "2023-12-31", - "Should we enable proper nested multivalue grouping", - "Takes effect at redeployment", - INSTANCE_ID); - public static final UnboundBooleanFlag ALWAYS_MARK_PHRASE_EXPENSIVE = defineFeatureFlag( "always-mark-phrase-expensive", false, List.of("baldersheim"), "2023-11-20", "2023-12-31", diff --git a/searchcore/src/tests/grouping/grouping_test.cpp b/searchcore/src/tests/grouping/grouping_test.cpp index a2f646cba3a..fb46fb65f3b 100644 --- a/searchcore/src/tests/grouping/grouping_test.cpp +++ b/searchcore/src/tests/grouping/grouping_test.cpp @@ -45,7 +45,7 @@ struct MyWorld { bv.setInterval(0, NUM_DOCS); // attribute context { - SingleInt32ExtAttribute *attr = new SingleInt32ExtAttribute("attr0"); + auto *attr = new SingleInt32ExtAttribute("attr0"); AttributeVector::DocId docid; for (uint32_t i = 0; i < NUM_DOCS; ++i) { attr->addDoc(docid); @@ -55,7 +55,7 @@ struct MyWorld { attributeContext.add(attr); } { - SingleInt32ExtAttribute *attr = new SingleInt32ExtAttribute("attr1"); + auto *attr = new SingleInt32ExtAttribute("attr1"); AttributeVector::DocId docid; for (uint32_t i = 0; i < NUM_DOCS; ++i) { attr->addDoc(docid); @@ -65,7 +65,7 @@ struct MyWorld { attributeContext.add(attr); } { - SingleInt32ExtAttribute *attr = new SingleInt32ExtAttribute("attr2"); + auto *attr = new SingleInt32ExtAttribute("attr2"); AttributeVector::DocId docid; for (uint32_t i = 0; i < NUM_DOCS; ++i) { attr->addDoc(docid); @@ -75,7 +75,7 @@ struct MyWorld { attributeContext.add(attr); } { - SingleInt32ExtAttribute *attr = new SingleInt32ExtAttribute("attr3"); + auto *attr = new SingleInt32ExtAttribute("attr3"); AttributeVector::DocId docid; for (uint32_t i = 0; i < NUM_DOCS; ++i) { attr->addDoc(docid); @@ -94,16 +94,17 @@ using GroupingList = GroupingContext::GroupingList; SessionId createSessionId(const std::string & s) { std::vector<char> vec; - for (size_t i = 0; i < s.size(); i++) { - vec.push_back(s[i]); + for (char c : s) { + vec.push_back(c); } - return SessionId(&vec[0], vec.size()); + return {vec.data(), vec.size()}; } class CheckAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate { public: - CheckAttributeReferences(bool log=false) : _log(log), _numrefs(0) { } + CheckAttributeReferences() : CheckAttributeReferences(false) {} + explicit CheckAttributeReferences(bool log) : _log(log), _numrefs(0) { } bool _log; uint32_t _numrefs; private: @@ -177,7 +178,7 @@ TEST_F("testGroupingContextInitialization", DoomFixture()) { baseRequest.serialize(nos); AllocatedBitVector bv(1); - GroupingContext context(bv, f1.clock.clock(), f1.timeOfDoom, os.data(), os.size(), true); + GroupingContext context(bv, f1.clock.clock(), f1.timeOfDoom, os.data(), os.size()); ASSERT_TRUE(!context.empty()); GroupingContext::GroupingList list = context.getGroupingList(); ASSERT_TRUE(list.size() == 1); @@ -303,8 +304,8 @@ TEST_F("testGroupingSession", DoomFixture()) { manager.groupInRelevanceOrder(&hit, 1); CheckAttributeReferences attrCheck_after; GroupingList &gl3(initContext.getGroupingList()); - for (unsigned int i = 0; i < gl3.size(); i++) { - gl3[i]->select(attrCheck_after, attrCheck_after); + for (auto & grouping : gl3) { + grouping->select(attrCheck_after, attrCheck_after); } EXPECT_EQUAL(attrCheck_after._numrefs, 0u); { @@ -423,9 +424,9 @@ void doGrouping(GroupingContext &ctx, { GroupingManager man(ctx); std::vector<RankedHit> hits; - hits.push_back(RankedHit(doc1, rank1)); - hits.push_back(RankedHit(doc2, rank2)); - hits.push_back(RankedHit(doc3, rank3)); + hits.emplace_back(doc1, rank1); + hits.emplace_back(doc2, rank2); + hits.emplace_back(doc3, rank3); man.groupInRelevanceOrder(&hits[0], 3); } diff --git a/searchcore/src/vespa/searchcore/grouping/groupingcontext.cpp b/searchcore/src/vespa/searchcore/grouping/groupingcontext.cpp index 63127d01450..882a4d1509d 100644 --- a/searchcore/src/vespa/searchcore/grouping/groupingcontext.cpp +++ b/searchcore/src/vespa/searchcore/grouping/groupingcontext.cpp @@ -2,7 +2,6 @@ #include "groupingcontext.h" #include <vespa/searchlib/aggregation/predicates.h> -#include <vespa/searchlib/aggregation/modifiers.h> #include <vespa/searchlib/aggregation/hitsaggregationresult.h> #include <vespa/searchlib/common/bitvector.h> @@ -53,13 +52,12 @@ GroupingContext::setDistributionKey(uint32_t distributionKey) } GroupingContext::GroupingContext(const BitVector & validLids, const vespalib::Clock & clock, vespalib::steady_time timeOfDoom, - const char *groupSpec, uint32_t groupSpecLen, bool enableNested) + const char *groupSpec, uint32_t groupSpecLen) : _validLids(validLids), _clock(clock), _timeOfDoom(timeOfDoom), _os(), - _groupingList(), - _enableNestedMultivalueGrouping(enableNested) + _groupingList() { deserialize(groupSpec, groupSpecLen); } @@ -69,8 +67,7 @@ GroupingContext::GroupingContext(const BitVector & validLids, const vespalib::Cl _clock(clock), _timeOfDoom(timeOfDoom), _os(), - _groupingList(), - _enableNestedMultivalueGrouping(true) + _groupingList() { } GroupingContext::GroupingContext(const GroupingContext & rhs) @@ -78,8 +75,7 @@ GroupingContext::GroupingContext(const GroupingContext & rhs) _clock(rhs._clock), _timeOfDoom(rhs._timeOfDoom), _os(), - _groupingList(), - _enableNestedMultivalueGrouping(rhs._enableNestedMultivalueGrouping) + _groupingList() { } void @@ -100,7 +96,7 @@ GroupingContext::serialize() } bool -GroupingContext::needRanking() const +GroupingContext::needRanking() const noexcept { if (_groupingList.empty()) { return false; diff --git a/searchcore/src/vespa/searchcore/grouping/groupingcontext.h b/searchcore/src/vespa/searchcore/grouping/groupingcontext.h index c29b5122f74..e1b296df99b 100644 --- a/searchcore/src/vespa/searchcore/grouping/groupingcontext.h +++ b/searchcore/src/vespa/searchcore/grouping/groupingcontext.h @@ -34,7 +34,7 @@ public: * @param groupSpecLen The length of the grouping specification, in bytes. **/ GroupingContext(const BitVector & validLids, const vespalib::Clock & clock, vespalib::steady_time timeOfDoom, - const char *groupSpec, uint32_t groupSpecLen, bool enableNestedMultivalueGrouping); + const char *groupSpec, uint32_t groupSpecLen); /** * Create a new grouping context from a byte buffer. @@ -105,9 +105,7 @@ public: * Figure out if ranking is necessary for any of the grouping requests here. * @return true if ranking is required. */ - bool needRanking() const; - bool enableNestedMultivalueGrouping() const noexcept { return _enableNestedMultivalueGrouping; } - const search::BitVector & getValidLids() const { return _validLids; } + bool needRanking() const noexcept; void groupUnordered(const RankedHit *searchResults, uint32_t binSize, const search::BitVector * overflow); void groupInRelevanceOrder(const RankedHit *searchResults, uint32_t binSize); @@ -123,7 +121,6 @@ private: vespalib::steady_time _timeOfDoom; vespalib::nbostream _os; GroupingList _groupingList; - bool _enableNestedMultivalueGrouping; }; } diff --git a/searchcore/src/vespa/searchcore/grouping/groupingmanager.cpp b/searchcore/src/vespa/searchcore/grouping/groupingmanager.cpp index f5bcc935cb1..799d2663589 100644 --- a/searchcore/src/vespa/searchcore/grouping/groupingmanager.cpp +++ b/searchcore/src/vespa/searchcore/grouping/groupingmanager.cpp @@ -1,7 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "groupingmanager.h" -#include "groupingsession.h" #include "groupingcontext.h" #include <vespa/searchlib/aggregation/fs4hit.h> #include <vespa/searchlib/expression/attributenode.h> @@ -52,11 +51,11 @@ GroupingManager::init(const IAttributeContext &attrCtx) ExpressionNode & en = *level.getExpression().getRoot(); if (en.inherits(AttributeNode::classId)) { - AttributeNode & an = static_cast<AttributeNode &>(en); + auto & an = static_cast<AttributeNode &>(en); an.enableEnumOptimization(true); } } - ConfigureStaticParams stuff(&attrCtx, nullptr, _groupingContext.enableNestedMultivalueGrouping()); + ConfigureStaticParams stuff(&attrCtx, nullptr); grouping.configureStaticStuff(stuff); list.push_back(groupingList[i]); } catch (const std::exception & e) { @@ -96,10 +95,9 @@ void GroupingManager::prune() { GroupingContext::GroupingList &groupingList(_groupingContext.getGroupingList()); - for (size_t i = 0; i < groupingList.size(); ++i) { - Grouping &g = *groupingList[i]; - g.postMerge(); - g.sortById(); + for (const auto & g : groupingList) { + g->postMerge(); + g->sortById(); } } @@ -107,10 +105,9 @@ void GroupingManager::convertToGlobalId(const search::IDocumentMetaStore &metaStore) { GroupingContext::GroupingList & groupingList = _groupingContext.getGroupingList(); - for (size_t i = 0; i < groupingList.size(); ++i) { - Grouping & g = *groupingList[i]; - g.convertToGlobalId(metaStore); - LOG(debug, "convertToGlobalId: %s", g.asString().c_str()); + for (const auto & g : groupingList) { + g->convertToGlobalId(metaStore); + LOG(debug, "convertToGlobalId: %s", g->asString().c_str()); } } diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp index 3c8c90a229d..3e8909aa593 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp @@ -249,7 +249,7 @@ Matcher::match(const SearchRequest &request, vespalib::ThreadBundle &threadBundl { // we want to measure full set-up and tear-down time as part of // collateral time GroupingContext groupingContext(metaStore.getValidLids(), _clock, request.getTimeOfDoom(), - request.groupSpec.data(), request.groupSpec.size(), _rankSetup->enableNestedMultivalueGrouping()); + request.groupSpec.data(), request.groupSpec.size()); SessionId sessionId(request.sessionId.data(), request.sessionId.size()); bool shouldCacheSearchSession = false; bool shouldCacheGroupingSession = false; diff --git a/searchlib/src/vespa/searchlib/expression/expressiontree.h b/searchlib/src/vespa/searchlib/expression/expressiontree.h index 52c075f3e29..127de66b6dc 100644 --- a/searchlib/src/vespa/searchlib/expression/expressiontree.h +++ b/searchlib/src/vespa/searchlib/expression/expressiontree.h @@ -50,11 +50,11 @@ public: }; ExpressionTree() noexcept; - ExpressionTree(const ExpressionNode & root); - ExpressionTree(ExpressionNode::UP root); + explicit ExpressionTree(const ExpressionNode & root); + explicit ExpressionTree(ExpressionNode::UP root); ExpressionTree(const ExpressionTree & rhs); ExpressionTree(ExpressionTree &&) noexcept = default; - ~ExpressionTree(); + ~ExpressionTree() override; ExpressionTree & operator = (ExpressionNode::UP rhs); ExpressionTree & operator = (const ExpressionTree & rhs); ExpressionTree & operator = (ExpressionTree &&) noexcept = default; diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index f063bad66e1..5d3dbe69e77 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -173,11 +173,6 @@ namespace onsummary { namespace temporary { -const vespalib::string EnableNestedMultivalueGrouping::NAME("vespa.temporary.enable_nested_multivalue_grouping"); -bool EnableNestedMultivalueGrouping::check(const Properties &props) { - return lookupBool(props, NAME, false); -} - } namespace mutate { diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 348ce3ab5e2..1921f52276f 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -176,11 +176,8 @@ namespace mutate { }; } +// Add temporary flags used for safe rollout of new features here namespace temporary { - struct EnableNestedMultivalueGrouping { - static const vespalib::string NAME; - static bool check(const Properties &props); - }; } namespace mutate::on_match { diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 806be9af47c..d6b0b900516 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -75,8 +75,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _mutateOnFirstPhase(), _mutateOnSecondPhase(), _mutateOnSummary(), - _mutateAllowQueryOverride(false), - _enableNestedMultivalueGrouping(false) + _mutateAllowQueryOverride(false) { } RankSetup::~RankSetup() = default; @@ -135,7 +134,6 @@ RankSetup::configure() _mutateOnSummary._attribute = mutate::on_summary::Attribute::lookup(_indexEnv.getProperties()); _mutateOnSummary._operation = mutate::on_summary::Operation::lookup(_indexEnv.getProperties()); _mutateAllowQueryOverride = mutate::AllowQueryOverride::check(_indexEnv.getProperties()); - _enableNestedMultivalueGrouping = temporary::EnableNestedMultivalueGrouping::check(_indexEnv.getProperties()); _always_mark_phrase_expensive = matching::AlwaysMarkPhraseExpensive::check(_indexEnv.getProperties()); } diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index 0e98c3f1c5d..d744b38cc6e 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -85,7 +85,6 @@ private: MutateOperation _mutateOnSecondPhase; MutateOperation _mutateOnSummary; bool _mutateAllowQueryOverride; - bool _enableNestedMultivalueGrouping; void compileAndCheckForErrors(BlueprintResolver &bp); public: @@ -460,7 +459,6 @@ public: const MutateOperation & getMutateOnSummary() const { return _mutateOnSummary; } bool allowMutateQueryOverride() const { return _mutateAllowQueryOverride; } - bool enableNestedMultivalueGrouping() const { return _enableNestedMultivalueGrouping; } }; } |