diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-07-04 09:00:22 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-04 09:00:22 +0200 |
commit | c21b731f8734fc9b5a5f167086d4ec3e969af9e7 (patch) | |
tree | 1f93662ab9e1bc282278840db81b0c1568e10ce1 | |
parent | 86bd836d889e6648f459701fe8fec7e5a364afd6 (diff) | |
parent | 62bd002c38c0d70233c1de3c35ffb17bb687ad8f (diff) |
Merge pull request #27619 from vespa-engine/balder/double-check-if-still-active
Move code only used from searchcore to searchcore to reduce wiring an…
10 files changed, 105 insertions, 144 deletions
diff --git a/searchcore/src/vespa/searchcore/grouping/groupingcontext.cpp b/searchcore/src/vespa/searchcore/grouping/groupingcontext.cpp index 30a1f6074f1..08bbf422674 100644 --- a/searchcore/src/vespa/searchcore/grouping/groupingcontext.cpp +++ b/searchcore/src/vespa/searchcore/grouping/groupingcontext.cpp @@ -3,6 +3,8 @@ #include "groupingcontext.h" #include <vespa/searchlib/aggregation/predicates.h> #include <vespa/searchlib/aggregation/modifiers.h> +#include <vespa/searchlib/aggregation/hitsaggregationresult.h> +#include <vespa/searchlib/common/bitvector.h> namespace search::grouping { @@ -17,12 +19,11 @@ GroupingContext::deserialize(const char *groupSpec, uint32_t groupSpecLen) vespalib::NBOSerializer nis(is); uint32_t numGroupings = 0; nis >> numGroupings; + _groupingList.reserve(numGroupings); for (size_t i = 0; i < numGroupings; i++) { auto grouping = std::make_shared<search::aggregation::Grouping>(); grouping->deserialize(nis); - grouping->setClock(&_clock); - grouping->setTimeOfDoom(_timeOfDoom); - _groupingList.push_back(grouping); + _groupingList.push_back(std::move(grouping)); } } } @@ -65,8 +66,7 @@ GroupingContext::GroupingContext(const vespalib::Clock & clock, vespalib::steady _os(), _groupingList(), _enableNestedMultivalueGrouping(true) -{ -} +{ } GroupingContext::GroupingContext(const GroupingContext & rhs) : _clock(rhs._clock), @@ -74,8 +74,7 @@ GroupingContext::GroupingContext(const GroupingContext & rhs) : _os(), _groupingList(), _enableNestedMultivalueGrouping(rhs._enableNestedMultivalueGrouping) -{ -} +{ } void GroupingContext::addGrouping(const GroupingPtr & g) @@ -104,4 +103,43 @@ GroupingContext::needRanking() const return true; } +using DocId = uint32_t; + +void +GroupingContext::aggregate(Grouping & grouping, const RankedHit * rankedHit, unsigned int len, const BitVector * bVec) const +{ + grouping.preAggregate(false); + + for(unsigned int i(0), m(grouping.getMaxN(len)); (i < m) && !hasExpired(); i++) { + grouping.aggregate(rankedHit[i].getDocId(), rankedHit[i].getRank()); + } + if (bVec != nullptr) { + unsigned int sz(bVec->size()); + int64_t topN = grouping.getTopN(); + if (topN > 0) { + for(DocId d(bVec->getFirstTrueBit()), i(0), m(grouping.getMaxN(sz)); (d < sz) && (i < m) && !hasExpired(); d = bVec->getNextTrueBit(d+1), i++) { + grouping.aggregate(d, 0.0); + } + } else { + for(DocId d(bVec->getFirstTrueBit()); (d < sz) && !hasExpired(); d = bVec->getNextTrueBit(d+1)) { + grouping.aggregate(d, 0.0); + } + } + } + grouping.postProcess(); +} + +void +GroupingContext::aggregate(Grouping & grouping, const RankedHit * rankedHit, unsigned int len) const +{ + bool isOrdered(! grouping.needResort()); + grouping.preAggregate(isOrdered); + search::aggregation::HitsAggregationResult::SetOrdered pred; + grouping.select(pred, pred); + for(unsigned int i(0), m(grouping.getMaxN(len)); (i < m) && !hasExpired(); i++) { + grouping.aggregate(rankedHit[i].getDocId(), rankedHit[i].getRank()); + } + grouping.postProcess(); +} + } diff --git a/searchcore/src/vespa/searchcore/grouping/groupingcontext.h b/searchcore/src/vespa/searchcore/grouping/groupingcontext.h index af71cf30017..cbbe78a1019 100644 --- a/searchcore/src/vespa/searchcore/grouping/groupingcontext.h +++ b/searchcore/src/vespa/searchcore/grouping/groupingcontext.h @@ -4,6 +4,7 @@ #include <vespa/searchlib/aggregation/grouping.h> #include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/util/time.h> +#include <vespa/vespalib/util/clock.h> #include <vector> #include <memory> @@ -18,17 +19,10 @@ class GroupingContext { public: using UP = std::unique_ptr<GroupingContext>; - using GroupingPtr = std::shared_ptr<search::aggregation::Grouping>; + using Grouping = search::aggregation::Grouping; + using GroupingPtr = std::shared_ptr<Grouping>; using GroupingList = std::vector<GroupingPtr>; -private: - const vespalib::Clock & _clock; - vespalib::steady_time _timeOfDoom; - vespalib::nbostream _os; - GroupingList _groupingList; - bool _enableNestedMultivalueGrouping; -public: - /** * Deserialize a grouping spec into this context. * @@ -74,7 +68,7 @@ public: * Return the internal list of grouping expressions in this context. * @return a list of groupings. **/ - GroupingList &getGroupingList() { return _groupingList; } + GroupingList &getGroupingList() noexcept { return _groupingList; } /** * Serialize the grouping expressions in this context. @@ -84,7 +78,7 @@ public: /** * Check whether this context contains any groupings. **/ - bool empty() const { return _groupingList.empty(); } + bool empty() const noexcept { return _groupingList.empty(); } /** * Obtain the grouping result. @@ -108,13 +102,23 @@ public: /** * Obtain the time of doom. */ - vespalib::steady_time getTimeOfDoom() const { return _timeOfDoom; } + vespalib::steady_time getTimeOfDoom() const noexcept { return _timeOfDoom; } + bool hasExpired() const noexcept { return _clock.getTimeNS() > _timeOfDoom; } /** * Figure out if ranking is necessary for any of the grouping requests here. * @return true if ranking is required. */ bool needRanking() const; - bool enableNestedMultivalueGrouping() const { return _enableNestedMultivalueGrouping; } + bool enableNestedMultivalueGrouping() const noexcept { return _enableNestedMultivalueGrouping; } + + void aggregate(Grouping & grouping, const RankedHit * rankedHit, unsigned int len, const BitVector * bVec) const; + void aggregate(Grouping & grouping, const RankedHit * rankedHit, unsigned int len) const; +private: + const vespalib::Clock & _clock; + vespalib::steady_time _timeOfDoom; + vespalib::nbostream _os; + GroupingList _groupingList; + bool _enableNestedMultivalueGrouping; }; } diff --git a/searchcore/src/vespa/searchcore/grouping/groupingmanager.cpp b/searchcore/src/vespa/searchcore/grouping/groupingmanager.cpp index e907f4a7c76..3eb410ad2c2 100644 --- a/searchcore/src/vespa/searchcore/grouping/groupingmanager.cpp +++ b/searchcore/src/vespa/searchcore/grouping/groupingmanager.cpp @@ -69,14 +69,12 @@ GroupingManager::init(const IAttributeContext &attrCtx) void GroupingManager::groupInRelevanceOrder(const RankedHit *searchResults, uint32_t binSize) { - GroupingContext::GroupingList &groupingList(_groupingContext.getGroupingList()); - for (size_t i = 0; i < groupingList.size(); ++i) { - Grouping & g = *groupingList[i]; - if ( ! g.needResort() ) { - g.aggregate(searchResults, binSize); - LOG(debug, "groupInRelevanceOrder: %s", g.asString().c_str()); - g.cleanTemporary(); - g.cleanupAttributeReferences(); + for (const auto & g : _groupingContext.getGroupingList()) { + if ( ! g->needResort() ) { + _groupingContext.aggregate(*g, searchResults, binSize); + LOG(debug, "groupInRelevanceOrder: %s", g->asString().c_str()); + g->cleanTemporary(); + g->cleanupAttributeReferences(); } } } @@ -84,14 +82,12 @@ GroupingManager::groupInRelevanceOrder(const RankedHit *searchResults, uint32_t void GroupingManager::groupUnordered(const RankedHit *searchResults, uint32_t binSize, const search::BitVector * overflow) { - GroupingContext::GroupingList &groupingList(_groupingContext.getGroupingList()); - for (size_t i = 0; i < groupingList.size(); ++i) { - Grouping & g = *groupingList[i]; - if ( g.needResort() ) { - g.aggregate(searchResults, binSize, overflow); - LOG(debug, "groupUnordered: %s", g.asString().c_str()); - g.cleanTemporary(); - g.cleanupAttributeReferences(); + for (const auto & g : _groupingContext.getGroupingList()) { + if ( g->needResort() ) { + _groupingContext.aggregate(*g, searchResults, binSize, overflow); + LOG(debug, "groupUnordered: %s", g->asString().c_str()); + g->cleanTemporary(); + g->cleanupAttributeReferences(); } } } diff --git a/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp b/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp index 9b85ddbdde8..1608c633124 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp @@ -119,7 +119,6 @@ ResultProcessor::Result::UP ResultProcessor::makeReply(PartialResultUP full_result) { auto reply = std::make_unique<search::engine::SearchReply>(); - const search::IDocumentMetaStore &metaStore = _metaStore; search::engine::SearchReply &r = *reply; PartialResult &result = *full_result; size_t numFs4Hits(0); @@ -127,7 +126,7 @@ ResultProcessor::makeReply(PartialResultUP full_result) if (_wasMerged) { _groupingSession->getGroupingManager().prune(); } - _groupingSession->getGroupingManager().convertToGlobalId(metaStore); + _groupingSession->getGroupingManager().convertToGlobalId(_metaStore); _groupingSession->continueExecution(_groupingContext); numFs4Hits = _groupingContext.countFS4Hits(); _groupingContext.getResult().swap(r.groupResult); @@ -144,7 +143,7 @@ ResultProcessor::makeReply(PartialResultUP full_result) search::engine::SearchReply::Hit &dst = r.hits[i]; const search::RankedHit &src = result.hit(hitOffset + i); uint32_t docId = src.getDocId(); - if (metaStore.getGidEvenIfMoved(docId, gid)) { + if (_metaStore.getGidEvenIfMoved(docId, gid)) { dst.gid = gid; } dst.metric = src.getRank(); diff --git a/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp b/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp index cd2f4a58552..711e15dd186 100644 --- a/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp +++ b/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp @@ -5,7 +5,6 @@ #include <vespa/searchlib/aggregation/aggregation.h> #include <vespa/searchlib/attribute/extendableattributes.h> #include <vespa/searchlib/attribute/attributemanager.h> -#include <vespa/searchlib/aggregation/hitsaggregationresult.h> #include <vespa/searchlib/aggregation/fs4hit.h> #include <vespa/searchlib/expression/fixedwidthbucketfunctionnode.h> #include <vespa/searchlib/grouping/groupingengine.h> @@ -147,8 +146,8 @@ private: CheckAttributeReferences() : _numrefs(0) { } int _numrefs; private: - virtual void execute(vespalib::Identifiable &obj) override { - if (static_cast<AttributeNode &>(obj).getAttribute() != NULL) { + void execute(vespalib::Identifiable &obj) override { + if (static_cast<AttributeNode &>(obj).getAttribute() != nullptr) { _numrefs++; } } @@ -261,13 +260,13 @@ Test::Main() aggrType = _argv[3]; } if (_argc > 4) { - numDocs = strtol(_argv[4], NULL, 0); + numDocs = strtol(_argv[4], nullptr, 0); } if (_argc > 5) { - numQueries = strtol(_argv[5], NULL, 0); + numQueries = strtol(_argv[5], nullptr, 0); } if (_argc > 6) { - maxGroups = strtol(_argv[6], NULL, 0); + maxGroups = strtol(_argv[6], nullptr, 0); } TEST_INIT("grouping_benchmark"); LOG(info, "sizeof(Group) = %ld", sizeof(Group)); diff --git a/searchlib/src/vespa/searchlib/aggregation/group.cpp b/searchlib/src/vespa/searchlib/aggregation/group.cpp index 5d6e62f30f0..5a16756f0d7 100644 --- a/searchlib/src/vespa/searchlib/aggregation/group.cpp +++ b/searchlib/src/vespa/searchlib/aggregation/group.cpp @@ -1,16 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "group.h" -#include "maxaggregationresult.h" -#include "groupinglevel.h" #include "grouping.h" +#include <vespa/searchlib/expression/aggregationrefnode.h> -#include <vespa/vespalib/objects/objectdumper.h> #include <vespa/vespalib/objects/visit.hpp> #include <vespa/vespalib/stllike/hash_set.hpp> -#include <cmath> #include <cassert> -#include <algorithm> namespace search::aggregation { @@ -766,10 +762,22 @@ Group::Value::partialCopy(const Value & rhs) { memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy)); } +namespace { + class AggregationRefNodeConfigure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + public: + AggregationRefNodeConfigure(expression::ExpressionNode::CP * & exprVec) : _exprVec(exprVec) { } + private: + void execute(vespalib::Identifiable &obj) override { static_cast<AggregationRefNode&>(obj).locateExpression(_exprVec); } + bool check(const vespalib::Identifiable &obj) const override { return obj.inherits(AggregationRefNode::classId); } + expression::ExpressionNode::CP * & _exprVec; + }; +} + void Group::Value::setupAggregationReferences() { - AggregationRefNode::Configure exprRefSetup(_aggregationResults); + AggregationRefNodeConfigure exprRefSetup(_aggregationResults); select(exprRefSetup, exprRefSetup); } diff --git a/searchlib/src/vespa/searchlib/aggregation/grouping.cpp b/searchlib/src/vespa/searchlib/aggregation/grouping.cpp index ea35310c727..e9df10d3a61 100644 --- a/searchlib/src/vespa/searchlib/aggregation/grouping.cpp +++ b/searchlib/src/vespa/searchlib/aggregation/grouping.cpp @@ -12,7 +12,6 @@ #include <vespa/vespalib/objects/serializer.hpp> #include <vespa/vespalib/objects/deserializer.hpp> #include <vespa/searchlib/common/idocumentmetastore.h> -#include <vespa/searchlib/common/bitvector.h> #include <vespa/log/log.h> LOG_SETUP(".searchlib.aggregation.grouping"); @@ -129,11 +128,8 @@ Grouping::Grouping() noexcept _firstLevel(0), _lastLevel(0), _levels(), - _root(), - _clock(nullptr), - _timeOfDoom(vespalib::duration::zero()) -{ -} + _root() +{ } Grouping::Grouping(const Grouping &) = default; Grouping & Grouping::operator = (const Grouping &) = default; @@ -221,66 +217,14 @@ Grouping::postProcess() } void -Grouping::aggregateWithoutClock(const RankedHit * rankedHit, unsigned int len) { - for(unsigned int i(0); i < len; i++) { - aggregate(rankedHit[i].getDocId(), rankedHit[i].getRank()); - } -} - -void -Grouping::aggregateWithClock(const RankedHit * rankedHit, unsigned int len) { - for(unsigned int i(0); (i < len) && !hasExpired(); i++) { - aggregate(rankedHit[i].getDocId(), rankedHit[i].getRank()); - } -} - -void Grouping::aggregate(const RankedHit * rankedHit, unsigned int len) { bool isOrdered(! needResort()); preAggregate(isOrdered); HitsAggregationResult::SetOrdered pred; select(pred, pred); - if (_clock == nullptr) { - aggregateWithoutClock(rankedHit, getMaxN(len)); - } else { - aggregateWithClock(rankedHit, getMaxN(len)); - } - postProcess(); -} - -void -Grouping::aggregate(const RankedHit * rankedHit, unsigned int len, const BitVector * bVec) -{ - preAggregate(false); - if (_clock == nullptr) { - aggregateWithoutClock(rankedHit, getMaxN(len)); - } else { - aggregateWithClock(rankedHit, getMaxN(len)); - } - if (bVec != nullptr) { - unsigned int sz(bVec->size()); - if (_clock == nullptr) { - if (getTopN() > 0) { - for(DocId d(bVec->getFirstTrueBit()), i(0), m(getMaxN(sz)); (d < sz) && (i < m); d = bVec->getNextTrueBit(d+1), i++) { - aggregate(d, 0.0); - } - } else { - for(DocId d(bVec->getFirstTrueBit()); d < sz; d = bVec->getNextTrueBit(d+1)) { - aggregate(d, 0.0); - } - } - } else { - if (getTopN() > 0) { - for(DocId d(bVec->getFirstTrueBit()), i(0), m(getMaxN(sz)); (d < sz) && (i < m) && !hasExpired(); d = bVec->getNextTrueBit(d+1), i++) { - aggregate(d, 0.0); - } - } else { - for(DocId d(bVec->getFirstTrueBit()); (d < sz) && !hasExpired(); d = bVec->getNextTrueBit(d+1)) { - aggregate(d, 0.0); - } - } - } + for(unsigned int i(0), m(getMaxN(len)); i < m; i++) { + aggregate(rankedHit[i].getDocId(), rankedHit[i].getRank()); } postProcess(); } diff --git a/searchlib/src/vespa/searchlib/aggregation/grouping.h b/searchlib/src/vespa/searchlib/aggregation/grouping.h index 2136075443e..49a56143607 100644 --- a/searchlib/src/vespa/searchlib/aggregation/grouping.h +++ b/searchlib/src/vespa/searchlib/aggregation/grouping.h @@ -3,7 +3,6 @@ #include "groupinglevel.h" #include <vespa/searchlib/common/rankedhit.h> -#include <vespa/vespalib/util/clock.h> namespace search { class BitVector; @@ -30,13 +29,6 @@ private: uint32_t _lastLevel; // last processing level this iteration GroupingLevelList _levels; // grouping parameters per level Group _root; // the grouping tree - const vespalib::Clock *_clock; // An optional clock to be used for timeout handling. - vespalib::steady_time _timeOfDoom; // Used if clock is specified. This is time when request expires. - - bool hasExpired() const { return _clock->getTimeNS() > _timeOfDoom; } - void aggregateWithoutClock(const RankedHit * rankedHit, unsigned int len); - void aggregateWithClock(const RankedHit * rankedHit, unsigned int len); - void postProcess(); public: DECLARE_IDENTIFIABLE_NS2(search, aggregation, Grouping); DECLARE_NBO_SERIALIZE; @@ -58,8 +50,6 @@ public: Grouping &setLastLevel(unsigned int level) { _lastLevel = level; return *this; } Grouping &addLevel(GroupingLevel && level) { _levels.push_back(std::move(level)); return *this; } Grouping &setRoot(const Group &root_) { _root = root_; return *this; } - Grouping &setClock(const vespalib::Clock * clock) { _clock = clock; return *this; } - Grouping &setTimeOfDoom(vespalib::steady_time timeOfDoom) { _timeOfDoom = timeOfDoom; return *this; } unsigned int getId() const noexcept { return _id; } bool valid() const noexcept { return _valid; } @@ -84,12 +74,12 @@ public: void preAggregate(bool isOrdered); void prune(const Grouping & b); void aggregate(DocId from, DocId to); - void aggregate(const RankedHit * rankedHit, unsigned int len); - void aggregate(const RankedHit * rankedHit, unsigned int len, const BitVector * bVec); void aggregate(DocId docId, HitRank rank = 0); void aggregate(const document::Document & doc, HitRank rank = 0); + void aggregate(const RankedHit * rankedHit, unsigned int len); void convertToGlobalId(const IDocumentMetaStore &metaStore); void postAggregate(); + void postProcess(); void sortById(); void cleanTemporary(); void configureStaticStuff(const expression::ConfigureStaticParams & params); diff --git a/searchlib/src/vespa/searchlib/aggregation/groupinglevel.h b/searchlib/src/vespa/searchlib/aggregation/groupinglevel.h index 2cffe498e96..dc8f021e343 100644 --- a/searchlib/src/vespa/searchlib/aggregation/groupinglevel.h +++ b/searchlib/src/vespa/searchlib/aggregation/groupinglevel.h @@ -2,7 +2,6 @@ #pragma once #include "group.h" -#include <vespa/searchlib/expression/aggregationrefnode.h> #include <vespa/searchlib/expression/currentindex.h> namespace search::expression { class CurrentIndexSetup; } diff --git a/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h index cae438c8351..f321014dcc7 100644 --- a/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h +++ b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h @@ -2,31 +2,18 @@ #pragma once #include "expressionnode.h" -#include "serializer.h" -#include <vespa/vespalib/objects/objectoperation.h> -#include <vespa/vespalib/objects/objectpredicate.h> -namespace search { -namespace expression { +namespace search::expression { class AggregationRefNode : public ExpressionNode { public: DECLARE_NBO_SERIALIZE; - class Configure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate - { - public: - Configure(ExpressionNodeArray & exprVec) : _exprVec(exprVec) { } - private: - void execute(vespalib::Identifiable &obj) override { static_cast<AggregationRefNode&>(obj).locateExpression(_exprVec); } - bool check(const vespalib::Identifiable &obj) const override { return obj.inherits(AggregationRefNode::classId); } - ExpressionNodeArray & _exprVec; - }; void visitMembers(vespalib::ObjectVisitor &visitor) const override; DECLARE_EXPRESSIONNODE(AggregationRefNode); - AggregationRefNode() : _index(0), _expressionNode(NULL) { } - AggregationRefNode(uint32_t index) : _index(index), _expressionNode(NULL) { } + AggregationRefNode() : _index(0), _expressionNode(nullptr) { } + AggregationRefNode(uint32_t index) : _index(index), _expressionNode(nullptr) { } AggregationRefNode(const AggregationRefNode & rhs); AggregationRefNode & operator = (const AggregationRefNode & exprref); @@ -35,13 +22,10 @@ public: void onPrepare(bool preserveAccurateTypes) override { _expressionNode->prepare(preserveAccurateTypes); } bool onExecute() const override; -private: void locateExpression(ExpressionNodeArray & exprVec) const; - +private: uint32_t _index; mutable ExpressionNode *_expressionNode; }; } -} - |