diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2017-03-29 18:20:39 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2017-03-29 18:20:39 +0200 |
commit | fde30342ba5128071c93d7f6f7c4f385a24761a9 (patch) | |
tree | c1d38cbfb87e9e75d7d5791a47ca36762483d90f /searchlib | |
parent | bef0c9ddf8b6fb2370913bb3070f5e969ac4856e (diff) |
Move th tricky part to a value object to avoid challenges with move of polymorph objects.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/tests/grouping/grouping_test.cpp | 9 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/aggregation/group.cpp | 526 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/aggregation/group.h | 205 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/expression/aggregationrefnode.h | 10 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/grouping/collect.h | 2 |
5 files changed, 448 insertions, 304 deletions
diff --git a/searchlib/src/tests/grouping/grouping_test.cpp b/searchlib/src/tests/grouping/grouping_test.cpp index a8c879550dd..fec91a62d17 100644 --- a/searchlib/src/tests/grouping/grouping_test.cpp +++ b/searchlib/src/tests/grouping/grouping_test.cpp @@ -281,6 +281,7 @@ Test::testMerge(const Grouping &a, const Grouping &b, const Grouping &c, void Test::testAggregationSimple() { + EXPECT_EQUAL(64u, sizeof(Group)); AggregationContext ctx; ctx.result().add(0).add(1).add(2); ctx.add(IntAttrBuilder("int").add(3).add(7).add(15).sp()); @@ -1889,11 +1890,11 @@ struct RunDiff { ~RunDiff() { system("diff -u lhs.out rhs.out > diff.txt"); }}; int Test::Main() { - RunDiff runDiff; - (void) runDiff; - TEST_DEBUG("lhs.out", "rhs.out"); + //RunDiff runDiff; + //(void) runDiff; + //TEST_DEBUG("lhs.out", "rhs.out"); TEST_INIT("grouping_test"); - testAggregationSimple(); + TEST_DO(testAggregationSimple()); testAggregationLevels(); testAggregationMaxGroups(); testAggregationGroupOrder(); diff --git a/searchlib/src/vespa/searchlib/aggregation/group.cpp b/searchlib/src/vespa/searchlib/aggregation/group.cpp index 449b28a9338..2ef2d034d4d 100644 --- a/searchlib/src/vespa/searchlib/aggregation/group.cpp +++ b/searchlib/src/vespa/searchlib/aggregation/group.cpp @@ -37,7 +37,8 @@ struct SortByGroupRank { IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, Group, vespalib::Identifiable); -void Group::destruct(GroupList & l, size_t m) +void +Group::destruct(GroupList & l, size_t m) { for (size_t i(0); i < m; i++) { destruct(l[i]); @@ -46,13 +47,10 @@ void Group::destruct(GroupList & l, size_t m) l = NULL; } -int Group::cmpRank(const Group &rhs) const +int +Group::cmpRank(const Group &rhs) const { - int diff(0); - for(size_t i(0), m(getOrderBySize()); (diff == 0) && (i < m); i++) { - uint32_t index = std::abs(getOrderBy(i)) - 1; - diff = expr(index).getResult().cmp(rhs.expr(index).getResult())*getOrderBy(i); - } + int diff(_aggr.cmp(rhs._aggr)); return diff ? diff : ((_rank > rhs._rank) @@ -60,111 +58,26 @@ int Group::cmpRank(const Group &rhs) const : ((_rank < rhs._rank) ? 1 : 0)); } -Group & Group::addOrderBy(ExpressionNode::UP orderBy, bool ascending) -{ - assert(getOrderBySize() < sizeof(_orderBy)*2-1); - assert(getExprSize() < 15); - addExpressionResult(std::move(orderBy)); - setOrderBy(getOrderBySize(), (ascending ? getExprSize() : -getExprSize())); - setOrderBySize(getOrderBySize() + 1); - setupAggregationReferences(); - return *this; -} - -Group & Group::addAggregationResult(ExpressionNode::UP aggr) -{ - assert(getAggrSize() < 15); - size_t newSize = getAggrSize() + 1 + getExprSize(); - ExpressionVector n = new ExpressionNode::CP[newSize]; - for (size_t i(0), m(getAggrSize()); i < m; i++) { - n[i] = std::move(_aggregationResults[i]); - } - n[getAggrSize()].reset(aggr.release()); - // Copy expressions after aggregationresults - for (size_t i(getAggrSize()); i < newSize - 1; i++) { - n[i + 1] = std::move(_aggregationResults[i]); - } - delete [] _aggregationResults; - _aggregationResults = n; - setAggrSize(getAggrSize() + 1); - return *this; -} - -Group & Group::addExpressionResult(ExpressionNode::UP expressionNode) -{ - uint32_t newSize = getAggrSize() + getExprSize() + 1; - ExpressionVector n = new ExpressionNode::CP[newSize]; - for (uint32_t i(0); i < (newSize - 1); i++) { - n[i] = std::move(_aggregationResults[i]); - } - n[newSize - 1].reset(expressionNode.release()); - delete [] _aggregationResults; - _aggregationResults = n; - setExprSize(getExprSize()+1); - return *this; -} - -void Group::setupAggregationReferences() -{ - AggregationRefNode::Configure exprRefSetup(_aggregationResults); - select(exprRefSetup, exprRefSetup); -} - Group & Group::addResult(ExpressionNode::UP aggr) { - assert(getExprSize() < 15); + assert(_aggr.getExprSize() < 15); addAggregationResult(std::move(aggr)); addExpressionResult(ExpressionNode::UP(new AggregationRefNode(getAggrSize() - 1))); - setupAggregationReferences(); + _aggr.setupAggregationReferences(); return *this; } -void Group::addChild(Group * child) -{ - const size_t sz(getChildrenSize()); - assert(sz < 0xffffff); - if (_children == 0) { - _children = new ChildP[4]; - } else if ((sz >=4) && vespalib::Optimized::msbIdx(sz) == vespalib::Optimized::lsbIdx(sz)) { - GroupList n = new ChildP[sz*2]; - for (size_t i(0), m(getChildrenSize()); i < m; i++) { - n[i] = _children[i]; - } - delete [] _children; - _children = n; - } - _children[sz] = child; - setChildrenSize(sz + 1); -} - void -Group::selectMembers(const vespalib::ObjectPredicate &predicate, - vespalib::ObjectOperation &operation) -{ +Group::selectMembers(const vespalib::ObjectPredicate &predicate, vespalib::ObjectOperation &operation) { if (_id.get()) { _id->select(predicate, operation); } - uint32_t totalSize = getAggrSize() + getExprSize(); - for (uint32_t i(0); i < totalSize; i++) { - _aggregationResults[i]->select(predicate, operation); - } -} - -void -Group::preAggregate() -{ - assert(_childInfo._childMap == NULL); - _childInfo._childMap = new GroupHash(getChildrenSize()*2, GroupHasher(&_children), GroupEqual(&_children)); - GroupHash & childMap = *_childInfo._childMap; - for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) { - (*it)->preAggregate(); - childMap.insert(it - _children); - } + _aggr.select(predicate, operation); } template <typename Doc> -void Group::collect(const Doc & doc, HitRank rank) +void Group::Value::collect(const Doc & doc, HitRank rank) { for(size_t i(0), m(getAggrSize()); i < m; i++) { getAggr(i)->aggregate(doc, rank); @@ -195,7 +108,8 @@ Group::groupNext(const GroupingLevel & level, const Doc & doc, HitRank rank) level.group(*this, selectResult, doc, rank); } -Group * Group::groupSingle(const ResultNode & selectResult, HitRank rank, const GroupingLevel & level) +Group * +Group::Value::groupSingle(const ResultNode & selectResult, HitRank rank, const GroupingLevel & level) { if (_childInfo._childMap == NULL) { assert(getChildrenSize() == 0); @@ -222,7 +136,195 @@ Group * Group::groupSingle(const ResultNode & selectResult, HitRank rank, const } void -Group::postAggregate() +Group::merge(const GroupingLevelList &levels, uint32_t firstLevel, uint32_t currentLevel, Group &b) { + bool frozen = (currentLevel < firstLevel); // is this level frozen ? + _rank = std::max(_rank, b._rank); + + if (!frozen) { // should we merge collectors for this level ? + _aggr.mergeCollectors(b._aggr); + } + _aggr.merge(levels, firstLevel, currentLevel, b._aggr); +} + +void +Group::prune(const Group & b, uint32_t lastLevel, uint32_t currentLevel) { + if (currentLevel >= lastLevel) { + return; + } + _aggr.prune(b._aggr, lastLevel, currentLevel); +} + +void +Group::mergePartial(const GroupingLevelList &levels, uint32_t firstLevel, uint32_t lastLevel, + uint32_t currentLevel, const Group & b) { + bool frozen = (currentLevel < firstLevel); + + if (!frozen) { + _aggr.mergeCollectors(b._aggr); + _aggr.execute(); + + // At this level, we must create a copy of the other nodes children. + if (currentLevel >= lastLevel) { + _aggr.mergeLevel(levels[currentLevel].getGroupPrototype(), b._aggr); + return; + } + } + _aggr.mergePartial(levels, firstLevel, lastLevel, currentLevel, b._aggr); +} + +Group & +Group::setRank(RawRank r) +{ + _rank = std::isnan(r) ? -HUGE_VAL : r; + return *this; +} + +Group & +Group::updateRank(RawRank r) +{ + return setRank(std::max(_rank, r)); +} + +Serializer & +Group::onSerialize(Serializer & os) const { + _aggr.assertIdOrder(); + os << _id << _rank; + _aggr.serialize(os); + return os; +} + +Deserializer & +Group::onDeserialize(Deserializer & is) { + is >> _id >> _rank; + _aggr.deserialize(is); + _aggr.assertIdOrder(); + return is; +} + +void +Group::visitMembers(vespalib::ObjectVisitor &visitor) const { + visit(visitor, "id", _id); + visit(visitor, "rank", _rank); + _aggr.visitMembers(visitor); +} + +Group::Group() : + _id(), + _rank(0), + _aggr() +{ } + +Group::Group(const Group & rhs) = default; +Group & Group::operator = (const Group & rhs) = default; + +Group::~Group() { } + +Group & +Group::partialCopy(const Group & rhs) { + setId(*rhs._id); + _rank = rhs._rank; + _aggr.partialCopy(rhs._aggr); + return *this; +} + +template void Group::aggregate(const Grouping & grouping, uint32_t currentLevel, const DocId & doc, HitRank rank); +template void Group::aggregate(const Grouping & grouping, uint32_t currentLevel, const document::Document & doc, HitRank rank); + +int +Group::Value::cmp(const Value & rhs) const { + int diff(0); + for (size_t i(0), m(getOrderBySize()); (diff == 0) && (i < m); i++) { + uint32_t index = std::abs(getOrderBy(i)) - 1; + diff = expr(index).getResult().cmp(rhs.expr(index).getResult()) * getOrderBy(i); + } + return diff; +} + +void +Group::Value::addExpressionResult(ExpressionNode::UP expressionNode) +{ + uint32_t newSize = getAggrSize() + getExprSize() + 1; + ExpressionVector n = new ExpressionNode::CP[newSize]; + for (uint32_t i(0); i < (newSize - 1); i++) { + n[i] = std::move(_aggregationResults[i]); + } + n[newSize - 1].reset(expressionNode.release()); + delete [] _aggregationResults; + _aggregationResults = n; + setExprSize(getExprSize()+1); + setupAggregationReferences(); +} + +void +Group::Value::addAggregationResult(ExpressionNode::UP aggr) +{ + assert(getAggrSize() < 15); + size_t newSize = getAggrSize() + 1 + getExprSize(); + ExpressionVector n = new ExpressionNode::CP[newSize]; + for (size_t i(0), m(getAggrSize()); i < m; i++) { + n[i] = std::move(_aggregationResults[i]); + } + n[getAggrSize()].reset(aggr.release()); + // Copy expressions after aggregationresults + for (size_t i(getAggrSize()); i < newSize - 1; i++) { + n[i + 1] = std::move(_aggregationResults[i]); + } + delete [] _aggregationResults; + _aggregationResults = n; + setAggrSize(getAggrSize() + 1); +} + +void +Group::Value::addOrderBy(ExpressionNode::UP orderBy, bool ascending) +{ + assert(getOrderBySize() < sizeof(_orderBy)*2-1); + assert(getExprSize() < 15); + addExpressionResult(std::move(orderBy)); + setOrderBy(getOrderBySize(), (ascending ? getExprSize() : -getExprSize())); + setOrderBySize(getOrderBySize() + 1); +} + +void +Group::Value::addChild(Group * child) +{ + const size_t sz(getChildrenSize()); + assert(sz < 0xffffff); + if (_children == 0) { + _children = new ChildP[4]; + } else if ((sz >=4) && vespalib::Optimized::msbIdx(sz) == vespalib::Optimized::lsbIdx(sz)) { + GroupList n = new ChildP[sz*2]; + for (size_t i(0), m(getChildrenSize()); i < m; i++) { + n[i] = _children[i]; + } + delete [] _children; + _children = n; + } + _children[sz] = child; + setChildrenSize(sz + 1); +} + +void +Group::Value::select(const vespalib::ObjectPredicate &predicate, vespalib::ObjectOperation &operation) { + uint32_t totalSize = getAggrSize() + getExprSize(); + for (uint32_t i(0); i < totalSize; i++) { + _aggregationResults[i]->select(predicate, operation); + } +} + +void +Group::Value::preAggregate() +{ + assert(_childInfo._childMap == NULL); + _childInfo._childMap = new GroupHash(getChildrenSize()*2, GroupHasher(&_children), GroupEqual(&_children)); + GroupHash & childMap = *_childInfo._childMap; + for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) { + (*it)->preAggregate(); + childMap.insert(it - _children); + } +} + +void +Group::Value::postAggregate() { delete _childInfo._childMap; _childInfo._childMap = NULL; @@ -232,7 +334,7 @@ Group::postAggregate() } void -Group::executeOrderBy() +Group::Value::executeOrderBy() { for (size_t i(0), m(getExprSize()); i < m; i++) { ExpressionNode & e(expr(i)); @@ -241,7 +343,8 @@ Group::executeOrderBy() } } -void Group::sortById() +void +Group::Value::sortById() { std::sort(_children, _children + getChildrenSize(), SortByGroupId()); for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) { @@ -250,17 +353,32 @@ void Group::sortById() } void -Group::merge(const std::vector<GroupingLevel> &levels, - uint32_t firstLevel, uint32_t currentLevel, Group &b) -{ - bool frozen = (currentLevel < firstLevel); // is this level frozen ? - _rank = std::max(_rank, b._rank); +Group::Value::mergeCollectors(const Value &rhs) { + for(size_t i(0), m(getAggrSize()); i < m; i++) { + getAggr(i)->merge(rhs.getAggr(i)); + } +} - if (!frozen) { // should we merge collectors for this level ? - for(size_t i(0), m(getAggrSize()); i < m; i++) { - getAggr(i)->merge(*b.getAggr(i)); - } +void +Group::Value::execute() { + for (size_t i(0), m(getExprSize()); i < m; i++) { + expr(i).execute(); } +} + +void +Group::Value::mergeLevel(const Group & protoType, const Value & b) { + for (ChildP *it(b._children), *mt(b._children + b.getChildrenSize()); it != mt; ++it) { + ChildP g(new Group(protoType)); + g->partialCopy(**it); + addChild(g); + } +} + +void +Group::Value::merge(const std::vector<GroupingLevel> &levels, + uint32_t firstLevel, uint32_t currentLevel, const Value &b) +{ GroupList z = new ChildP[getChildrenSize() + b.getChildrenSize()]; size_t kept(0); ChildP * px = _children; @@ -300,12 +418,7 @@ Group::merge(const std::vector<GroupingLevel> &levels, } void -Group::prune(const Group & b, uint32_t lastLevel, uint32_t currentLevel) -{ - if (currentLevel >= lastLevel) { - return; - } - +Group::Value::prune(const Value & b, uint32_t lastLevel, uint32_t currentLevel) { GroupList keep = new ChildP[b.getChildrenSize()]; size_t kept(0); ChildP * px = _children; @@ -333,34 +446,9 @@ Group::prune(const Group & b, uint32_t lastLevel, uint32_t currentLevel) } void -Group::mergePartial(const std::vector<GroupingLevel> &levels, - uint32_t firstLevel, - uint32_t lastLevel, - uint32_t currentLevel, - const Group & b) +Group::Value::mergePartial(const GroupingLevelList &levels, uint32_t firstLevel, uint32_t lastLevel, + uint32_t currentLevel, const Value & b) { - bool frozen = (currentLevel < firstLevel); - - if (!frozen) { - for(size_t i(0), m(getAggrSize()); i < m; i++) { - getAggr(i)->merge(b.getAggr(i)); - } - for(size_t i(0), m(getExprSize()); i < m; i++) { - expr(i).execute(); - } - - - // At this level, we must create a copy of the other nodes children. - if (currentLevel >= lastLevel) { - for (ChildP *it(b._children), *mt(b._children + b.getChildrenSize()); it != mt; ++it) { - ChildP g(new Group(levels[currentLevel].getGroupPrototype())); - g->partialCopy(**it); - addChild(g); - } - return; - } - } - ChildP * px = _children; ChildP * ex = _children + getChildrenSize(); const ChildP * py = b._children; @@ -380,9 +468,7 @@ Group::mergePartial(const std::vector<GroupingLevel> &levels, } void -Group::postMerge(const std::vector<GroupingLevel> &levels, - uint32_t firstLevel, - uint32_t currentLevel) +Group::Value::postMerge(const std::vector<GroupingLevel> &levels, uint32_t firstLevel, uint32_t currentLevel) { bool frozen = (currentLevel < firstLevel); // is this level frozen ? @@ -413,18 +499,8 @@ Group::postMerge(const std::vector<GroupingLevel> &levels, } } -Group & Group::setRank(RawRank r) -{ - _rank = std::isnan(r) ? -HUGE_VAL : r; - return *this; -} - -Group & Group::updateRank(RawRank r) -{ - return setRank(std::max(_rank, r)); -} - -bool Group::needResort() const +bool +Group::Value::needResort() const { bool resort(needFullRank()); for (const ChildP *it(_children), *mt(_children + getChildrenSize()); !resort && (it != mt); ++it) { @@ -433,14 +509,18 @@ bool Group::needResort() const return resort; } -Serializer & Group::onSerialize(Serializer & os) const -{ +void +Group::Value::assertIdOrder() const { if (getChildrenSize() > 1) { for (size_t i(1), m(getChildrenSize()); i < m; i++) { assert(_children[i]->cmpId(*_children[i-1]) > 0); } } - os << _id << _rank; +} + +Serializer & +Group::Value::serialize(Serializer & os) const { + os << uint32_t(getOrderBySize()); for (size_t i(0), m(getOrderBySize()); i < m; i++) { os << int32_t(getOrderBy(i)); @@ -460,10 +540,10 @@ Serializer & Group::onSerialize(Serializer & os) const return os << _tag; } -Deserializer & Group::onDeserialize(Deserializer & is) -{ +Deserializer & +Group::Value::deserialize(Deserializer & is) { uint32_t count(0); - is >> _id >> _rank >> count; + is >> count; assert(count < sizeof(_orderBy)*2); setOrderBySize(count); for(uint32_t i(0); i < count; i++) { @@ -493,13 +573,13 @@ Deserializer & Group::onDeserialize(Deserializer & is) _aggregationResults[i] = tmpAggregationResults[i]; } delete [] tmpAggregationResults; + setupAggregationReferences(); assert(exprSize < 16); setExprSize(exprSize); for (uint32_t i(aggrSize); i < aggrSize + exprSize; i++) { is >> _aggregationResults[i]; } - setupAggregationReferences(); is >> count; destruct(_children, getAllChildrenSize()); _childInfo._allChildren = 0; @@ -511,19 +591,11 @@ Deserializer & Group::onDeserialize(Deserializer & is) _children[i] = group; } is >> _tag; - if (getChildrenSize() > 1) { - for (size_t i(1), m(getChildrenSize()); i < m; i++) { - assert(_children[i]->cmpId(*_children[i-1]) > 0); - } - } return is; } void -Group::visitMembers(vespalib::ObjectVisitor &visitor) const -{ - visit(visitor, "id", _id); - visit(visitor, "rank", _rank); +Group::Value::visitMembers(vespalib::ObjectVisitor &visitor) const { // visit(visitor, "orderBy", _orderBy); visitor.openStruct("orderBy", "[]"); visit(visitor, "size", getOrderBySize()); @@ -555,30 +627,25 @@ Group::visitMembers(vespalib::ObjectVisitor &visitor) const visit(visitor, "tag", _tag); } -Group::Group() : - _id(), - _rank(0), +Group::Value::Value() : _packedLength(0), _tag(-1), _aggregationResults(NULL), - _orderBy(), _children(NULL), - _childInfo() + _childInfo(), + _orderBy() { memset(_orderBy, 0, sizeof(_orderBy)); _childInfo._childMap = NULL; } -Group::Group(const Group & rhs) : - Identifiable(rhs), - _id(rhs._id), - _rank(rhs._rank), +Group::Value::Value(const Value & rhs) : _packedLength(rhs._packedLength), _tag(rhs._tag), _aggregationResults(NULL), - _orderBy(), _children(NULL), - _childInfo() + _childInfo(), + _orderBy() { _childInfo._childMap = NULL; memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy)); @@ -600,25 +667,46 @@ Group::Group(const Group & rhs) : } } -Group::Group(Group && rhs) noexcept : - Identifiable(rhs), - _id(std::move(rhs._id)), - _rank(std::move(rhs._rank)), +Group::Value::Value(Value && rhs) noexcept : _packedLength(std::move(rhs._packedLength)), _tag(std::move(rhs._tag)), _aggregationResults(std::move(rhs._aggregationResults)), - _orderBy(), _children(std::move(rhs._children)), - _childInfo(std::move(rhs._childInfo)) + _childInfo(std::move(rhs._childInfo)), + _orderBy() { memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy)); + + rhs.setChildrenSize(0); + rhs._aggregationResults = nullptr; + rhs._childInfo._allChildren = 0; + rhs._children = nullptr; +} + +Group::Value & +Group::Value::operator =(Value && rhs) noexcept { + _packedLength = std::move(rhs._packedLength); + _tag = std::move(rhs._tag); + _aggregationResults = std::move(rhs._aggregationResults); + _children = std::move(rhs._children); + _childInfo = std::move(rhs._childInfo); + memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy)); + rhs.setChildrenSize(0); rhs._aggregationResults = nullptr; rhs._childInfo._allChildren = 0; rhs._children = nullptr; + return *this; } -Group::~Group() +Group::Value & +Group::Value::operator =(const Value & rhs) { + Value tmp(rhs); + tmp.swap(*this); + return *this; +} + +Group::Value::~Value() { destruct(_children, getAllChildrenSize()); setChildrenSize(0); @@ -626,27 +714,25 @@ Group::~Group() delete [] _aggregationResults; } -Group & -Group::operator = (const Group & rhs) { - if (&rhs != this) { - Group g(rhs); - swap(g); +void +Group::Value::swap(Value & rhs) +{ + std::swap(_aggregationResults, rhs._aggregationResults); + std::swap(_children, rhs._children); + std::swap(_childInfo._childMap, rhs._childInfo._childMap); + { + int8_t tmp[sizeof(_orderBy)]; + memcpy(tmp, _orderBy, sizeof(_orderBy)); + memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy)); + memcpy(rhs._orderBy, tmp, sizeof(_orderBy)); } - return *this; + std::swap(_tag, rhs._tag); + std::swap(_packedLength, rhs._packedLength); } -Group & -Group::operator = (Group && rhs) noexcept { - Group g(std::move(rhs)); - swap(g); - return *this; -} -Group & -Group::partialCopy(const Group & rhs) -{ - setId(*rhs._id); - _rank = rhs._rank; +void +Group::Value::partialCopy(const Value & rhs) { uint32_t totalAggrSize = getAggrSize() + getExprSize(); for(size_t i(0), m(totalAggrSize); i < m; i++) { _aggregationResults[i] = rhs._aggregationResults[i]; @@ -659,29 +745,15 @@ Group::partialCopy(const Group & rhs) setExprSize(rhs.getExprSize()); setupAggregationReferences(); memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy)); - return *this; } -void Group::swap(Group & rhs) +void +Group::Value::setupAggregationReferences() { - _id.swap(rhs._id); - std::swap(_rank, rhs._rank); - std::swap(_aggregationResults, rhs._aggregationResults); - std::swap(_children, rhs._children); - std::swap(_childInfo._childMap, rhs._childInfo._childMap); - { - int8_t tmp[sizeof(_orderBy)]; - memcpy(tmp, _orderBy, sizeof(_orderBy)); - memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy)); - memcpy(rhs._orderBy, tmp, sizeof(_orderBy)); - } - std::swap(_tag, rhs._tag); - std::swap(_packedLength, rhs._packedLength); + AggregationRefNode::Configure exprRefSetup(_aggregationResults); + select(exprRefSetup, exprRefSetup); } -template void Group::aggregate(const Grouping & grouping, uint32_t currentLevel, const DocId & doc, HitRank rank); -template void Group::aggregate(const Grouping & grouping, uint32_t currentLevel, const document::Document & doc, HitRank rank); - } } diff --git a/searchlib/src/vespa/searchlib/aggregation/group.h b/searchlib/src/vespa/searchlib/aggregation/group.h index c52a830f89e..3d0909be962 100644 --- a/searchlib/src/vespa/searchlib/aggregation/group.h +++ b/searchlib/src/vespa/searchlib/aggregation/group.h @@ -65,54 +65,114 @@ public: size_t operator() (const ResultNode & arg) const { return arg.hash(); } }; - typedef ExpressionNode::CP * ExpressionVector; - typedef vespalib::hash_set<uint32_t, GroupHasher, GroupEqual > GroupHash; typedef std::vector<GroupingLevel> GroupingLevelList; private: + + class Value { + public: + Value(); + Value(const Value & rhs); + Value & operator =(const Value & rhs); + Value(Value &&) noexcept; + Value & operator = (Value &&) noexcept; + ~Value() noexcept; + void swap(Value & rhs); + + VESPA_DLL_LOCAL int cmp(const Value & rhs) const; + void addExpressionResult(ExpressionNode::UP expressionNode); + void addAggregationResult(ExpressionNode::UP aggr); + void setupAggregationReferences(); + void addOrderBy(ExpressionNode::UP orderBy, bool ascending); + void select(const vespalib::ObjectPredicate &predicate, vespalib::ObjectOperation &operation); + void preAggregate(); + void postAggregate(); + void executeOrderBy(); + void sortById(); + void mergeCollectors(const Value & rhs); + void execute(); + bool needResort() const; + void assertIdOrder() const; + void visitMembers(vespalib::ObjectVisitor &visitor) const; + vespalib::Serializer & serialize(vespalib::Serializer & os) const; + vespalib::Deserializer & deserialize(vespalib::Deserializer & is); + void mergeLevel(const Group & protoType, const Value & b); + void mergePartial(const GroupingLevelList &levels, uint32_t firstLevel, uint32_t lastLevel, + uint32_t currentLevel, const Value & b); + void merge(const GroupingLevelList & levels, uint32_t firstLevel, uint32_t currentLevel, const Value & rhs); + void prune(const Value & b, uint32_t lastLevel, uint32_t currentLevel); + void postMerge(const std::vector<GroupingLevel> &levels, uint32_t firstLevel, uint32_t currentLevel); + void partialCopy(const Value & rhs); + VESPA_DLL_LOCAL Group * groupSingle(const ResultNode & selectResult, HitRank rank, const GroupingLevel & level); + + GroupList groups() const { return _children; } + void addChild(Group * child); + uint32_t getAggrSize() const { return _packedLength & 0x0f; } + uint32_t getOrderBySize() const { return (_packedLength >> 6) & 0x03; } + uint32_t getChildrenSize() const { return (_packedLength >> 8); } + uint32_t getExpr(uint32_t i) const { return getAggrSize() + i; } + int32_t getOrderBy(uint32_t i) const { + int32_t v((_orderBy[i/2] >> (4*(i%2))) & 0x0f); + return (v & 0x8) ? -(v&0x7) : v; + } + + const AggregationResult & getAggregationResult(size_t i) const { return static_cast<const AggregationResult &>(*_aggregationResults[i]); } + AggregationResult & getAggregationResult(size_t i) { return static_cast<AggregationResult &>(*_aggregationResults[i]); } + uint32_t getExprSize() const { return (_packedLength >> 4) & 0x03; } + const Group & getChild(size_t i) const { return *_children[i]; } + + template <typename Doc> + void collect(const Doc & docId, HitRank rank); + private: + + using ExpressionVector = ExpressionNode::CP *; + using GroupHash = vespalib::hash_set<uint32_t, GroupHasher, GroupEqual >; + void setAggrSize(uint32_t v) { _packedLength = (_packedLength & ~0x0f) | v; } + void setExprSize(uint32_t v) { _packedLength = (_packedLength & ~0x30) | (v << 4); } + void setOrderBySize(uint32_t v) { _packedLength = (_packedLength & ~0xc0) | (v << 6); } + void setChildrenSize(uint32_t v) { _packedLength = (_packedLength & ~0xffffff00) | (v << 8); } + AggregationResult * getAggr(size_t i) { return static_cast<AggregationResult *>(_aggregationResults[i].get()); } + const AggregationResult & getAggr(size_t i) const { return static_cast<const AggregationResult &>(*_aggregationResults[i]); } + const ExpressionNode::CP & getAggrCP(size_t i) const { return _aggregationResults[i]; } + const ExpressionNode::CP & getExprCP(size_t i) const { return _aggregationResults[getExpr(i)]; } + ExpressionNode & expr(size_t i) { return *_aggregationResults[getExpr(i)]; } + const ExpressionNode & expr(size_t i) const { return *_aggregationResults[getExpr(i)]; } + size_t getAllChildrenSize() const { return std::max(static_cast<size_t>(getChildrenSize()), _childInfo._allChildren); } + void setOrderBy(uint32_t i, int32_t v) { + if (v < 0) { + v = -v; + v = v | 0x8; + } + _orderBy[i/2] = (_orderBy[i/2] & (0xf0 >> (4*(i%2)))) | (v << (4*(i%2))); + } + bool needFullRank() const { return getOrderBySize() != 0; } + + uint32_t _packedLength; // Length of the 3 vectors below + uint32_t _tag; // Opaque tag used to identify the group by the client. + + // The collectors and expressions stored by this group. Currently, both aggregation results and expressions used by orderby() are stored in this + // array to save 8 bytes in the Group size. This makes it important to use the getAggr() and expr() methods for accessing elements, + // as they will correctly offset the index to the correct place in the array. + ExpressionVector _aggregationResults; + + ChildP *_children; // the sub-groups of this group. Great care must be taken to ensure proper destruct. + union ChildInfo { + GroupHash *_childMap; // child map used during aggregation + size_t _allChildren; // Keep real number of children. + } _childInfo; + uint8_t _orderBy[2]; // How this group is ranked, negative means reverse rank. + }; + ResultNode::CP _id; // the label of this group, separating it from other groups RawRank _rank; // The default rank taken from the highest hit relevance. - uint32_t _packedLength; // Length of the 3 vectors below - uint32_t _tag; // Opaque tag used to identify the group by the client. - - // The collectors and expressions stored by this group. Currently, both aggregation results and expressions used by orderby() are stored in this - // array to save 8 bytes in the Group size. This makes it important to use the getAggr() and expr() methods for accessing elements, - // as they will correctly offset the index to the correct place in the array. - ExpressionVector _aggregationResults; - - uint8_t _orderBy[2]; // How this group is ranked, negative means reverse rank. - ChildP *_children; // the sub-groups of this group. Great care must be taken to ensure proper destruct. - union ChildInfo { - GroupHash *_childMap; // child map used during aggregation - size_t _allChildren; // Keep real number of children. - } _childInfo; - - bool needFullRank() const { return getOrderBySize() != 0; } + Value _aggr; + Group & partialCopy(const Group & rhs); - void setOrderBy(uint32_t i, int32_t v) { - if (v < 0) { - v = -v; - v = v | 0x8; - } - _orderBy[i/2] = (_orderBy[i/2] & (0xf0 >> (4*(i%2)))) | (v << (4*(i%2))); - } - uint32_t getExprSize() const { return (_packedLength >> 4) & 0x03; } - void setAggrSize(uint32_t v) { _packedLength = (_packedLength & ~0x0f) | v; } - void setExprSize(uint32_t v) { _packedLength = (_packedLength & ~0x30) | (v << 4); } - void setOrderBySize(uint32_t v) { _packedLength = (_packedLength & ~0xc0) | (v << 6); } - void setChildrenSize(uint32_t v) { _packedLength = (_packedLength & ~0xffffff00) | (v << 8); } - AggregationResult * getAggr(size_t i) { return static_cast<AggregationResult *>(_aggregationResults[i].get()); } - const AggregationResult & getAggr(size_t i) const { return static_cast<const AggregationResult &>(*_aggregationResults[i]); } - const ExpressionNode::CP & getAggrCP(size_t i) const { return _aggregationResults[i]; } - const ExpressionNode::CP & getExprCP(size_t i) const { return _aggregationResults[getExpr(i)]; } - ExpressionNode & expr(size_t i) { return *_aggregationResults[getExpr(i)]; } - const ExpressionNode & expr(size_t i) const { return *_aggregationResults[getExpr(i)]; } + static void reset(Group * & v) { v = NULL; } static void destruct(Group * v) { if (v) { delete v; } } static void destruct(GroupList & l, size_t sz); - void addChild(Group * child); void setupAggregationReferences(); - size_t getAllChildrenSize() const { return std::max(static_cast<size_t>(getChildrenSize()), _childInfo._allChildren); } template <typename Doc> VESPA_DLL_LOCAL void groupNext(const GroupingLevel & level, const Doc & docId, HitRank rank); public: @@ -122,10 +182,9 @@ public: Group(); Group(const Group & rhs); Group & operator =(const Group & rhs); - Group(Group &&) noexcept; - Group & operator = (Group &&) noexcept; - ~Group() noexcept; - void swap(Group & rhs); + Group(Group &&) = default; + Group & operator = (Group &&) = default; + ~Group(); int cmpId(const Group &rhs) const { return _id->cmpFast(*rhs._id); } int cmpRank(const Group &rhs) const; @@ -133,7 +192,9 @@ public: Group & updateRank(RawRank r); RawRank getRank() const { return _rank; } - VESPA_DLL_LOCAL Group * groupSingle(const ResultNode & result, HitRank rank, const GroupingLevel & level); + Group * groupSingle(const ResultNode & result, HitRank rank, const GroupingLevel & level) { + return _aggr.groupSingle(result, rank, level); + } bool hasId() const { return (_id.get() != NULL); } const ResultNode &getId() const { return *_id; } @@ -141,16 +202,35 @@ public: Group unchain() const { return *this; } Group &setId(const ResultNode &id) { _id.reset(static_cast<ResultNode *>(id.clone())); return *this; } - Group &addAggregationResult(ExpressionNode::UP result); + Group &addAggregationResult(ExpressionNode::UP result) { + _aggr.addAggregationResult(std::move(result)); + return *this; + } Group &addResult(ExpressionNode::UP aggr); Group &addResult(const ExpressionNode & aggr) { return addResult(ExpressionNode::UP(aggr.clone())); } - Group &addExpressionResult(ExpressionNode::UP expressionNode); - Group &addOrderBy(ExpressionNode::UP orderBy, bool ascending); + Group &addExpressionResult(ExpressionNode::UP expressionNode) { + _aggr.addExpressionResult(std::move(expressionNode)); + return *this; + } + Group &addOrderBy(ExpressionNode::UP orderBy, bool ascending) { + _aggr.addOrderBy(std::move(orderBy), ascending); return *this; + } Group &addOrderBy(const ExpressionNode & orderBy, bool ascending) { return addOrderBy(ExpressionNode::UP(orderBy.clone()), ascending); } - Group &addChild(const Group &child) { addChild(new Group(child)); return *this; } - Group &addChild(Group::UP child) { addChild(child.release()); return *this; } + Group &addChild(const Group &child) { _aggr.addChild(new Group(child)); return *this; } + Group &addChild(Group::UP child) { _aggr.addChild(child.release()); return *this; } + + GroupList groups() const { return _aggr.groups(); } + uint32_t getAggrSize() const { return _aggr.getAggrSize(); } + uint32_t getOrderBySize() const { return _aggr.getOrderBySize(); } + uint32_t getExpr(uint32_t i) const { return _aggr.getExpr(i); } + int32_t getOrderBy(uint32_t i) const { return _aggr.getOrderBy(i); } + uint32_t getChildrenSize() const { return _aggr.getChildrenSize(); } + const Group & getChild(size_t i) const { return _aggr.getChild(i); } + + const AggregationResult & getAggregationResult(size_t i) const { return _aggr.getAggregationResult(i); } + AggregationResult & getAggregationResult(size_t i) { return _aggr.getAggregationResult(i); } /** * Prunes this tree, keeping only the nodes found in another @@ -166,19 +246,20 @@ public: * Recursively checks if any itself or any children needs a full resort. * Then all hits must be processed and should be doen before any hit sorting. */ - bool needResort() const; + bool needResort() const { return _aggr.needResort(); } void selectMembers(const vespalib::ObjectPredicate &predicate, vespalib::ObjectOperation &operation) override; - void preAggregate(); + void preAggregate() { return _aggr.preAggregate(); } template <typename Doc> VESPA_DLL_LOCAL void aggregate(const Grouping & grouping, uint32_t currentLevel, const Doc & docId, HitRank rank); template <typename Doc> - void collect(const Doc & docId, HitRank rank); - void postAggregate(); + void collect(const Doc & docId, HitRank rank) { _aggr.collect(docId, rank); } + void postAggregate() { _aggr.postAggregate(); } void merge(const std::vector<GroupingLevel> &levels, uint32_t firstLevel, uint32_t currentLevel, Group &b); - void executeOrderBy(); + void executeOrderBy() { _aggr.executeOrderBy(); } + void sortById() { _aggr.sortById(); } /** * Merge children and results of another tree within the unfrozen parts of @@ -189,20 +270,10 @@ public: * @param lastLevel The last level to merge. * @param currentLevel The current level on which merging should be done. **/ - void mergePartial(const std::vector<GroupingLevel> &levels, uint32_t firstLevel, uint32_t lastLevel, uint32_t currentLevel, const Group & b); - void postMerge(const std::vector<GroupingLevel> &levels, uint32_t firstLevel, uint32_t currentLevel); - void sortById(); - uint32_t getChildrenSize() const { return (_packedLength >> 8); } - const Group & getChild(size_t i) const { return *_children[i]; } - GroupList groups() const { return _children; } - const AggregationResult & getAggregationResult(size_t i) const { return static_cast<const AggregationResult &>(*_aggregationResults[i]); } - AggregationResult & getAggregationResult(size_t i) { return static_cast<AggregationResult &>(*_aggregationResults[i]); } - uint32_t getAggrSize() const { return _packedLength & 0x0f; } - uint32_t getOrderBySize() const { return (_packedLength >> 6) & 0x03; } - uint32_t getExpr(uint32_t i) const { return getAggrSize() + i; } - int32_t getOrderBy(uint32_t i) const { - int32_t v((_orderBy[i/2] >> (4*(i%2))) & 0x0f); - return (v & 0x8) ? -(v&0x7) : v; + void mergePartial(const std::vector<GroupingLevel> &levels, uint32_t firstLevel, uint32_t lastLevel, + uint32_t currentLevel, const Group & b); + void postMerge(const std::vector<GroupingLevel> &levels, uint32_t firstLevel, uint32_t currentLevel) { + _aggr.postMerge(levels, firstLevel, currentLevel); } }; diff --git a/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h index 5ee42b7d789..03ee085c647 100644 --- a/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h +++ b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h @@ -18,8 +18,8 @@ public: public: Configure(ExpressionNodeArray & exprVec) : _exprVec(exprVec) { } private: - virtual void execute(vespalib::Identifiable &obj) { static_cast<AggregationRefNode&>(obj).locateExpression(_exprVec); } - virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AggregationRefNode::classId); } + virtual void execute(vespalib::Identifiable &obj) override { static_cast<AggregationRefNode&>(obj).locateExpression(_exprVec); } + virtual bool check(const vespalib::Identifiable &obj) const override { return obj.inherits(AggregationRefNode::classId); } ExpressionNodeArray & _exprVec; }; virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; @@ -31,9 +31,9 @@ public: AggregationRefNode & operator = (const AggregationRefNode & exprref); ExpressionNode *getExpression() { return _expressionNode; } - virtual const ResultNode & getResult() const { return _expressionNode->getResult(); } - virtual void onPrepare(bool preserveAccurateTypes) { _expressionNode->prepare(preserveAccurateTypes); } - virtual bool onExecute() const; + const ResultNode & getResult() const override { return _expressionNode->getResult(); } + void onPrepare(bool preserveAccurateTypes) override { _expressionNode->prepare(preserveAccurateTypes); } + bool onExecute() const override; private: void locateExpression(ExpressionNodeArray & exprVec) const; diff --git a/searchlib/src/vespa/searchlib/grouping/collect.h b/searchlib/src/vespa/searchlib/grouping/collect.h index 63b0950c460..a3bee9e9630 100644 --- a/searchlib/src/vespa/searchlib/grouping/collect.h +++ b/searchlib/src/vespa/searchlib/grouping/collect.h @@ -1,7 +1,7 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include <vespa/searchlib/grouping/groupref.h> +#include "groupref.h" #include <vespa/searchlib/aggregation/group.h> namespace search { |