summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2020-10-30 15:09:27 +0000
committerHåvard Pettersen <havardpe@oath.com>2020-10-30 15:09:27 +0000
commit170e1a00a84d3334e353a0f05c52b079d5618a0a (patch)
tree1c4cc6b7f1e34a4136e28f9aa22a4f311a89fe9e /eval
parent6714a9d0b255933bdad7956db7928c67179cec50 (diff)
avoid unrolling reduce all loop
This is to see if it help the number stability of the BERT performance test. Note that unrolling made it go 4 times fatser, so we might want to re-introduce it again later.
Diffstat (limited to 'eval')
-rw-r--r--eval/src/vespa/eval/instruction/generic_reduce.cpp20
1 files changed, 5 insertions, 15 deletions
diff --git a/eval/src/vespa/eval/instruction/generic_reduce.cpp b/eval/src/vespa/eval/instruction/generic_reduce.cpp
index e930cd9e7a0..ce68e3db36f 100644
--- a/eval/src/vespa/eval/instruction/generic_reduce.cpp
+++ b/eval/src/vespa/eval/instruction/generic_reduce.cpp
@@ -114,23 +114,13 @@ template <typename ICT, typename OCT, typename AGGR>
void my_full_reduce_op(State &state, uint64_t) {
auto cells = state.peek(0).cells().typify<ICT>();
if (cells.size() > 0) {
- AGGR aggr[4];
- size_t i = 0;
- for (; (i + 3) < cells.size(); i += 4) {
- aggr[0].sample(cells[i+0]);
- aggr[1].sample(cells[i+1]);
- aggr[2].sample(cells[i+2]);
- aggr[3].sample(cells[i+3]);
+ AGGR aggr;
+ for (ICT value: cells) {
+ aggr.sample(value);
}
- for (; i < cells.size(); ++i) {
- aggr[0].sample(cells[i]);
- }
- aggr[0].merge(aggr[1]);
- aggr[0].merge(aggr[2]);
- aggr[0].merge(aggr[3]);
- state.pop_push(state.stash.create<ScalarValue<OCT>>(aggr[0].result()));
+ state.pop_push(state.stash.create<ScalarValue<OCT>>(aggr.result()));
} else {
- state.pop_push(state.stash.create<ScalarValue<OCT>>(0.0));
+ state.pop_push(state.stash.create<ScalarValue<OCT>>(OCT{0}));
}
};