diff options
author | Håvard Pettersen <havardpe@oath.com> | 2020-10-30 15:09:27 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2020-10-30 15:09:27 +0000 |
commit | 170e1a00a84d3334e353a0f05c52b079d5618a0a (patch) | |
tree | 1c4cc6b7f1e34a4136e28f9aa22a4f311a89fe9e /eval | |
parent | 6714a9d0b255933bdad7956db7928c67179cec50 (diff) |
avoid unrolling reduce all loop
This is to see if it help the number stability of the BERT performance
test. Note that unrolling made it go 4 times fatser, so we might want
to re-introduce it again later.
Diffstat (limited to 'eval')
-rw-r--r-- | eval/src/vespa/eval/instruction/generic_reduce.cpp | 20 |
1 files changed, 5 insertions, 15 deletions
diff --git a/eval/src/vespa/eval/instruction/generic_reduce.cpp b/eval/src/vespa/eval/instruction/generic_reduce.cpp index e930cd9e7a0..ce68e3db36f 100644 --- a/eval/src/vespa/eval/instruction/generic_reduce.cpp +++ b/eval/src/vespa/eval/instruction/generic_reduce.cpp @@ -114,23 +114,13 @@ template <typename ICT, typename OCT, typename AGGR> void my_full_reduce_op(State &state, uint64_t) { auto cells = state.peek(0).cells().typify<ICT>(); if (cells.size() > 0) { - AGGR aggr[4]; - size_t i = 0; - for (; (i + 3) < cells.size(); i += 4) { - aggr[0].sample(cells[i+0]); - aggr[1].sample(cells[i+1]); - aggr[2].sample(cells[i+2]); - aggr[3].sample(cells[i+3]); + AGGR aggr; + for (ICT value: cells) { + aggr.sample(value); } - for (; i < cells.size(); ++i) { - aggr[0].sample(cells[i]); - } - aggr[0].merge(aggr[1]); - aggr[0].merge(aggr[2]); - aggr[0].merge(aggr[3]); - state.pop_push(state.stash.create<ScalarValue<OCT>>(aggr[0].result())); + state.pop_push(state.stash.create<ScalarValue<OCT>>(aggr.result())); } else { - state.pop_push(state.stash.create<ScalarValue<OCT>>(0.0)); + state.pop_push(state.stash.create<ScalarValue<OCT>>(OCT{0})); } }; |