diff options
author | Håvard Pettersen <havardpe@oath.com> | 2018-02-01 11:29:59 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2018-02-01 11:29:59 +0000 |
commit | 9795276d274f123b56e4d998b620b67714ddb3b3 (patch) | |
tree | 32b83cb4dc14d84961d7f1b8116e980d645ab73a | |
parent | e5b308f018266f6d4a87d14fae8973c4da5e7bdb (diff) |
pre-alloc optimized xw product output vector
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp | 29 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h | 16 |
2 files changed, 27 insertions, 18 deletions
diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp index 06afe7dd164..8b5d8f673ef 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp @@ -50,19 +50,19 @@ void transposedProduct(const DenseXWProductFunction::Self &self, assert(out == result.end()); } +template <bool commonDimensionInnermost> void my_op(eval::InterpretedFunction::State &state, uint64_t param) { DenseXWProductFunction::Self *self = (DenseXWProductFunction::Self *)(param); DenseTensorView::CellsRef vectorCells = getCellsRef(state.peek(1)); DenseTensorView::CellsRef matrixCells = getCellsRef(state.peek(0)); - ArrayRef<double> outputCells = state.stash.create_array<double>(self->_resultSize); - if (self->_commonDimensionInnermost) { - multiDotProduct(*self, vectorCells, matrixCells, outputCells); + if (commonDimensionInnermost) { + multiDotProduct(*self, vectorCells, matrixCells, self->_outputCells); } else { - transposedProduct(*self, vectorCells, matrixCells, outputCells); + transposedProduct(*self, vectorCells, matrixCells, self->_outputCells); } - state.pop_pop_push(state.stash.create<DenseTensorView>(self->_resultType, outputCells)); + state.pop_pop_push(self->_outputView); } } // namespace vespalib::tensor::<unnamed> @@ -70,11 +70,11 @@ void my_op(eval::InterpretedFunction::State &state, uint64_t param) { DenseXWProductFunction::Self::Self(const eval::ValueType &resultType, size_t vectorSize, size_t resultSize, - bool matrixHasCommonDimensionInnermost) - : _resultType(resultType), + Stash &stash) + : _outputCells(stash.create_array<double>(resultSize)), + _outputView(resultType, _outputCells), _vectorSize(vectorSize), _resultSize(resultSize), - _commonDimensionInnermost(matrixHasCommonDimensionInnermost), _hwAccelerator(hwaccelrated::IAccelrated::getAccelrator()) {} @@ -85,7 +85,9 @@ DenseXWProductFunction::DenseXWProductFunction(const eval::ValueType &resultType size_t resultSize, bool matrixHasCommonDimensionInnermost) : eval::tensor_function::Op2(resultType, vector_in, matrix_in), - _self(resultType, vectorSize, resultSize, matrixHasCommonDimensionInnermost) + _vectorSize(vectorSize), + _resultSize(resultSize), + _commonDimensionInnermost(matrixHasCommonDimensionInnermost) {} namespace { @@ -93,9 +95,14 @@ namespace { } // namespace <unnamed> eval::InterpretedFunction::Instruction -DenseXWProductFunction::compile_self(Stash &) const +DenseXWProductFunction::compile_self(Stash &stash) const { - return eval::InterpretedFunction::Instruction(my_op, (uint64_t)(&_self)); + Self &self = stash.create<Self>(result_type(), _vectorSize, _resultSize, stash); + if (_commonDimensionInnermost) { + return eval::InterpretedFunction::Instruction(my_op<true>, (uint64_t)(&self)); + } else { + return eval::InterpretedFunction::Instruction(my_op<false>, (uint64_t)(&self)); + } } } diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h index 516954d0246..39b7d9fcdc4 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h +++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h @@ -18,20 +18,22 @@ class DenseXWProductFunction : public eval::tensor_function::Op2 { public: struct Self { - const eval::ValueType _resultType; + ArrayRef<double> _outputCells; + DenseTensorView _outputView; const size_t _vectorSize; const size_t _resultSize; - bool _commonDimensionInnermost; hwaccelrated::IAccelrated::UP _hwAccelerator; Self(const eval::ValueType &resultType, size_t vectorSize, size_t resultSize, - bool matrixHasCommonDimensionInnermost); + Stash &stash); ~Self() {} }; private: - Self _self; + const size_t _vectorSize; + const size_t _resultSize; + bool _commonDimensionInnermost; public: DenseXWProductFunction(const eval::ValueType &resultType, @@ -43,10 +45,10 @@ public: ~DenseXWProductFunction() {} - size_t vectorSize() const { return _self._vectorSize; } - size_t resultSize() const { return _self._resultSize; } + size_t vectorSize() const { return _vectorSize; } + size_t resultSize() const { return _resultSize; } - bool matrixHasCommonDimensionInnermost() const { return _self._commonDimensionInnermost; } + bool matrixHasCommonDimensionInnermost() const { return _commonDimensionInnermost; } eval::InterpretedFunction::Instruction compile_self(Stash &stash) const override; }; |