summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2018-02-01 11:29:59 +0000
committerHåvard Pettersen <havardpe@oath.com>2018-02-01 11:29:59 +0000
commit9795276d274f123b56e4d998b620b67714ddb3b3 (patch)
tree32b83cb4dc14d84961d7f1b8116e980d645ab73a /eval
parente5b308f018266f6d4a87d14fae8973c4da5e7bdb (diff)
pre-alloc optimized xw product output vector
Diffstat (limited to 'eval')
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp29
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h16
2 files changed, 27 insertions, 18 deletions
diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
index 06afe7dd164..8b5d8f673ef 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
@@ -50,19 +50,19 @@ void transposedProduct(const DenseXWProductFunction::Self &self,
assert(out == result.end());
}
+template <bool commonDimensionInnermost>
void my_op(eval::InterpretedFunction::State &state, uint64_t param) {
DenseXWProductFunction::Self *self = (DenseXWProductFunction::Self *)(param);
DenseTensorView::CellsRef vectorCells = getCellsRef(state.peek(1));
DenseTensorView::CellsRef matrixCells = getCellsRef(state.peek(0));
- ArrayRef<double> outputCells = state.stash.create_array<double>(self->_resultSize);
- if (self->_commonDimensionInnermost) {
- multiDotProduct(*self, vectorCells, matrixCells, outputCells);
+ if (commonDimensionInnermost) {
+ multiDotProduct(*self, vectorCells, matrixCells, self->_outputCells);
} else {
- transposedProduct(*self, vectorCells, matrixCells, outputCells);
+ transposedProduct(*self, vectorCells, matrixCells, self->_outputCells);
}
- state.pop_pop_push(state.stash.create<DenseTensorView>(self->_resultType, outputCells));
+ state.pop_pop_push(self->_outputView);
}
} // namespace vespalib::tensor::<unnamed>
@@ -70,11 +70,11 @@ void my_op(eval::InterpretedFunction::State &state, uint64_t param) {
DenseXWProductFunction::Self::Self(const eval::ValueType &resultType,
size_t vectorSize,
size_t resultSize,
- bool matrixHasCommonDimensionInnermost)
- : _resultType(resultType),
+ Stash &stash)
+ : _outputCells(stash.create_array<double>(resultSize)),
+ _outputView(resultType, _outputCells),
_vectorSize(vectorSize),
_resultSize(resultSize),
- _commonDimensionInnermost(matrixHasCommonDimensionInnermost),
_hwAccelerator(hwaccelrated::IAccelrated::getAccelrator())
{}
@@ -85,7 +85,9 @@ DenseXWProductFunction::DenseXWProductFunction(const eval::ValueType &resultType
size_t resultSize,
bool matrixHasCommonDimensionInnermost)
: eval::tensor_function::Op2(resultType, vector_in, matrix_in),
- _self(resultType, vectorSize, resultSize, matrixHasCommonDimensionInnermost)
+ _vectorSize(vectorSize),
+ _resultSize(resultSize),
+ _commonDimensionInnermost(matrixHasCommonDimensionInnermost)
{}
namespace {
@@ -93,9 +95,14 @@ namespace {
} // namespace <unnamed>
eval::InterpretedFunction::Instruction
-DenseXWProductFunction::compile_self(Stash &) const
+DenseXWProductFunction::compile_self(Stash &stash) const
{
- return eval::InterpretedFunction::Instruction(my_op, (uint64_t)(&_self));
+ Self &self = stash.create<Self>(result_type(), _vectorSize, _resultSize, stash);
+ if (_commonDimensionInnermost) {
+ return eval::InterpretedFunction::Instruction(my_op<true>, (uint64_t)(&self));
+ } else {
+ return eval::InterpretedFunction::Instruction(my_op<false>, (uint64_t)(&self));
+ }
}
}
diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h
index 516954d0246..39b7d9fcdc4 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h
+++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h
@@ -18,20 +18,22 @@ class DenseXWProductFunction : public eval::tensor_function::Op2
{
public:
struct Self {
- const eval::ValueType _resultType;
+ ArrayRef<double> _outputCells;
+ DenseTensorView _outputView;
const size_t _vectorSize;
const size_t _resultSize;
- bool _commonDimensionInnermost;
hwaccelrated::IAccelrated::UP _hwAccelerator;
Self(const eval::ValueType &resultType,
size_t vectorSize,
size_t resultSize,
- bool matrixHasCommonDimensionInnermost);
+ Stash &stash);
~Self() {}
};
private:
- Self _self;
+ const size_t _vectorSize;
+ const size_t _resultSize;
+ bool _commonDimensionInnermost;
public:
DenseXWProductFunction(const eval::ValueType &resultType,
@@ -43,10 +45,10 @@ public:
~DenseXWProductFunction() {}
- size_t vectorSize() const { return _self._vectorSize; }
- size_t resultSize() const { return _self._resultSize; }
+ size_t vectorSize() const { return _vectorSize; }
+ size_t resultSize() const { return _resultSize; }
- bool matrixHasCommonDimensionInnermost() const { return _self._commonDimensionInnermost; }
+ bool matrixHasCommonDimensionInnermost() const { return _commonDimensionInnermost; }
eval::InterpretedFunction::Instruction compile_self(Stash &stash) const override;
};