diff options
author | Håvard Pettersen <havardpe@oath.com> | 2020-01-29 11:56:20 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2020-01-30 14:23:12 +0000 |
commit | 74dccbc0b94164db1f47866fc1ed2ef07ee92bda (patch) | |
tree | 7d3cf1d3f87fd300fbf9a4da1aec7fa82bcdcdc6 /eval | |
parent | 7dfe0a92a97e3929e8fd4fc9e2cc83c607d8732f (diff) |
use openblas for matrix vector multiplication
Diffstat (limited to 'eval')
3 files changed, 109 insertions, 114 deletions
diff --git a/eval/src/tests/tensor/dense_xw_product_function/dense_xw_product_function_test.cpp b/eval/src/tests/tensor/dense_xw_product_function/dense_xw_product_function_test.cpp index 36609c04219..0b924451907 100644 --- a/eval/src/tests/tensor/dense_xw_product_function/dense_xw_product_function_test.cpp +++ b/eval/src/tests/tensor/dense_xw_product_function/dense_xw_product_function_test.cpp @@ -78,9 +78,9 @@ void verify_optimized(const vespalib::string &expr, size_t vec_size, size_t res_ auto info = fixture.find_all<DenseXWProductFunction>(); ASSERT_EQUAL(info.size(), 1u); EXPECT_TRUE(info[0]->result_is_mutable()); - EXPECT_EQUAL(info[0]->vectorSize(), vec_size); - EXPECT_EQUAL(info[0]->resultSize(), res_size); - EXPECT_EQUAL(info[0]->matrixHasCommonDimensionInnermost(), happy); + EXPECT_EQUAL(info[0]->vector_size(), vec_size); + EXPECT_EQUAL(info[0]->result_size(), res_size); + EXPECT_EQUAL(info[0]->common_inner(), happy); } vespalib::string make_expr(const vespalib::string &a, const vespalib::string &b, const vespalib::string &common, diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp index 2db5b4e8f92..8225178be7a 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp @@ -10,6 +10,8 @@ #include <vespa/vespalib/util/exceptions.h> #include <assert.h> +#include <openblas/cblas.h> + namespace vespalib::tensor { using eval::ValueType; @@ -21,76 +23,59 @@ using namespace eval::operation; namespace { -template <typename LCT, typename RCT> -struct HWSupport { - static double call(hwaccelrated::IAccelrated *, const LCT *lhs, const RCT *rhs, size_t len) { - double result = 0.0; - for (size_t i = 0; i < len; ++i) { - result += (lhs[i] * rhs[i]); - } - return result; +template <typename LCT, typename RCT, bool common_inner> +double my_dot_product(const LCT *lhs, const RCT *rhs, size_t vector_size, size_t result_size) { + double result = 0.0; + for (size_t i = 0; i < vector_size; ++i) { + result += ((*lhs) * (*rhs)); + ++lhs; + rhs += (common_inner ? 1 : result_size); } -}; -template <> struct HWSupport<float, float> { - static double call(hwaccelrated::IAccelrated *hw, const float *lhs, const float *rhs, size_t len) { - return hw->dotProduct(lhs, rhs, len); - } -}; -template <> struct HWSupport<double, double> { - static double call(hwaccelrated::IAccelrated *hw, const double *lhs, const double *rhs, size_t len) { - return hw->dotProduct(lhs, rhs, len); - } -}; - -template <typename LCT, typename RCT, typename OCT> -void multiDotProduct(const DenseXWProductFunction::Self &self, - const ConstArrayRef<LCT> &vectorCells, const ConstArrayRef<RCT> &matrixCells, ArrayRef<OCT> &result) -{ - OCT *out = result.begin(); - const RCT *matrixP = matrixCells.cbegin(); - const LCT * const vectorP = vectorCells.cbegin(); - for (size_t row = 0; row < self._resultSize; ++row) { - double cell = HWSupport<LCT,RCT>::call(self._hwAccelerator.get(), vectorP, matrixP, self._vectorSize); - *out++ = cell; - matrixP += self._vectorSize; - } - assert(out == result.end()); - assert(matrixP == matrixCells.cend()); -} - -template <typename LCT, typename RCT, typename OCT> -void transposedProduct(const DenseXWProductFunction::Self &self, - const ConstArrayRef<LCT> &vectorCells, const ConstArrayRef<RCT> &matrixCells, ArrayRef<OCT> &result) -{ - OCT *out = result.begin(); - const RCT * const matrixP = matrixCells.cbegin(); - const LCT * const vectorP = vectorCells.cbegin(); - for (size_t row = 0; row < self._resultSize; ++row) { - double cell = 0; - for (size_t col = 0; col < self._vectorSize; ++col) { - cell += matrixP[col*self._resultSize + row] * vectorP[col]; - } - *out++ = cell; - } - assert(out == result.end()); + return result; } -template <typename LCT, typename RCT, bool commonDimensionInnermost> +template <typename LCT, typename RCT, bool common_inner> void my_xw_product_op(eval::InterpretedFunction::State &state, uint64_t param) { - DenseXWProductFunction::Self *self = (DenseXWProductFunction::Self *)(param); - + const DenseXWProductFunction::Self &self = *((const DenseXWProductFunction::Self *)(param)); using OCT = typename eval::UnifyCellTypes<LCT,RCT>::type; - auto vectorCells = DenseTensorView::typify_cells<LCT>(state.peek(1)); - auto matrixCells = DenseTensorView::typify_cells<RCT>(state.peek(0)); - auto outputCells = state.stash.create_array<OCT>(self->_resultSize); - - if (commonDimensionInnermost) { - multiDotProduct(*self, vectorCells, matrixCells, outputCells); - } else { - transposedProduct(*self, vectorCells, matrixCells, outputCells); + auto vector_cells = DenseTensorView::typify_cells<LCT>(state.peek(1)); + auto matrix_cells = DenseTensorView::typify_cells<RCT>(state.peek(0)); + auto dst_cells = state.stash.create_array<OCT>(self.result_size); + OCT *dst = dst_cells.begin(); + const RCT *matrix = matrix_cells.cbegin(); + for (size_t i = 0; i < self.result_size; ++i) { + *dst++ = my_dot_product<LCT,RCT,common_inner>(vector_cells.cbegin(), matrix, self.vector_size, self.result_size); + matrix += (common_inner ? self.vector_size : 1); } + state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells))); +} + +template <bool common_inner> +void my_cblas_double_xw_product_op(eval::InterpretedFunction::State &state, uint64_t param) { + const DenseXWProductFunction::Self &self = *((const DenseXWProductFunction::Self *)(param)); + auto vector_cells = DenseTensorView::typify_cells<double>(state.peek(1)); + auto matrix_cells = DenseTensorView::typify_cells<double>(state.peek(0)); + auto dst_cells = state.stash.create_array<double>(self.result_size); + cblas_dgemv(CblasRowMajor, common_inner ? CblasNoTrans : CblasTrans, + common_inner ? self.result_size : self.vector_size, + common_inner ? self.vector_size : self.result_size, + 1.0, matrix_cells.cbegin(), common_inner ? self.vector_size : self.result_size, vector_cells.cbegin(), 1, + 0.0, dst_cells.begin(), 1); + state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells))); +} - state.pop_pop_push(state.stash.create<DenseTensorView>(self->_resultType, TypedCells(outputCells))); +template <bool common_inner> +void my_cblas_float_xw_product_op(eval::InterpretedFunction::State &state, uint64_t param) { + const DenseXWProductFunction::Self &self = *((const DenseXWProductFunction::Self *)(param)); + auto vector_cells = DenseTensorView::typify_cells<float>(state.peek(1)); + auto matrix_cells = DenseTensorView::typify_cells<float>(state.peek(0)); + auto dst_cells = state.stash.create_array<float>(self.result_size); + cblas_sgemv(CblasRowMajor, common_inner ? CblasNoTrans : CblasTrans, + common_inner ? self.result_size : self.vector_size, + common_inner ? self.vector_size : self.result_size, + 1.0, matrix_cells.cbegin(), common_inner ? self.vector_size : self.result_size, vector_cells.cbegin(), 1, + 0.0, dst_cells.begin(), 1); + state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells))); } template <bool common_inner> @@ -99,11 +84,24 @@ struct MyXWProductOp { static auto get_fun() { return my_xw_product_op<LCT,RCT,common_inner>; } }; -eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct, bool common_innermost) { - if (common_innermost) { - return select_2<MyXWProductOp<true> >(lct, rct); +template <bool common_inner> +eval::InterpretedFunction::op_function my_select2(CellType lct, CellType rct) { + if (lct == rct) { + if (lct == ValueType::CellType::DOUBLE) { + return my_cblas_double_xw_product_op<common_inner>; + } + if (lct == ValueType::CellType::FLOAT) { + return my_cblas_float_xw_product_op<common_inner>; + } + } + return select_2<MyXWProductOp<common_inner>>(lct, rct); +} + +eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct, bool common_inner) { + if (common_inner) { + return my_select2<true>(lct, rct); } else { - return select_2<MyXWProductOp<false> >(lct, rct); + return my_select2<false>(lct, rct); } } @@ -129,42 +127,43 @@ bool isDenseXWProduct(const ValueType &res, const ValueType &vec, const ValueTyp } const TensorFunction &createDenseXWProduct(const ValueType &res, const TensorFunction &vec, const TensorFunction &mat, Stash &stash) { - bool common_is_inner = (mat.result_type().dimension_index(vec.result_type().dimensions()[0].name) == 1); + bool common_inner = (mat.result_type().dimension_index(vec.result_type().dimensions()[0].name) == 1); return stash.create<DenseXWProductFunction>(res, vec, mat, vec.result_type().dimensions()[0].size, res.dimensions()[0].size, - common_is_inner); + common_inner); } } // namespace vespalib::tensor::<unnamed> -DenseXWProductFunction::Self::Self(const eval::ValueType &resultType, - size_t vectorSize, - size_t resultSize) - : _resultType(resultType), - _vectorSize(vectorSize), - _resultSize(resultSize), - _hwAccelerator(hwaccelrated::IAccelrated::getAccelrator()) -{} +DenseXWProductFunction::Self::Self(const eval::ValueType &result_type_in, + size_t vector_size_in, size_t result_size_in) + : result_type(result_type_in), + vector_size(vector_size_in), + result_size(result_size_in) +{ +} +DenseXWProductFunction::Self::~Self() = default; -DenseXWProductFunction::DenseXWProductFunction(const eval::ValueType &resultType, +DenseXWProductFunction::DenseXWProductFunction(const eval::ValueType &result_type, const eval::TensorFunction &vector_in, const eval::TensorFunction &matrix_in, - size_t vectorSize, - size_t resultSize, - bool matrixHasCommonDimensionInnermost) - : eval::tensor_function::Op2(resultType, vector_in, matrix_in), - _vectorSize(vectorSize), - _resultSize(resultSize), - _commonDimensionInnermost(matrixHasCommonDimensionInnermost) -{} + size_t vector_size, + size_t result_size, + bool common_inner) + : eval::tensor_function::Op2(result_type, vector_in, matrix_in), + _vector_size(vector_size), + _result_size(result_size), + _common_inner(common_inner) +{ +} eval::InterpretedFunction::Instruction DenseXWProductFunction::compile_self(Stash &stash) const { - Self &self = stash.create<Self>(result_type(), _vectorSize, _resultSize); + Self &self = stash.create<Self>(result_type(), _vector_size, _result_size); auto op = my_select(lhs().result_type().cell_type(), - rhs().result_type().cell_type(), _commonDimensionInnermost); + rhs().result_type().cell_type(), _common_inner); return eval::InterpretedFunction::Instruction(op, (uint64_t)(&self)); } @@ -172,9 +171,9 @@ void DenseXWProductFunction::visit_self(vespalib::ObjectVisitor &visitor) const { Super::visit_self(visitor); - visitor.visitInt("vector_size", _vectorSize); - visitor.visitInt("result_size", _resultSize); - visitor.visitBool("common_dimension_innermost", _commonDimensionInnermost); + visitor.visitInt("vector_size", _vector_size); + visitor.visitInt("result_size", _result_size); + visitor.visitBool("common_inner", _common_inner); } const TensorFunction & diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h index f2f4d67c0f0..d7c39fa45a2 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h +++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h @@ -4,7 +4,6 @@ #include <vespa/eval/eval/tensor_function.h> #include "dense_tensor_view.h" -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> namespace vespalib::tensor { @@ -16,37 +15,34 @@ class DenseXWProductFunction : public eval::tensor_function::Op2 using Super = eval::tensor_function::Op2; public: struct Self { - const eval::ValueType _resultType; - const size_t _vectorSize; - const size_t _resultSize; - hwaccelrated::IAccelrated::UP _hwAccelerator; - Self(const eval::ValueType &resultType, - size_t vectorSize, - size_t resultSize); - ~Self() {} + eval::ValueType result_type; + size_t vector_size; + size_t result_size; + Self(const eval::ValueType &result_type_in, + size_t vector_size_in, size_t result_size_in); + ~Self(); }; private: - const size_t _vectorSize; - const size_t _resultSize; - bool _commonDimensionInnermost; + size_t _vector_size; + size_t _result_size; + bool _common_inner; public: - DenseXWProductFunction(const eval::ValueType &resultType, + DenseXWProductFunction(const eval::ValueType &result_type, const eval::TensorFunction &vector_in, const eval::TensorFunction &matrix_in, - size_t vectorSize, - size_t resultSize, - bool matrixHasCommonDimensionInnermost); + size_t vector_size, + size_t result_size, + bool common_inner); ~DenseXWProductFunction() {} bool result_is_mutable() const override { return true; } - size_t vectorSize() const { return _vectorSize; } - size_t resultSize() const { return _resultSize; } - - bool matrixHasCommonDimensionInnermost() const { return _commonDimensionInnermost; } + size_t vector_size() const { return _vector_size; } + size_t result_size() const { return _result_size; } + bool common_inner() const { return _common_inner; } eval::InterpretedFunction::Instruction compile_self(Stash &stash) const override; void visit_self(vespalib::ObjectVisitor &visitor) const override; |