summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2020-01-24 13:30:31 +0000
committerHåvard Pettersen <havardpe@oath.com>2020-01-29 12:01:46 +0000
commit7ee101e3880de1366ea7441481c1eb37a5fa24c8 (patch)
treeeb64d69311f21314fba2143547cdec51dbf9aa94 /eval
parentff2010ffd3c15c5518dbc9276cbc7b24cde77879 (diff)
use openblas for matrix multiplication
Diffstat (limited to 'eval')
-rw-r--r--eval/src/vespa/eval/CMakeLists.txt3
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp80
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_matmul_function.h2
3 files changed, 52 insertions, 33 deletions
diff --git a/eval/src/vespa/eval/CMakeLists.txt b/eval/src/vespa/eval/CMakeLists.txt
index 90972de7c80..c28643e605e 100644
--- a/eval/src/vespa/eval/CMakeLists.txt
+++ b/eval/src/vespa/eval/CMakeLists.txt
@@ -14,3 +14,6 @@ vespa_add_library(vespaeval
DEPENDS
${VESPA_LLVM_LIB}
)
+
+set(BLA_VENDOR OpenBLAS)
+vespa_add_target_package_dependency(vespaeval BLAS)
diff --git a/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp
index 7ba186b622a..ce0d4230c23 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_matmul_function.cpp
@@ -10,6 +10,8 @@
#include <vespa/eval/tensor/tensor.h>
#include <assert.h>
+#include <openblas/cblas.h>
+
namespace vespalib::tensor {
using eval::ValueType;
@@ -21,29 +23,8 @@ using namespace eval::operation;
namespace {
-template <typename LCT, typename RCT>
-struct HWSupport {
- static double call(hwaccelrated::IAccelrated *, const LCT *lhs, const RCT *rhs, size_t len) {
- double result = 0.0;
- for (size_t i = 0; i < len; ++i) {
- result += (lhs[i] * rhs[i]);
- }
- return result;
- }
-};
-template <> struct HWSupport<float, float> {
- static double call(hwaccelrated::IAccelrated *hw, const float *lhs, const float *rhs, size_t len) {
- return hw->dotProduct(lhs, rhs, len);
- }
-};
-template <> struct HWSupport<double, double> {
- static double call(hwaccelrated::IAccelrated *hw, const double *lhs, const double *rhs, size_t len) {
- return hw->dotProduct(lhs, rhs, len);
- }
-};
-
template <typename LCT, typename RCT, bool lhs_common_inner, bool rhs_common_inner>
-double sparse_dot_product(const LCT *lhs, const RCT *rhs, size_t lhs_size, size_t common_size, size_t rhs_size) {
+double my_dot_product(const LCT *lhs, const RCT *rhs, size_t lhs_size, size_t common_size, size_t rhs_size) {
double result = 0.0;
for (size_t i = 0; i < common_size; ++i) {
result += ((*lhs) * (*rhs));
@@ -65,11 +46,7 @@ void my_matmul_op(eval::InterpretedFunction::State &state, uint64_t param) {
for (size_t i = 0; i < self.lhs_size; ++i) {
const RCT *rhs = rhs_cells.cbegin();
for (size_t j = 0; j < self.rhs_size; ++j) {
- if (lhs_common_inner && rhs_common_inner) {
- *dst++ = HWSupport<LCT,RCT>::call(self.hw.get(), lhs, rhs, self.common_size);
- } else {
- *dst++ = sparse_dot_product<LCT,RCT,lhs_common_inner,rhs_common_inner>(lhs, rhs, self.lhs_size, self.common_size, self.rhs_size);
- }
+ *dst++ = my_dot_product<LCT,RCT,lhs_common_inner,rhs_common_inner>(lhs, rhs, self.lhs_size, self.common_size, self.rhs_size);
rhs += (rhs_common_inner ? self.common_size : 1);
}
lhs += (lhs_common_inner ? self.common_size : 1);
@@ -78,19 +55,61 @@ void my_matmul_op(eval::InterpretedFunction::State &state, uint64_t param) {
}
template <bool lhs_common_inner, bool rhs_common_inner>
+void my_cblas_double_matmul_op(eval::InterpretedFunction::State &state, uint64_t param) {
+ const DenseMatMulFunction::Self &self = *((const DenseMatMulFunction::Self *)(param));
+ auto lhs_cells = DenseTensorView::typify_cells<double>(state.peek(1));
+ auto rhs_cells = DenseTensorView::typify_cells<double>(state.peek(0));
+ auto dst_cells = state.stash.create_array<double>(self.lhs_size * self.rhs_size);
+ cblas_dgemm(CblasRowMajor, lhs_common_inner ? CblasNoTrans : CblasTrans, rhs_common_inner ? CblasTrans : CblasNoTrans,
+ self.lhs_size, self.rhs_size, self.common_size, 1.0,
+ lhs_cells.cbegin(), lhs_common_inner ? self.common_size : self.lhs_size,
+ rhs_cells.cbegin(), rhs_common_inner ? self.common_size : self.rhs_size,
+ 0.0, dst_cells.begin(), self.rhs_size);
+ state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells)));
+}
+
+template <bool lhs_common_inner, bool rhs_common_inner>
+void my_cblas_float_matmul_op(eval::InterpretedFunction::State &state, uint64_t param) {
+ const DenseMatMulFunction::Self &self = *((const DenseMatMulFunction::Self *)(param));
+ auto lhs_cells = DenseTensorView::typify_cells<float>(state.peek(1));
+ auto rhs_cells = DenseTensorView::typify_cells<float>(state.peek(0));
+ auto dst_cells = state.stash.create_array<float>(self.lhs_size * self.rhs_size);
+ cblas_sgemm(CblasRowMajor, lhs_common_inner ? CblasNoTrans : CblasTrans, rhs_common_inner ? CblasTrans : CblasNoTrans,
+ self.lhs_size, self.rhs_size, self.common_size, 1.0,
+ lhs_cells.cbegin(), lhs_common_inner ? self.common_size : self.lhs_size,
+ rhs_cells.cbegin(), rhs_common_inner ? self.common_size : self.rhs_size,
+ 0.0, dst_cells.begin(), self.rhs_size);
+ state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells)));
+}
+
+template <bool lhs_common_inner, bool rhs_common_inner>
struct MyMatMulOp {
template <typename LCT, typename RCT>
static auto get_fun() { return my_matmul_op<LCT,RCT,lhs_common_inner,rhs_common_inner>; }
};
+template <bool lhs_common_inner, bool rhs_common_inner>
+eval::InterpretedFunction::op_function my_select3(CellType lct, CellType rct)
+{
+ if (lct == rct) {
+ if (lct == ValueType::CellType::DOUBLE) {
+ return my_cblas_double_matmul_op<lhs_common_inner,rhs_common_inner>;
+ }
+ if (lct == ValueType::CellType::FLOAT) {
+ return my_cblas_float_matmul_op<lhs_common_inner,rhs_common_inner>;
+ }
+ }
+ return select_2<MyMatMulOp<lhs_common_inner,rhs_common_inner>>(lct, rct);
+}
+
template <bool lhs_common_inner>
eval::InterpretedFunction::op_function my_select2(CellType lct, CellType rct,
bool rhs_common_inner)
{
if (rhs_common_inner) {
- return select_2<MyMatMulOp<lhs_common_inner,true>>(lct, rct);
+ return my_select3<lhs_common_inner,true>(lct, rct);
} else {
- return select_2<MyMatMulOp<lhs_common_inner,false>>(lct, rct);
+ return my_select3<lhs_common_inner,false>(lct, rct);
}
}
@@ -152,8 +171,7 @@ DenseMatMulFunction::Self::Self(const eval::ValueType &result_type_in,
: result_type(result_type_in),
lhs_size(lhs_size_in),
common_size(common_size_in),
- rhs_size(rhs_size_in),
- hw(hwaccelrated::IAccelrated::getAccelrator())
+ rhs_size(rhs_size_in)
{
}
diff --git a/eval/src/vespa/eval/tensor/dense/dense_matmul_function.h b/eval/src/vespa/eval/tensor/dense/dense_matmul_function.h
index 276a455bda4..f0b6d8b6c19 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_matmul_function.h
+++ b/eval/src/vespa/eval/tensor/dense/dense_matmul_function.h
@@ -4,7 +4,6 @@
#include <vespa/eval/eval/tensor_function.h>
#include "dense_tensor_view.h"
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
namespace vespalib::tensor {
@@ -20,7 +19,6 @@ public:
size_t lhs_size;
size_t common_size;
size_t rhs_size;
- hwaccelrated::IAccelrated::UP hw;
Self(const eval::ValueType &result_type_in,
size_t lhs_size_in, size_t common_size_in, size_t rhs_size_in);
~Self();