diff options
author | Håvard Pettersen <havardpe@oath.com> | 2020-01-29 14:52:00 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2020-01-30 14:23:13 +0000 |
commit | 140fecac51910a4239aaff8444aeb6e6cea6e3ad (patch) | |
tree | ae6c8348b917d6b8e7d81dcdc4440036fcfffd02 /eval/src | |
parent | 74dccbc0b94164db1f47866fc1ed2ef07ee92bda (diff) |
use openblas for dot product
Diffstat (limited to 'eval/src')
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp | 67 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h | 2 |
2 files changed, 38 insertions, 31 deletions
diff --git a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp index 8bcaddba3b4..ea7f43b610a 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp @@ -7,6 +7,8 @@ #include <vespa/eval/eval/value.h> #include <vespa/eval/tensor/tensor.h> +#include <openblas/cblas.h> + namespace vespalib::tensor { using eval::ValueType; @@ -19,32 +21,29 @@ using namespace eval::operation; namespace { template <typename LCT, typename RCT> -struct HWSupport { - static double call(hwaccelrated::IAccelrated *, const ConstArrayRef<LCT> &lhs, const ConstArrayRef<RCT> &rhs) { - double result = 0.0; - for (size_t i = 0; i < lhs.size(); ++i) { - result += (lhs[i] * rhs[i]); - } - return result; +void my_dot_product_op(eval::InterpretedFunction::State &state, uint64_t) { + auto lhs_cells = DenseTensorView::typify_cells<LCT>(state.peek(1)); + auto rhs_cells = DenseTensorView::typify_cells<RCT>(state.peek(0)); + double result = 0.0; + const LCT *lhs = lhs_cells.cbegin(); + const RCT *rhs = rhs_cells.cbegin(); + for (size_t i = 0; i < lhs_cells.size(); ++i) { + result += ((*lhs++) * (*rhs++)); } -}; -template <> struct HWSupport<float, float> { - static double call(hwaccelrated::IAccelrated *hw, const ConstArrayRef<float> &lhs, const ConstArrayRef<float> &rhs) { - return hw->dotProduct(lhs.cbegin(), rhs.cbegin(), lhs.size()); - } -}; -template <> struct HWSupport<double, double> { - static double call(hwaccelrated::IAccelrated *hw, const ConstArrayRef<double> &lhs, const ConstArrayRef<double> &rhs) { - return hw->dotProduct(lhs.cbegin(), rhs.cbegin(), lhs.size()); - } -}; + state.pop_pop_push(state.stash.create<eval::DoubleValue>(result)); +} -template <typename LCT, typename RCT> -void my_dot_product_op(eval::InterpretedFunction::State &state, uint64_t param) { - auto *hw = (hwaccelrated::IAccelrated *)(param); - auto lhs = DenseTensorView::typify_cells<LCT>(state.peek(1)); - auto rhs = DenseTensorView::typify_cells<RCT>(state.peek(0)); - double result = HWSupport<LCT,RCT>::call(hw, lhs, rhs); +void my_cblas_double_dot_product_op(eval::InterpretedFunction::State &state, uint64_t) { + auto lhs_cells = DenseTensorView::typify_cells<double>(state.peek(1)); + auto rhs_cells = DenseTensorView::typify_cells<double>(state.peek(0)); + double result = cblas_ddot(lhs_cells.size(), lhs_cells.cbegin(), 1, rhs_cells.cbegin(), 1); + state.pop_pop_push(state.stash.create<eval::DoubleValue>(result)); +} + +void my_cblas_float_dot_product_op(eval::InterpretedFunction::State &state, uint64_t) { + auto lhs_cells = DenseTensorView::typify_cells<float>(state.peek(1)); + auto rhs_cells = DenseTensorView::typify_cells<float>(state.peek(0)); + double result = cblas_sdot(lhs_cells.size(), lhs_cells.cbegin(), 1, rhs_cells.cbegin(), 1); state.pop_pop_push(state.stash.create<eval::DoubleValue>(result)); } @@ -53,21 +52,31 @@ struct MyDotProductOp { static auto get_fun() { return my_dot_product_op<LCT,RCT>; } }; +eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct) { + if (lct == rct) { + if (lct == ValueType::CellType::DOUBLE) { + return my_cblas_double_dot_product_op; + } + if (lct == ValueType::CellType::FLOAT) { + return my_cblas_float_dot_product_op; + } + } + return select_2<MyDotProductOp>(lct, rct); +} + } // namespace vespalib::tensor::<unnamed> DenseDotProductFunction::DenseDotProductFunction(const eval::TensorFunction &lhs_in, const eval::TensorFunction &rhs_in) - : eval::tensor_function::Op2(eval::ValueType::double_type(), lhs_in, rhs_in), - _hwAccelerator(hwaccelrated::IAccelrated::getAccelrator()) + : eval::tensor_function::Op2(eval::ValueType::double_type(), lhs_in, rhs_in) { } eval::InterpretedFunction::Instruction DenseDotProductFunction::compile_self(Stash &) const { - auto op = select_2<MyDotProductOp>(lhs().result_type().cell_type(), - rhs().result_type().cell_type()); - return eval::InterpretedFunction::Instruction(op, (uint64_t)(_hwAccelerator.get())); + auto op = my_select(lhs().result_type().cell_type(), rhs().result_type().cell_type()); + return eval::InterpretedFunction::Instruction(op); } bool diff --git a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h index d6181d33887..1d8f749689b 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h +++ b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h @@ -3,7 +3,6 @@ #pragma once #include <vespa/eval/eval/tensor_function.h> -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> namespace vespalib::tensor { @@ -13,7 +12,6 @@ namespace vespalib::tensor { class DenseDotProductFunction : public eval::tensor_function::Op2 { private: - hwaccelrated::IAccelrated::UP _hwAccelerator; using ValueType = eval::ValueType; public: DenseDotProductFunction(const eval::TensorFunction &lhs_in, |