aboutsummaryrefslogtreecommitdiffstats
path: root/eval/src
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2020-01-29 14:52:00 +0000
committerHåvard Pettersen <havardpe@oath.com>2020-01-30 14:23:13 +0000
commit140fecac51910a4239aaff8444aeb6e6cea6e3ad (patch)
treeae6c8348b917d6b8e7d81dcdc4440036fcfffd02 /eval/src
parent74dccbc0b94164db1f47866fc1ed2ef07ee92bda (diff)
use openblas for dot product
Diffstat (limited to 'eval/src')
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp67
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h2
2 files changed, 38 insertions, 31 deletions
diff --git a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp
index 8bcaddba3b4..ea7f43b610a 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.cpp
@@ -7,6 +7,8 @@
#include <vespa/eval/eval/value.h>
#include <vespa/eval/tensor/tensor.h>
+#include <openblas/cblas.h>
+
namespace vespalib::tensor {
using eval::ValueType;
@@ -19,32 +21,29 @@ using namespace eval::operation;
namespace {
template <typename LCT, typename RCT>
-struct HWSupport {
- static double call(hwaccelrated::IAccelrated *, const ConstArrayRef<LCT> &lhs, const ConstArrayRef<RCT> &rhs) {
- double result = 0.0;
- for (size_t i = 0; i < lhs.size(); ++i) {
- result += (lhs[i] * rhs[i]);
- }
- return result;
+void my_dot_product_op(eval::InterpretedFunction::State &state, uint64_t) {
+ auto lhs_cells = DenseTensorView::typify_cells<LCT>(state.peek(1));
+ auto rhs_cells = DenseTensorView::typify_cells<RCT>(state.peek(0));
+ double result = 0.0;
+ const LCT *lhs = lhs_cells.cbegin();
+ const RCT *rhs = rhs_cells.cbegin();
+ for (size_t i = 0; i < lhs_cells.size(); ++i) {
+ result += ((*lhs++) * (*rhs++));
}
-};
-template <> struct HWSupport<float, float> {
- static double call(hwaccelrated::IAccelrated *hw, const ConstArrayRef<float> &lhs, const ConstArrayRef<float> &rhs) {
- return hw->dotProduct(lhs.cbegin(), rhs.cbegin(), lhs.size());
- }
-};
-template <> struct HWSupport<double, double> {
- static double call(hwaccelrated::IAccelrated *hw, const ConstArrayRef<double> &lhs, const ConstArrayRef<double> &rhs) {
- return hw->dotProduct(lhs.cbegin(), rhs.cbegin(), lhs.size());
- }
-};
+ state.pop_pop_push(state.stash.create<eval::DoubleValue>(result));
+}
-template <typename LCT, typename RCT>
-void my_dot_product_op(eval::InterpretedFunction::State &state, uint64_t param) {
- auto *hw = (hwaccelrated::IAccelrated *)(param);
- auto lhs = DenseTensorView::typify_cells<LCT>(state.peek(1));
- auto rhs = DenseTensorView::typify_cells<RCT>(state.peek(0));
- double result = HWSupport<LCT,RCT>::call(hw, lhs, rhs);
+void my_cblas_double_dot_product_op(eval::InterpretedFunction::State &state, uint64_t) {
+ auto lhs_cells = DenseTensorView::typify_cells<double>(state.peek(1));
+ auto rhs_cells = DenseTensorView::typify_cells<double>(state.peek(0));
+ double result = cblas_ddot(lhs_cells.size(), lhs_cells.cbegin(), 1, rhs_cells.cbegin(), 1);
+ state.pop_pop_push(state.stash.create<eval::DoubleValue>(result));
+}
+
+void my_cblas_float_dot_product_op(eval::InterpretedFunction::State &state, uint64_t) {
+ auto lhs_cells = DenseTensorView::typify_cells<float>(state.peek(1));
+ auto rhs_cells = DenseTensorView::typify_cells<float>(state.peek(0));
+ double result = cblas_sdot(lhs_cells.size(), lhs_cells.cbegin(), 1, rhs_cells.cbegin(), 1);
state.pop_pop_push(state.stash.create<eval::DoubleValue>(result));
}
@@ -53,21 +52,31 @@ struct MyDotProductOp {
static auto get_fun() { return my_dot_product_op<LCT,RCT>; }
};
+eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct) {
+ if (lct == rct) {
+ if (lct == ValueType::CellType::DOUBLE) {
+ return my_cblas_double_dot_product_op;
+ }
+ if (lct == ValueType::CellType::FLOAT) {
+ return my_cblas_float_dot_product_op;
+ }
+ }
+ return select_2<MyDotProductOp>(lct, rct);
+}
+
} // namespace vespalib::tensor::<unnamed>
DenseDotProductFunction::DenseDotProductFunction(const eval::TensorFunction &lhs_in,
const eval::TensorFunction &rhs_in)
- : eval::tensor_function::Op2(eval::ValueType::double_type(), lhs_in, rhs_in),
- _hwAccelerator(hwaccelrated::IAccelrated::getAccelrator())
+ : eval::tensor_function::Op2(eval::ValueType::double_type(), lhs_in, rhs_in)
{
}
eval::InterpretedFunction::Instruction
DenseDotProductFunction::compile_self(Stash &) const
{
- auto op = select_2<MyDotProductOp>(lhs().result_type().cell_type(),
- rhs().result_type().cell_type());
- return eval::InterpretedFunction::Instruction(op, (uint64_t)(_hwAccelerator.get()));
+ auto op = my_select(lhs().result_type().cell_type(), rhs().result_type().cell_type());
+ return eval::InterpretedFunction::Instruction(op);
}
bool
diff --git a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h
index d6181d33887..1d8f749689b 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h
+++ b/eval/src/vespa/eval/tensor/dense/dense_dot_product_function.h
@@ -3,7 +3,6 @@
#pragma once
#include <vespa/eval/eval/tensor_function.h>
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
namespace vespalib::tensor {
@@ -13,7 +12,6 @@ namespace vespalib::tensor {
class DenseDotProductFunction : public eval::tensor_function::Op2
{
private:
- hwaccelrated::IAccelrated::UP _hwAccelerator;
using ValueType = eval::ValueType;
public:
DenseDotProductFunction(const eval::TensorFunction &lhs_in,