summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2020-01-29 11:56:20 +0000
committerHåvard Pettersen <havardpe@oath.com>2020-01-30 14:23:12 +0000
commit74dccbc0b94164db1f47866fc1ed2ef07ee92bda (patch)
tree7d3cf1d3f87fd300fbf9a4da1aec7fa82bcdcdc6 /eval
parent7dfe0a92a97e3929e8fd4fc9e2cc83c607d8732f (diff)
use openblas for matrix vector multiplication
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/tensor/dense_xw_product_function/dense_xw_product_function_test.cpp6
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp181
-rw-r--r--eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h36
3 files changed, 109 insertions, 114 deletions
diff --git a/eval/src/tests/tensor/dense_xw_product_function/dense_xw_product_function_test.cpp b/eval/src/tests/tensor/dense_xw_product_function/dense_xw_product_function_test.cpp
index 36609c04219..0b924451907 100644
--- a/eval/src/tests/tensor/dense_xw_product_function/dense_xw_product_function_test.cpp
+++ b/eval/src/tests/tensor/dense_xw_product_function/dense_xw_product_function_test.cpp
@@ -78,9 +78,9 @@ void verify_optimized(const vespalib::string &expr, size_t vec_size, size_t res_
auto info = fixture.find_all<DenseXWProductFunction>();
ASSERT_EQUAL(info.size(), 1u);
EXPECT_TRUE(info[0]->result_is_mutable());
- EXPECT_EQUAL(info[0]->vectorSize(), vec_size);
- EXPECT_EQUAL(info[0]->resultSize(), res_size);
- EXPECT_EQUAL(info[0]->matrixHasCommonDimensionInnermost(), happy);
+ EXPECT_EQUAL(info[0]->vector_size(), vec_size);
+ EXPECT_EQUAL(info[0]->result_size(), res_size);
+ EXPECT_EQUAL(info[0]->common_inner(), happy);
}
vespalib::string make_expr(const vespalib::string &a, const vespalib::string &b, const vespalib::string &common,
diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
index 2db5b4e8f92..8225178be7a 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.cpp
@@ -10,6 +10,8 @@
#include <vespa/vespalib/util/exceptions.h>
#include <assert.h>
+#include <openblas/cblas.h>
+
namespace vespalib::tensor {
using eval::ValueType;
@@ -21,76 +23,59 @@ using namespace eval::operation;
namespace {
-template <typename LCT, typename RCT>
-struct HWSupport {
- static double call(hwaccelrated::IAccelrated *, const LCT *lhs, const RCT *rhs, size_t len) {
- double result = 0.0;
- for (size_t i = 0; i < len; ++i) {
- result += (lhs[i] * rhs[i]);
- }
- return result;
+template <typename LCT, typename RCT, bool common_inner>
+double my_dot_product(const LCT *lhs, const RCT *rhs, size_t vector_size, size_t result_size) {
+ double result = 0.0;
+ for (size_t i = 0; i < vector_size; ++i) {
+ result += ((*lhs) * (*rhs));
+ ++lhs;
+ rhs += (common_inner ? 1 : result_size);
}
-};
-template <> struct HWSupport<float, float> {
- static double call(hwaccelrated::IAccelrated *hw, const float *lhs, const float *rhs, size_t len) {
- return hw->dotProduct(lhs, rhs, len);
- }
-};
-template <> struct HWSupport<double, double> {
- static double call(hwaccelrated::IAccelrated *hw, const double *lhs, const double *rhs, size_t len) {
- return hw->dotProduct(lhs, rhs, len);
- }
-};
-
-template <typename LCT, typename RCT, typename OCT>
-void multiDotProduct(const DenseXWProductFunction::Self &self,
- const ConstArrayRef<LCT> &vectorCells, const ConstArrayRef<RCT> &matrixCells, ArrayRef<OCT> &result)
-{
- OCT *out = result.begin();
- const RCT *matrixP = matrixCells.cbegin();
- const LCT * const vectorP = vectorCells.cbegin();
- for (size_t row = 0; row < self._resultSize; ++row) {
- double cell = HWSupport<LCT,RCT>::call(self._hwAccelerator.get(), vectorP, matrixP, self._vectorSize);
- *out++ = cell;
- matrixP += self._vectorSize;
- }
- assert(out == result.end());
- assert(matrixP == matrixCells.cend());
-}
-
-template <typename LCT, typename RCT, typename OCT>
-void transposedProduct(const DenseXWProductFunction::Self &self,
- const ConstArrayRef<LCT> &vectorCells, const ConstArrayRef<RCT> &matrixCells, ArrayRef<OCT> &result)
-{
- OCT *out = result.begin();
- const RCT * const matrixP = matrixCells.cbegin();
- const LCT * const vectorP = vectorCells.cbegin();
- for (size_t row = 0; row < self._resultSize; ++row) {
- double cell = 0;
- for (size_t col = 0; col < self._vectorSize; ++col) {
- cell += matrixP[col*self._resultSize + row] * vectorP[col];
- }
- *out++ = cell;
- }
- assert(out == result.end());
+ return result;
}
-template <typename LCT, typename RCT, bool commonDimensionInnermost>
+template <typename LCT, typename RCT, bool common_inner>
void my_xw_product_op(eval::InterpretedFunction::State &state, uint64_t param) {
- DenseXWProductFunction::Self *self = (DenseXWProductFunction::Self *)(param);
-
+ const DenseXWProductFunction::Self &self = *((const DenseXWProductFunction::Self *)(param));
using OCT = typename eval::UnifyCellTypes<LCT,RCT>::type;
- auto vectorCells = DenseTensorView::typify_cells<LCT>(state.peek(1));
- auto matrixCells = DenseTensorView::typify_cells<RCT>(state.peek(0));
- auto outputCells = state.stash.create_array<OCT>(self->_resultSize);
-
- if (commonDimensionInnermost) {
- multiDotProduct(*self, vectorCells, matrixCells, outputCells);
- } else {
- transposedProduct(*self, vectorCells, matrixCells, outputCells);
+ auto vector_cells = DenseTensorView::typify_cells<LCT>(state.peek(1));
+ auto matrix_cells = DenseTensorView::typify_cells<RCT>(state.peek(0));
+ auto dst_cells = state.stash.create_array<OCT>(self.result_size);
+ OCT *dst = dst_cells.begin();
+ const RCT *matrix = matrix_cells.cbegin();
+ for (size_t i = 0; i < self.result_size; ++i) {
+ *dst++ = my_dot_product<LCT,RCT,common_inner>(vector_cells.cbegin(), matrix, self.vector_size, self.result_size);
+ matrix += (common_inner ? self.vector_size : 1);
}
+ state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells)));
+}
+
+template <bool common_inner>
+void my_cblas_double_xw_product_op(eval::InterpretedFunction::State &state, uint64_t param) {
+ const DenseXWProductFunction::Self &self = *((const DenseXWProductFunction::Self *)(param));
+ auto vector_cells = DenseTensorView::typify_cells<double>(state.peek(1));
+ auto matrix_cells = DenseTensorView::typify_cells<double>(state.peek(0));
+ auto dst_cells = state.stash.create_array<double>(self.result_size);
+ cblas_dgemv(CblasRowMajor, common_inner ? CblasNoTrans : CblasTrans,
+ common_inner ? self.result_size : self.vector_size,
+ common_inner ? self.vector_size : self.result_size,
+ 1.0, matrix_cells.cbegin(), common_inner ? self.vector_size : self.result_size, vector_cells.cbegin(), 1,
+ 0.0, dst_cells.begin(), 1);
+ state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells)));
+}
- state.pop_pop_push(state.stash.create<DenseTensorView>(self->_resultType, TypedCells(outputCells)));
+template <bool common_inner>
+void my_cblas_float_xw_product_op(eval::InterpretedFunction::State &state, uint64_t param) {
+ const DenseXWProductFunction::Self &self = *((const DenseXWProductFunction::Self *)(param));
+ auto vector_cells = DenseTensorView::typify_cells<float>(state.peek(1));
+ auto matrix_cells = DenseTensorView::typify_cells<float>(state.peek(0));
+ auto dst_cells = state.stash.create_array<float>(self.result_size);
+ cblas_sgemv(CblasRowMajor, common_inner ? CblasNoTrans : CblasTrans,
+ common_inner ? self.result_size : self.vector_size,
+ common_inner ? self.vector_size : self.result_size,
+ 1.0, matrix_cells.cbegin(), common_inner ? self.vector_size : self.result_size, vector_cells.cbegin(), 1,
+ 0.0, dst_cells.begin(), 1);
+ state.pop_pop_push(state.stash.create<DenseTensorView>(self.result_type, TypedCells(dst_cells)));
}
template <bool common_inner>
@@ -99,11 +84,24 @@ struct MyXWProductOp {
static auto get_fun() { return my_xw_product_op<LCT,RCT,common_inner>; }
};
-eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct, bool common_innermost) {
- if (common_innermost) {
- return select_2<MyXWProductOp<true> >(lct, rct);
+template <bool common_inner>
+eval::InterpretedFunction::op_function my_select2(CellType lct, CellType rct) {
+ if (lct == rct) {
+ if (lct == ValueType::CellType::DOUBLE) {
+ return my_cblas_double_xw_product_op<common_inner>;
+ }
+ if (lct == ValueType::CellType::FLOAT) {
+ return my_cblas_float_xw_product_op<common_inner>;
+ }
+ }
+ return select_2<MyXWProductOp<common_inner>>(lct, rct);
+}
+
+eval::InterpretedFunction::op_function my_select(CellType lct, CellType rct, bool common_inner) {
+ if (common_inner) {
+ return my_select2<true>(lct, rct);
} else {
- return select_2<MyXWProductOp<false> >(lct, rct);
+ return my_select2<false>(lct, rct);
}
}
@@ -129,42 +127,43 @@ bool isDenseXWProduct(const ValueType &res, const ValueType &vec, const ValueTyp
}
const TensorFunction &createDenseXWProduct(const ValueType &res, const TensorFunction &vec, const TensorFunction &mat, Stash &stash) {
- bool common_is_inner = (mat.result_type().dimension_index(vec.result_type().dimensions()[0].name) == 1);
+ bool common_inner = (mat.result_type().dimension_index(vec.result_type().dimensions()[0].name) == 1);
return stash.create<DenseXWProductFunction>(res, vec, mat,
vec.result_type().dimensions()[0].size,
res.dimensions()[0].size,
- common_is_inner);
+ common_inner);
}
} // namespace vespalib::tensor::<unnamed>
-DenseXWProductFunction::Self::Self(const eval::ValueType &resultType,
- size_t vectorSize,
- size_t resultSize)
- : _resultType(resultType),
- _vectorSize(vectorSize),
- _resultSize(resultSize),
- _hwAccelerator(hwaccelrated::IAccelrated::getAccelrator())
-{}
+DenseXWProductFunction::Self::Self(const eval::ValueType &result_type_in,
+ size_t vector_size_in, size_t result_size_in)
+ : result_type(result_type_in),
+ vector_size(vector_size_in),
+ result_size(result_size_in)
+{
+}
+DenseXWProductFunction::Self::~Self() = default;
-DenseXWProductFunction::DenseXWProductFunction(const eval::ValueType &resultType,
+DenseXWProductFunction::DenseXWProductFunction(const eval::ValueType &result_type,
const eval::TensorFunction &vector_in,
const eval::TensorFunction &matrix_in,
- size_t vectorSize,
- size_t resultSize,
- bool matrixHasCommonDimensionInnermost)
- : eval::tensor_function::Op2(resultType, vector_in, matrix_in),
- _vectorSize(vectorSize),
- _resultSize(resultSize),
- _commonDimensionInnermost(matrixHasCommonDimensionInnermost)
-{}
+ size_t vector_size,
+ size_t result_size,
+ bool common_inner)
+ : eval::tensor_function::Op2(result_type, vector_in, matrix_in),
+ _vector_size(vector_size),
+ _result_size(result_size),
+ _common_inner(common_inner)
+{
+}
eval::InterpretedFunction::Instruction
DenseXWProductFunction::compile_self(Stash &stash) const
{
- Self &self = stash.create<Self>(result_type(), _vectorSize, _resultSize);
+ Self &self = stash.create<Self>(result_type(), _vector_size, _result_size);
auto op = my_select(lhs().result_type().cell_type(),
- rhs().result_type().cell_type(), _commonDimensionInnermost);
+ rhs().result_type().cell_type(), _common_inner);
return eval::InterpretedFunction::Instruction(op, (uint64_t)(&self));
}
@@ -172,9 +171,9 @@ void
DenseXWProductFunction::visit_self(vespalib::ObjectVisitor &visitor) const
{
Super::visit_self(visitor);
- visitor.visitInt("vector_size", _vectorSize);
- visitor.visitInt("result_size", _resultSize);
- visitor.visitBool("common_dimension_innermost", _commonDimensionInnermost);
+ visitor.visitInt("vector_size", _vector_size);
+ visitor.visitInt("result_size", _result_size);
+ visitor.visitBool("common_inner", _common_inner);
}
const TensorFunction &
diff --git a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h
index f2f4d67c0f0..d7c39fa45a2 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h
+++ b/eval/src/vespa/eval/tensor/dense/dense_xw_product_function.h
@@ -4,7 +4,6 @@
#include <vespa/eval/eval/tensor_function.h>
#include "dense_tensor_view.h"
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
namespace vespalib::tensor {
@@ -16,37 +15,34 @@ class DenseXWProductFunction : public eval::tensor_function::Op2
using Super = eval::tensor_function::Op2;
public:
struct Self {
- const eval::ValueType _resultType;
- const size_t _vectorSize;
- const size_t _resultSize;
- hwaccelrated::IAccelrated::UP _hwAccelerator;
- Self(const eval::ValueType &resultType,
- size_t vectorSize,
- size_t resultSize);
- ~Self() {}
+ eval::ValueType result_type;
+ size_t vector_size;
+ size_t result_size;
+ Self(const eval::ValueType &result_type_in,
+ size_t vector_size_in, size_t result_size_in);
+ ~Self();
};
private:
- const size_t _vectorSize;
- const size_t _resultSize;
- bool _commonDimensionInnermost;
+ size_t _vector_size;
+ size_t _result_size;
+ bool _common_inner;
public:
- DenseXWProductFunction(const eval::ValueType &resultType,
+ DenseXWProductFunction(const eval::ValueType &result_type,
const eval::TensorFunction &vector_in,
const eval::TensorFunction &matrix_in,
- size_t vectorSize,
- size_t resultSize,
- bool matrixHasCommonDimensionInnermost);
+ size_t vector_size,
+ size_t result_size,
+ bool common_inner);
~DenseXWProductFunction() {}
bool result_is_mutable() const override { return true; }
- size_t vectorSize() const { return _vectorSize; }
- size_t resultSize() const { return _resultSize; }
-
- bool matrixHasCommonDimensionInnermost() const { return _commonDimensionInnermost; }
+ size_t vector_size() const { return _vector_size; }
+ size_t result_size() const { return _result_size; }
+ bool common_inner() const { return _common_inner; }
eval::InterpretedFunction::Instruction compile_self(Stash &stash) const override;
void visit_self(vespalib::ObjectVisitor &visitor) const override;