aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHÃ¥vard Pettersen <3535158+havardpe@users.noreply.github.com>2023-08-14 15:39:01 +0200
committerGitHub <noreply@github.com>2023-08-14 15:39:01 +0200
commite9840e436e7b2d1bfcf173d5ecf58cf030625b62 (patch)
tree5c006e0be5b7a432f514e5cf16accbcd55430a7a
parent871396f9aea8f7381ea231d74a48aadae975929c (diff)
parenta30dfd15685c34fb65f7c8b5c0fb7bd3fd92e541 (diff)
Merge pull request #28043 from vespa-engine/havardpe/refactor-low-level-dot-product
use common inlined code for low-level dot products
-rw-r--r--eval/src/vespa/eval/eval/inline_operation.h28
-rw-r--r--eval/src/vespa/eval/instruction/best_similarity_function.cpp5
-rw-r--r--eval/src/vespa/eval/instruction/dense_dot_product_function.cpp40
-rw-r--r--eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp15
-rw-r--r--eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp31
-rw-r--r--eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp6
6 files changed, 44 insertions, 81 deletions
diff --git a/eval/src/vespa/eval/eval/inline_operation.h b/eval/src/vespa/eval/eval/inline_operation.h
index 9b862b59e37..910fa9cffaa 100644
--- a/eval/src/vespa/eval/eval/inline_operation.h
+++ b/eval/src/vespa/eval/eval/inline_operation.h
@@ -4,6 +4,7 @@
#include "operation.h"
#include <vespa/vespalib/util/typify.h>
+#include <cblas.h>
#include <cmath>
namespace vespalib::eval::operation {
@@ -148,4 +149,31 @@ void apply_op2_vec_vec(D *dst, const A *a, const B *b, size_t n, OP2 &&f) {
//-----------------------------------------------------------------------------
+template <typename LCT, typename RCT>
+struct DotProduct {
+ static double apply(const LCT * lhs, const RCT * rhs, size_t count) {
+ double result = 0.0;
+ for (size_t i = 0; i < count; ++i) {
+ result += lhs[i] * rhs[i];
+ }
+ return result;
+ }
+};
+
+template <>
+struct DotProduct<float,float> {
+ static float apply(const float * lhs, const float * rhs, size_t count) {
+ return cblas_sdot(count, lhs, 1, rhs, 1);
+ }
+};
+
+template <>
+struct DotProduct<double,double> {
+ static double apply(const double * lhs, const double * rhs, size_t count) {
+ return cblas_ddot(count, lhs, 1, rhs, 1);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
}
diff --git a/eval/src/vespa/eval/instruction/best_similarity_function.cpp b/eval/src/vespa/eval/instruction/best_similarity_function.cpp
index 964f27a4564..415a08d0d93 100644
--- a/eval/src/vespa/eval/instruction/best_similarity_function.cpp
+++ b/eval/src/vespa/eval/instruction/best_similarity_function.cpp
@@ -1,10 +1,9 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "best_similarity_function.h"
-#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/inline_operation.h>
#include <vespa/eval/eval/value.h>
#include <vespa/vespalib/util/binary_hamming_distance.h>
-#include <cblas.h>
namespace vespalib::eval {
@@ -22,7 +21,7 @@ struct BestSimParam {
struct UseDotProduct {
static float calc(const float *pri, const float *sec, size_t size) {
- return cblas_sdot(size, pri, 1, sec, 1);
+ return DotProduct<float,float>::apply(pri, sec, size);
}
};
diff --git a/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp b/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp
index a2048707685..de9e029f377 100644
--- a/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp
+++ b/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp
@@ -1,9 +1,8 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "dense_dot_product_function.h"
-#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/inline_operation.h>
#include <vespa/eval/eval/value.h>
-#include <cblas.h>
namespace vespalib::eval {
@@ -16,26 +15,7 @@ template <typename LCT, typename RCT>
void my_dot_product_op(InterpretedFunction::State &state, uint64_t) {
auto lhs_cells = state.peek(1).cells().typify<LCT>();
auto rhs_cells = state.peek(0).cells().typify<RCT>();
- double result = 0.0;
- const LCT *lhs = lhs_cells.cbegin();
- const RCT *rhs = rhs_cells.cbegin();
- for (size_t i = 0; i < lhs_cells.size(); ++i) {
- result += ((*lhs++) * (*rhs++));
- }
- state.pop_pop_push(state.stash.create<DoubleValue>(result));
-}
-
-void my_cblas_double_dot_product_op(InterpretedFunction::State &state, uint64_t) {
- auto lhs_cells = state.peek(1).cells().typify<double>();
- auto rhs_cells = state.peek(0).cells().typify<double>();
- double result = cblas_ddot(lhs_cells.size(), lhs_cells.cbegin(), 1, rhs_cells.cbegin(), 1);
- state.pop_pop_push(state.stash.create<DoubleValue>(result));
-}
-
-void my_cblas_float_dot_product_op(InterpretedFunction::State &state, uint64_t) {
- auto lhs_cells = state.peek(1).cells().typify<float>();
- auto rhs_cells = state.peek(0).cells().typify<float>();
- double result = cblas_sdot(lhs_cells.size(), lhs_cells.cbegin(), 1, rhs_cells.cbegin(), 1);
+ double result = DotProduct<LCT,RCT>::apply(lhs_cells.cbegin(), rhs_cells.cbegin(), lhs_cells.size());
state.pop_pop_push(state.stash.create<DoubleValue>(result));
}
@@ -44,19 +24,6 @@ struct MyDotProductOp {
static auto invoke() { return my_dot_product_op<LCT,RCT>; }
};
-InterpretedFunction::op_function my_select(CellType lct, CellType rct) {
- if (lct == rct) {
- if (lct == CellType::DOUBLE) {
- return my_cblas_double_dot_product_op;
- }
- if (lct == CellType::FLOAT) {
- return my_cblas_float_dot_product_op;
- }
- }
- using MyTypify = TypifyCellType;
- return typify_invoke<2,MyTypify,MyDotProductOp>(lct, rct);
-}
-
} // namespace <unnamed>
DenseDotProductFunction::DenseDotProductFunction(const TensorFunction &lhs_in,
@@ -68,7 +35,8 @@ DenseDotProductFunction::DenseDotProductFunction(const TensorFunction &lhs_in,
InterpretedFunction::Instruction
DenseDotProductFunction::compile_self(const ValueBuilderFactory &, Stash &) const
{
- auto op = my_select(lhs().result_type().cell_type(), rhs().result_type().cell_type());
+ auto op = typify_invoke<2,TypifyCellType,MyDotProductOp>(lhs().result_type().cell_type(),
+ rhs().result_type().cell_type());
return InterpretedFunction::Instruction(op);
}
diff --git a/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp b/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp
index 8bfa4b07980..47e1dbb58ed 100644
--- a/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp
+++ b/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp
@@ -5,7 +5,6 @@
#include <vespa/vespalib/util/typify.h>
#include <vespa/vespalib/util/require.h>
#include <vespa/eval/eval/visit_stuff.h>
-#include <cblas.h>
#include <algorithm>
#include <optional>
@@ -17,14 +16,6 @@ using namespace instruction;
namespace {
-template <typename CT> double my_dot_product(const CT * lhs, const CT * rhs, size_t count);
-template <> double my_dot_product<double>(const double * lhs, const double * rhs, size_t count) {
- return cblas_ddot(count, lhs, 1, rhs, 1);
-}
-template <> double my_dot_product<float>(const float * lhs, const float * rhs, size_t count) {
- return cblas_sdot(count, lhs, 1, rhs, 1);
-}
-
template <typename T, size_t N>
ConstArrayRef<const T *> as_ccar(std::array<T *, N> &array) {
return {array.data(), array.size()};
@@ -54,10 +45,11 @@ double my_mixed_112_dot_product_fallback(const Value::Index &a_idx, const Value:
auto outer = a_idx.create_view({});
auto model = c_idx.create_view({&single_dim[0], 1});
outer->lookup({});
+ using dot_product = DotProduct<CT,CT>;
while (outer->next_result(as_car(c_addr_ref[0]), a_space)) {
model->lookup(as_ccar(c_addr_ref));
if (model->next_result({}, c_space)) {
- result += my_dot_product<CT>(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space];
+ result += dot_product::apply(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space];
}
}
return result;
@@ -70,11 +62,12 @@ double my_fast_mixed_112_dot_product(const FastAddrMap *a_map, const FastAddrMap
{
double result = 0.0;
const auto &a_labels = a_map->labels();
+ using dot_product = DotProduct<CT,CT>;
for (size_t a_space = 0; a_space < a_labels.size(); ++a_space) {
if (a_cells[a_space] != 0.0) { // handle pseudo-sparse input
auto c_space = c_map->lookup_singledim(a_labels[a_space]);
if (c_space != FastAddrMap::npos()) {
- result += my_dot_product<CT>(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space];
+ result += dot_product::apply(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space];
}
}
}
diff --git a/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp b/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp
index 248f909fcf5..5880a90a2cd 100644
--- a/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp
+++ b/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp
@@ -1,9 +1,8 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "mixed_inner_product_function.h"
-#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/inline_operation.h>
#include <vespa/eval/eval/value.h>
-#include <cblas.h>
namespace vespalib::eval {
@@ -12,31 +11,6 @@ using namespace operation;
namespace {
-template <typename LCT, typename RCT>
-struct MyDotProduct {
- static double apply(const LCT * lhs, const RCT * rhs, size_t count) {
- double result = 0.0;
- for (size_t i = 0; i < count; ++i) {
- result += lhs[i] * rhs[i];
- }
- return result;
- }
-};
-
-template <>
-struct MyDotProduct<double,double> {
- static double apply(const double * lhs, const double * rhs, size_t count) {
- return cblas_ddot(count, lhs, 1, rhs, 1);
- }
-};
-
-template <>
-struct MyDotProduct<float,float> {
- static float apply(const float * lhs, const float * rhs, size_t count) {
- return cblas_sdot(count, lhs, 1, rhs, 1);
- }
-};
-
struct MixedInnerProductParam {
ValueType res_type;
size_t vector_size;
@@ -66,8 +40,9 @@ void my_mixed_inner_product_op(InterpretedFunction::State &state, uint64_t param
ArrayRef<OCT> out_cells = state.stash.create_uninitialized_array<OCT>(num_output_cells);
const MCT *m_cp = m_cells.begin();
const VCT *v_cp = v_cells.begin();
+ using dot_product = DotProduct<MCT,VCT>;
for (OCT &out : out_cells) {
- out = MyDotProduct<MCT,VCT>::apply(m_cp, v_cp, param.vector_size);
+ out = dot_product::apply(m_cp, v_cp, param.vector_size);
m_cp += param.vector_size;
}
assert(m_cp == m_cells.end());
diff --git a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp
index a76eaa38925..41017bc3687 100644
--- a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp
+++ b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp
@@ -1,9 +1,8 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "sum_max_dot_product_function.h"
-#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/inline_operation.h>
#include <vespa/eval/eval/value.h>
-#include <cblas.h>
namespace vespalib::eval {
@@ -16,11 +15,12 @@ void my_sum_max_dot_product_op(InterpretedFunction::State &state, uint64_t dp_si
double result = 0.0;
auto query_cells = state.peek(1).cells().typify<float>();
auto document_cells = state.peek(0).cells().typify<float>();
+ using dot_product = DotProduct<float,float>;
if ((query_cells.size() > 0) && (document_cells.size() > 0)) {
for (const float *query = query_cells.begin(); query < query_cells.end(); query += dp_size) {
float max_dp = aggr::Max<float>::null_value();
for (const float *document = document_cells.begin(); document < document_cells.end(); document += dp_size) {
- max_dp = aggr::Max<float>::combine(max_dp, cblas_sdot(dp_size, query, 1, document, 1));
+ max_dp = aggr::Max<float>::combine(max_dp, dot_product::apply(query, document, dp_size));
}
result += max_dp;
}