diff options
author | Håvard Pettersen <havardpe@yahooinc.com> | 2023-08-14 12:35:45 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@yahooinc.com> | 2023-08-14 12:35:45 +0000 |
commit | a30dfd15685c34fb65f7c8b5c0fb7bd3fd92e541 (patch) | |
tree | a97f30f53f65a085efad939841fa038ac363f5b6 /eval | |
parent | 4e19101d1019bc9c44ae077669e4526165387249 (diff) |
use common inlined code for low-level dot products
Diffstat (limited to 'eval')
6 files changed, 44 insertions, 81 deletions
diff --git a/eval/src/vespa/eval/eval/inline_operation.h b/eval/src/vespa/eval/eval/inline_operation.h index 9b862b59e37..910fa9cffaa 100644 --- a/eval/src/vespa/eval/eval/inline_operation.h +++ b/eval/src/vespa/eval/eval/inline_operation.h @@ -4,6 +4,7 @@ #include "operation.h" #include <vespa/vespalib/util/typify.h> +#include <cblas.h> #include <cmath> namespace vespalib::eval::operation { @@ -148,4 +149,31 @@ void apply_op2_vec_vec(D *dst, const A *a, const B *b, size_t n, OP2 &&f) { //----------------------------------------------------------------------------- +template <typename LCT, typename RCT> +struct DotProduct { + static double apply(const LCT * lhs, const RCT * rhs, size_t count) { + double result = 0.0; + for (size_t i = 0; i < count; ++i) { + result += lhs[i] * rhs[i]; + } + return result; + } +}; + +template <> +struct DotProduct<float,float> { + static float apply(const float * lhs, const float * rhs, size_t count) { + return cblas_sdot(count, lhs, 1, rhs, 1); + } +}; + +template <> +struct DotProduct<double,double> { + static double apply(const double * lhs, const double * rhs, size_t count) { + return cblas_ddot(count, lhs, 1, rhs, 1); + } +}; + +//----------------------------------------------------------------------------- + } diff --git a/eval/src/vespa/eval/instruction/best_similarity_function.cpp b/eval/src/vespa/eval/instruction/best_similarity_function.cpp index 964f27a4564..415a08d0d93 100644 --- a/eval/src/vespa/eval/instruction/best_similarity_function.cpp +++ b/eval/src/vespa/eval/instruction/best_similarity_function.cpp @@ -1,10 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "best_similarity_function.h" -#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/inline_operation.h> #include <vespa/eval/eval/value.h> #include <vespa/vespalib/util/binary_hamming_distance.h> -#include <cblas.h> namespace vespalib::eval { @@ -22,7 +21,7 @@ struct BestSimParam { struct UseDotProduct { static float calc(const float *pri, const float *sec, size_t size) { - return cblas_sdot(size, pri, 1, sec, 1); + return DotProduct<float,float>::apply(pri, sec, size); } }; diff --git a/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp b/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp index a2048707685..de9e029f377 100644 --- a/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp +++ b/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp @@ -1,9 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "dense_dot_product_function.h" -#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/inline_operation.h> #include <vespa/eval/eval/value.h> -#include <cblas.h> namespace vespalib::eval { @@ -16,26 +15,7 @@ template <typename LCT, typename RCT> void my_dot_product_op(InterpretedFunction::State &state, uint64_t) { auto lhs_cells = state.peek(1).cells().typify<LCT>(); auto rhs_cells = state.peek(0).cells().typify<RCT>(); - double result = 0.0; - const LCT *lhs = lhs_cells.cbegin(); - const RCT *rhs = rhs_cells.cbegin(); - for (size_t i = 0; i < lhs_cells.size(); ++i) { - result += ((*lhs++) * (*rhs++)); - } - state.pop_pop_push(state.stash.create<DoubleValue>(result)); -} - -void my_cblas_double_dot_product_op(InterpretedFunction::State &state, uint64_t) { - auto lhs_cells = state.peek(1).cells().typify<double>(); - auto rhs_cells = state.peek(0).cells().typify<double>(); - double result = cblas_ddot(lhs_cells.size(), lhs_cells.cbegin(), 1, rhs_cells.cbegin(), 1); - state.pop_pop_push(state.stash.create<DoubleValue>(result)); -} - -void my_cblas_float_dot_product_op(InterpretedFunction::State &state, uint64_t) { - auto lhs_cells = state.peek(1).cells().typify<float>(); - auto rhs_cells = state.peek(0).cells().typify<float>(); - double result = cblas_sdot(lhs_cells.size(), lhs_cells.cbegin(), 1, rhs_cells.cbegin(), 1); + double result = DotProduct<LCT,RCT>::apply(lhs_cells.cbegin(), rhs_cells.cbegin(), lhs_cells.size()); state.pop_pop_push(state.stash.create<DoubleValue>(result)); } @@ -44,19 +24,6 @@ struct MyDotProductOp { static auto invoke() { return my_dot_product_op<LCT,RCT>; } }; -InterpretedFunction::op_function my_select(CellType lct, CellType rct) { - if (lct == rct) { - if (lct == CellType::DOUBLE) { - return my_cblas_double_dot_product_op; - } - if (lct == CellType::FLOAT) { - return my_cblas_float_dot_product_op; - } - } - using MyTypify = TypifyCellType; - return typify_invoke<2,MyTypify,MyDotProductOp>(lct, rct); -} - } // namespace <unnamed> DenseDotProductFunction::DenseDotProductFunction(const TensorFunction &lhs_in, @@ -68,7 +35,8 @@ DenseDotProductFunction::DenseDotProductFunction(const TensorFunction &lhs_in, InterpretedFunction::Instruction DenseDotProductFunction::compile_self(const ValueBuilderFactory &, Stash &) const { - auto op = my_select(lhs().result_type().cell_type(), rhs().result_type().cell_type()); + auto op = typify_invoke<2,TypifyCellType,MyDotProductOp>(lhs().result_type().cell_type(), + rhs().result_type().cell_type()); return InterpretedFunction::Instruction(op); } diff --git a/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp b/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp index 8bfa4b07980..47e1dbb58ed 100644 --- a/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp +++ b/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp @@ -5,7 +5,6 @@ #include <vespa/vespalib/util/typify.h> #include <vespa/vespalib/util/require.h> #include <vespa/eval/eval/visit_stuff.h> -#include <cblas.h> #include <algorithm> #include <optional> @@ -17,14 +16,6 @@ using namespace instruction; namespace { -template <typename CT> double my_dot_product(const CT * lhs, const CT * rhs, size_t count); -template <> double my_dot_product<double>(const double * lhs, const double * rhs, size_t count) { - return cblas_ddot(count, lhs, 1, rhs, 1); -} -template <> double my_dot_product<float>(const float * lhs, const float * rhs, size_t count) { - return cblas_sdot(count, lhs, 1, rhs, 1); -} - template <typename T, size_t N> ConstArrayRef<const T *> as_ccar(std::array<T *, N> &array) { return {array.data(), array.size()}; @@ -54,10 +45,11 @@ double my_mixed_112_dot_product_fallback(const Value::Index &a_idx, const Value: auto outer = a_idx.create_view({}); auto model = c_idx.create_view({&single_dim[0], 1}); outer->lookup({}); + using dot_product = DotProduct<CT,CT>; while (outer->next_result(as_car(c_addr_ref[0]), a_space)) { model->lookup(as_ccar(c_addr_ref)); if (model->next_result({}, c_space)) { - result += my_dot_product<CT>(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space]; + result += dot_product::apply(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space]; } } return result; @@ -70,11 +62,12 @@ double my_fast_mixed_112_dot_product(const FastAddrMap *a_map, const FastAddrMap { double result = 0.0; const auto &a_labels = a_map->labels(); + using dot_product = DotProduct<CT,CT>; for (size_t a_space = 0; a_space < a_labels.size(); ++a_space) { if (a_cells[a_space] != 0.0) { // handle pseudo-sparse input auto c_space = c_map->lookup_singledim(a_labels[a_space]); if (c_space != FastAddrMap::npos()) { - result += my_dot_product<CT>(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space]; + result += dot_product::apply(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space]; } } } diff --git a/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp b/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp index 248f909fcf5..5880a90a2cd 100644 --- a/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp +++ b/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp @@ -1,9 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "mixed_inner_product_function.h" -#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/inline_operation.h> #include <vespa/eval/eval/value.h> -#include <cblas.h> namespace vespalib::eval { @@ -12,31 +11,6 @@ using namespace operation; namespace { -template <typename LCT, typename RCT> -struct MyDotProduct { - static double apply(const LCT * lhs, const RCT * rhs, size_t count) { - double result = 0.0; - for (size_t i = 0; i < count; ++i) { - result += lhs[i] * rhs[i]; - } - return result; - } -}; - -template <> -struct MyDotProduct<double,double> { - static double apply(const double * lhs, const double * rhs, size_t count) { - return cblas_ddot(count, lhs, 1, rhs, 1); - } -}; - -template <> -struct MyDotProduct<float,float> { - static float apply(const float * lhs, const float * rhs, size_t count) { - return cblas_sdot(count, lhs, 1, rhs, 1); - } -}; - struct MixedInnerProductParam { ValueType res_type; size_t vector_size; @@ -66,8 +40,9 @@ void my_mixed_inner_product_op(InterpretedFunction::State &state, uint64_t param ArrayRef<OCT> out_cells = state.stash.create_uninitialized_array<OCT>(num_output_cells); const MCT *m_cp = m_cells.begin(); const VCT *v_cp = v_cells.begin(); + using dot_product = DotProduct<MCT,VCT>; for (OCT &out : out_cells) { - out = MyDotProduct<MCT,VCT>::apply(m_cp, v_cp, param.vector_size); + out = dot_product::apply(m_cp, v_cp, param.vector_size); m_cp += param.vector_size; } assert(m_cp == m_cells.end()); diff --git a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp index a76eaa38925..41017bc3687 100644 --- a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp +++ b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp @@ -1,9 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "sum_max_dot_product_function.h" -#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/inline_operation.h> #include <vespa/eval/eval/value.h> -#include <cblas.h> namespace vespalib::eval { @@ -16,11 +15,12 @@ void my_sum_max_dot_product_op(InterpretedFunction::State &state, uint64_t dp_si double result = 0.0; auto query_cells = state.peek(1).cells().typify<float>(); auto document_cells = state.peek(0).cells().typify<float>(); + using dot_product = DotProduct<float,float>; if ((query_cells.size() > 0) && (document_cells.size() > 0)) { for (const float *query = query_cells.begin(); query < query_cells.end(); query += dp_size) { float max_dp = aggr::Max<float>::null_value(); for (const float *document = document_cells.begin(); document < document_cells.end(); document += dp_size) { - max_dp = aggr::Max<float>::combine(max_dp, cblas_sdot(dp_size, query, 1, document, 1)); + max_dp = aggr::Max<float>::combine(max_dp, dot_product::apply(query, document, dp_size)); } result += max_dp; } |