From a30dfd15685c34fb65f7c8b5c0fb7bd3fd92e541 Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Mon, 14 Aug 2023 12:35:45 +0000 Subject: use common inlined code for low-level dot products --- eval/src/vespa/eval/eval/inline_operation.h | 28 +++++++++++++++ .../eval/instruction/best_similarity_function.cpp | 5 ++- .../instruction/dense_dot_product_function.cpp | 40 +++------------------- .../eval/instruction/mixed_112_dot_product.cpp | 15 +++----- .../instruction/mixed_inner_product_function.cpp | 31 ++--------------- .../instruction/sum_max_dot_product_function.cpp | 6 ++-- 6 files changed, 44 insertions(+), 81 deletions(-) diff --git a/eval/src/vespa/eval/eval/inline_operation.h b/eval/src/vespa/eval/eval/inline_operation.h index 9b862b59e37..910fa9cffaa 100644 --- a/eval/src/vespa/eval/eval/inline_operation.h +++ b/eval/src/vespa/eval/eval/inline_operation.h @@ -4,6 +4,7 @@ #include "operation.h" #include +#include #include namespace vespalib::eval::operation { @@ -148,4 +149,31 @@ void apply_op2_vec_vec(D *dst, const A *a, const B *b, size_t n, OP2 &&f) { //----------------------------------------------------------------------------- +template +struct DotProduct { + static double apply(const LCT * lhs, const RCT * rhs, size_t count) { + double result = 0.0; + for (size_t i = 0; i < count; ++i) { + result += lhs[i] * rhs[i]; + } + return result; + } +}; + +template <> +struct DotProduct { + static float apply(const float * lhs, const float * rhs, size_t count) { + return cblas_sdot(count, lhs, 1, rhs, 1); + } +}; + +template <> +struct DotProduct { + static double apply(const double * lhs, const double * rhs, size_t count) { + return cblas_ddot(count, lhs, 1, rhs, 1); + } +}; + +//----------------------------------------------------------------------------- + } diff --git a/eval/src/vespa/eval/instruction/best_similarity_function.cpp b/eval/src/vespa/eval/instruction/best_similarity_function.cpp index 964f27a4564..415a08d0d93 100644 --- a/eval/src/vespa/eval/instruction/best_similarity_function.cpp +++ b/eval/src/vespa/eval/instruction/best_similarity_function.cpp @@ -1,10 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "best_similarity_function.h" -#include +#include #include #include -#include namespace vespalib::eval { @@ -22,7 +21,7 @@ struct BestSimParam { struct UseDotProduct { static float calc(const float *pri, const float *sec, size_t size) { - return cblas_sdot(size, pri, 1, sec, 1); + return DotProduct::apply(pri, sec, size); } }; diff --git a/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp b/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp index a2048707685..de9e029f377 100644 --- a/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp +++ b/eval/src/vespa/eval/instruction/dense_dot_product_function.cpp @@ -1,9 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "dense_dot_product_function.h" -#include +#include #include -#include namespace vespalib::eval { @@ -16,26 +15,7 @@ template void my_dot_product_op(InterpretedFunction::State &state, uint64_t) { auto lhs_cells = state.peek(1).cells().typify(); auto rhs_cells = state.peek(0).cells().typify(); - double result = 0.0; - const LCT *lhs = lhs_cells.cbegin(); - const RCT *rhs = rhs_cells.cbegin(); - for (size_t i = 0; i < lhs_cells.size(); ++i) { - result += ((*lhs++) * (*rhs++)); - } - state.pop_pop_push(state.stash.create(result)); -} - -void my_cblas_double_dot_product_op(InterpretedFunction::State &state, uint64_t) { - auto lhs_cells = state.peek(1).cells().typify(); - auto rhs_cells = state.peek(0).cells().typify(); - double result = cblas_ddot(lhs_cells.size(), lhs_cells.cbegin(), 1, rhs_cells.cbegin(), 1); - state.pop_pop_push(state.stash.create(result)); -} - -void my_cblas_float_dot_product_op(InterpretedFunction::State &state, uint64_t) { - auto lhs_cells = state.peek(1).cells().typify(); - auto rhs_cells = state.peek(0).cells().typify(); - double result = cblas_sdot(lhs_cells.size(), lhs_cells.cbegin(), 1, rhs_cells.cbegin(), 1); + double result = DotProduct::apply(lhs_cells.cbegin(), rhs_cells.cbegin(), lhs_cells.size()); state.pop_pop_push(state.stash.create(result)); } @@ -44,19 +24,6 @@ struct MyDotProductOp { static auto invoke() { return my_dot_product_op; } }; -InterpretedFunction::op_function my_select(CellType lct, CellType rct) { - if (lct == rct) { - if (lct == CellType::DOUBLE) { - return my_cblas_double_dot_product_op; - } - if (lct == CellType::FLOAT) { - return my_cblas_float_dot_product_op; - } - } - using MyTypify = TypifyCellType; - return typify_invoke<2,MyTypify,MyDotProductOp>(lct, rct); -} - } // namespace DenseDotProductFunction::DenseDotProductFunction(const TensorFunction &lhs_in, @@ -68,7 +35,8 @@ DenseDotProductFunction::DenseDotProductFunction(const TensorFunction &lhs_in, InterpretedFunction::Instruction DenseDotProductFunction::compile_self(const ValueBuilderFactory &, Stash &) const { - auto op = my_select(lhs().result_type().cell_type(), rhs().result_type().cell_type()); + auto op = typify_invoke<2,TypifyCellType,MyDotProductOp>(lhs().result_type().cell_type(), + rhs().result_type().cell_type()); return InterpretedFunction::Instruction(op); } diff --git a/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp b/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp index 8bfa4b07980..47e1dbb58ed 100644 --- a/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp +++ b/eval/src/vespa/eval/instruction/mixed_112_dot_product.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -17,14 +16,6 @@ using namespace instruction; namespace { -template double my_dot_product(const CT * lhs, const CT * rhs, size_t count); -template <> double my_dot_product(const double * lhs, const double * rhs, size_t count) { - return cblas_ddot(count, lhs, 1, rhs, 1); -} -template <> double my_dot_product(const float * lhs, const float * rhs, size_t count) { - return cblas_sdot(count, lhs, 1, rhs, 1); -} - template ConstArrayRef as_ccar(std::array &array) { return {array.data(), array.size()}; @@ -54,10 +45,11 @@ double my_mixed_112_dot_product_fallback(const Value::Index &a_idx, const Value: auto outer = a_idx.create_view({}); auto model = c_idx.create_view({&single_dim[0], 1}); outer->lookup({}); + using dot_product = DotProduct; while (outer->next_result(as_car(c_addr_ref[0]), a_space)) { model->lookup(as_ccar(c_addr_ref)); if (model->next_result({}, c_space)) { - result += my_dot_product(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space]; + result += dot_product::apply(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space]; } } return result; @@ -70,11 +62,12 @@ double my_fast_mixed_112_dot_product(const FastAddrMap *a_map, const FastAddrMap { double result = 0.0; const auto &a_labels = a_map->labels(); + using dot_product = DotProduct; for (size_t a_space = 0; a_space < a_labels.size(); ++a_space) { if (a_cells[a_space] != 0.0) { // handle pseudo-sparse input auto c_space = c_map->lookup_singledim(a_labels[a_space]); if (c_space != FastAddrMap::npos()) { - result += my_dot_product(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space]; + result += dot_product::apply(b_cells, c_cells + (c_space * dense_size), dense_size) * a_cells[a_space]; } } } diff --git a/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp b/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp index 248f909fcf5..5880a90a2cd 100644 --- a/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp +++ b/eval/src/vespa/eval/instruction/mixed_inner_product_function.cpp @@ -1,9 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "mixed_inner_product_function.h" -#include +#include #include -#include namespace vespalib::eval { @@ -12,31 +11,6 @@ using namespace operation; namespace { -template -struct MyDotProduct { - static double apply(const LCT * lhs, const RCT * rhs, size_t count) { - double result = 0.0; - for (size_t i = 0; i < count; ++i) { - result += lhs[i] * rhs[i]; - } - return result; - } -}; - -template <> -struct MyDotProduct { - static double apply(const double * lhs, const double * rhs, size_t count) { - return cblas_ddot(count, lhs, 1, rhs, 1); - } -}; - -template <> -struct MyDotProduct { - static float apply(const float * lhs, const float * rhs, size_t count) { - return cblas_sdot(count, lhs, 1, rhs, 1); - } -}; - struct MixedInnerProductParam { ValueType res_type; size_t vector_size; @@ -66,8 +40,9 @@ void my_mixed_inner_product_op(InterpretedFunction::State &state, uint64_t param ArrayRef out_cells = state.stash.create_uninitialized_array(num_output_cells); const MCT *m_cp = m_cells.begin(); const VCT *v_cp = v_cells.begin(); + using dot_product = DotProduct; for (OCT &out : out_cells) { - out = MyDotProduct::apply(m_cp, v_cp, param.vector_size); + out = dot_product::apply(m_cp, v_cp, param.vector_size); m_cp += param.vector_size; } assert(m_cp == m_cells.end()); diff --git a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp index a76eaa38925..41017bc3687 100644 --- a/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp +++ b/eval/src/vespa/eval/instruction/sum_max_dot_product_function.cpp @@ -1,9 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "sum_max_dot_product_function.h" -#include +#include #include -#include namespace vespalib::eval { @@ -16,11 +15,12 @@ void my_sum_max_dot_product_op(InterpretedFunction::State &state, uint64_t dp_si double result = 0.0; auto query_cells = state.peek(1).cells().typify(); auto document_cells = state.peek(0).cells().typify(); + using dot_product = DotProduct; if ((query_cells.size() > 0) && (document_cells.size() > 0)) { for (const float *query = query_cells.begin(); query < query_cells.end(); query += dp_size) { float max_dp = aggr::Max::null_value(); for (const float *document = document_cells.begin(); document < document_cells.end(); document += dp_size) { - max_dp = aggr::Max::combine(max_dp, cblas_sdot(dp_size, query, 1, document, 1)); + max_dp = aggr::Max::combine(max_dp, dot_product::apply(query, document, dp_size)); } result += max_dp; } -- cgit v1.2.3