diff options
author | Arne Juul <arnej@verizonmedia.com> | 2021-02-04 11:44:53 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2021-02-04 13:48:13 +0000 |
commit | e113b62ff818d6530b11f6c3164f32456f2b9fbf (patch) | |
tree | cd7017e758122ab78f8319aaf930b53aa077c438 | |
parent | f5c4d5b8bebae1a68928143c7fde2bf3d3309238 (diff) |
make a separate optimized TensorFunction for sparse merge
4 files changed, 247 insertions, 0 deletions
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp index 25612b8d5fd..196e8a98679 100644 --- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp +++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp @@ -6,6 +6,7 @@ #include <vespa/eval/instruction/dense_dot_product_function.h> #include <vespa/eval/instruction/sparse_dot_product_function.h> +#include <vespa/eval/instruction/sparse_merge_function.h> #include <vespa/eval/instruction/mixed_inner_product_function.h> #include <vespa/eval/instruction/sum_max_dot_product_function.h> #include <vespa/eval/instruction/dense_xw_product_function.h> @@ -72,6 +73,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te child.set(MixedSimpleJoinFunction::optimize(child.get(), stash)); child.set(JoinWithNumberFunction::optimize(child.get(), stash)); child.set(DenseSingleReduceFunction::optimize(child.get(), stash)); + child.set(SparseMergeFunction::optimize(child.get(), stash)); nodes.pop_back(); } } diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index cac69d23640..3def8907ac8 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -33,6 +33,7 @@ vespa_add_library(eval_instruction OBJECT remove_trivial_dimension_optimizer.cpp replace_type_function.cpp sparse_dot_product_function.cpp + sparse_merge_function.cpp sum_max_dot_product_function.cpp vector_from_doubles_function.cpp ) diff --git a/eval/src/vespa/eval/instruction/sparse_merge_function.cpp b/eval/src/vespa/eval/instruction/sparse_merge_function.cpp new file mode 100644 index 00000000000..fe2fca36f07 --- /dev/null +++ b/eval/src/vespa/eval/instruction/sparse_merge_function.cpp @@ -0,0 +1,222 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "sparse_merge_function.h" +#include "generic_join.h" +#include <vespa/eval/eval/fast_value.hpp> +#include <vespa/vespalib/util/typify.h> + +namespace vespalib::eval { + +using namespace tensor_function; +using namespace operation; +using namespace instruction; + +namespace { + +struct SparseMergeParam { + const ValueType res_type; + const join_fun_t function; + const size_t num_mapped_dimensions; + std::vector<size_t> all_view_dims; + const ValueBuilderFactory &factory; + + SparseMergeParam(const ValueType &res_type_in, join_fun_t function_in, const ValueBuilderFactory &factory_in) + : res_type(res_type_in), + function(function_in), + num_mapped_dimensions(res_type.count_mapped_dimensions()), + all_view_dims(num_mapped_dimensions), + factory(factory_in) + { + assert(!res_type.is_error()); + for (size_t i = 0; i < num_mapped_dimensions; ++i) { + all_view_dims[i] = i; + } + } + ~SparseMergeParam(); +}; +SparseMergeParam::~SparseMergeParam() = default; + +template <typename CT, typename Fun> +std::unique_ptr<Value> my_sparse_merge_fallback(const Value::Index &a_idx, const Value::Index &b_idx, + ConstArrayRef<CT> a_cells, ConstArrayRef<CT> b_cells, + const SparseMergeParam ¶m) __attribute__((noinline)); +template <typename CT, typename Fun> +std::unique_ptr<Value> my_sparse_merge_fallback(const Value::Index &a_idx, const Value::Index &b_idx, + ConstArrayRef<CT> a_cells, ConstArrayRef<CT> b_cells, + const SparseMergeParam ¶ms) +{ + Fun fun(params.function); + const size_t num_mapped = params.num_mapped_dimensions; + const size_t subspace_size = 1u; + size_t guess_subspaces = std::max(a_idx.size(), b_idx.size()); + auto builder = params.factory.create_transient_value_builder<CT>(params.res_type, num_mapped, subspace_size, guess_subspaces); + std::vector<string_id> address(num_mapped); + std::vector<const string_id *> addr_cref; + std::vector<string_id *> addr_ref; + for (auto & ref : address) { + addr_cref.push_back(&ref); + addr_ref.push_back(&ref); + } + size_t a_subspace; + size_t b_subspace; + auto inner = b_idx.create_view(params.all_view_dims); + auto outer = a_idx.create_view({}); + outer->lookup({}); + while (outer->next_result(addr_ref, a_subspace)) { + ArrayRef<CT> dst = builder->add_subspace(address); + inner->lookup(addr_cref); + if (inner->next_result({}, b_subspace)) { + dst[0] = fun(a_cells[a_subspace], b_cells[b_subspace]); + } else { + dst[0] = a_cells[a_subspace]; + } + } + inner = a_idx.create_view(params.all_view_dims); + outer = b_idx.create_view({}); + outer->lookup({}); + while (outer->next_result(addr_ref, b_subspace)) { + inner->lookup(addr_cref); + if (! inner->next_result({}, a_subspace)) { + ArrayRef<CT> dst = builder->add_subspace(address); + dst[0] = b_cells[b_subspace]; + } + } + return builder->build(std::move(builder)); +} + + +template <typename CT, bool single_dim, typename Fun> +const Value& my_fast_sparse_merge(const FastAddrMap &a_map, const FastAddrMap &b_map, + const CT *a_cells, const CT *b_cells, + const SparseMergeParam ¶ms, + Stash &stash) +{ + Fun fun(params.function); + size_t guess_size = a_map.size() + b_map.size(); + auto &result = stash.create<FastValue<CT,true>>(params.res_type, params.num_mapped_dimensions, 1u, guess_size); + if constexpr (single_dim) { + string_id cur_label; + ConstArrayRef<string_id> addr(&cur_label, 1); + const auto &a_labels = a_map.labels(); + for (size_t i = 0; i < a_labels.size(); ++i) { + cur_label = a_labels[i]; + result.add_mapping(addr, cur_label.hash()); + result.my_cells.push_back_fast(a_cells[i]); + } + const auto &b_labels = b_map.labels(); + for (size_t i = 0; i < b_labels.size(); ++i) { + cur_label = b_labels[i]; + auto result_subspace = result.my_index.map.lookup_singledim(cur_label); + if (result_subspace == FastAddrMap::npos()) { + result.add_mapping(addr, cur_label.hash()); + result.my_cells.push_back_fast(b_cells[i]); + } else { + CT *out_cell = result.my_cells.get(result_subspace); + out_cell[0] = fun(out_cell[0], b_cells[i]); + } + } + } else { + a_map.each_map_entry([&](auto lhs_subspace, auto hash) + { + result.add_mapping(a_map.get_addr(lhs_subspace), hash); + result.my_cells.push_back_fast(a_cells[lhs_subspace]); + }); + b_map.each_map_entry([&](auto rhs_subspace, auto hash) + { + auto rhs_addr = b_map.get_addr(rhs_subspace); + auto result_subspace = result.my_index.map.lookup(rhs_addr, hash); + if (result_subspace == FastAddrMap::npos()) { + result.add_mapping(rhs_addr, hash); + result.my_cells.push_back_fast(b_cells[rhs_subspace]); + } else { + CT *out_cell = result.my_cells.get(result_subspace); + out_cell[0] = fun(out_cell[0], b_cells[rhs_subspace]); + } + }); + } + return result; + + +} + +template <typename CT, bool single_dim, typename Fun> +void my_sparse_merge_op(InterpretedFunction::State &state, uint64_t param_in) { + const auto ¶m = unwrap_param<SparseMergeParam>(param_in); + const auto &a_idx = state.peek(1).index(); + const auto &b_idx = state.peek(0).index(); + auto a_cells = state.peek(1).cells().typify<CT>(); + auto b_cells = state.peek(0).cells().typify<CT>(); + //assert(a_idx.size() == a_cells.size()); + //assert(b_idx.size() == b_cells.size()); + if (__builtin_expect(are_fast(a_idx, b_idx), true)) { + const Value &v = my_fast_sparse_merge<CT,single_dim,Fun>(as_fast(a_idx).map, as_fast(b_idx).map, a_cells.cbegin(), b_cells.cbegin(), param, state.stash); + state.pop_pop_push(v); + } else { + auto up = my_sparse_merge_fallback<CT,Fun>(a_idx, b_idx, a_cells, b_cells, param); + state.pop_pop_push(*state.stash.create<std::unique_ptr<Value>>(std::move(up))); + } +} + +struct SelectSparseMergeOp { + template <typename CT, typename SINGLE_DIM, typename Fun> + static auto invoke() { return my_sparse_merge_op<CT,SINGLE_DIM::value,Fun>; } +}; + +using MyTypify = TypifyValue<TypifyCellType,TypifyBool,operation::TypifyOp2>; + +} // namespace <unnamed> + +SparseMergeFunction::SparseMergeFunction(const tensor_function::Merge &original) + : tensor_function::Merge(original.result_type(), + original.lhs(), + original.rhs(), + original.function()) +{ + assert(compatible_types(result_type(), lhs().result_type(), rhs().result_type())); +} + +InterpretedFunction::Instruction +SparseMergeFunction::compile_self(const ValueBuilderFactory &factory, Stash &stash) const +{ + assert(result_type() == ValueType::join(lhs().result_type(), rhs().result_type())); + const auto ¶m = stash.create<SparseMergeParam>(result_type(), function(), factory); + size_t num_dims = result_type().count_mapped_dimensions(); + auto op = typify_invoke<3,MyTypify,SelectSparseMergeOp>(result_type().cell_type(), + num_dims == 1, + function()); + return InterpretedFunction::Instruction(op, wrap_param<SparseMergeParam>(param)); +} + +bool +SparseMergeFunction::compatible_types(const ValueType &res, const ValueType &lhs, const ValueType &rhs) +{ + if ((lhs.cell_type() == rhs.cell_type()) + && (lhs.count_mapped_dimensions() > 0) + && (lhs.dense_subspace_size() == 1)) + { + assert(rhs.dense_subspace_size() == lhs.dense_subspace_size()); + assert(rhs.count_mapped_dimensions() == lhs.count_mapped_dimensions()); + + assert(res.cell_type() == lhs.cell_type()); + assert(res.dense_subspace_size() == lhs.dense_subspace_size()); + assert(res.count_mapped_dimensions() == lhs.count_mapped_dimensions()); + + return true; + } + return false; +} + +const TensorFunction & +SparseMergeFunction::optimize(const TensorFunction &expr, Stash &stash) +{ + if (auto merge = as<Merge>(expr)) { + const TensorFunction &lhs = merge->lhs(); + const TensorFunction &rhs = merge->rhs(); + if (compatible_types(expr.result_type(), lhs.result_type(), rhs.result_type())) { + return stash.create<SparseMergeFunction>(*merge); + } + } + return expr; +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/sparse_merge_function.h b/eval/src/vespa/eval/instruction/sparse_merge_function.h new file mode 100644 index 00000000000..d2b26196ed6 --- /dev/null +++ b/eval/src/vespa/eval/instruction/sparse_merge_function.h @@ -0,0 +1,22 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::eval { + +/** + * Tensor function for merging two sparse tensors. + */ +class SparseMergeFunction : public tensor_function::Merge +{ +public: + SparseMergeFunction(const tensor_function::Merge &original); + InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override; + bool result_is_mutable() const override { return true; } + static bool compatible_types(const ValueType &res, const ValueType &lhs, const ValueType &rhs); + static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash); +}; + +} // namespace |