From 0f98e3423f392dfd154d37a15d49e176725e40cf Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Mon, 2 Nov 2020 16:49:28 +0000 Subject: make a specific inline version of sparse no-overlap join --- eval/src/vespa/eval/eval/fast_value.hpp | 58 +++++++++++++- eval/src/vespa/eval/instruction/generic_join.cpp | 98 ++++++++++++++++++++++-- 2 files changed, 148 insertions(+), 8 deletions(-) (limited to 'eval') diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp index f02d02489b8..2f0c9b6f6a8 100644 --- a/eval/src/vespa/eval/eval/fast_value.hpp +++ b/eval/src/vespa/eval/eval/fast_value.hpp @@ -140,6 +140,7 @@ struct IterateView : public Value::Index::View { //----------------------------------------------------------------------------- +using JoinAddrSource = instruction::SparseJoinPlan::Source; // This is the class instructions will look for when optimizing sparse // operations by calling inline functions directly. @@ -155,9 +156,15 @@ struct FastValueIndex final : Value::Index { template static const Value &sparse_only_merge(const ValueType &res_type, const Fun &fun, - const FastValueIndex &lhs, const FastValueIndex &rhs, - ConstArrayRef lhs_cells, ConstArrayRef rhs_cells, - Stash &stash) __attribute((noinline)); + const FastValueIndex &lhs, const FastValueIndex &rhs, + ConstArrayRef lhs_cells, ConstArrayRef rhs_cells, + Stash &stash) __attribute((noinline)); + + template + static const Value &sparse_no_overlap_join(const ValueType &res_type, const Fun &fun, + const FastValueIndex &lhs, const FastValueIndex &rhs, + const std::vector &addr_sources, + ConstArrayRef lhs_cells, ConstArrayRef rhs_cells, Stash &stash); size_t size() const override { return map.size(); } std::unique_ptr create_view(const std::vector &dims) const override; @@ -360,4 +367,49 @@ FastValueIndex::sparse_only_merge(const ValueType &res_type, const Fun &fun, //----------------------------------------------------------------------------- +template +const Value & +FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun, + const FastValueIndex &lhs, const FastValueIndex &rhs, + const std::vector &addr_sources, + ConstArrayRef lhs_cells, ConstArrayRef rhs_cells, Stash &stash) +{ + using HashedLabelRef = std::reference_wrapper; + auto &result = stash.create>(res_type, res_type.count_mapped_dimensions(), 1, lhs.map.size()*rhs.map.size()); + std::vector output_addr; + lhs.map.each_map_entry([&](auto lhs_subspace, auto) + { + auto l_addr = lhs.map.make_addr(lhs_subspace); + rhs.map.each_map_entry([&](auto rhs_subspace, auto) + { + auto r_addr = rhs.map.make_addr(rhs_subspace); + output_addr.clear(); + size_t l_idx = 0; + size_t r_idx = 0; + for (JoinAddrSource source : addr_sources) { + switch (source) { + case JoinAddrSource::LHS: + output_addr.push_back(l_addr[l_idx++]); + break; + case JoinAddrSource::RHS: + output_addr.push_back(r_addr[r_idx++]); + break; + default: + abort(); + } + } + assert(l_idx == l_addr.size()); + assert(r_idx == r_addr.size()); + + ConstArrayRef output_addr_ref(output_addr); + auto idx = result.my_index.map.add_mapping(output_addr_ref); + if (__builtin_expect((idx == result.my_cells.size), true)) { + auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]); + result.my_cells.push_back_fast(cell_value); + } + }); + }); + return result; +} + } diff --git a/eval/src/vespa/eval/instruction/generic_join.cpp b/eval/src/vespa/eval/instruction/generic_join.cpp index fb5f170c68e..eabe1c362e2 100644 --- a/eval/src/vespa/eval/instruction/generic_join.cpp +++ b/eval/src/vespa/eval/instruction/generic_join.cpp @@ -69,6 +69,90 @@ void my_mixed_join_op(State &state, uint64_t param_in) { //----------------------------------------------------------------------------- +/** possible generic utility */ +template +const T * +recognize_by_type_index(const U & object) +{ + if (std::type_index(typeid(object)) == std::type_index(typeid(T))) { + return static_cast(&object); + } + return nullptr; +} + +template +class RecognizedValues +{ +private: + std::array _pointers; +public: + RecognizedValues(std::array && pointers) + : _pointers(std::move(pointers)) + {} + bool all_converted() const { + for (auto p : _pointers) { + if (p == nullptr) return false; + } + return true; + } + operator bool() const { return all_converted(); } + template const T& get() const { + static_assert(idx < N); + return *_pointers[idx]; + } +}; + +template +RecognizedValues +detect_type(const Args &... args) +{ + return RecognizedValues({(recognize_by_type_index(args))...}); +} + +//----------------------------------------------------------------------------- + +template +void my_sparse_no_overlap_join_op(State &state, uint64_t param_in) { + const auto ¶m = unwrap_param(param_in); + const Value &lhs = state.peek(1); + const Value &rhs = state.peek(0); + auto lhs_cells = lhs.cells().typify(); + auto rhs_cells = rhs.cells().typify(); + const Value::Index &lhs_index = lhs.index(); + const Value::Index &rhs_index = rhs.index(); + if (auto indexes = detect_type(lhs_index, rhs_index)) { + const auto &lhs_fast = indexes.get<0>(); + const auto &rhs_fast = indexes.get<1>(); + return state.pop_pop_push( + FastValueIndex::sparse_no_overlap_join + (param.res_type, Fun(param.function), + lhs_fast, rhs_fast, + param.sparse_plan.sources, + lhs_cells, rhs_cells, state.stash)); + } + Fun fun(param.function); + SparseJoinState sparse(param.sparse_plan, lhs.index(), rhs.index()); + auto guess = lhs.index().size() * rhs.index().size(); + assert(param.dense_plan.out_size == 1); + auto builder = param.factory.create_value_builder(param.res_type, param.sparse_plan.sources.size(), 1, guess); + auto outer = sparse.first_index.create_view({}); + assert(sparse.second_view_dims.empty()); + auto inner = sparse.second_index.create_view({}); + outer->lookup({}); + while (outer->next_result(sparse.first_address, sparse.first_subspace)) { + inner->lookup({}); + while (inner->next_result(sparse.second_only_address, sparse.second_subspace)) { + auto cell_value = fun(lhs_cells[sparse.lhs_subspace], rhs_cells[sparse.rhs_subspace]); + builder->add_subspace(sparse.full_address)[0] = cell_value; + } + } + auto &result = state.stash.create>(builder->build(std::move(builder))); + const Value &result_ref = *(result.get()); + state.pop_pop_push(result_ref); +}; + +//----------------------------------------------------------------------------- + template void my_sparse_full_overlap_join_op(State &state, uint64_t param_in) { const auto ¶m = unwrap_param(param_in); @@ -78,11 +162,9 @@ void my_sparse_full_overlap_join_op(State &state, uint64_t param_in) { auto rhs_cells = rhs.cells().typify(); const Value::Index &lhs_index = lhs.index(); const Value::Index &rhs_index = rhs.index(); - if ((std::type_index(typeid(lhs_index)) == std::type_index(typeid(FastValueIndex))) && - (std::type_index(typeid(rhs_index)) == std::type_index(typeid(FastValueIndex)))) - { - const FastValueIndex &lhs_fast = static_cast(lhs_index); - const FastValueIndex &rhs_fast = static_cast(rhs_index); + if (auto indexes = detect_type(lhs_index, rhs_index)) { + const auto &lhs_fast = indexes.get<0>(); + const auto &rhs_fast = indexes.get<1>(); return (rhs_fast.map.size() < lhs_fast.map.size()) ? state.pop_pop_push(FastValueIndex::sparse_full_overlap_join> (param.res_type, SwapArgs2(param.function), rhs_fast, lhs_fast, rhs_cells, lhs_cells, state.stash)) @@ -145,6 +227,12 @@ struct SelectGenericJoinOp { { return my_sparse_full_overlap_join_op; } + if ((param.dense_plan.out_size == 1) && + (param.sparse_plan.lhs_overlap.size() == 0) && + (param.sparse_plan.rhs_overlap.size() == 0)) + { + return my_sparse_no_overlap_join_op; + } return my_mixed_join_op; } }; -- cgit v1.2.3 From 9856a29b4685a0119c5f73ba93e5fd6bb16d13ea Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 4 Nov 2020 07:16:45 +0000 Subject: move detect_type to its own header file --- eval/src/vespa/eval/instruction/detect_type.h | 75 +++++++++++++++++++++++ eval/src/vespa/eval/instruction/generic_join.cpp | 44 +------------ eval/src/vespa/eval/instruction/generic_merge.cpp | 41 +------------ 3 files changed, 77 insertions(+), 83 deletions(-) create mode 100644 eval/src/vespa/eval/instruction/detect_type.h (limited to 'eval') diff --git a/eval/src/vespa/eval/instruction/detect_type.h b/eval/src/vespa/eval/instruction/detect_type.h new file mode 100644 index 00000000000..22c280f17ca --- /dev/null +++ b/eval/src/vespa/eval/instruction/detect_type.h @@ -0,0 +1,75 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include + +#pragma once + +namespace vespalib::eval::instruction { + +/* + * Utilities for detecting implementation class by comparing + * typeindex(typeid(T)); for now these are local to this + * namespace, but we can consider moving them to a more + * common place (probably vespalib) if we see more use-cases. + */ + +/** + * Recognize a (const) instance of type T. This is cheaper than + * dynamic_cast, but requires the object to be exactly of class T. + * Returns a pointer to the object as T if recognized, nullptr + * otherwise. + **/ +template +const T * +recognize_by_type_index(const U & object) +{ + if (std::type_index(typeid(object)) == std::type_index(typeid(T))) { + return static_cast(&object); + } + return nullptr; +} + +/** + * Packs N recognized values into one object, used as return value + * from detect_type. + * + * Use all_converted() or the equivalent bool cast operator to check + * if all objects were recognized. After this check is successful use + * get<0>(), get<1>() etc to get a reference to the objects. + **/ +template +class RecognizedValues +{ +private: + std::array _pointers; +public: + RecognizedValues(std::array && pointers) + : _pointers(std::move(pointers)) + {} + bool all_converted() const { + for (auto p : _pointers) { + if (p == nullptr) return false; + } + return true; + } + operator bool() const { return all_converted(); } + template const T& get() const { + static_assert(idx < N); + return *_pointers[idx]; + } +}; + +/** + * For all arguments, detect if they have typeid(T), convert to T if + * possible, and return a RecognizedValues packing the converted + * values. + **/ +template +RecognizedValues +detect_type(const Args &... args) +{ + return RecognizedValues({(recognize_by_type_index(args))...}); +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/generic_join.cpp b/eval/src/vespa/eval/instruction/generic_join.cpp index eabe1c362e2..25829576ba5 100644 --- a/eval/src/vespa/eval/instruction/generic_join.cpp +++ b/eval/src/vespa/eval/instruction/generic_join.cpp @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "generic_join.h" +#include "detect_type.h" #include #include #include @@ -9,7 +10,6 @@ #include #include #include -#include using namespace vespalib::eval::tensor_function; @@ -69,48 +69,6 @@ void my_mixed_join_op(State &state, uint64_t param_in) { //----------------------------------------------------------------------------- -/** possible generic utility */ -template -const T * -recognize_by_type_index(const U & object) -{ - if (std::type_index(typeid(object)) == std::type_index(typeid(T))) { - return static_cast(&object); - } - return nullptr; -} - -template -class RecognizedValues -{ -private: - std::array _pointers; -public: - RecognizedValues(std::array && pointers) - : _pointers(std::move(pointers)) - {} - bool all_converted() const { - for (auto p : _pointers) { - if (p == nullptr) return false; - } - return true; - } - operator bool() const { return all_converted(); } - template const T& get() const { - static_assert(idx < N); - return *_pointers[idx]; - } -}; - -template -RecognizedValues -detect_type(const Args &... args) -{ - return RecognizedValues({(recognize_by_type_index(args))...}); -} - -//----------------------------------------------------------------------------- - template void my_sparse_no_overlap_join_op(State &state, uint64_t param_in) { const auto ¶m = unwrap_param(param_in); diff --git a/eval/src/vespa/eval/instruction/generic_merge.cpp b/eval/src/vespa/eval/instruction/generic_merge.cpp index f340e7ad506..87be47a9c2e 100644 --- a/eval/src/vespa/eval/instruction/generic_merge.cpp +++ b/eval/src/vespa/eval/instruction/generic_merge.cpp @@ -1,5 +1,6 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "detect_type.h" #include "generic_merge.h" #include #include @@ -118,46 +119,6 @@ void my_mixed_merge_op(State &state, uint64_t param_in) { state.pop_pop_push(result_ref); }; -/** possible generic utility */ -template -const T * -recognize_by_type_index(const U & object) -{ - if (std::type_index(typeid(object)) == std::type_index(typeid(T))) { - return static_cast(&object); - } - return nullptr; -} - -template -class RecognizedValues -{ -private: - std::array _pointers; -public: - RecognizedValues(std::array && pointers) - : _pointers(std::move(pointers)) - {} - bool all_converted() const { - for (auto p : _pointers) { - if (p == nullptr) return false; - } - return true; - } - operator bool() const { return all_converted(); } - template const T& get() const { - static_assert(idx < N); - return *_pointers[idx]; - } -}; - -template -RecognizedValues -detect_type(const Args &... args) -{ - return RecognizedValues({(recognize_by_type_index(args))...}); -} - template void my_sparse_merge_op(State &state, uint64_t param_in) { const auto ¶m = unwrap_param(param_in); -- cgit v1.2.3 From c00c16f4bc82275d32657f7ca40ead26050b6245 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 4 Nov 2020 10:17:05 +0000 Subject: just loop over subspaces --- eval/src/vespa/eval/eval/fast_value.hpp | 60 +++++++++++++++------------------ 1 file changed, 28 insertions(+), 32 deletions(-) (limited to 'eval') diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp index 2f0c9b6f6a8..559f2e0a7f6 100644 --- a/eval/src/vespa/eval/eval/fast_value.hpp +++ b/eval/src/vespa/eval/eval/fast_value.hpp @@ -377,38 +377,34 @@ FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun using HashedLabelRef = std::reference_wrapper; auto &result = stash.create>(res_type, res_type.count_mapped_dimensions(), 1, lhs.map.size()*rhs.map.size()); std::vector output_addr; - lhs.map.each_map_entry([&](auto lhs_subspace, auto) - { - auto l_addr = lhs.map.make_addr(lhs_subspace); - rhs.map.each_map_entry([&](auto rhs_subspace, auto) - { - auto r_addr = rhs.map.make_addr(rhs_subspace); - output_addr.clear(); - size_t l_idx = 0; - size_t r_idx = 0; - for (JoinAddrSource source : addr_sources) { - switch (source) { - case JoinAddrSource::LHS: - output_addr.push_back(l_addr[l_idx++]); - break; - case JoinAddrSource::RHS: - output_addr.push_back(r_addr[r_idx++]); - break; - default: - abort(); - } - } - assert(l_idx == l_addr.size()); - assert(r_idx == r_addr.size()); - - ConstArrayRef output_addr_ref(output_addr); - auto idx = result.my_index.map.add_mapping(output_addr_ref); - if (__builtin_expect((idx == result.my_cells.size), true)) { - auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]); - result.my_cells.push_back_fast(cell_value); - } - }); - }); + for (size_t lhs_subspace = 0; lhs_subspace < lhs.map.size(); ++lhs_subspace) { + auto l_addr = lhs.map.make_addr(lhs_subspace); + for (size_t rhs_subspace = 0; rhs_subspace < rhs.map.size(); ++rhs_subspace) { + auto r_addr = rhs.map.make_addr(rhs_subspace); + output_addr.clear(); + size_t l_idx = 0; + size_t r_idx = 0; + for (JoinAddrSource source : addr_sources) { + switch (source) { + case JoinAddrSource::LHS: + output_addr.push_back(l_addr[l_idx++]); + break; + case JoinAddrSource::RHS: + output_addr.push_back(r_addr[r_idx++]); + break; + default: + abort(); + } + } + assert(l_idx == l_addr.size()); + assert(r_idx == r_addr.size()); + auto idx = result.my_index.map.add_mapping(ConstArrayRef(output_addr)); + if (__builtin_expect((idx == result.my_cells.size), true)) { + auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]); + result.my_cells.push_back_fast(cell_value); + } + } + } return result; } -- cgit v1.2.3