diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2020-11-04 14:54:14 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-11-04 14:54:14 +0100 |
commit | 26487c3acc487947b367ad85f94b19487f101ff5 (patch) | |
tree | b41a8c37b82c86c798347814b6d7b41ec6ddb057 /eval | |
parent | 41cc23bb91e30d34efb60a8698ecccb188d12010 (diff) | |
parent | c00c16f4bc82275d32657f7ca40ead26050b6245 (diff) |
Merge pull request #15171 from vespa-engine/arnej/inline-sparse-no-overlap
Arnej/inline sparse no overlap
Diffstat (limited to 'eval')
-rw-r--r-- | eval/src/vespa/eval/eval/fast_value.hpp | 54 | ||||
-rw-r--r-- | eval/src/vespa/eval/instruction/detect_type.h | 75 | ||||
-rw-r--r-- | eval/src/vespa/eval/instruction/generic_join.cpp | 58 | ||||
-rw-r--r-- | eval/src/vespa/eval/instruction/generic_merge.cpp | 41 |
4 files changed, 179 insertions, 49 deletions
diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp index f02d02489b8..559f2e0a7f6 100644 --- a/eval/src/vespa/eval/eval/fast_value.hpp +++ b/eval/src/vespa/eval/eval/fast_value.hpp @@ -140,6 +140,7 @@ struct IterateView : public Value::Index::View { //----------------------------------------------------------------------------- +using JoinAddrSource = instruction::SparseJoinPlan::Source; // This is the class instructions will look for when optimizing sparse // operations by calling inline functions directly. @@ -155,9 +156,15 @@ struct FastValueIndex final : Value::Index { template <typename LCT, typename RCT, typename OCT, typename Fun> static const Value &sparse_only_merge(const ValueType &res_type, const Fun &fun, - const FastValueIndex &lhs, const FastValueIndex &rhs, - ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, - Stash &stash) __attribute((noinline)); + const FastValueIndex &lhs, const FastValueIndex &rhs, + ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, + Stash &stash) __attribute((noinline)); + + template <typename LCT, typename RCT, typename OCT, typename Fun> + static const Value &sparse_no_overlap_join(const ValueType &res_type, const Fun &fun, + const FastValueIndex &lhs, const FastValueIndex &rhs, + const std::vector<JoinAddrSource> &addr_sources, + ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash); size_t size() const override { return map.size(); } std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override; @@ -360,4 +367,45 @@ FastValueIndex::sparse_only_merge(const ValueType &res_type, const Fun &fun, //----------------------------------------------------------------------------- +template <typename LCT, typename RCT, typename OCT, typename Fun> +const Value & +FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun, + const FastValueIndex &lhs, const FastValueIndex &rhs, + const std::vector<JoinAddrSource> &addr_sources, + ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash) +{ + using HashedLabelRef = std::reference_wrapper<const FastSparseMap::HashedLabel>; + auto &result = stash.create<FastValue<OCT>>(res_type, res_type.count_mapped_dimensions(), 1, lhs.map.size()*rhs.map.size()); + std::vector<HashedLabelRef> output_addr; + for (size_t lhs_subspace = 0; lhs_subspace < lhs.map.size(); ++lhs_subspace) { + auto l_addr = lhs.map.make_addr(lhs_subspace); + for (size_t rhs_subspace = 0; rhs_subspace < rhs.map.size(); ++rhs_subspace) { + auto r_addr = rhs.map.make_addr(rhs_subspace); + output_addr.clear(); + size_t l_idx = 0; + size_t r_idx = 0; + for (JoinAddrSource source : addr_sources) { + switch (source) { + case JoinAddrSource::LHS: + output_addr.push_back(l_addr[l_idx++]); + break; + case JoinAddrSource::RHS: + output_addr.push_back(r_addr[r_idx++]); + break; + default: + abort(); + } + } + assert(l_idx == l_addr.size()); + assert(r_idx == r_addr.size()); + auto idx = result.my_index.map.add_mapping(ConstArrayRef(output_addr)); + if (__builtin_expect((idx == result.my_cells.size), true)) { + auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]); + result.my_cells.push_back_fast(cell_value); + } + } + } + return result; +} + } diff --git a/eval/src/vespa/eval/instruction/detect_type.h b/eval/src/vespa/eval/instruction/detect_type.h new file mode 100644 index 00000000000..22c280f17ca --- /dev/null +++ b/eval/src/vespa/eval/instruction/detect_type.h @@ -0,0 +1,75 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <typeindex> +#include <array> + +#pragma once + +namespace vespalib::eval::instruction { + +/* + * Utilities for detecting implementation class by comparing + * typeindex(typeid(T)); for now these are local to this + * namespace, but we can consider moving them to a more + * common place (probably vespalib) if we see more use-cases. + */ + +/** + * Recognize a (const) instance of type T. This is cheaper than + * dynamic_cast, but requires the object to be exactly of class T. + * Returns a pointer to the object as T if recognized, nullptr + * otherwise. + **/ +template<typename T, typename U> +const T * +recognize_by_type_index(const U & object) +{ + if (std::type_index(typeid(object)) == std::type_index(typeid(T))) { + return static_cast<const T *>(&object); + } + return nullptr; +} + +/** + * Packs N recognized values into one object, used as return value + * from detect_type<T>. + * + * Use all_converted() or the equivalent bool cast operator to check + * if all objects were recognized. After this check is successful use + * get<0>(), get<1>() etc to get a reference to the objects. + **/ +template<typename T, size_t N> +class RecognizedValues +{ +private: + std::array<const T *, N> _pointers; +public: + RecognizedValues(std::array<const T *, N> && pointers) + : _pointers(std::move(pointers)) + {} + bool all_converted() const { + for (auto p : _pointers) { + if (p == nullptr) return false; + } + return true; + } + operator bool() const { return all_converted(); } + template<size_t idx> const T& get() const { + static_assert(idx < N); + return *_pointers[idx]; + } +}; + +/** + * For all arguments, detect if they have typeid(T), convert to T if + * possible, and return a RecognizedValues packing the converted + * values. + **/ +template<typename T, typename... Args> +RecognizedValues<T, sizeof...(Args)> +detect_type(const Args &... args) +{ + return RecognizedValues<T, sizeof...(Args)>({(recognize_by_type_index<T>(args))...}); +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/generic_join.cpp b/eval/src/vespa/eval/instruction/generic_join.cpp index fb5f170c68e..25829576ba5 100644 --- a/eval/src/vespa/eval/instruction/generic_join.cpp +++ b/eval/src/vespa/eval/instruction/generic_join.cpp @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "generic_join.h" +#include "detect_type.h" #include <vespa/eval/eval/inline_operation.h> #include <vespa/eval/eval/fast_value.hpp> #include <vespa/eval/eval/wrap_param.h> @@ -9,7 +10,6 @@ #include <vespa/vespalib/util/typify.h> #include <vespa/vespalib/util/visit_ranges.h> #include <cassert> -#include <typeindex> using namespace vespalib::eval::tensor_function; @@ -70,6 +70,48 @@ void my_mixed_join_op(State &state, uint64_t param_in) { //----------------------------------------------------------------------------- template <typename LCT, typename RCT, typename OCT, typename Fun> +void my_sparse_no_overlap_join_op(State &state, uint64_t param_in) { + const auto ¶m = unwrap_param<JoinParam>(param_in); + const Value &lhs = state.peek(1); + const Value &rhs = state.peek(0); + auto lhs_cells = lhs.cells().typify<LCT>(); + auto rhs_cells = rhs.cells().typify<RCT>(); + const Value::Index &lhs_index = lhs.index(); + const Value::Index &rhs_index = rhs.index(); + if (auto indexes = detect_type<FastValueIndex>(lhs_index, rhs_index)) { + const auto &lhs_fast = indexes.get<0>(); + const auto &rhs_fast = indexes.get<1>(); + return state.pop_pop_push( + FastValueIndex::sparse_no_overlap_join<LCT,RCT,OCT,Fun> + (param.res_type, Fun(param.function), + lhs_fast, rhs_fast, + param.sparse_plan.sources, + lhs_cells, rhs_cells, state.stash)); + } + Fun fun(param.function); + SparseJoinState sparse(param.sparse_plan, lhs.index(), rhs.index()); + auto guess = lhs.index().size() * rhs.index().size(); + assert(param.dense_plan.out_size == 1); + auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), 1, guess); + auto outer = sparse.first_index.create_view({}); + assert(sparse.second_view_dims.empty()); + auto inner = sparse.second_index.create_view({}); + outer->lookup({}); + while (outer->next_result(sparse.first_address, sparse.first_subspace)) { + inner->lookup({}); + while (inner->next_result(sparse.second_only_address, sparse.second_subspace)) { + auto cell_value = fun(lhs_cells[sparse.lhs_subspace], rhs_cells[sparse.rhs_subspace]); + builder->add_subspace(sparse.full_address)[0] = cell_value; + } + } + auto &result = state.stash.create<std::unique_ptr<Value>>(builder->build(std::move(builder))); + const Value &result_ref = *(result.get()); + state.pop_pop_push(result_ref); +}; + +//----------------------------------------------------------------------------- + +template <typename LCT, typename RCT, typename OCT, typename Fun> void my_sparse_full_overlap_join_op(State &state, uint64_t param_in) { const auto ¶m = unwrap_param<JoinParam>(param_in); const Value &lhs = state.peek(1); @@ -78,11 +120,9 @@ void my_sparse_full_overlap_join_op(State &state, uint64_t param_in) { auto rhs_cells = rhs.cells().typify<RCT>(); const Value::Index &lhs_index = lhs.index(); const Value::Index &rhs_index = rhs.index(); - if ((std::type_index(typeid(lhs_index)) == std::type_index(typeid(FastValueIndex))) && - (std::type_index(typeid(rhs_index)) == std::type_index(typeid(FastValueIndex)))) - { - const FastValueIndex &lhs_fast = static_cast<const FastValueIndex&>(lhs_index); - const FastValueIndex &rhs_fast = static_cast<const FastValueIndex&>(rhs_index); + if (auto indexes = detect_type<FastValueIndex>(lhs_index, rhs_index)) { + const auto &lhs_fast = indexes.get<0>(); + const auto &rhs_fast = indexes.get<1>(); return (rhs_fast.map.size() < lhs_fast.map.size()) ? state.pop_pop_push(FastValueIndex::sparse_full_overlap_join<RCT,LCT,OCT,SwapArgs2<Fun>> (param.res_type, SwapArgs2<Fun>(param.function), rhs_fast, lhs_fast, rhs_cells, lhs_cells, state.stash)) @@ -145,6 +185,12 @@ struct SelectGenericJoinOp { { return my_sparse_full_overlap_join_op<LCT,RCT,OCT,Fun>; } + if ((param.dense_plan.out_size == 1) && + (param.sparse_plan.lhs_overlap.size() == 0) && + (param.sparse_plan.rhs_overlap.size() == 0)) + { + return my_sparse_no_overlap_join_op<LCT,RCT,OCT,Fun>; + } return my_mixed_join_op<LCT,RCT,OCT,Fun>; } }; diff --git a/eval/src/vespa/eval/instruction/generic_merge.cpp b/eval/src/vespa/eval/instruction/generic_merge.cpp index f340e7ad506..87be47a9c2e 100644 --- a/eval/src/vespa/eval/instruction/generic_merge.cpp +++ b/eval/src/vespa/eval/instruction/generic_merge.cpp @@ -1,5 +1,6 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "detect_type.h" #include "generic_merge.h" #include <vespa/eval/eval/inline_operation.h> #include <vespa/eval/eval/fast_value.hpp> @@ -118,46 +119,6 @@ void my_mixed_merge_op(State &state, uint64_t param_in) { state.pop_pop_push(result_ref); }; -/** possible generic utility */ -template<typename T, typename U> -const T * -recognize_by_type_index(const U & object) -{ - if (std::type_index(typeid(object)) == std::type_index(typeid(T))) { - return static_cast<const T *>(&object); - } - return nullptr; -} - -template<typename T, size_t N> -class RecognizedValues -{ -private: - std::array<const T *, N> _pointers; -public: - RecognizedValues(std::array<const T *, N> && pointers) - : _pointers(std::move(pointers)) - {} - bool all_converted() const { - for (auto p : _pointers) { - if (p == nullptr) return false; - } - return true; - } - operator bool() const { return all_converted(); } - template<size_t idx> const T& get() const { - static_assert(idx < N); - return *_pointers[idx]; - } -}; - -template<typename T, typename... Args> -RecognizedValues<T, sizeof...(Args)> -detect_type(const Args &... args) -{ - return RecognizedValues<T, sizeof...(Args)>({(recognize_by_type_index<T>(args))...}); -} - template <typename LCT, typename RCT, typename OCT, typename Fun> void my_sparse_merge_op(State &state, uint64_t param_in) { const auto ¶m = unwrap_param<MergeParam>(param_in); |