diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2021-02-03 11:53:20 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-03 11:53:20 +0100 |
commit | 5bc79680f181742971c35eda70a136c81b3affba (patch) | |
tree | ed1b0ad2ed5ee4c92ed0f926f7a5703a6b99e70a /eval | |
parent | 986dc62765dc2264ab07c2f4744d86d48e707193 (diff) | |
parent | cfe138a74ec1db72993ac4b1a6bca7ca63bbc781 (diff) |
Merge pull request #16344 from vespa-engine/havardpe/faster-single-dim-matching
added some optimizations for single-dimension sparse matching
Diffstat (limited to 'eval')
4 files changed, 109 insertions, 64 deletions
diff --git a/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp b/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp index 65eab2778aa..ac333f5224a 100644 --- a/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp +++ b/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp @@ -17,10 +17,8 @@ const ValueBuilderFactory &test_factory = SimpleValueBuilderFactory::get(); EvalFixture::ParamRepo make_params() { return EvalFixture::ParamRepo() - .add("v1_x", GenSpec().map("x", 32, 1).seq_bias(3.0).gen()) - .add("v1_x_f", GenSpec().map("x", 32, 1).seq_bias(3.0).cells_float().gen()) - .add("v2_x", GenSpec().map("x", 16, 2).seq_bias(7.0).gen()) - .add("v2_x_f", GenSpec().map("x", 16, 2).seq_bias(7.0).cells_float().gen()) + .add_variants("v1_x", GenSpec().map("x", 32, 1).seq_bias(3.0)) + .add_variants("v2_x", GenSpec().map("x", 16, 2).seq_bias(7.0)) .add("v3_y", GenSpec().map("y", 10, 1).gen()) .add("v4_xd", GenSpec().idx("x", 10).gen()) .add("m1_xy", GenSpec().map("x", 32, 1).map("y", 16, 2).seq_bias(3.0).gen()) @@ -53,8 +51,6 @@ TEST(SparseDotProduct, expression_can_be_optimized) { assert_optimized("reduce(v1_x*v2_x,sum,x)"); assert_optimized("reduce(v2_x*v1_x,sum)"); - assert_optimized("reduce(v1_x*v2_x_f,sum)"); - assert_optimized("reduce(v1_x_f*v2_x,sum)"); assert_optimized("reduce(v1_x_f*v2_x_f,sum)"); } @@ -80,6 +76,12 @@ TEST(SparseDotProduct, similar_expressions_are_not_optimized) assert_not_optimized("reduce(m3_xym*m3_xym,sum)"); } +TEST(SparseDotProduct, mixed_cell_types_are_not_optimized) +{ + assert_not_optimized("reduce(v1_x*v2_x_f,sum)"); + assert_not_optimized("reduce(v1_x_f*v2_x,sum)"); +} + //----------------------------------------------------------------------------- GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h index d8f68d2a37c..7df6bd7fafc 100644 --- a/eval/src/vespa/eval/eval/fast_addr_map.h +++ b/eval/src/vespa/eval/eval/fast_addr_map.h @@ -20,9 +20,13 @@ namespace vespalib::eval { class FastAddrMap { public: - // label hasing functions - static constexpr uint32_t hash_label(string_id label) { return label.hash(); } - static constexpr uint32_t hash_label(const string_id *label) { return label->hash(); } + // label extracting functions + static constexpr string_id self(string_id label) { return label; } + static constexpr string_id self(const string_id *label) { return *label; } + + // label hashing functions + static constexpr uint32_t hash_label(string_id label) { return label.value(); } + static constexpr uint32_t hash_label(const string_id *label) { return label->value(); } static constexpr uint32_t combine_label_hash(uint32_t full_hash, uint32_t next_hash) { return ((full_hash * 31) + next_hash); } @@ -71,27 +75,27 @@ public: template <typename T> constexpr uint32_t operator()(const AltKey<T> &key) const { return key.hash; } constexpr uint32_t operator()(const Entry &entry) const { return entry.hash; } + constexpr uint32_t operator()(string_id label) const { return label.value(); } }; // equality functor for sparse hash set struct Equal { const LabelView &label_view; Equal(const LabelView &label_view_in) : label_view(label_view_in) {} - static constexpr bool eq_labels(string_id a, string_id b) { return (a == b); } - static constexpr bool eq_labels(string_id a, const string_id *b) { return (a == *b); } template <typename T> bool operator()(const Entry &a, const AltKey<T> &b) const { - if ((a.hash != b.hash) || (b.key.size() != label_view.addr_size)) { + if (a.hash != b.hash) { return false; } auto a_key = label_view.get_addr(a.tag.idx); for (size_t i = 0; i < a_key.size(); ++i) { - if (!eq_labels(a_key[i], b.key[i])) { + if (a_key[i] != self(b.key[i])) { return false; } } return true; } + bool operator()(const Entry &a, string_id b) const { return (a.hash == b.value()); } }; using HashType = hashtable<Entry, Entry, Hash, Equal, Identity, hashtable_base::and_modulator>; @@ -101,8 +105,8 @@ private: HashType _map; public: - FastAddrMap(size_t num_mapped_dims, const std::vector<string_id> &labels, size_t expected_subspaces) - : _labels(num_mapped_dims, labels), + FastAddrMap(size_t num_mapped_dims, const std::vector<string_id> &labels_in, size_t expected_subspaces) + : _labels(num_mapped_dims, labels_in), _map(expected_subspaces * 2, Hash(), Equal(_labels)) {} ~FastAddrMap(); FastAddrMap(const FastAddrMap &) = delete; @@ -113,15 +117,24 @@ public: ConstArrayRef<string_id> get_addr(size_t idx) const { return _labels.get_addr(idx); } size_t size() const { return _map.size(); } constexpr size_t addr_size() const { return _labels.addr_size; } + const std::vector<string_id> &labels() const { return _labels.labels; } template <typename T> size_t lookup(ConstArrayRef<T> addr, uint32_t hash) const { + // assert(addr_size() == addr.size()); AltKey<T> key{addr, hash}; auto pos = _map.find(key); return (pos == _map.end()) ? npos() : pos->tag.idx; } + size_t lookup_singledim(string_id addr) const { + // assert(addr_size() == 1); + auto pos = _map.find(addr); + return (pos == _map.end()) ? npos() : pos->tag.idx; + } template <typename T> size_t lookup(ConstArrayRef<T> addr) const { - return lookup(addr, hash_labels(addr)); + return (addr.size() == 1) + ? lookup_singledim(self(addr[0])) + : lookup(addr, hash_labels(addr)); } void add_mapping(uint32_t hash) { uint32_t idx = _map.size(); diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp index 5657c93f53f..88319df7590 100644 --- a/eval/src/vespa/eval/eval/fast_value.hpp +++ b/eval/src/vespa/eval/eval/fast_value.hpp @@ -6,6 +6,7 @@ #include <vespa/eval/instruction/generic_join.h> #include <vespa/vespalib/stllike/hashtable.hpp> #include <vespa/vespalib/util/shared_string_repo.h> +#include <typeindex> namespace vespalib::eval { @@ -135,9 +136,9 @@ struct FastIterateView : public Value::Index::View { //----------------------------------------------------------------------------- using JoinAddrSource = instruction::SparseJoinPlan::Source; + // This is the class instructions will look for when optimizing sparse // operations by calling inline functions directly. - struct FastValueIndex final : Value::Index { FastAddrMap map; FastValueIndex(size_t num_mapped_dims_in, const std::vector<string_id> &labels, size_t expected_subspaces_in) @@ -164,6 +165,18 @@ struct FastValueIndex final : Value::Index { std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override; }; +inline bool is_fast(const Value::Index &index) { + return (std::type_index(typeid(index)) == std::type_index(typeid(FastValueIndex))); +} + +inline bool are_fast(const Value::Index &a, const Value::Index &b) { + return (is_fast(a) && is_fast(b)); +} + +constexpr const FastValueIndex &as_fast(const Value::Index &index) { + return static_cast<const FastValueIndex &>(index); +} + //----------------------------------------------------------------------------- template <typename T> diff --git a/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp b/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp index 93ae2856372..7cc4417bdbb 100644 --- a/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp +++ b/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp @@ -2,8 +2,8 @@ #include "sparse_dot_product_function.h" #include "generic_join.h" -#include "detect_type.h" #include <vespa/eval/eval/fast_value.hpp> +#include <vespa/vespalib/util/typify.h> namespace vespalib::eval { @@ -13,62 +13,76 @@ using namespace instruction; namespace { -template <typename SCT, typename BCT> -double my_fast_sparse_dot_product(const FastValueIndex &small_idx, const FastValueIndex &big_idx, - const SCT *small_cells, const BCT *big_cells) +template <typename CT> +double my_sparse_dot_product_fallback(const Value::Index &lhs_idx, const Value::Index &rhs_idx, + const CT *lhs_cells, const CT *rhs_cells, size_t num_mapped_dims) __attribute__((noinline)); +template <typename CT> +double my_sparse_dot_product_fallback(const Value::Index &lhs_idx, const Value::Index &rhs_idx, + const CT *lhs_cells, const CT *rhs_cells, size_t num_mapped_dims) { double result = 0.0; - small_idx.map.each_map_entry([&](auto small_subspace, auto hash) { - auto small_addr = small_idx.map.get_addr(small_subspace); - auto big_subspace = big_idx.map.lookup(small_addr, hash); - if (big_subspace != FastAddrMap::npos()) { - result += (small_cells[small_subspace] * big_cells[big_subspace]); - } - }); + SparseJoinPlan plan(num_mapped_dims); + SparseJoinState sparse(plan, lhs_idx, rhs_idx); + auto outer = sparse.first_index.create_view({}); + auto inner = sparse.second_index.create_view(sparse.second_view_dims); + outer->lookup({}); + while (outer->next_result(sparse.first_address, sparse.first_subspace)) { + inner->lookup(sparse.address_overlap); + if (inner->next_result(sparse.second_only_address, sparse.second_subspace)) { + result += (lhs_cells[sparse.lhs_subspace] * rhs_cells[sparse.rhs_subspace]); + } + } return result; } -template <typename LCT, typename RCT> -void my_sparse_dot_product_op(InterpretedFunction::State &state, uint64_t num_mapped_dims) { - const auto &lhs_idx = state.peek(1).index(); - const auto &rhs_idx = state.peek(0).index(); - const LCT *lhs_cells = state.peek(1).cells().typify<LCT>().cbegin(); - const RCT *rhs_cells = state.peek(0).cells().typify<RCT>().cbegin(); - if (auto indexes = detect_type<FastValueIndex>(lhs_idx, rhs_idx)) { -#if __has_cpp_attribute(likely) - [[likely]]; -#endif - const auto &lhs_fast = indexes.get<0>(); - const auto &rhs_fast = indexes.get<1>(); - double result = (rhs_fast.map.size() < lhs_fast.map.size()) - ? my_fast_sparse_dot_product(rhs_fast, lhs_fast, rhs_cells, lhs_cells) - : my_fast_sparse_dot_product(lhs_fast, rhs_fast, lhs_cells, rhs_cells); - state.pop_pop_push(state.stash.create<ScalarValue<double>>(result)); - } else { -#if __has_cpp_attribute(unlikely) - [[unlikely]]; -#endif - double result = 0.0; - SparseJoinPlan plan(num_mapped_dims); - SparseJoinState sparse(plan, lhs_idx, rhs_idx); - auto outer = sparse.first_index.create_view({}); - auto inner = sparse.second_index.create_view(sparse.second_view_dims); - outer->lookup({}); - while (outer->next_result(sparse.first_address, sparse.first_subspace)) { - inner->lookup(sparse.address_overlap); - if (inner->next_result(sparse.second_only_address, sparse.second_subspace)) { - result += (lhs_cells[sparse.lhs_subspace] * rhs_cells[sparse.rhs_subspace]); +template <typename CT, bool single_dim> +double my_fast_sparse_dot_product(const FastAddrMap *small_map, const FastAddrMap *big_map, + const CT *small_cells, const CT *big_cells) +{ + double result = 0.0; + if (big_map->size() < small_map->size()) { + std::swap(small_map, big_map); + std::swap(small_cells, big_cells); + } + if constexpr (single_dim) { + const auto &labels = small_map->labels(); + for (size_t i = 0; i < labels.size(); ++i) { + auto big_subspace = big_map->lookup_singledim(labels[i]); + if (big_subspace != FastAddrMap::npos()) { + result += (small_cells[i] * big_cells[big_subspace]); } } - state.pop_pop_push(state.stash.create<ScalarValue<double>>(result)); + } else { + small_map->each_map_entry([&](auto small_subspace, auto hash) { + auto small_addr = small_map->get_addr(small_subspace); + auto big_subspace = big_map->lookup(small_addr, hash); + if (big_subspace != FastAddrMap::npos()) { + result += (small_cells[small_subspace] * big_cells[big_subspace]); + } + }); } + return result; +} + +template <typename CT, bool single_dim> +void my_sparse_dot_product_op(InterpretedFunction::State &state, uint64_t num_mapped_dims) { + const auto &lhs_idx = state.peek(1).index(); + const auto &rhs_idx = state.peek(0).index(); + const CT *lhs_cells = state.peek(1).cells().typify<CT>().cbegin(); + const CT *rhs_cells = state.peek(0).cells().typify<CT>().cbegin(); + double result = __builtin_expect(are_fast(lhs_idx, rhs_idx), true) + ? my_fast_sparse_dot_product<CT,single_dim>(&as_fast(lhs_idx).map, &as_fast(rhs_idx).map, lhs_cells, rhs_cells) + : my_sparse_dot_product_fallback<CT>(lhs_idx, rhs_idx, lhs_cells, rhs_cells, num_mapped_dims); + state.pop_pop_push(state.stash.create<ScalarValue<double>>(result)); } struct MyGetFun { - template <typename LCT, typename RCT> - static auto invoke() { return my_sparse_dot_product_op<LCT,RCT>; } + template <typename CT, typename SINGLE_DIM> + static auto invoke() { return my_sparse_dot_product_op<CT,SINGLE_DIM::value>; } }; +using MyTypify = TypifyValue<TypifyCellType,TypifyBool>; + } // namespace <unnamed> SparseDotProductFunction::SparseDotProductFunction(const TensorFunction &lhs_in, @@ -80,15 +94,18 @@ SparseDotProductFunction::SparseDotProductFunction(const TensorFunction &lhs_in, InterpretedFunction::Instruction SparseDotProductFunction::compile_self(const ValueBuilderFactory &, Stash &) const { - auto op = typify_invoke<2,TypifyCellType,MyGetFun>(lhs().result_type().cell_type(), rhs().result_type().cell_type()); - return InterpretedFunction::Instruction(op, lhs().result_type().count_mapped_dimensions()); + size_t num_dims = lhs().result_type().count_mapped_dimensions(); + auto op = typify_invoke<2,MyTypify,MyGetFun>(lhs().result_type().cell_type(), + (num_dims == 1)); + return InterpretedFunction::Instruction(op, num_dims); } bool SparseDotProductFunction::compatible_types(const ValueType &res, const ValueType &lhs, const ValueType &rhs) { return (res.is_scalar() && (res.cell_type() == CellType::DOUBLE) && - lhs.is_sparse() && (rhs.dimensions() == lhs.dimensions())); + lhs.is_sparse() && (rhs.dimensions() == lhs.dimensions()) && + lhs.cell_type() == rhs.cell_type()); } const TensorFunction & |