diff options
author | HÃ¥vard Pettersen <3535158+havardpe@users.noreply.github.com> | 2020-09-18 14:59:48 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-09-18 14:59:48 +0200 |
commit | 1af05b8eaa94e766c3cb19be0a8b98d8f3a2f0fc (patch) | |
tree | ceb3cf0226be468012a0f839f0ea2c36eb647da4 | |
parent | 42adcf6286491f57d6fa4ea087c760d6a8f84d26 (diff) | |
parent | 6b81dee8d14d17338c863abd646699c3a3811712 (diff) |
Merge pull request #14440 from vespa-engine/havardpe/general-join-for-mixed-tensors
general join for mixed tensors
-rw-r--r-- | eval/src/tests/eval/simple_value/simple_value_test.cpp | 102 | ||||
-rw-r--r-- | eval/src/tests/eval/value_type/value_type_test.cpp | 24 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/simple_value.cpp | 200 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/simple_value.h | 74 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/value_type.cpp | 15 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/value_type.h | 3 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/dense_multi_matmul_function.cpp | 8 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/dense_simple_expand_function.cpp | 4 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp | 4 |
9 files changed, 385 insertions, 49 deletions
diff --git a/eval/src/tests/eval/simple_value/simple_value_test.cpp b/eval/src/tests/eval/simple_value/simple_value_test.cpp index 988e0a453bc..32a099afce3 100644 --- a/eval/src/tests/eval/simple_value/simple_value_test.cpp +++ b/eval/src/tests/eval/simple_value/simple_value_test.cpp @@ -2,11 +2,15 @@ #include <vespa/eval/eval/simple_value.h> #include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/gtest/gtest.h> +using namespace vespalib; using namespace vespalib::eval; using namespace vespalib::eval::test; +using vespalib::make_string_short::fmt; + std::vector<Layout> layouts = { {}, {x(3)}, @@ -22,6 +26,48 @@ std::vector<Layout> layouts = { float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})}) }; +std::vector<Layout> join_layouts = { + {}, {}, + {x(5)}, {x(5)}, + {x(5)}, {y(5)}, + {x(5)}, {x(5),y(5)}, + {y(3)}, {x(2),z(3)}, + {x(3),y(5)}, {y(5),z(7)}, + float_cells({x(3),y(5)}), {y(5),z(7)}, + {x(3),y(5)}, float_cells({y(5),z(7)}), + float_cells({x(3),y(5)}), float_cells({y(5),z(7)}), + {x({"a","b","c"})}, {x({"a","b","c"})}, + {x({"a","b","c"})}, {x({"a","b"})}, + {x({"a","b","c"})}, {y({"foo","bar","baz"})}, + {x({"a","b","c"})}, {x({"a","b","c"}),y({"foo","bar","baz"})}, + {x({"a","b"}),y({"foo","bar","baz"})}, {x({"a","b","c"}),y({"foo","bar"})}, + {x({"a","b"}),y({"foo","bar","baz"})}, {y({"foo","bar"}),z({"i","j","k","l"})}, + float_cells({x({"a","b"}),y({"foo","bar","baz"})}), {y({"foo","bar"}),z({"i","j","k","l"})}, + {x({"a","b"}),y({"foo","bar","baz"})}, float_cells({y({"foo","bar"}),z({"i","j","k","l"})}), + float_cells({x({"a","b"}),y({"foo","bar","baz"})}), float_cells({y({"foo","bar"}),z({"i","j","k","l"})}), + {x(3),y({"foo", "bar"})}, {y({"foo", "bar"}),z(7)}, + {x({"a","b","c"}),y(5)}, {y(5),z({"i","j","k","l"})}, + float_cells({x({"a","b","c"}),y(5)}), {y(5),z({"i","j","k","l"})}, + {x({"a","b","c"}),y(5)}, float_cells({y(5),z({"i","j","k","l"})}), + float_cells({x({"a","b","c"}),y(5)}), float_cells({y(5),z({"i","j","k","l"})}) +}; + +TensorSpec simple_tensor_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) { + Stash stash; + const auto &engine = SimpleTensorEngine::ref(); + auto lhs = engine.from_spec(a); + auto rhs = engine.from_spec(b); + const auto &result = engine.join(*lhs, *rhs, function, stash); + return engine.to_spec(result); +} + +TensorSpec simple_value_new_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) { + auto lhs = new_value_from_spec(a, SimpleValueBuilderFactory()); + auto rhs = new_value_from_spec(b, SimpleValueBuilderFactory()); + auto result = new_join(*lhs, *rhs, function, SimpleValueBuilderFactory()); + return spec_from_new_value(*result); +} + TEST(SimpleValueTest, simple_values_can_be_converted_from_and_to_tensor_spec) { for (const auto &layout: layouts) { TensorSpec expect = spec(layout, N()); @@ -62,4 +108,60 @@ TEST(SimpleValueTest, simple_value_can_be_built_and_inspected) { EXPECT_FALSE(view->next_result({&label}, subspace)); } +TEST(SimpleValueTest, dense_join_plan_can_be_created) { + auto lhs = ValueType::from_spec("tensor(a{},b[6],c[5],e[3],f[2],g{})"); + auto rhs = ValueType::from_spec("tensor(a{},b[6],c[5],d[4],h{})"); + auto plan = DenseJoinPlan(lhs, rhs); + std::vector<size_t> expect_loop = {30,4,6}; + std::vector<size_t> expect_lhs_stride = {6,0,1}; + std::vector<size_t> expect_rhs_stride = {4,1,0}; + EXPECT_EQ(plan.lhs_size, 180); + EXPECT_EQ(plan.rhs_size, 120); + EXPECT_EQ(plan.out_size, 720); + EXPECT_EQ(plan.loop_cnt, expect_loop); + EXPECT_EQ(plan.lhs_stride, expect_lhs_stride); + EXPECT_EQ(plan.rhs_stride, expect_rhs_stride); +} + +TEST(SimpleValueTest, sparse_join_plan_can_be_created) { + auto lhs = ValueType::from_spec("tensor(a{},b[6],c[5],e[3],f[2],g{})"); + auto rhs = ValueType::from_spec("tensor(b[6],c[5],d[4],g{},h{})"); + auto plan = SparseJoinPlan(lhs, rhs); + using SRC = SparseJoinPlan::Source; + std::vector<SRC> expect_sources = {SRC::LHS,SRC::BOTH,SRC::RHS}; + std::vector<size_t> expect_lhs_overlap = {1}; + std::vector<size_t> expect_rhs_overlap = {0}; + EXPECT_EQ(plan.sources, expect_sources); + EXPECT_EQ(plan.lhs_overlap, expect_lhs_overlap); + EXPECT_EQ(plan.rhs_overlap, expect_rhs_overlap); +} + +TEST(SimpleValueTest, dense_join_plan_can_be_executed) { + auto plan = DenseJoinPlan(ValueType::from_spec("tensor(a[2])"), + ValueType::from_spec("tensor(b[3])")); + std::vector<int> a({1, 2}); + std::vector<int> b({3, 4, 5}); + std::vector<int> c(6, 0); + std::vector<int> expect = {3,4,5,6,8,10}; + ASSERT_EQ(plan.out_size, 6); + int *dst = &c[0]; + auto cell_join = [&](size_t a_idx, size_t b_idx) { *dst++ = (a[a_idx] * b[b_idx]); }; + plan.execute(0, 0, cell_join); + EXPECT_EQ(c, expect); +} + +TEST(SimpleValueTest, new_generic_join_works_for_simple_values) { + ASSERT_TRUE((join_layouts.size() % 2) == 0); + for (size_t i = 0; i < join_layouts.size(); i += 2) { + TensorSpec lhs = spec(join_layouts[i], Div16(N())); + TensorSpec rhs = spec(join_layouts[i + 1], Div16(N())); + for (auto fun: {operation::Add::f, operation::Sub::f, operation::Mul::f, operation::Div::f}) { + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto expect = simple_tensor_join(lhs, rhs, fun); + auto actual = simple_value_new_join(lhs, rhs, fun); + EXPECT_EQ(actual, expect); + } + } +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/tests/eval/value_type/value_type_test.cpp b/eval/src/tests/eval/value_type/value_type_test.cpp index c783c32e699..2b103a91b81 100644 --- a/eval/src/tests/eval/value_type/value_type_test.cpp +++ b/eval/src/tests/eval/value_type/value_type_test.cpp @@ -266,18 +266,28 @@ TEST("require that dimension names can be obtained") { EXPECT_EQUAL(type("tensor<float>(y[10],x[30],z{})").dimension_names(), str_list({"x", "y", "z"})); } -TEST("require that nontrivial dimensions can be obtained") { +TEST("require that nontrivial indexed dimensions can be obtained") { auto my_check = [](const auto &list) { - ASSERT_EQUAL(list.size(), 2u); + ASSERT_EQUAL(list.size(), 1u); EXPECT_EQUAL(list[0].name, "x"); EXPECT_EQUAL(list[0].size, 10u); - EXPECT_EQUAL(list[1].name, "y"); - EXPECT_TRUE(list[1].is_mapped()); }; - EXPECT_TRUE(type("double").nontrivial_dimensions().empty()); - TEST_DO(my_check(type("tensor(x[10],y{})").nontrivial_dimensions())); - TEST_DO(my_check(type("tensor(a[1],b[1],x[10],y{},z[1])").nontrivial_dimensions())); + EXPECT_TRUE(type("double").nontrivial_indexed_dimensions().empty()); + TEST_DO(my_check(type("tensor(x[10],y{})").nontrivial_indexed_dimensions())); + TEST_DO(my_check(type("tensor(a[1],b[1],x[10],y{},z[1])").nontrivial_indexed_dimensions())); +} + +TEST("require that mapped dimensions can be obtained") { + auto my_check = [](const auto &list) + { + ASSERT_EQUAL(list.size(), 1u); + EXPECT_EQUAL(list[0].name, "x"); + EXPECT_TRUE(list[0].is_mapped()); + }; + EXPECT_TRUE(type("double").mapped_dimensions().empty()); + TEST_DO(my_check(type("tensor(x{},y[10])").mapped_dimensions())); + TEST_DO(my_check(type("tensor(a[1],b[1],x{},y[10],z[1])").mapped_dimensions())); } TEST("require that dimension index can be obtained") { diff --git a/eval/src/vespa/eval/eval/simple_value.cpp b/eval/src/vespa/eval/eval/simple_value.cpp index c2e1f2b5adc..e8ab26078e6 100644 --- a/eval/src/vespa/eval/eval/simple_value.cpp +++ b/eval/src/vespa/eval/eval/simple_value.cpp @@ -2,8 +2,10 @@ #include "simple_value.h" #include "tensor_spec.h" -#include <vespa/vespalib/util/stash.h> +#include "inline_operation.h" #include <vespa/vespalib/util/typify.h> +#include <vespa/vespalib/util/visit_ranges.h> +#include <vespa/vespalib/util/overload.h> #include <vespa/log/log.h> LOG_SETUP(".eval.simple_value"); @@ -16,10 +18,10 @@ namespace { struct CreateSimpleValueBuilderBase { template <typename T> static std::unique_ptr<ValueBuilderBase> invoke(const ValueType &type, - size_t num_mapped_in, size_t subspace_size_in) + size_t num_mapped_dims_in, size_t subspace_size_in) { assert(check_cell_type<T>(type.cell_type())); - return std::make_unique<SimpleValueT<T>>(type, num_mapped_in, subspace_size_in); + return std::make_unique<SimpleValueT<T>>(type, num_mapped_dims_in, subspace_size_in); } }; @@ -100,15 +102,15 @@ private: using Itr = Map::const_iterator; const Map &_index; - size_t _num_mapped; + size_t _num_mapped_dims; std::vector<size_t> _match_dims; std::vector<size_t> _extract_dims; Addr _query; Itr _pos; - bool is_direct_lookup() const { return (_match_dims.size() == _num_mapped); } + bool is_direct_lookup() const { return (_match_dims.size() == _num_mapped_dims); } bool is_match() const { - assert(_pos->first.size() == _num_mapped); + assert(_pos->first.size() == _num_mapped_dims); for (size_t idx: _match_dims) { if (_query[idx] != _pos->first[idx]) { return false; @@ -118,11 +120,11 @@ private: } public: - SimpleValueView(const Map &index, const std::vector<size_t> &match_dims, size_t num_mapped) - : _index(index), _num_mapped(num_mapped), _match_dims(match_dims), _extract_dims(), _query(num_mapped, ""), _pos(_index.end()) + SimpleValueView(const Map &index, const std::vector<size_t> &match_dims, size_t num_mapped_dims) + : _index(index), _num_mapped_dims(num_mapped_dims), _match_dims(match_dims), _extract_dims(), _query(num_mapped_dims, ""), _pos(_index.end()) { auto pos = _match_dims.begin(); - for (size_t i = 0; i < _num_mapped; ++i) { + for (size_t i = 0; i < _num_mapped_dims; ++i) { if ((pos == _match_dims.end()) || (*pos != i)) { _extract_dims.push_back(i); } else { @@ -130,7 +132,7 @@ public: } } assert(pos == _match_dims.end()); - assert((_match_dims.size() + _extract_dims.size()) == _num_mapped); + assert((_match_dims.size() + _extract_dims.size()) == _num_mapped_dims); } void lookup(const std::vector<const vespalib::stringref*> &addr) override { @@ -166,7 +168,80 @@ public: } }; -} +// Contains various state needed to perform the sparse part (all +// mapped dimensions) of the join operation. Performs swapping of +// sparse indexes to ensure that we look up entries from the smallest +// index in the largest index. +struct SparseJoinState { + bool swapped; + const NewValue::Index &first_index; + const NewValue::Index &second_index; + const std::vector<size_t> &second_view_dims; + std::vector<vespalib::stringref> full_address; + std::vector<vespalib::stringref*> first_address; + std::vector<const vespalib::stringref*> address_overlap; + std::vector<vespalib::stringref*> second_only_address; + size_t lhs_subspace; + size_t rhs_subspace; + size_t &first_subspace; + size_t &second_subspace; + + SparseJoinState(const SparseJoinPlan &plan, const NewValue::Index &lhs, const NewValue::Index &rhs) + : swapped(rhs.size() < lhs.size()), + first_index(swapped ? rhs : lhs), second_index(swapped ? lhs : rhs), + second_view_dims(swapped ? plan.lhs_overlap : plan.rhs_overlap), + full_address(plan.sources.size()), + first_address(), address_overlap(), second_only_address(), + lhs_subspace(), rhs_subspace(), + first_subspace(swapped ? rhs_subspace : lhs_subspace), + second_subspace(swapped ? lhs_subspace : rhs_subspace) + { + auto first_source = swapped ? SparseJoinPlan::Source::RHS : SparseJoinPlan::Source::LHS; + for (size_t i = 0; i < full_address.size(); ++i) { + if (plan.sources[i] == SparseJoinPlan::Source::BOTH) { + first_address.push_back(&full_address[i]); + address_overlap.push_back(&full_address[i]); + } else if (plan.sources[i] == first_source) { + first_address.push_back(&full_address[i]); + } else { + second_only_address.push_back(&full_address[i]); + } + } + } + ~SparseJoinState(); +}; +SparseJoinState::~SparseJoinState() = default; + +// Treats all values as mixed tensors. Needs output cell type as well +// as input cell types since output cell type cannot always be +// directly inferred. +struct GenericJoin { + template <typename LCT, typename RCT, typename OCT, typename Fun> static std::unique_ptr<NewValue> + invoke(const NewValue &lhs, const NewValue &rhs, join_fun_t function, + const SparseJoinPlan &sparse_plan, const DenseJoinPlan &dense_plan, + const ValueType &res_type, const ValueBuilderFactory &factory) + { + Fun fun(function); + auto lhs_cells = lhs.cells().typify<LCT>(); + auto rhs_cells = rhs.cells().typify<RCT>(); + SparseJoinState state(sparse_plan, lhs.index(), rhs.index()); + auto builder = factory.create_value_builder<OCT>(res_type, sparse_plan.sources.size(), dense_plan.out_size, state.first_index.size()); + auto outer = state.first_index.create_view({}); + auto inner = state.second_index.create_view(state.second_view_dims); + outer->lookup({}); + while (outer->next_result(state.first_address, state.first_subspace)) { + inner->lookup(state.address_overlap); + while (inner->next_result(state.second_only_address, state.second_subspace)) { + OCT *dst = builder->add_subspace(state.full_address).begin(); + auto join_cells = [&](size_t lhs_idx, size_t rhs_idx) { *dst++ = fun(lhs_cells[lhs_idx], rhs_cells[rhs_idx]); }; + dense_plan.execute(dense_plan.lhs_size * state.lhs_subspace, dense_plan.rhs_size * state.rhs_subspace, join_cells); + } + } + return builder->build(std::move(builder)); + } +}; + +} // namespace <unnamed> //----------------------------------------------------------------------------- @@ -182,13 +257,13 @@ SimpleValue::add_mapping(const std::vector<vespalib::stringref> &addr) assert(res.second); } -SimpleValue::SimpleValue(const ValueType &type, size_t num_mapped_in, size_t subspace_size_in) +SimpleValue::SimpleValue(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in) : _type(type), - _num_mapped(num_mapped_in), + _num_mapped_dims(num_mapped_dims_in), _subspace_size(subspace_size_in), _index() { - assert(_type.count_mapped_dimensions() == _num_mapped); + assert(_type.count_mapped_dimensions() == _num_mapped_dims); assert(_type.dense_subspace_size() == _subspace_size); } @@ -197,14 +272,14 @@ SimpleValue::~SimpleValue() = default; std::unique_ptr<NewValue::Index::View> SimpleValue::create_view(const std::vector<size_t> &dims) const { - return std::make_unique<SimpleValueView>(_index, dims, _num_mapped); + return std::make_unique<SimpleValueView>(_index, dims, _num_mapped_dims); } //----------------------------------------------------------------------------- template <typename T> -SimpleValueT<T>::SimpleValueT(const ValueType &type, size_t num_mapped_in, size_t subspace_size_in) - : SimpleValue(type, num_mapped_in, subspace_size_in), +SimpleValueT<T>::SimpleValueT(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in) + : SimpleValue(type, num_mapped_dims_in, subspace_size_in), _cells() { } @@ -227,19 +302,96 @@ SimpleValueT<T>::add_subspace(const std::vector<vespalib::stringref> &addr) std::unique_ptr<ValueBuilderBase> SimpleValueBuilderFactory::create_value_builder_base(const ValueType &type, - size_t num_mapped_in, size_t subspace_size_in, size_t) const + size_t num_mapped_dims_in, size_t subspace_size_in, size_t) const +{ + return typify_invoke<1,TypifyCellType,CreateSimpleValueBuilderBase>(type.cell_type(), type, num_mapped_dims_in, subspace_size_in); +} + +//----------------------------------------------------------------------------- + +DenseJoinPlan::DenseJoinPlan(const ValueType &lhs_type, const ValueType &rhs_type) + : lhs_size(1), rhs_size(1), out_size(1), loop_cnt(), lhs_stride(), rhs_stride() +{ + enum class Case { NONE, LHS, RHS, BOTH }; + Case prev_case = Case::NONE; + auto update_plan = [&](Case my_case, size_t my_size, size_t in_lhs, size_t in_rhs) { + if (my_case == prev_case) { + assert(!loop_cnt.empty()); + loop_cnt.back() *= my_size; + } else { + loop_cnt.push_back(my_size); + lhs_stride.push_back(in_lhs); + rhs_stride.push_back(in_rhs); + prev_case = my_case; + } + }; + auto visitor = overload + { + [&](visit_ranges_first, const auto &a) { update_plan(Case::LHS, a.size, 1, 0); }, + [&](visit_ranges_second, const auto &b) { update_plan(Case::RHS, b.size, 0, 1); }, + [&](visit_ranges_both, const auto &a, const auto &) { update_plan(Case::BOTH, a.size, 1, 1); } + }; + auto lhs_dims = lhs_type.nontrivial_indexed_dimensions(); + auto rhs_dims = rhs_type.nontrivial_indexed_dimensions(); + visit_ranges(visitor, lhs_dims.begin(), lhs_dims.end(), rhs_dims.begin(), rhs_dims.end(), + [](const auto &a, const auto &b){ return (a.name < b.name); }); + for (size_t i = loop_cnt.size(); i-- > 0; ) { + out_size *= loop_cnt[i]; + if (lhs_stride[i] != 0) { + lhs_stride[i] = lhs_size; + lhs_size *= loop_cnt[i]; + } + if (rhs_stride[i] != 0) { + rhs_stride[i] = rhs_size; + rhs_size *= loop_cnt[i]; + } + } +} + +DenseJoinPlan::~DenseJoinPlan() = default; + +//----------------------------------------------------------------------------- + +SparseJoinPlan::SparseJoinPlan(const ValueType &lhs_type, const ValueType &rhs_type) + : sources(), lhs_overlap(), rhs_overlap() { - return typify_invoke<1,TypifyCellType,CreateSimpleValueBuilderBase>(type.cell_type(), type, num_mapped_in, subspace_size_in); + size_t lhs_idx = 0; + size_t rhs_idx = 0; + auto visitor = overload + { + [&](visit_ranges_first, const auto &) { + sources.push_back(Source::LHS); + ++lhs_idx; + }, + [&](visit_ranges_second, const auto &) { + sources.push_back(Source::RHS); + ++rhs_idx; + }, + [&](visit_ranges_both, const auto &, const auto &) { + sources.push_back(Source::BOTH); + lhs_overlap.push_back(lhs_idx++); + rhs_overlap.push_back(rhs_idx++); + } + }; + auto lhs_dims = lhs_type.mapped_dimensions(); + auto rhs_dims = rhs_type.mapped_dimensions(); + visit_ranges(visitor, lhs_dims.begin(), lhs_dims.end(), rhs_dims.begin(), rhs_dims.end(), + [](const auto &a, const auto &b){ return (a.name < b.name); }); } +SparseJoinPlan::~SparseJoinPlan() = default; + //----------------------------------------------------------------------------- +using JoinTypify = TypifyValue<TypifyCellType,operation::TypifyOp2>; + std::unique_ptr<NewValue> new_join(const NewValue &a, const NewValue &b, join_fun_t function, const ValueBuilderFactory &factory) { - (void) a; - (void) b; - (void) function; - (void) factory; - return std::unique_ptr<NewValue>(nullptr); + auto res_type = ValueType::join(a.type(), b.type()); + assert(!res_type.is_error()); + SparseJoinPlan sparse_plan(a.type(), b.type()); + DenseJoinPlan dense_plan(a.type(), b.type()); + return typify_invoke<4,JoinTypify,GenericJoin>(a.type().cell_type(), b.type().cell_type(), res_type.cell_type(), function, + a, b, function, sparse_plan, dense_plan, res_type, factory); } //----------------------------------------------------------------------------- diff --git a/eval/src/vespa/eval/eval/simple_value.h b/eval/src/vespa/eval/eval/simple_value.h index d080a0e31c2..892dd6f1da6 100644 --- a/eval/src/vespa/eval/eval/simple_value.h +++ b/eval/src/vespa/eval/eval/simple_value.h @@ -113,10 +113,10 @@ struct ValueBuilder : ValueBuilderBase { struct ValueBuilderFactory { template <typename T> std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type, - size_t num_mapped_in, size_t subspace_size_in, size_t expect_subspaces) const + size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const { assert(check_cell_type<T>(type.cell_type())); - auto base = create_value_builder_base(type, num_mapped_in, subspace_size_in, expect_subspaces); + auto base = create_value_builder_base(type, num_mapped_dims_in, subspace_size_in, expected_subspaces); ValueBuilder<T> *builder = dynamic_cast<ValueBuilder<T>*>(base.get()); assert(builder); base.release(); @@ -130,7 +130,7 @@ struct ValueBuilderFactory { virtual ~ValueBuilderFactory() {} protected: virtual std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, - size_t num_mapped_in, size_t subspace_size_in, size_t expect_subspaces) const = 0; + size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const = 0; }; /** @@ -145,14 +145,14 @@ class SimpleValue : public NewValue, public NewValue::Index private: using Addr = std::vector<vespalib::string>; ValueType _type; - size_t _num_mapped; + size_t _num_mapped_dims; size_t _subspace_size; std::map<Addr,size_t> _index; protected: size_t subspace_size() const { return _subspace_size; } void add_mapping(const std::vector<vespalib::stringref> &addr); public: - SimpleValue(const ValueType &type, size_t num_mapped_in, size_t subspace_size_in); + SimpleValue(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in); ~SimpleValue() override; const ValueType &type() const override { return _type; } const NewValue::Index &index() const override { return *this; } @@ -169,7 +169,7 @@ class SimpleValueT : public SimpleValue, public ValueBuilder<T> private: std::vector<T> _cells; public: - SimpleValueT(const ValueType &type, size_t num_mapped_in, size_t subspace_size_in); + SimpleValueT(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in); ~SimpleValueT() override; TypedCells cells() const override { return TypedCells(ConstArrayRef<T>(_cells)); } ArrayRef<T> add_subspace(const std::vector<vespalib::stringref> &addr) override; @@ -188,7 +188,67 @@ struct SimpleValueBuilderFactory : ValueBuilderFactory { ~SimpleValueBuilderFactory() override {} protected: std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, - size_t num_mapped_in, size_t subspace_size_in, size_t expect_subspaces) const override; + size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const override; +}; + +/** + * Plan for how to traverse two partially overlapping dense subspaces + * in parallel, identifying all matching cell index combinations, in + * the exact order the joined cells will be stored in the result. The + * plan can be made up-front during tensor function compilation. + **/ +struct DenseJoinPlan { + size_t lhs_size; + size_t rhs_size; + size_t out_size; + std::vector<size_t> loop_cnt; + std::vector<size_t> lhs_stride; + std::vector<size_t> rhs_stride; + DenseJoinPlan(const ValueType &lhs_type, const ValueType &rhs_type); + ~DenseJoinPlan(); + template <typename F> void execute(size_t lhs, size_t rhs, F &&f) const { + switch(loops_left(0)) { + case 0: return execute_few<F, 0>(0, lhs, rhs, std::forward<F>(f)); + case 1: return execute_few<F, 1>(0, lhs, rhs, std::forward<F>(f)); + case 2: return execute_few<F, 2>(0, lhs, rhs, std::forward<F>(f)); + case 3: return execute_few<F, 3>(0, lhs, rhs, std::forward<F>(f)); + default: return execute_many<F>(0, lhs, rhs, std::forward<F>(f)); + } + } +private: + size_t loops_left(size_t idx) const { return (loop_cnt.size() - idx); } + template <typename F, size_t N> void execute_few(size_t idx, size_t lhs, size_t rhs, F &&f) const { + if constexpr (N == 0) { + f(lhs, rhs); + } else { + for (size_t i = 0; i < loop_cnt[idx]; ++i, lhs += lhs_stride[idx], rhs += rhs_stride[idx]) { + execute_few<F, N - 1>(idx + 1, lhs, rhs, std::forward<F>(f)); + } + } + } + template <typename F> void execute_many(size_t idx, size_t lhs, size_t rhs, F &&f) const { + for (size_t i = 0; i < loop_cnt[idx]; ++i, lhs += lhs_stride[idx], rhs += rhs_stride[idx]) { + if (loops_left(idx + 1) == 3) { + execute_few<F, 3>(idx + 1, lhs, rhs, std::forward<F>(f)); + } else { + execute_many<F>(idx + 1, lhs, rhs, std::forward<F>(f)); + } + } + } +}; + +/** + * Plan for how to join the sparse part (all mapped dimensions) + * between two values. The plan can be made up-front during tensor + * function compilation. + **/ +struct SparseJoinPlan { + enum class Source { LHS, RHS, BOTH }; + std::vector<Source> sources; + std::vector<size_t> lhs_overlap; + std::vector<size_t> rhs_overlap; + SparseJoinPlan(const ValueType &lhs_type, const ValueType &rhs_type); + ~SparseJoinPlan(); }; /** diff --git a/eval/src/vespa/eval/eval/value_type.cpp b/eval/src/vespa/eval/eval/value_type.cpp index 2b24a88f473..30a36fcba1d 100644 --- a/eval/src/vespa/eval/eval/value_type.cpp +++ b/eval/src/vespa/eval/eval/value_type.cpp @@ -198,10 +198,21 @@ ValueType::dense_subspace_size() const } std::vector<ValueType::Dimension> -ValueType::nontrivial_dimensions() const { +ValueType::nontrivial_indexed_dimensions() const { std::vector<ValueType::Dimension> result; for (const auto &dim: dimensions()) { - if (!dim.is_trivial()) { + if (dim.is_indexed() && !dim.is_trivial()) { + result.push_back(dim); + } + } + return result; +} + +std::vector<ValueType::Dimension> +ValueType::mapped_dimensions() const { + std::vector<ValueType::Dimension> result; + for (const auto &dim: dimensions()) { + if (dim.is_mapped()) { result.push_back(dim); } } diff --git a/eval/src/vespa/eval/eval/value_type.h b/eval/src/vespa/eval/eval/value_type.h index b89c5859593..4199b3a3381 100644 --- a/eval/src/vespa/eval/eval/value_type.h +++ b/eval/src/vespa/eval/eval/value_type.h @@ -63,7 +63,8 @@ public: size_t count_mapped_dimensions() const; size_t dense_subspace_size() const; const std::vector<Dimension> &dimensions() const { return _dimensions; } - std::vector<Dimension> nontrivial_dimensions() const; + std::vector<Dimension> nontrivial_indexed_dimensions() const; + std::vector<Dimension> mapped_dimensions() const; size_t dimension_index(const vespalib::string &name) const; std::vector<vespalib::string> dimension_names() const; bool operator==(const ValueType &rhs) const { diff --git a/eval/src/vespa/eval/tensor/dense/dense_multi_matmul_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_multi_matmul_function.cpp index 23e54e7fc02..b8832305640 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_multi_matmul_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_multi_matmul_function.cpp @@ -128,8 +128,8 @@ bool check_input_type(const ValueType &type, const DimList &relevant) { } bool is_multi_matmul(const ValueType &a, const ValueType &b, const vespalib::string &reduce_dim) { - auto dims_a = a.nontrivial_dimensions(); - auto dims_b = b.nontrivial_dimensions(); + auto dims_a = a.nontrivial_indexed_dimensions(); + auto dims_b = b.nontrivial_indexed_dimensions(); if (check_input_type(a, dims_a) && check_input_type(b, dims_b) && (a.cell_type() == b.cell_type())) { CommonDim cd_a(dims_a, reduce_dim); CommonDim cd_b(dims_b, reduce_dim); @@ -144,8 +144,8 @@ bool is_multi_matmul(const ValueType &a, const ValueType &b, const vespalib::str const TensorFunction &create_multi_matmul(const TensorFunction &a, const TensorFunction &b, const vespalib::string &reduce_dim, const ValueType &result_type, Stash &stash) { - auto dims_a = a.result_type().nontrivial_dimensions(); - auto dims_b = b.result_type().nontrivial_dimensions(); + auto dims_a = a.result_type().nontrivial_indexed_dimensions(); + auto dims_b = b.result_type().nontrivial_indexed_dimensions(); CommonDim cd_a(dims_a, reduce_dim); CommonDim cd_b(dims_b, reduce_dim); DimPrefix prefix(dims_a, dims_b); diff --git a/eval/src/vespa/eval/tensor/dense/dense_simple_expand_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_simple_expand_function.cpp index e4e3ffc27d6..e57cfd25325 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_simple_expand_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_simple_expand_function.cpp @@ -72,8 +72,8 @@ using MyTypify = TypifyValue<TypifyCellType,TypifyOp2,TypifyBool>; //----------------------------------------------------------------------------- std::optional<Inner> detect_simple_expand(const TensorFunction &lhs, const TensorFunction &rhs) { - std::vector<ValueType::Dimension> a = lhs.result_type().nontrivial_dimensions(); - std::vector<ValueType::Dimension> b = rhs.result_type().nontrivial_dimensions(); + std::vector<ValueType::Dimension> a = lhs.result_type().nontrivial_indexed_dimensions(); + std::vector<ValueType::Dimension> b = rhs.result_type().nontrivial_indexed_dimensions(); if (a.empty() || b.empty()) { return std::nullopt; } else if (a.back().name < b.front().name) { diff --git a/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp index c407ef6cdff..05de3d07c96 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_simple_join_function.cpp @@ -130,8 +130,8 @@ Primary select_primary(const TensorFunction &lhs, const TensorFunction &rhs, Val } std::optional<Overlap> detect_overlap(const TensorFunction &primary, const TensorFunction &secondary) { - std::vector<ValueType::Dimension> a = primary.result_type().nontrivial_dimensions(); - std::vector<ValueType::Dimension> b = secondary.result_type().nontrivial_dimensions(); + std::vector<ValueType::Dimension> a = primary.result_type().nontrivial_indexed_dimensions(); + std::vector<ValueType::Dimension> b = secondary.result_type().nontrivial_indexed_dimensions(); if (b.size() > a.size()) { return std::nullopt; } else if (b == a) { |