diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2023-08-28 15:28:01 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-28 15:28:01 +0200 |
commit | c0e937263e09b4723f3313665fa2480046a069c8 (patch) | |
tree | 80f6a7ed84ebe83ab83fdcb1bc69db10ab8cd0ae /eval | |
parent | c5d373464251e232c99ba70be72b851b2b5bda7f (diff) | |
parent | 387a1206484925aee3de22988655ce5c588bc3ab (diff) |
Merge pull request #28156 from vespa-engine/havardpe/dense-join-reduce-plan
dense join reduce plan
Diffstat (limited to 'eval')
-rw-r--r-- | eval/CMakeLists.txt | 1 | ||||
-rw-r--r-- | eval/src/tests/eval/nested_loop/nested_loop_test.cpp | 80 | ||||
-rw-r--r-- | eval/src/tests/eval/value_type/value_type_test.cpp | 14 | ||||
-rw-r--r-- | eval/src/tests/instruction/dense_join_reduce_plan/CMakeLists.txt | 9 | ||||
-rw-r--r-- | eval/src/tests/instruction/dense_join_reduce_plan/dense_join_reduce_plan_test.cpp | 101 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/nested_loop.h | 37 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/value_type.cpp | 31 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/value_type.h | 1 | ||||
-rw-r--r-- | eval/src/vespa/eval/instruction/CMakeLists.txt | 1 | ||||
-rw-r--r-- | eval/src/vespa/eval/instruction/dense_join_reduce_plan.cpp | 95 | ||||
-rw-r--r-- | eval/src/vespa/eval/instruction/dense_join_reduce_plan.h | 27 |
11 files changed, 372 insertions, 25 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index e149727d94f..9b6bcc990fc 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -50,6 +50,7 @@ vespa_define_module( src/tests/instruction/dense_dot_product_function src/tests/instruction/dense_hamming_distance src/tests/instruction/dense_inplace_join_function + src/tests/instruction/dense_join_reduce_plan src/tests/instruction/dense_matmul_function src/tests/instruction/dense_multi_matmul_function src/tests/instruction/dense_replace_type_function diff --git a/eval/src/tests/eval/nested_loop/nested_loop_test.cpp b/eval/src/tests/eval/nested_loop/nested_loop_test.cpp index caa76816599..5c5d4b219c5 100644 --- a/eval/src/tests/eval/nested_loop/nested_loop_test.cpp +++ b/eval/src/tests/eval/nested_loop/nested_loop_test.cpp @@ -5,7 +5,7 @@ using namespace vespalib::eval; -std::vector<size_t> run_single(size_t idx_in, const std::vector<size_t> &loop, const std::vector<size_t> &stride) { +std::vector<size_t> run_loop(size_t idx_in, const std::vector<size_t> &loop, const std::vector<size_t> &stride) { std::vector<size_t> result; auto capture = [&](size_t idx_out) { result.push_back(idx_out); }; assert(loop.size() == stride.size()); @@ -13,8 +13,8 @@ std::vector<size_t> run_single(size_t idx_in, const std::vector<size_t> &loop, c return result; } -std::vector<std::pair<size_t,size_t>> run_double(size_t idx1_in, size_t idx2_in, const std::vector<size_t> &loop, - const std::vector<size_t> &stride1, const std::vector<size_t> &stride2) +std::vector<std::pair<size_t,size_t>> run_two_loops(size_t idx1_in, size_t idx2_in, const std::vector<size_t> &loop, + const std::vector<size_t> &stride1, const std::vector<size_t> &stride2) { std::vector<std::pair<size_t,size_t>> result; auto capture = [&](size_t idx1_out, size_t idx2_out) { result.emplace_back(idx1_out, idx2_out); }; @@ -24,37 +24,77 @@ std::vector<std::pair<size_t,size_t>> run_double(size_t idx1_in, size_t idx2_in, return result; } -void verify_double(size_t idx1_in, size_t idx2_in, const std::vector<size_t> &loop, - const std::vector<size_t> &stride1, const std::vector<size_t> &stride2) +void add_entry(std::vector<std::vector<size_t>> &result, std::vector<size_t> value) { + result.push_back(std::move(value)); +} + +std::vector<std::vector<size_t>> run_three_loops(size_t idx1_in, size_t idx2_in, size_t idx3_in, const std::vector<size_t> &loop, + const std::vector<size_t> &stride1, const std::vector<size_t> &stride2, const std::vector<size_t> &stride3) { - auto res1 = run_single(idx1_in, loop, stride1); - auto res2 = run_single(idx2_in, loop, stride2); + std::vector<std::vector<size_t>> result; + auto capture = [&](size_t idx1_out, size_t idx2_out, size_t idx3_out) { add_entry(result, {idx1_out, idx2_out, idx3_out}); }; + assert(loop.size() == stride1.size()); + assert(loop.size() == stride2.size()); + assert(loop.size() == stride3.size()); + run_nested_loop(idx1_in, idx2_in, idx3_in, loop, stride1, stride2, stride3, capture); + return result; +} + +void verify_two(size_t idx1_in, size_t idx2_in, const std::vector<size_t> &loop, + const std::vector<size_t> &stride1, const std::vector<size_t> &stride2) +{ + auto res1 = run_loop(idx1_in, loop, stride1); + auto res2 = run_loop(idx2_in, loop, stride2); ASSERT_EQ(res1.size(), res2.size()); std::vector<std::pair<size_t,size_t>> expect; for (size_t i = 0; i < res1.size(); ++i) { expect.emplace_back(res1[i], res2[i]); } - auto actual = run_double(idx1_in, idx2_in, loop, stride1, stride2); + auto actual = run_two_loops(idx1_in, idx2_in, loop, stride1, stride2); + EXPECT_EQ(actual, expect); +} + +void verify_three(size_t idx1_in, size_t idx2_in, size_t idx3_in, const std::vector<size_t> &loop, + const std::vector<size_t> &stride1, const std::vector<size_t> &stride2, const std::vector<size_t> &stride3) +{ + auto res1 = run_loop(idx1_in, loop, stride1); + auto res2 = run_loop(idx2_in, loop, stride2); + auto res3 = run_loop(idx3_in, loop, stride3); + ASSERT_EQ(res1.size(), res2.size()); + ASSERT_EQ(res1.size(), res3.size()); + std::vector<std::vector<size_t>> expect; + for (size_t i = 0; i < res1.size(); ++i) { + add_entry(expect, {res1[i], res2[i], res3[i]}); + } + auto actual = run_three_loops(idx1_in, idx2_in, idx3_in, loop, stride1, stride2, stride3); EXPECT_EQ(actual, expect); } std::vector<size_t> v(std::vector<size_t> vec) { return vec; } -TEST(NestedLoopTest, single_nested_loop_can_be_executed) { - EXPECT_EQ(v({123}), run_single(123, {}, {})); - EXPECT_EQ(v({10,11}), run_single(10, {2}, {1})); - EXPECT_EQ(v({100,110,101,111}), run_single(100, {2,2}, {1,10})); - EXPECT_EQ(v({100,110,100,110,101,111,101,111}), run_single(100, {2,2,2}, {1,0,10})); +TEST(NestedLoopTest, nested_loop_can_be_executed) { + EXPECT_EQ(v({123}), run_loop(123, {}, {})); + EXPECT_EQ(v({10,11}), run_loop(10, {2}, {1})); + EXPECT_EQ(v({100,110,101,111}), run_loop(100, {2,2}, {1,10})); + EXPECT_EQ(v({100,110,100,110,101,111,101,111}), run_loop(100, {2,2,2}, {1,0,10})); EXPECT_EQ(v({100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}), - run_single(100, {2,2,2,2}, {8,4,2,1})); + run_loop(100, {2,2,2,2}, {8,4,2,1})); +} + +TEST(NestedLoopTest, two_parallel_nested_loops_can_be_executed) { + verify_two(10, 20, {}, {}, {}); + verify_two(10, 20, {3}, {5}, {7}); + verify_two(10, 20, {3,3}, {2,3}, {7,5}); + verify_two(10, 20, {3,3,2}, {2,0,3}, {0,7,5}); + verify_two(10, 20, {2,3,2,3}, {7,2,1,3}, {3,7,5,1}); } -TEST(NestedLoopTest, double_nested_loop_can_be_executed) { - verify_double(10, 20, {}, {}, {}); - verify_double(10, 20, {3}, {5}, {7}); - verify_double(10, 20, {3,3}, {2,3}, {7,5}); - verify_double(10, 20, {3,3,2}, {2,0,3}, {0,7,5}); - verify_double(10, 20, {2,3,2,3}, {7,2,1,3}, {3,7,5,1}); +TEST(NestedLoopTest, three_parallel_nested_loops_can_be_executed) { + verify_three(10, 20, 30, {}, {}, {}, {}); + verify_three(10, 20, 30, {3}, {5}, {7}, {3}); + verify_three(10, 20, 30, {3,3}, {2,3}, {7,5}, {5, 3}); + verify_three(10, 20, 30, {3,3,2}, {2,0,3}, {0,7,5}, {5, 3, 0}); + verify_three(10, 20, 30, {2,3,2,3}, {7,2,1,3}, {3,7,5,1}, {1,5,7,3}); } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/tests/eval/value_type/value_type_test.cpp b/eval/src/tests/eval/value_type/value_type_test.cpp index feb6fb5c368..2e505a9d16d 100644 --- a/eval/src/tests/eval/value_type/value_type_test.cpp +++ b/eval/src/tests/eval/value_type/value_type_test.cpp @@ -363,6 +363,20 @@ TEST("require that dimension index can be obtained") { EXPECT_EQUAL(type("tensor(y[10],x{},z[5])").dimension_index("w"), ValueType::Dimension::npos); } +TEST("require that dimension stride can be calculated") { + EXPECT_EQUAL(type("error").stride_of("x"), 0u); + EXPECT_EQUAL(type("double").stride_of("x"), 0u); + EXPECT_EQUAL(type("tensor()").stride_of("x"), 0u); + EXPECT_EQUAL(type("tensor(x{})").stride_of("x"), 0u); + EXPECT_EQUAL(type("tensor(x[10])").stride_of("x"), 1u); + EXPECT_EQUAL(type("tensor(x[10])").stride_of("y"), 0u); + EXPECT_EQUAL(type("tensor(x[10],y[5])").stride_of("x"), 5u); + EXPECT_EQUAL(type("tensor(x[10],y[5],z[3])").stride_of("x"), 15u); + EXPECT_EQUAL(type("tensor(x[10],y[5],z[3])").stride_of("y"), 3u); + EXPECT_EQUAL(type("tensor(x[10],y[5],z[3])").stride_of("z"), 1u); + EXPECT_EQUAL(type("tensor(x[10],y{},z[3])").stride_of("x"), 3u); +} + void verify_predicates(const ValueType &type, bool expect_error, bool expect_double, bool expect_tensor, bool expect_sparse, bool expect_dense, bool expect_mixed) diff --git a/eval/src/tests/instruction/dense_join_reduce_plan/CMakeLists.txt b/eval/src/tests/instruction/dense_join_reduce_plan/CMakeLists.txt new file mode 100644 index 00000000000..9de33bcf8a0 --- /dev/null +++ b/eval/src/tests/instruction/dense_join_reduce_plan/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_dense_join_reduce_plan_test_app TEST + SOURCES + dense_join_reduce_plan_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_dense_join_reduce_plan_test_app COMMAND eval_dense_join_reduce_plan_test_app) diff --git a/eval/src/tests/instruction/dense_join_reduce_plan/dense_join_reduce_plan_test.cpp b/eval/src/tests/instruction/dense_join_reduce_plan/dense_join_reduce_plan_test.cpp new file mode 100644 index 00000000000..9851e209ba5 --- /dev/null +++ b/eval/src/tests/instruction/dense_join_reduce_plan/dense_join_reduce_plan_test.cpp @@ -0,0 +1,101 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/instruction/dense_join_reduce_plan.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::instruction; + +ValueType type(const vespalib::string &type_spec) { + return ValueType::from_spec(type_spec); +} + +TEST(DenseJoinReducePlanTest, make_trivial_plan) { + auto plan = DenseJoinReducePlan(type("double"), type("double"), type("double")); + EXPECT_TRUE(plan.distinct_result()); + EXPECT_EQ(plan.lhs_size, 1); + EXPECT_EQ(plan.rhs_size, 1); + EXPECT_EQ(plan.res_size, 1); + EXPECT_TRUE(plan.loop_cnt.empty()); + EXPECT_TRUE(plan.lhs_stride.empty()); + EXPECT_TRUE(plan.rhs_stride.empty()); + EXPECT_TRUE(plan.res_stride.empty()); +} + +TEST(DenseJoinReducePlanTest, execute_trivial_plan) { + auto plan = DenseJoinReducePlan(type("double"), type("double"), type("double")); + size_t res = 0; + auto join_reduce = [&](size_t a_idx, size_t b_idx, size_t c_idx) { + res += (12 + a_idx + b_idx + c_idx); + }; + plan.execute(5, 10, 15, join_reduce); + EXPECT_EQ(res, 42); +} + +TEST(DenseJoinReducePlanTest, make_simple_plan) { + auto plan = DenseJoinReducePlan(type("tensor(a[2])"), type("tensor(b[3])"), type("tensor(a[2])")); + SmallVector<size_t> expect_loop = {2,3}; + SmallVector<size_t> expect_lhs_stride = {1,0}; + SmallVector<size_t> expect_rhs_stride = {0,1}; + SmallVector<size_t> expect_res_stride = {1,0}; + EXPECT_FALSE(plan.distinct_result()); + EXPECT_EQ(plan.lhs_size, 2); + EXPECT_EQ(plan.rhs_size, 3); + EXPECT_EQ(plan.res_size, 2); + EXPECT_EQ(plan.loop_cnt, expect_loop); + EXPECT_EQ(plan.lhs_stride, expect_lhs_stride); + EXPECT_EQ(plan.rhs_stride, expect_rhs_stride); + EXPECT_EQ(plan.res_stride, expect_res_stride); +} + +TEST(DenseJoinReducePlanTest, execute_simple_plan) { + auto plan = DenseJoinReducePlan(type("tensor(a[2])"), type("tensor(b[3])"), type("tensor(a[2])")); + std::vector<int> a({1, 2}); + std::vector<int> b({3, 4, 5}); + std::vector<int> c(2, 0); + std::vector<int> expect = {12, 24}; + ASSERT_EQ(plan.res_size, 2); + auto join_reduce = [&](size_t a_idx, size_t b_idx, size_t c_idx) { c[c_idx] += (a[a_idx] * b[b_idx]); }; + plan.execute(0, 0, 0, join_reduce); + EXPECT_EQ(c, expect); +} + +TEST(DenseJoinReducePlanTest, make_distinct_plan) { + auto plan = DenseJoinReducePlan(type("tensor(a[2])"), + type("tensor(b[3])"), + type("tensor(a[2],b[3])")); + SmallVector<size_t> expect_loop = {2,3}; + SmallVector<size_t> expect_lhs_stride = {1,0}; + SmallVector<size_t> expect_rhs_stride = {0,1}; + SmallVector<size_t> expect_res_stride = {3,1}; + EXPECT_TRUE(plan.distinct_result()); + EXPECT_EQ(plan.lhs_size, 2); + EXPECT_EQ(plan.rhs_size, 3); + EXPECT_EQ(plan.res_size, 6); + EXPECT_EQ(plan.loop_cnt, expect_loop); + EXPECT_EQ(plan.lhs_stride, expect_lhs_stride); + EXPECT_EQ(plan.rhs_stride, expect_rhs_stride); + EXPECT_EQ(plan.res_stride, expect_res_stride); +} + +TEST(DenseJoinReducePlanTest, make_complex_plan) { + auto lhs = type("tensor(a{},b[6],c[5],e[3],f[2],g{})"); + auto rhs = type("tensor(a{},b[6],c[5],d[4],h{})"); + auto res = type("tensor(a{},b[6],c[5],d[4],e[3])"); + auto plan = DenseJoinReducePlan(lhs, rhs, res); + SmallVector<size_t> expect_loop = {30,4,3,2}; + SmallVector<size_t> expect_lhs_stride = {6,0,2,1}; + SmallVector<size_t> expect_rhs_stride = {4,1,0,0}; + SmallVector<size_t> expect_res_stride = {12,3,1,0}; + EXPECT_FALSE(plan.distinct_result()); + EXPECT_EQ(plan.lhs_size, 180); + EXPECT_EQ(plan.rhs_size, 120); + EXPECT_EQ(plan.res_size, 360); + EXPECT_EQ(plan.loop_cnt, expect_loop); + EXPECT_EQ(plan.lhs_stride, expect_lhs_stride); + EXPECT_EQ(plan.rhs_stride, expect_rhs_stride); + EXPECT_EQ(plan.res_stride, expect_res_stride); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/nested_loop.h b/eval/src/vespa/eval/eval/nested_loop.h index 92e84f5c052..695b212dd90 100644 --- a/eval/src/vespa/eval/eval/nested_loop.h +++ b/eval/src/vespa/eval/eval/nested_loop.h @@ -65,6 +65,28 @@ template <typename F> void execute_many(size_t idx1, size_t idx2, const size_t * //----------------------------------------------------------------------------- +template <typename F, size_t N> void execute_few(size_t idx1, size_t idx2, size_t idx3, const size_t *loop, const size_t *stride1, const size_t *stride2, const size_t *stride3, const F &f) { + if constexpr (N == 0) { + f(idx1, idx2, idx3); + } else { + for (size_t i = 0; i < *loop; ++i, idx1 += *stride1, idx2 += *stride2, idx3 += *stride3) { + execute_few<F, N - 1>(idx1, idx2, idx3, loop + 1, stride1 + 1, stride2 + 1, stride3 + 1, f); + } + } +} + +template <typename F> void execute_many(size_t idx1, size_t idx2, size_t idx3, const size_t *loop, const size_t *stride1, const size_t *stride2, const size_t *stride3, size_t levels, const F &f) { + for (size_t i = 0; i < *loop; ++i, idx1 += *stride1, idx2 += *stride2, idx3 += *stride3) { + if ((levels - 1) == 3) { + execute_few<F, 3>(idx1, idx2, idx3, loop + 1, stride1 + 1, stride2 + 1, stride3 + 1, f); + } else { + execute_many<F>(idx1, idx2, idx3, loop + 1, stride1 + 1, stride2 + 1, stride3 + 1, levels - 1, f); + } + } +} + +//----------------------------------------------------------------------------- + } // implementation details // Run a nested loop and pass indexes to 'f' @@ -95,4 +117,19 @@ void run_nested_loop(size_t idx1, size_t idx2, const V &loop, const V &stride1, } } +// Run three nested loops in parallel and all three indexes to +// 'f'. Note that 'loop' is shared, which means that only individual +// strides may differ between the three loops. +template <typename F, typename V> +void run_nested_loop(size_t idx1, size_t idx2, size_t idx3, const V &loop, const V &stride1, const V &stride2, const V &stride3, const F &f) { + size_t levels = loop.size(); + switch(levels) { + case 0: return f(idx1, idx2, idx3); + case 1: return nested_loop::execute_few<F, 1>(idx1, idx2, idx3, &loop[0], &stride1[0], &stride2[0], &stride3[0], f); + case 2: return nested_loop::execute_few<F, 2>(idx1, idx2, idx3, &loop[0], &stride1[0], &stride2[0], &stride3[0], f); + case 3: return nested_loop::execute_few<F, 3>(idx1, idx2, idx3, &loop[0], &stride1[0], &stride2[0], &stride3[0], f); + default: return nested_loop::execute_many<F>(idx1, idx2, idx3, &loop[0], &stride1[0], &stride2[0], &stride3[0], levels, f); + } +} + } // namespace diff --git a/eval/src/vespa/eval/eval/value_type.cpp b/eval/src/vespa/eval/eval/value_type.cpp index 7d088b22e06..ae0ea1a0cd6 100644 --- a/eval/src/vespa/eval/eval/value_type.cpp +++ b/eval/src/vespa/eval/eval/value_type.cpp @@ -155,7 +155,8 @@ ValueType::error_if(bool has_error, ValueType else_type) ValueType::~ValueType() = default; bool -ValueType::is_double() const { +ValueType::is_double() const +{ if (!_error && _dimensions.empty()) { assert(_cell_type == CellType::DOUBLE); return true; @@ -240,7 +241,8 @@ ValueType::dense_subspace_size() const } std::vector<ValueType::Dimension> -ValueType::nontrivial_indexed_dimensions() const { +ValueType::nontrivial_indexed_dimensions() const +{ std::vector<ValueType::Dimension> result; for (const auto &dim: dimensions()) { if (dim.is_indexed() && !dim.is_trivial()) { @@ -251,7 +253,8 @@ ValueType::nontrivial_indexed_dimensions() const { } std::vector<ValueType::Dimension> -ValueType::indexed_dimensions() const { +ValueType::indexed_dimensions() const +{ std::vector<ValueType::Dimension> result; for (const auto &dim: dimensions()) { if (dim.is_indexed()) { @@ -262,7 +265,8 @@ ValueType::indexed_dimensions() const { } std::vector<ValueType::Dimension> -ValueType::mapped_dimensions() const { +ValueType::mapped_dimensions() const +{ std::vector<ValueType::Dimension> result; for (const auto &dim: dimensions()) { if (dim.is_mapped()) { @@ -273,10 +277,27 @@ ValueType::mapped_dimensions() const { } size_t -ValueType::dimension_index(const vespalib::string &name) const { +ValueType::dimension_index(const vespalib::string &name) const +{ return my_dimension_index(_dimensions, name); } +size_t +ValueType::stride_of(const vespalib::string &name) const +{ + size_t stride = 0; + for (const auto &dim: dimensions()) { + if (dim.is_indexed()) { + if (stride == 0 && dim.name == name) { + stride = 1; + } else { + stride *= dim.size; + } + } + } + return stride; +} + std::vector<vespalib::string> ValueType::dimension_names() const { diff --git a/eval/src/vespa/eval/eval/value_type.h b/eval/src/vespa/eval/eval/value_type.h index ddbdcfb4a5f..3bdfcbdd980 100644 --- a/eval/src/vespa/eval/eval/value_type.h +++ b/eval/src/vespa/eval/eval/value_type.h @@ -69,6 +69,7 @@ public: std::vector<Dimension> indexed_dimensions() const; std::vector<Dimension> mapped_dimensions() const; size_t dimension_index(const vespalib::string &name) const; + size_t stride_of(const vespalib::string &name) const; bool has_dimension(const vespalib::string &name) const { return (dimension_index(name) != Dimension::npos); } diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index 487539aeff1..5fea5052eba 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -7,6 +7,7 @@ vespa_add_library(eval_instruction OBJECT dense_cell_range_function.cpp dense_dot_product_function.cpp dense_hamming_distance.cpp + dense_join_reduce_plan.cpp dense_lambda_peek_function.cpp dense_lambda_peek_optimizer.cpp dense_matmul_function.cpp diff --git a/eval/src/vespa/eval/instruction/dense_join_reduce_plan.cpp b/eval/src/vespa/eval/instruction/dense_join_reduce_plan.cpp new file mode 100644 index 00000000000..20b7d3364a8 --- /dev/null +++ b/eval/src/vespa/eval/instruction/dense_join_reduce_plan.cpp @@ -0,0 +1,95 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "dense_join_reduce_plan.h" +#include <vespa/vespalib/util/overload.h> +#include <vespa/vespalib/util/visit_ranges.h> +#include <cassert> + +namespace vespalib::eval::instruction { + +namespace { + +using Dim = ValueType::Dimension; +using Dims = std::vector<ValueType::Dimension>; + +void visit(auto &v, const Dims &a, const Dims &b) { + visit_ranges(v, a.begin(), a.end(), b.begin(), b.end(), + [](const auto &x, const auto &y){ return (x.name < y.name); }); +} + +Dims merge(const Dims &first, const Dims &second) { + Dims result; + auto visitor = overload { + [&result](visit_ranges_either, const Dim &dim) { result.push_back(dim); }, + [&result](visit_ranges_both, const Dim &dim, const Dim &) { result.push_back(dim); } + }; + visit(visitor, first, second); + return result; +} + +size_t count_only_in_second(const Dims &first, const Dims &second) { + size_t result = 0; + auto visitor = overload { + [](visit_ranges_first, const Dim &) {}, + [&result](visit_ranges_second, const Dim &) { ++result; }, + [](visit_ranges_both, const Dim &, const Dim &) {} + }; + visit(visitor, first, second); + return result; +} + +struct Strides { + size_t lhs; + size_t rhs; + size_t res; + Strides() noexcept : lhs(0), rhs(0), res(0) {} + Strides(size_t lhs_in, size_t rhs_in, size_t res_in) noexcept + : lhs(lhs_in), rhs(rhs_in), res(res_in) {} + bool can_combine_with(const Strides &prev) const noexcept { + return ((lhs > 0) == (prev.lhs > 0)) && + ((rhs > 0) == (prev.rhs > 0)) && + ((res > 0) == (prev.res > 0)); + } +}; + +} // <unnamed> + +DenseJoinReducePlan::DenseJoinReducePlan(const ValueType &lhs, const ValueType &rhs, const ValueType &res) + : lhs_size(lhs.dense_subspace_size()), rhs_size(rhs.dense_subspace_size()), res_size(res.dense_subspace_size()), + loop_cnt(), lhs_stride(), rhs_stride(), res_stride() +{ + auto dims = merge(lhs.nontrivial_indexed_dimensions(), rhs.nontrivial_indexed_dimensions()); + assert(count_only_in_second(dims, res.nontrivial_indexed_dimensions()) == 0); + Strides prev_strides; + for (const auto &dim: dims) { + Strides strides(lhs.stride_of(dim.name), rhs.stride_of(dim.name), res.stride_of(dim.name)); + if (strides.can_combine_with(prev_strides)) { + assert(!loop_cnt.empty()); + loop_cnt.back() *= dim.size; + lhs_stride.back() = strides.lhs; + rhs_stride.back() = strides.rhs; + res_stride.back() = strides.res; + } else { + loop_cnt.push_back(dim.size); + lhs_stride.push_back(strides.lhs); + rhs_stride.push_back(strides.rhs); + res_stride.push_back(strides.res); + } + prev_strides = strides; + } +} + +DenseJoinReducePlan::~DenseJoinReducePlan() = default; + +bool +DenseJoinReducePlan::distinct_result() const +{ + for (size_t stride: res_stride) { + if (stride == 0) { + return false; + } + } + return true; +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/dense_join_reduce_plan.h b/eval/src/vespa/eval/instruction/dense_join_reduce_plan.h new file mode 100644 index 00000000000..8f9d5218630 --- /dev/null +++ b/eval/src/vespa/eval/instruction/dense_join_reduce_plan.h @@ -0,0 +1,27 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value_type.h> +#include <vespa/eval/eval/nested_loop.h> +#include <vespa/vespalib/util/small_vector.h> + +namespace vespalib::eval::instruction { + +struct DenseJoinReducePlan { + size_t lhs_size; + size_t rhs_size; + size_t res_size; + SmallVector<size_t> loop_cnt; + SmallVector<size_t> lhs_stride; + SmallVector<size_t> rhs_stride; + SmallVector<size_t> res_stride; + DenseJoinReducePlan(const ValueType &lhs, const ValueType &rhs, const ValueType &res); + ~DenseJoinReducePlan(); + template <typename F> void execute(size_t lhs, size_t rhs, size_t res, const F &f) const { + run_nested_loop(lhs, rhs, res, loop_cnt, lhs_stride, rhs_stride, res_stride, f); + } + bool distinct_result() const; +}; + +} // namespace |