diff options
Diffstat (limited to 'eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp')
-rw-r--r-- | eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp | 220 |
1 files changed, 146 insertions, 74 deletions
diff --git a/eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp b/eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp index 6c0726dab37..e1967f012cb 100644 --- a/eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp +++ b/eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp @@ -27,31 +27,7 @@ using vespalib::make_string_short::fmt; const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get(); bool bench = false; double budget = 1.0; - -GenSpec::seq_t N_16ths = [] (size_t i) noexcept { return (i + 33.0) / 16.0; }; - -GenSpec G() { return GenSpec().seq(N_16ths); } - -const std::vector<GenSpec> layouts = { - G(), G(), - G().idx("x", 5), G().idx("x", 5), - G().idx("x", 5), G().idx("y", 5), - G().idx("x", 5), G().idx("x", 5).idx("y", 5), - G().idx("y", 3), G().idx("x", 2).idx("z", 3), - G().idx("x", 3).idx("y", 5), G().idx("y", 5).idx("z", 7), - G().map("x", {"a","b","c"}), G().map("x", {"a","b","c"}), - G().map("x", {"a","b","c"}), G().map("x", {"a","b"}), - G().map("x", {"a","b","c"}), G().map("y", {"foo","bar","baz"}), - G().map("x", {"a","b","c"}), G().map("x", {"a","b","c"}).map("y", {"foo","bar","baz"}), - G().map("x", {"a","b"}).map("y", {"foo","bar","baz"}), G().map("x", {"a","b","c"}).map("y", {"foo","bar"}), - G().map("x", {"a","b"}).map("y", {"foo","bar","baz"}), G().map("y", {"foo","bar"}).map("z", {"i","j","k","l"}), - G().idx("x", 3).map("y", {"foo", "bar"}), G().map("y", {"foo", "bar"}).idx("z", 7), - G().map("x", {"a","b","c"}).idx("y", 5), G().idx("y", 5).map("z", {"i","j","k","l"}) -}; - -const std::vector<std::vector<vespalib::string>> reductions = { - {}, {"x"}, {"y"}, {"z"}, {"x", "y"}, {"x", "z"}, {"y", "z"} -}; +size_t verify_cnt = 0; std::vector<std::string> ns_list = { {"vespalib::eval::instruction::(anonymous namespace)::"}, @@ -76,14 +52,19 @@ std::string strip_ns(const vespalib::string &str) { return tmp; } -TensorSpec make_spec(const vespalib::string ¶m_name, size_t idx) { - return GenSpec::from_desc(param_name).cells_double().seq(N(1 + idx)); +using select_cell_type_t = std::function<CellType(size_t idx)>; +CellType always_double(size_t) { return CellType::DOUBLE; } +select_cell_type_t select(CellType lct) { return [lct](size_t)noexcept{ return lct; }; } +select_cell_type_t select(CellType lct, CellType rct) { return [lct,rct](size_t idx)noexcept{ return idx ? rct : lct; }; } + +TensorSpec make_spec(const vespalib::string ¶m_name, size_t idx, select_cell_type_t select_cell_type) { + return GenSpec::from_desc(param_name).cells(select_cell_type(idx)).seq(N(1 + idx)); } -TensorSpec eval_ref(const Function &fun) { +TensorSpec eval_ref(const Function &fun, select_cell_type_t select_cell_type) { std::vector<TensorSpec> params; for (size_t i = 0; i < fun.num_params(); ++i) { - params.push_back(make_spec(fun.param_name(i), i)); + params.push_back(make_spec(fun.param_name(i), i, select_cell_type)); } return ReferenceEvaluation::eval(fun, params); } @@ -134,19 +115,58 @@ Optimize universal_only() { return Optimize::specific("universal_only", my_optimizer); } +void verify(const vespalib::string &expr, select_cell_type_t select_cell_type) { + ++verify_cnt; + auto fun = Function::parse(expr); + ASSERT_FALSE(fun->has_error()); + std::vector<Value::UP> values; + for (size_t i = 0; i < fun->num_params(); ++i) { + auto value = value_from_spec(make_spec(fun->param_name(i), i, select_cell_type), prod_factory); + values.push_back(std::move(value)); + } + SimpleObjectParams params({}); + std::vector<ValueType> param_types; + for (auto &&up: values) { + params.params.emplace_back(*up); + param_types.push_back(up->type()); + } + NodeTypes node_types(*fun, param_types); + const ValueType &expected_type = node_types.get_type(fun->root()); + ASSERT_FALSE(expected_type.is_error()); + Stash stash; + size_t count = 0; + const TensorFunction &plain_fun = make_tensor_function(prod_factory, fun->root(), node_types, stash); + const TensorFunction &optimized = apply_tensor_function_optimizer(plain_fun, universal_only().optimizer, stash, &count); + ASSERT_GT(count, 0); + InterpretedFunction ifun(prod_factory, optimized); + InterpretedFunction::Context ctx(ifun); + const Value &actual = ifun.eval(ctx, params); + EXPECT_EQ(actual.type(), expected_type); + EXPECT_EQ(actual.cells().type, expected_type.cell_type()); + if (expected_type.count_mapped_dimensions() == 0) { + EXPECT_EQ(actual.index().size(), TrivialIndex::get().size()); + EXPECT_EQ(actual.cells().size, expected_type.dense_subspace_size()); + } else { + EXPECT_EQ(actual.cells().size, actual.index().size() * expected_type.dense_subspace_size()); + } + auto expected = eval_ref(*fun, select_cell_type); + EXPECT_EQ(spec_from_value(actual), expected); +} +void verify(const vespalib::string &expr) { verify(expr, always_double); } + using cost_list_t = std::vector<std::pair<vespalib::string,double>>; std::vector<std::pair<vespalib::string,cost_list_t>> benchmark_results; void benchmark(const vespalib::string &expr, std::vector<Optimize> list) { + verify(expr); auto fun = Function::parse(expr); ASSERT_FALSE(fun->has_error()); - auto expected = eval_ref(*fun); cost_list_t cost_list; fprintf(stderr, "BENCH: %s\n", expr.c_str()); for (Optimize &optimize: list) { std::vector<Value::UP> values; for (size_t i = 0; i < fun->num_params(); ++i) { - auto value = value_from_spec(make_spec(fun->param_name(i), i), prod_factory); + auto value = value_from_spec(make_spec(fun->param_name(i), i, always_double), prod_factory); values.push_back(std::move(value)); } SimpleObjectParams params({}); @@ -181,8 +201,6 @@ void benchmark(const vespalib::string &expr, std::vector<Optimize> list) { InterpretedFunction ifun(prod_factory, *optimized, &ctf_meta); InterpretedFunction::ProfiledContext pctx(ifun); ASSERT_EQ(ctf_meta.steps.size(), ifun.program_size()); - EXPECT_EQ(spec_from_value(ifun.eval(pctx.context, params)), expected); - EXPECT_EQ(spec_from_value(ifun.eval(pctx, params)), expected); std::vector<duration> prev_time(ctf_meta.steps.size(), duration::zero()); std::vector<duration> min_time(ctf_meta.steps.size(), duration::max()); BenchmarkTimer timer(budget); @@ -214,47 +232,98 @@ void benchmark(const vespalib::string &expr, std::vector<Optimize> list) { benchmark_results.emplace_back(expr, std::move(cost_list)); } -TensorSpec perform_dot_product(const TensorSpec &a, const TensorSpec &b, const std::vector<vespalib::string> &dims) -{ - Stash stash; - auto lhs = value_from_spec(a, prod_factory); - auto rhs = value_from_spec(b, prod_factory); - auto res_type = ValueType::join(lhs->type(), rhs->type()).reduce(dims); - EXPECT_FALSE(res_type.is_error()); - UniversalDotProduct dot_product(res_type, - tensor_function::inject(lhs->type(), 0, stash), - tensor_function::inject(rhs->type(), 1, stash)); - auto my_op = dot_product.compile_self(prod_factory, stash); - InterpretedFunction::EvalSingle single(prod_factory, my_op); - return spec_from_value(single.eval(std::vector<Value::CREF>({*lhs,*rhs}))); +TEST(UniversalDotProductTest, test_select_cell_types) { + auto always = always_double; + EXPECT_EQ(always(0), CellType::DOUBLE); + EXPECT_EQ(always(1), CellType::DOUBLE); + EXPECT_EQ(always(0), CellType::DOUBLE); + EXPECT_EQ(always(1), CellType::DOUBLE); + for (CellType lct: CellTypeUtils::list_types()) { + auto sel1 = select(lct); + EXPECT_EQ(sel1(0), lct); + EXPECT_EQ(sel1(1), lct); + EXPECT_EQ(sel1(0), lct); + EXPECT_EQ(sel1(1), lct); + for (CellType rct: CellTypeUtils::list_types()) { + auto sel2 = select(lct, rct); + EXPECT_EQ(sel2(0), lct); + EXPECT_EQ(sel2(1), rct); + EXPECT_EQ(sel2(0), lct); + EXPECT_EQ(sel2(1), rct); + } + } } -TEST(UniversalDotProductTest, generic_dot_product_works_for_various_cases) { - size_t test_cases = 0; - ASSERT_TRUE((layouts.size() % 2) == 0); - for (size_t i = 0; i < layouts.size(); i += 2) { - const auto &l = layouts[i]; - const auto &r = layouts[i+1]; - for (CellType lct : CellTypeUtils::list_types()) { - auto lhs = l.cpy().cells(lct); - if (lhs.bad_scalar()) continue; - for (CellType rct : CellTypeUtils::list_types()) { - auto rhs = r.cpy().cells(rct); - if (rhs.bad_scalar()) continue; - for (const std::vector<vespalib::string> &dims: reductions) { - if (ValueType::join(lhs.type(), rhs.type()).reduce(dims).is_error()) continue; - ++test_cases; - SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.gen().to_string().c_str(), rhs.gen().to_string().c_str())); - auto expect = ReferenceOperations::reduce(ReferenceOperations::join(lhs, rhs, operation::Mul::f), Aggr::SUM, dims); - auto actual = perform_dot_product(lhs, rhs, dims); - // fprintf(stderr, "\n===\nLHS: %s\nRHS: %s\n===\nRESULT: %s\n===\n", lhs.gen().to_string().c_str(), rhs.gen().to_string().c_str(), actual.to_string().c_str()); - EXPECT_EQ(actual, expect); - } - } +TEST(UniversalDotProductTest, universal_dot_product_works_for_various_cases) { + // forward, distinct, single + verify("reduce(2.0*3.0, sum)"); + + for (CellType lct: CellTypeUtils::list_types()) { + for (CellType rct: CellTypeUtils::list_types()) { + auto sel2 = select(lct, rct); + // !forward, !distinct, !single + verify("reduce(a4_1x8*a2_1x8,sum,a,x)", sel2); + + // !forward, !distinct, single + verify("reduce(a4_1x8*a2_1x8,sum,a)", sel2); + + // !forward, distinct, !single + verify("reduce(a4_1x8*a2_1x8,sum,x)", sel2); + + // forward, !distinct, !single + verify("reduce(a4_1x8*b2_1x8,sum,b,x)", sel2); + + // forward, !distinct, single + verify("reduce(a4_1x8*b2_1x8,sum,b)", sel2); + + // forward, distinct, !single + verify("reduce(a4_1x8*x8,sum,x)", sel2); } } - EXPECT_GT(test_cases, 500); - fprintf(stderr, "total test cases run: %zu\n", test_cases); + // !forward, distinct, single + + // This case is not possible since 'distinct' implies '!single' as + // long as we reduce anything. The only expression allowed to + // reduce nothing is the scalar case. +} + +TEST(UniversalDotProductTest, universal_dot_product_works_with_complex_dimension_nesting) { + verify("reduce(a4_1b4_1c4_1x4y3z2w1*a2_1c1_1x4z2,sum,b,c,x)"); +} + +TEST(UniversalDotProductTest, forwarding_empty_result) { + verify("reduce(x0_0*y8_1,sum,y)"); + verify("reduce(x8_1*y0_0,sum,y)"); + verify("reduce(x0_0z16*y8_1z16,sum,y)"); + verify("reduce(x8_1z16*y0_0z16,sum,y)"); +} + +TEST(UniversalDotProductTest, nonforwarding_empty_result) { + verify("reduce(x0_0y8*x1_1y8,sum,y)"); + verify("reduce(x1_1y8*x0_0y8,sum,y)"); + verify("reduce(x1_7y8z2*x1_1y8z2,sum,y)"); +} + +TEST(UniversalDotProductTest, forwarding_expanding_reduce) { + verify("reduce(5.0*y0_0,sum,y)"); + verify("reduce(5.0*y0_0z1,sum,y)"); + verify("reduce(z16*y0_0,sum,y)"); + verify("reduce(x1_1*y0_0,sum,y)"); + verify("reduce(x0_0*y1_1,sum,y)"); + verify("reduce(x1_1z16*y0_0,sum,y)"); + verify("reduce(x0_0z16*y1_1,sum,y)"); +} + +TEST(UniversalDotProductTest, nonforwarding_expanding_reduce) { + verify("reduce(x0_0*y1_1,sum,x,y)"); + verify("reduce(x1_1*y0_0,sum,x,y)"); + verify("reduce(x1_1*y0_0z1,sum,x,y)"); + verify("reduce(x0_0y16*x1_1y16,sum,x)"); + verify("reduce(x1_1y16*x0_0y16,sum,x)"); + verify("reduce(x1_7*y1_1,sum,x,y)"); + verify("reduce(x1_1*y1_7,sum,x,y)"); + verify("reduce(x1_7y16*x1_1y16,sum,x)"); + verify("reduce(x1_1y16*x1_7y16,sum,x)"); } TEST(UniversalDotProductTest, bench_vector_dot_product) { @@ -264,8 +333,11 @@ TEST(UniversalDotProductTest, bench_vector_dot_product) { } auto optimize_list = std::vector<Optimize>({baseline(), with_universal(), universal_only()}); - benchmark("reduce(1.0*2.0,sum)", optimize_list); + benchmark("reduce(2.0*3.0,sum)", optimize_list); benchmark("reduce(5.0*x128,sum,x)", optimize_list); + benchmark("reduce(a1*x128,sum,x)", optimize_list); + benchmark("reduce(a8*x128,sum,x)", optimize_list); + benchmark("reduce(a1_1b8*x128,sum,x)", optimize_list); benchmark("reduce(x16*x16,sum,x)", optimize_list); benchmark("reduce(x768*x768,sum,x)", optimize_list); benchmark("reduce(y64*x8y64,sum,x,y)", optimize_list); @@ -284,8 +356,6 @@ TEST(UniversalDotProductTest, bench_vector_dot_product) { benchmark("reduce(b64_1x8y128*x8y128,sum,y)", optimize_list); benchmark("reduce(b64_1x128*x128,sum,b,x)", optimize_list); benchmark("reduce(a1_1x128*a2_1b64_1x128,sum,a,x)", optimize_list); - benchmark("reduce(x0_0*y8_1,sum,y)", optimize_list); - benchmark("reduce(x8_1*y0_0,sum,y)", optimize_list); size_t max_expr_size = 0; for (const auto &[expr, cost_list]: benchmark_results) { @@ -347,5 +417,7 @@ int main(int argc, char **argv) { --argc; } ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + int result = RUN_ALL_TESTS(); + fprintf(stderr, "verify called %zu times\n", verify_cnt); + return result; } |