diff options
author | Håvard Pettersen <havardpe@yahooinc.com> | 2023-08-25 09:30:20 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@yahooinc.com> | 2023-08-29 13:56:08 +0000 |
commit | 82ef4f4dfde0a5dc0ec722b54d66b70012fa2966 (patch) | |
tree | d3a8370f33c7c0f50a019cb2af726ec94224a953 /eval/src | |
parent | eb72c809f9ef74d8e300f21321486e8fe4f6b527 (diff) |
added universal dot product
note that optimization is not yet active in production
Diffstat (limited to 'eval/src')
8 files changed, 249 insertions, 1 deletions
diff --git a/eval/src/tests/instruction/universal_dot_product/CMakeLists.txt b/eval/src/tests/instruction/universal_dot_product/CMakeLists.txt new file mode 100644 index 00000000000..19023b48d04 --- /dev/null +++ b/eval/src/tests/instruction/universal_dot_product/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_universal_dot_product_test_app TEST + SOURCES + universal_dot_product_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_universal_dot_product_test_app COMMAND eval_universal_dot_product_test_app) diff --git a/eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp b/eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp new file mode 100644 index 00000000000..3f60ad69b86 --- /dev/null +++ b/eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp @@ -0,0 +1,89 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/tensor_function.h> +#include <vespa/eval/instruction/universal_dot_product.h> +#include <vespa/eval/eval/test/reference_operations.h> +#include <vespa/eval/eval/test/gen_spec.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get(); + +GenSpec::seq_t N_16ths = [] (size_t i) noexcept { return (i + 33.0) / 16.0; }; + +GenSpec G() { return GenSpec().seq(N_16ths); } + +const std::vector<GenSpec> layouts = { + G(), G(), + G().idx("x", 5), G().idx("x", 5), + G().idx("x", 5), G().idx("y", 5), + G().idx("x", 5), G().idx("x", 5).idx("y", 5), + G().idx("y", 3), G().idx("x", 2).idx("z", 3), + G().idx("x", 3).idx("y", 5), G().idx("y", 5).idx("z", 7), + G().map("x", {"a","b","c"}), G().map("x", {"a","b","c"}), + G().map("x", {"a","b","c"}), G().map("x", {"a","b"}), + G().map("x", {"a","b","c"}), G().map("y", {"foo","bar","baz"}), + G().map("x", {"a","b","c"}), G().map("x", {"a","b","c"}).map("y", {"foo","bar","baz"}), + G().map("x", {"a","b"}).map("y", {"foo","bar","baz"}), G().map("x", {"a","b","c"}).map("y", {"foo","bar"}), + G().map("x", {"a","b"}).map("y", {"foo","bar","baz"}), G().map("y", {"foo","bar"}).map("z", {"i","j","k","l"}), + G().idx("x", 3).map("y", {"foo", "bar"}), G().map("y", {"foo", "bar"}).idx("z", 7), + G().map("x", {"a","b","c"}).idx("y", 5), G().idx("y", 5).map("z", {"i","j","k","l"}) +}; + +const std::vector<std::vector<vespalib::string>> reductions = { + {}, {"x"}, {"y"}, {"z"}, {"x", "y"}, {"x", "z"}, {"y", "z"} +}; + +TensorSpec perform_dot_product(const TensorSpec &a, const TensorSpec &b, const std::vector<vespalib::string> &dims) +{ + Stash stash; + auto lhs = value_from_spec(a, prod_factory); + auto rhs = value_from_spec(b, prod_factory); + auto res_type = ValueType::join(lhs->type(), rhs->type()).reduce(dims); + EXPECT_FALSE(res_type.is_error()); + UniversalDotProduct dot_product(res_type, + tensor_function::inject(lhs->type(), 0, stash), + tensor_function::inject(rhs->type(), 1, stash)); + auto my_op = dot_product.compile_self(prod_factory, stash); + InterpretedFunction::EvalSingle single(prod_factory, my_op); + return spec_from_value(single.eval(std::vector<Value::CREF>({*lhs,*rhs}))); +} + +TEST(UniversalDotProductTest, generic_dot_product_works_for_various_cases) { + size_t test_cases = 0; + ASSERT_TRUE((layouts.size() % 2) == 0); + for (size_t i = 0; i < layouts.size(); i += 2) { + const auto &l = layouts[i]; + const auto &r = layouts[i+1]; + for (CellType lct : CellTypeUtils::list_types()) { + auto lhs = l.cpy().cells(lct); + if (lhs.bad_scalar()) continue; + for (CellType rct : CellTypeUtils::list_types()) { + auto rhs = r.cpy().cells(rct); + if (rhs.bad_scalar()) continue; + for (const std::vector<vespalib::string> &dims: reductions) { + if (ValueType::join(lhs.type(), rhs.type()).reduce(dims).is_error()) continue; + ++test_cases; + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.gen().to_string().c_str(), rhs.gen().to_string().c_str())); + auto expect = ReferenceOperations::reduce(ReferenceOperations::join(lhs, rhs, operation::Mul::f), Aggr::SUM, dims); + auto actual = perform_dot_product(lhs, rhs, dims); + // fprintf(stderr, "\n===\nLHS: %s\nRHS: %s\n===\nRESULT: %s\n===\n", lhs.gen().to_string().c_str(), rhs.gen().to_string().c_str(), actual.to_string().c_str()); + EXPECT_EQ(actual, expect); + } + } + } + } + EXPECT_GT(test_cases, 500); + fprintf(stderr, "total test cases run: %zu\n", test_cases); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp index 1d0be47a309..3d9152d6b80 100644 --- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp +++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp @@ -37,6 +37,7 @@ #include <vespa/eval/instruction/mixed_l2_distance.h> #include <vespa/eval/instruction/simple_join_count.h> #include <vespa/eval/instruction/mapped_lookup.h> +#include <vespa/eval/instruction/universal_dot_product.h> #include <vespa/log/log.h> LOG_SETUP(".eval.eval.optimize_tensor_function"); @@ -88,6 +89,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te child.set(DenseHammingDistance::optimize(child.get(), stash)); child.set(SimpleJoinCount::optimize(child.get(), stash)); child.set(MappedLookup::optimize(child.get(), stash)); + // child.set(UniversalDotProduct::optimize(child.get(), stash)); }); run_optimize_pass(root, [&stash](const Child &child) { diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index 5fea5052eba..006a363a64f 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -49,6 +49,7 @@ vespa_add_library(eval_instruction OBJECT sparse_no_overlap_join_function.cpp sparse_singledim_lookup.cpp sum_max_dot_product_function.cpp + universal_dot_product.cpp unpack_bits_function.cpp vector_from_doubles_function.cpp ) diff --git a/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.cpp b/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.cpp index 806e24dd7df..00499e7f997 100644 --- a/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.cpp +++ b/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.cpp @@ -153,7 +153,7 @@ est_fun select_estimate(const std::vector<Est> &est_list) { } // <unnamed> SparseJoinReducePlan::SparseJoinReducePlan(const ValueType &lhs, const ValueType &rhs, const ValueType &res) - : _in_lhs(), _in_rhs(), _in_res(), _estimate() + : _in_lhs(), _in_rhs(), _in_res(), _res_dims(0), _estimate() { auto dims = merge(lhs.mapped_dimensions(), rhs.mapped_dimensions()); assert(count_only_in_second(dims, res.mapped_dimensions()) == 0); @@ -162,6 +162,9 @@ SparseJoinReducePlan::SparseJoinReducePlan(const ValueType &lhs, const ValueType _in_lhs.push_back(lhs.has_dimension(dim.name)); _in_rhs.push_back(rhs.has_dimension(dim.name)); _in_res.push_back(res.has_dimension(dim.name)); + if (_in_res.back()) { + ++_res_dims; + } update_est_list(est_list, _in_lhs.back(), _in_rhs.back(), _in_res.back()); } _estimate = select_estimate(est_list); diff --git a/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.h b/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.h index 8864c56887a..c93bf46e2dc 100644 --- a/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.h +++ b/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.h @@ -21,11 +21,14 @@ private: BitList _in_lhs; BitList _in_rhs; BitList _in_res; + size_t _res_dims; est_fun_t _estimate; public: SparseJoinReducePlan(const ValueType &lhs, const ValueType &rhs, const ValueType &res); ~SparseJoinReducePlan(); + size_t res_dims() const { return _res_dims; } + bool distinct_result() const { return (_res_dims == _in_res.size()); } size_t estimate_result_size(const Value::Index &lhs, const Value::Index &rhs) const { return _estimate(lhs.size(), rhs.size()); } diff --git a/eval/src/vespa/eval/instruction/universal_dot_product.cpp b/eval/src/vespa/eval/instruction/universal_dot_product.cpp new file mode 100644 index 00000000000..79a94d862bf --- /dev/null +++ b/eval/src/vespa/eval/instruction/universal_dot_product.cpp @@ -0,0 +1,119 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "universal_dot_product.h" +#include "sparse_join_reduce_plan.h" +#include "dense_join_reduce_plan.h" +#include <vespa/eval/eval/inline_operation.h> +#include <vespa/eval/eval/fast_value.hpp> + +namespace vespalib::eval { + +using namespace tensor_function; +using namespace instruction; +using namespace operation; + +namespace { + +struct UniversalDotProductParam { + ValueType res_type; + SparseJoinReducePlan sparse_plan; + DenseJoinReducePlan dense_plan; + size_t vector_size; + + UniversalDotProductParam(const ValueType &res_type_in, + const ValueType &lhs_type, + const ValueType &rhs_type) + : res_type(res_type_in), + sparse_plan(lhs_type, rhs_type, res_type), + dense_plan(lhs_type, rhs_type, res_type), + vector_size(1) + { + if (!dense_plan.loop_cnt.empty() && + dense_plan.lhs_stride.back() == 1 && + dense_plan.rhs_stride.back() == 1 && + dense_plan.res_stride.back() == 0) + { + vector_size = dense_plan.loop_cnt.back(); + dense_plan.loop_cnt.pop_back(); + dense_plan.lhs_stride.pop_back(); + dense_plan.rhs_stride.pop_back(); + dense_plan.res_stride.pop_back(); + } + } +}; + +template <typename LCT, typename RCT, typename OCT> +void my_universal_dot_product_op(InterpretedFunction::State &state, uint64_t param_in) { + using dot_product = DotProduct<LCT,RCT>; + const auto ¶m = unwrap_param<UniversalDotProductParam>(param_in); + const auto &lhs = state.peek(1); + const auto &rhs = state.peek(0); + const auto &lhs_index = lhs.index(); + const auto &rhs_index = rhs.index(); + const auto lhs_cells = lhs.cells().typify<LCT>(); + const auto rhs_cells = rhs.cells().typify<RCT>(); + auto &stored_result = state.stash.create<std::unique_ptr<FastValue<OCT,true>>>( + std::make_unique<FastValue<OCT,true>>(param.res_type, param.sparse_plan.res_dims(), param.dense_plan.res_size, + param.sparse_plan.estimate_result_size(lhs_index, rhs_index))); + auto &result = *(stored_result.get()); + ArrayRef<OCT> dst; + auto dense_fun = [&](size_t lhs_idx, size_t rhs_idx, size_t dst_idx) { + dst[dst_idx] += dot_product::apply(&lhs_cells[lhs_idx], &rhs_cells[rhs_idx], param.vector_size); + }; + auto sparse_fun = [&](size_t lhs_subspace, size_t rhs_subspace, ConstArrayRef<string_id> res_addr) { + bool first; + std::tie(dst, first) = result.insert_subspace(res_addr); + if (first) { + std::fill(dst.begin(), dst.end(), OCT{}); + } + param.dense_plan.execute(lhs_subspace * param.dense_plan.lhs_size, + rhs_subspace * param.dense_plan.rhs_size, + 0, dense_fun); + }; + param.sparse_plan.execute(lhs_index, rhs_index, sparse_fun); + state.pop_pop_push(result); +} + +struct SelectUniversalDotProduct { + template <typename LCM, typename RCM, typename SCALAR> static auto invoke(const UniversalDotProductParam &) { + constexpr CellMeta ocm = CellMeta::join(LCM::value, RCM::value).reduce(SCALAR::value); + using LCT = CellValueType<LCM::value.cell_type>; + using RCT = CellValueType<RCM::value.cell_type>; + using OCT = CellValueType<ocm.cell_type>; + return my_universal_dot_product_op<LCT,RCT,OCT>; + } +}; + +} // namespace <unnamed> + +UniversalDotProduct::UniversalDotProduct(const ValueType &res_type_in, + const TensorFunction &lhs_in, + const TensorFunction &rhs_in) + : tensor_function::Op2(res_type_in, lhs_in, rhs_in) +{ +} + +InterpretedFunction::Instruction +UniversalDotProduct::compile_self(const ValueBuilderFactory &, Stash &stash) const +{ + auto ¶m = stash.create<UniversalDotProductParam>(result_type(), lhs().result_type(), rhs().result_type()); + using MyTypify = TypifyValue<TypifyCellMeta,TypifyBool>; + auto op = typify_invoke<3,MyTypify,SelectUniversalDotProduct>(lhs().result_type().cell_meta(), + rhs().result_type().cell_meta(), + result_type().cell_meta().is_scalar, + param); + return InterpretedFunction::Instruction(op, wrap_param<UniversalDotProductParam>(param)); +} + +const TensorFunction & +UniversalDotProduct::optimize(const TensorFunction &expr, Stash &stash) +{ + if (auto reduce = as<Reduce>(expr); reduce && (reduce->aggr() == Aggr::SUM)) { + if (auto join = as<Join>(reduce->child()); join && (join->function() == Mul::f)) { + return stash.create<UniversalDotProduct>(expr.result_type(), join->lhs(), join->rhs()); + } + } + return expr; +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/universal_dot_product.h b/eval/src/vespa/eval/instruction/universal_dot_product.h new file mode 100644 index 00000000000..ac5aa157f17 --- /dev/null +++ b/eval/src/vespa/eval/instruction/universal_dot_product.h @@ -0,0 +1,22 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::eval { + +/** + * Tensor function performing dot product compatible operations + * (join:mul, reduce:sum) on values of arbitrary complexity. + **/ +class UniversalDotProduct : public tensor_function::Op2 +{ +public: + UniversalDotProduct(const ValueType &res_type, const TensorFunction &lhs, const TensorFunction &rhs); + InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override; + bool result_is_mutable() const override { return true; } + static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash); +}; + +} // namespace |