summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2023-08-25 09:30:20 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2023-08-29 13:56:08 +0000
commit82ef4f4dfde0a5dc0ec722b54d66b70012fa2966 (patch)
treed3a8370f33c7c0f50a019cb2af726ec94224a953 /eval
parenteb72c809f9ef74d8e300f21321486e8fe4f6b527 (diff)
added universal dot product
note that optimization is not yet active in production
Diffstat (limited to 'eval')
-rw-r--r--eval/CMakeLists.txt1
-rw-r--r--eval/src/tests/instruction/universal_dot_product/CMakeLists.txt9
-rw-r--r--eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp89
-rw-r--r--eval/src/vespa/eval/eval/optimize_tensor_function.cpp2
-rw-r--r--eval/src/vespa/eval/instruction/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/instruction/sparse_join_reduce_plan.cpp5
-rw-r--r--eval/src/vespa/eval/instruction/sparse_join_reduce_plan.h3
-rw-r--r--eval/src/vespa/eval/instruction/universal_dot_product.cpp119
-rw-r--r--eval/src/vespa/eval/instruction/universal_dot_product.h22
9 files changed, 250 insertions, 1 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 9b6bcc990fc..34f1cc6788d 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -89,6 +89,7 @@ vespa_define_module(
src/tests/instruction/sparse_no_overlap_join_function
src/tests/instruction/sparse_singledim_lookup
src/tests/instruction/sum_max_dot_product_function
+ src/tests/instruction/universal_dot_product
src/tests/instruction/unpack_bits_function
src/tests/instruction/vector_from_doubles_function
src/tests/streamed/value
diff --git a/eval/src/tests/instruction/universal_dot_product/CMakeLists.txt b/eval/src/tests/instruction/universal_dot_product/CMakeLists.txt
new file mode 100644
index 00000000000..19023b48d04
--- /dev/null
+++ b/eval/src/tests/instruction/universal_dot_product/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_universal_dot_product_test_app TEST
+ SOURCES
+ universal_dot_product_test.cpp
+ DEPENDS
+ vespaeval
+ GTest::GTest
+)
+vespa_add_test(NAME eval_universal_dot_product_test_app COMMAND eval_universal_dot_product_test_app)
diff --git a/eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp b/eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp
new file mode 100644
index 00000000000..3f60ad69b86
--- /dev/null
+++ b/eval/src/tests/instruction/universal_dot_product/universal_dot_product_test.cpp
@@ -0,0 +1,89 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/instruction/universal_dot_product.h>
+#include <vespa/eval/eval/test/reference_operations.h>
+#include <vespa/eval/eval/test/gen_spec.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+using vespalib::make_string_short::fmt;
+
+const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get();
+
+GenSpec::seq_t N_16ths = [] (size_t i) noexcept { return (i + 33.0) / 16.0; };
+
+GenSpec G() { return GenSpec().seq(N_16ths); }
+
+const std::vector<GenSpec> layouts = {
+ G(), G(),
+ G().idx("x", 5), G().idx("x", 5),
+ G().idx("x", 5), G().idx("y", 5),
+ G().idx("x", 5), G().idx("x", 5).idx("y", 5),
+ G().idx("y", 3), G().idx("x", 2).idx("z", 3),
+ G().idx("x", 3).idx("y", 5), G().idx("y", 5).idx("z", 7),
+ G().map("x", {"a","b","c"}), G().map("x", {"a","b","c"}),
+ G().map("x", {"a","b","c"}), G().map("x", {"a","b"}),
+ G().map("x", {"a","b","c"}), G().map("y", {"foo","bar","baz"}),
+ G().map("x", {"a","b","c"}), G().map("x", {"a","b","c"}).map("y", {"foo","bar","baz"}),
+ G().map("x", {"a","b"}).map("y", {"foo","bar","baz"}), G().map("x", {"a","b","c"}).map("y", {"foo","bar"}),
+ G().map("x", {"a","b"}).map("y", {"foo","bar","baz"}), G().map("y", {"foo","bar"}).map("z", {"i","j","k","l"}),
+ G().idx("x", 3).map("y", {"foo", "bar"}), G().map("y", {"foo", "bar"}).idx("z", 7),
+ G().map("x", {"a","b","c"}).idx("y", 5), G().idx("y", 5).map("z", {"i","j","k","l"})
+};
+
+const std::vector<std::vector<vespalib::string>> reductions = {
+ {}, {"x"}, {"y"}, {"z"}, {"x", "y"}, {"x", "z"}, {"y", "z"}
+};
+
+TensorSpec perform_dot_product(const TensorSpec &a, const TensorSpec &b, const std::vector<vespalib::string> &dims)
+{
+ Stash stash;
+ auto lhs = value_from_spec(a, prod_factory);
+ auto rhs = value_from_spec(b, prod_factory);
+ auto res_type = ValueType::join(lhs->type(), rhs->type()).reduce(dims);
+ EXPECT_FALSE(res_type.is_error());
+ UniversalDotProduct dot_product(res_type,
+ tensor_function::inject(lhs->type(), 0, stash),
+ tensor_function::inject(rhs->type(), 1, stash));
+ auto my_op = dot_product.compile_self(prod_factory, stash);
+ InterpretedFunction::EvalSingle single(prod_factory, my_op);
+ return spec_from_value(single.eval(std::vector<Value::CREF>({*lhs,*rhs})));
+}
+
+TEST(UniversalDotProductTest, generic_dot_product_works_for_various_cases) {
+ size_t test_cases = 0;
+ ASSERT_TRUE((layouts.size() % 2) == 0);
+ for (size_t i = 0; i < layouts.size(); i += 2) {
+ const auto &l = layouts[i];
+ const auto &r = layouts[i+1];
+ for (CellType lct : CellTypeUtils::list_types()) {
+ auto lhs = l.cpy().cells(lct);
+ if (lhs.bad_scalar()) continue;
+ for (CellType rct : CellTypeUtils::list_types()) {
+ auto rhs = r.cpy().cells(rct);
+ if (rhs.bad_scalar()) continue;
+ for (const std::vector<vespalib::string> &dims: reductions) {
+ if (ValueType::join(lhs.type(), rhs.type()).reduce(dims).is_error()) continue;
+ ++test_cases;
+ SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.gen().to_string().c_str(), rhs.gen().to_string().c_str()));
+ auto expect = ReferenceOperations::reduce(ReferenceOperations::join(lhs, rhs, operation::Mul::f), Aggr::SUM, dims);
+ auto actual = perform_dot_product(lhs, rhs, dims);
+ // fprintf(stderr, "\n===\nLHS: %s\nRHS: %s\n===\nRESULT: %s\n===\n", lhs.gen().to_string().c_str(), rhs.gen().to_string().c_str(), actual.to_string().c_str());
+ EXPECT_EQ(actual, expect);
+ }
+ }
+ }
+ }
+ EXPECT_GT(test_cases, 500);
+ fprintf(stderr, "total test cases run: %zu\n", test_cases);
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index 1d0be47a309..3d9152d6b80 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -37,6 +37,7 @@
#include <vespa/eval/instruction/mixed_l2_distance.h>
#include <vespa/eval/instruction/simple_join_count.h>
#include <vespa/eval/instruction/mapped_lookup.h>
+#include <vespa/eval/instruction/universal_dot_product.h>
#include <vespa/log/log.h>
LOG_SETUP(".eval.eval.optimize_tensor_function");
@@ -88,6 +89,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te
child.set(DenseHammingDistance::optimize(child.get(), stash));
child.set(SimpleJoinCount::optimize(child.get(), stash));
child.set(MappedLookup::optimize(child.get(), stash));
+ // child.set(UniversalDotProduct::optimize(child.get(), stash));
});
run_optimize_pass(root, [&stash](const Child &child)
{
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index 5fea5052eba..006a363a64f 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -49,6 +49,7 @@ vespa_add_library(eval_instruction OBJECT
sparse_no_overlap_join_function.cpp
sparse_singledim_lookup.cpp
sum_max_dot_product_function.cpp
+ universal_dot_product.cpp
unpack_bits_function.cpp
vector_from_doubles_function.cpp
)
diff --git a/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.cpp b/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.cpp
index 806e24dd7df..00499e7f997 100644
--- a/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.cpp
+++ b/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.cpp
@@ -153,7 +153,7 @@ est_fun select_estimate(const std::vector<Est> &est_list) {
} // <unnamed>
SparseJoinReducePlan::SparseJoinReducePlan(const ValueType &lhs, const ValueType &rhs, const ValueType &res)
- : _in_lhs(), _in_rhs(), _in_res(), _estimate()
+ : _in_lhs(), _in_rhs(), _in_res(), _res_dims(0), _estimate()
{
auto dims = merge(lhs.mapped_dimensions(), rhs.mapped_dimensions());
assert(count_only_in_second(dims, res.mapped_dimensions()) == 0);
@@ -162,6 +162,9 @@ SparseJoinReducePlan::SparseJoinReducePlan(const ValueType &lhs, const ValueType
_in_lhs.push_back(lhs.has_dimension(dim.name));
_in_rhs.push_back(rhs.has_dimension(dim.name));
_in_res.push_back(res.has_dimension(dim.name));
+ if (_in_res.back()) {
+ ++_res_dims;
+ }
update_est_list(est_list, _in_lhs.back(), _in_rhs.back(), _in_res.back());
}
_estimate = select_estimate(est_list);
diff --git a/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.h b/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.h
index 8864c56887a..c93bf46e2dc 100644
--- a/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.h
+++ b/eval/src/vespa/eval/instruction/sparse_join_reduce_plan.h
@@ -21,11 +21,14 @@ private:
BitList _in_lhs;
BitList _in_rhs;
BitList _in_res;
+ size_t _res_dims;
est_fun_t _estimate;
public:
SparseJoinReducePlan(const ValueType &lhs, const ValueType &rhs, const ValueType &res);
~SparseJoinReducePlan();
+ size_t res_dims() const { return _res_dims; }
+ bool distinct_result() const { return (_res_dims == _in_res.size()); }
size_t estimate_result_size(const Value::Index &lhs, const Value::Index &rhs) const {
return _estimate(lhs.size(), rhs.size());
}
diff --git a/eval/src/vespa/eval/instruction/universal_dot_product.cpp b/eval/src/vespa/eval/instruction/universal_dot_product.cpp
new file mode 100644
index 00000000000..79a94d862bf
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/universal_dot_product.cpp
@@ -0,0 +1,119 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "universal_dot_product.h"
+#include "sparse_join_reduce_plan.h"
+#include "dense_join_reduce_plan.h"
+#include <vespa/eval/eval/inline_operation.h>
+#include <vespa/eval/eval/fast_value.hpp>
+
+namespace vespalib::eval {
+
+using namespace tensor_function;
+using namespace instruction;
+using namespace operation;
+
+namespace {
+
+struct UniversalDotProductParam {
+ ValueType res_type;
+ SparseJoinReducePlan sparse_plan;
+ DenseJoinReducePlan dense_plan;
+ size_t vector_size;
+
+ UniversalDotProductParam(const ValueType &res_type_in,
+ const ValueType &lhs_type,
+ const ValueType &rhs_type)
+ : res_type(res_type_in),
+ sparse_plan(lhs_type, rhs_type, res_type),
+ dense_plan(lhs_type, rhs_type, res_type),
+ vector_size(1)
+ {
+ if (!dense_plan.loop_cnt.empty() &&
+ dense_plan.lhs_stride.back() == 1 &&
+ dense_plan.rhs_stride.back() == 1 &&
+ dense_plan.res_stride.back() == 0)
+ {
+ vector_size = dense_plan.loop_cnt.back();
+ dense_plan.loop_cnt.pop_back();
+ dense_plan.lhs_stride.pop_back();
+ dense_plan.rhs_stride.pop_back();
+ dense_plan.res_stride.pop_back();
+ }
+ }
+};
+
+template <typename LCT, typename RCT, typename OCT>
+void my_universal_dot_product_op(InterpretedFunction::State &state, uint64_t param_in) {
+ using dot_product = DotProduct<LCT,RCT>;
+ const auto &param = unwrap_param<UniversalDotProductParam>(param_in);
+ const auto &lhs = state.peek(1);
+ const auto &rhs = state.peek(0);
+ const auto &lhs_index = lhs.index();
+ const auto &rhs_index = rhs.index();
+ const auto lhs_cells = lhs.cells().typify<LCT>();
+ const auto rhs_cells = rhs.cells().typify<RCT>();
+ auto &stored_result = state.stash.create<std::unique_ptr<FastValue<OCT,true>>>(
+ std::make_unique<FastValue<OCT,true>>(param.res_type, param.sparse_plan.res_dims(), param.dense_plan.res_size,
+ param.sparse_plan.estimate_result_size(lhs_index, rhs_index)));
+ auto &result = *(stored_result.get());
+ ArrayRef<OCT> dst;
+ auto dense_fun = [&](size_t lhs_idx, size_t rhs_idx, size_t dst_idx) {
+ dst[dst_idx] += dot_product::apply(&lhs_cells[lhs_idx], &rhs_cells[rhs_idx], param.vector_size);
+ };
+ auto sparse_fun = [&](size_t lhs_subspace, size_t rhs_subspace, ConstArrayRef<string_id> res_addr) {
+ bool first;
+ std::tie(dst, first) = result.insert_subspace(res_addr);
+ if (first) {
+ std::fill(dst.begin(), dst.end(), OCT{});
+ }
+ param.dense_plan.execute(lhs_subspace * param.dense_plan.lhs_size,
+ rhs_subspace * param.dense_plan.rhs_size,
+ 0, dense_fun);
+ };
+ param.sparse_plan.execute(lhs_index, rhs_index, sparse_fun);
+ state.pop_pop_push(result);
+}
+
+struct SelectUniversalDotProduct {
+ template <typename LCM, typename RCM, typename SCALAR> static auto invoke(const UniversalDotProductParam &) {
+ constexpr CellMeta ocm = CellMeta::join(LCM::value, RCM::value).reduce(SCALAR::value);
+ using LCT = CellValueType<LCM::value.cell_type>;
+ using RCT = CellValueType<RCM::value.cell_type>;
+ using OCT = CellValueType<ocm.cell_type>;
+ return my_universal_dot_product_op<LCT,RCT,OCT>;
+ }
+};
+
+} // namespace <unnamed>
+
+UniversalDotProduct::UniversalDotProduct(const ValueType &res_type_in,
+ const TensorFunction &lhs_in,
+ const TensorFunction &rhs_in)
+ : tensor_function::Op2(res_type_in, lhs_in, rhs_in)
+{
+}
+
+InterpretedFunction::Instruction
+UniversalDotProduct::compile_self(const ValueBuilderFactory &, Stash &stash) const
+{
+ auto &param = stash.create<UniversalDotProductParam>(result_type(), lhs().result_type(), rhs().result_type());
+ using MyTypify = TypifyValue<TypifyCellMeta,TypifyBool>;
+ auto op = typify_invoke<3,MyTypify,SelectUniversalDotProduct>(lhs().result_type().cell_meta(),
+ rhs().result_type().cell_meta(),
+ result_type().cell_meta().is_scalar,
+ param);
+ return InterpretedFunction::Instruction(op, wrap_param<UniversalDotProductParam>(param));
+}
+
+const TensorFunction &
+UniversalDotProduct::optimize(const TensorFunction &expr, Stash &stash)
+{
+ if (auto reduce = as<Reduce>(expr); reduce && (reduce->aggr() == Aggr::SUM)) {
+ if (auto join = as<Join>(reduce->child()); join && (join->function() == Mul::f)) {
+ return stash.create<UniversalDotProduct>(expr.result_type(), join->lhs(), join->rhs());
+ }
+ }
+ return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/universal_dot_product.h b/eval/src/vespa/eval/instruction/universal_dot_product.h
new file mode 100644
index 00000000000..ac5aa157f17
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/universal_dot_product.h
@@ -0,0 +1,22 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Tensor function performing dot product compatible operations
+ * (join:mul, reduce:sum) on values of arbitrary complexity.
+ **/
+class UniversalDotProduct : public tensor_function::Op2
+{
+public:
+ UniversalDotProduct(const ValueType &res_type, const TensorFunction &lhs, const TensorFunction &rhs);
+ InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+ bool result_is_mutable() const override { return true; }
+ static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace