aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-12-08 05:57:18 +0100
committerGitHub <noreply@github.com>2021-12-08 05:57:18 +0100
commit373ccd9082b206b828565b20f2af45a9a886226a (patch)
tree28c62882b8daa303c5bdceae4965ac1a38d522ce
parent83d883b3a3ef519c3c1ee125ceb9f2df2c850958 (diff)
parent0d3163f0adc4d29bb815c4b19bfea4359ac24504 (diff)
Merge pull request #20372 from vespa-engine/havardpe/optimize-L2-distancev7.513.4
optimize squared euclidean distance between tensors
-rw-r--r--eval/CMakeLists.txt1
-rw-r--r--eval/src/tests/instruction/l2_distance/CMakeLists.txt10
-rw-r--r--eval/src/tests/instruction/l2_distance/l2_distance_test.cpp96
-rw-r--r--eval/src/vespa/eval/eval/optimize_tensor_function.cpp7
-rw-r--r--eval/src/vespa/eval/instruction/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/instruction/l2_distance.cpp96
-rw-r--r--eval/src/vespa/eval/instruction/l2_distance.h21
7 files changed, 231 insertions, 1 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 99c7e9c68b8..2e0af3acfa7 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -70,6 +70,7 @@ vespa_define_module(
src/tests/instruction/index_lookup_table
src/tests/instruction/inplace_map_function
src/tests/instruction/join_with_number
+ src/tests/instruction/l2_distance
src/tests/instruction/mixed_inner_product_function
src/tests/instruction/mixed_simple_join_function
src/tests/instruction/pow_as_map_optimizer
diff --git a/eval/src/tests/instruction/l2_distance/CMakeLists.txt b/eval/src/tests/instruction/l2_distance/CMakeLists.txt
new file mode 100644
index 00000000000..1e0fc69a3f9
--- /dev/null
+++ b/eval/src/tests/instruction/l2_distance/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+vespa_add_executable(eval_l2_distance_test_app TEST
+ SOURCES
+ l2_distance_test.cpp
+ DEPENDS
+ vespaeval
+ GTest::GTest
+)
+vespa_add_test(NAME eval_l2_distance_test_app COMMAND eval_l2_distance_test_app)
diff --git a/eval/src/tests/instruction/l2_distance/l2_distance_test.cpp b/eval/src/tests/instruction/l2_distance/l2_distance_test.cpp
new file mode 100644
index 00000000000..2cba9dfb18e
--- /dev/null
+++ b/eval/src/tests/instruction/l2_distance/l2_distance_test.cpp
@@ -0,0 +1,96 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/eval/eval/test/gen_spec.h>
+#include <vespa/eval/instruction/l2_distance.h>
+#include <vespa/vespalib/util/stash.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+#include <vespa/vespalib/util/require.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get();
+
+//-----------------------------------------------------------------------------
+
+void verify(const TensorSpec &a, const TensorSpec &b, const vespalib::string &expr, bool optimized = true) {
+ EvalFixture::ParamRepo param_repo;
+ param_repo.add("a", a).add("b", b);
+ EvalFixture fast_fixture(prod_factory, expr, param_repo, true);
+ EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo));
+ EXPECT_EQ(fast_fixture.find_all<L2Distance>().size(), optimized ? 1 : 0);
+}
+
+void verify_cell_types(GenSpec a, GenSpec b, const vespalib::string &expr, bool optimized = true) {
+ for (CellType act : CellTypeUtils::list_types()) {
+ for (CellType bct : CellTypeUtils::list_types()) {
+ if (optimized && (act == bct) && (act != CellType::BFLOAT16)) {
+ verify(a.cpy().cells(act), b.cpy().cells(bct), expr, true);
+ } else {
+ verify(a.cpy().cells(act), b.cpy().cells(bct), expr, false);
+ }
+ }
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+GenSpec gen(const vespalib::string &desc, int bias) {
+ return GenSpec::from_desc(desc).cells(CellType::FLOAT).seq(N(bias));
+}
+
+//-----------------------------------------------------------------------------
+
+vespalib::string sq_l2 = "reduce((a-b)^2,sum)";
+vespalib::string alt_sq_l2 = "reduce(map((a-b),f(x)(x*x)),sum)";
+
+//-----------------------------------------------------------------------------
+
+TEST(L2DistanceTest, squared_l2_distance_can_be_optimized) {
+ verify_cell_types(gen("x5", 3), gen("x5", 7), sq_l2);
+ verify_cell_types(gen("x5", 3), gen("x5", 7), alt_sq_l2);
+}
+
+TEST(L2DistanceTest, trivial_dimensions_are_ignored) {
+ verify(gen("x5y1", 3), gen("x5", 7), sq_l2);
+ verify(gen("x5", 3), gen("x5y1", 7), sq_l2);
+}
+
+TEST(L2DistanceTest, multiple_dimensions_can_be_used) {
+ verify(gen("x5y3", 3), gen("x5y3", 7), sq_l2);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(L2DistanceTest, inputs_must_be_dense) {
+ verify(gen("x5_1", 3), gen("x5_1", 7), sq_l2, false);
+ verify(gen("x5_1y3", 3), gen("x5_1y3", 7), sq_l2, false);
+ verify(gen("x5", 3), GenSpec(7), sq_l2, false);
+ verify(GenSpec(3), gen("x5", 7), sq_l2, false);
+}
+
+TEST(L2DistanceTest, result_must_be_double) {
+ verify(gen("x5y1", 3), gen("x5y1", 7), "reduce((a-b)^2,sum,x)", false);
+ verify(gen("x5y1_1", 3), gen("x5y1_1", 7), "reduce((a-b)^2,sum,x)", false);
+}
+
+TEST(L2DistanceTest, dimensions_must_match) {
+ verify(gen("x5y3", 3), gen("x5", 7), sq_l2, false);
+ verify(gen("x5", 3), gen("x5y3", 7), sq_l2, false);
+}
+
+TEST(L2DistanceTest, similar_expressions_are_not_optimized) {
+ verify(gen("x5", 3), gen("x5", 7), "reduce((a-b)^2,prod)", false);
+ verify(gen("x5", 3), gen("x5", 7), "reduce((a-b)^3,sum)", false);
+ verify(gen("x5", 3), gen("x5", 7), "reduce((a+b)^2,sum)", false);
+}
+
+//-----------------------------------------------------------------------------
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index 09814cc0b06..e1520d4deb2 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -30,6 +30,7 @@
#include <vespa/eval/instruction/dense_tensor_create_function.h>
#include <vespa/eval/instruction/dense_tensor_peek_function.h>
#include <vespa/eval/instruction/dense_hamming_distance.h>
+#include <vespa/eval/instruction/l2_distance.h>
#include <vespa/log/log.h>
LOG_SETUP(".eval.eval.optimize_tensor_function");
@@ -56,11 +57,16 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te
Child root(expr);
run_optimize_pass(root, [&stash](const Child &child)
{
+ child.set(PowAsMapOptimizer::optimize(child.get(), stash));
+ });
+ run_optimize_pass(root, [&stash](const Child &child)
+ {
child.set(SumMaxDotProductFunction::optimize(child.get(), stash));
});
run_optimize_pass(root, [&stash](const Child &child)
{
child.set(BestSimilarityFunction::optimize(child.get(), stash));
+ child.set(L2Distance::optimize(child.get(), stash));
});
run_optimize_pass(root, [&stash](const Child &child)
{
@@ -83,7 +89,6 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te
child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash));
child.set(UnpackBitsFunction::optimize(child.get(), stash));
child.set(FastRenameOptimizer::optimize(child.get(), stash));
- child.set(PowAsMapOptimizer::optimize(child.get(), stash));
child.set(InplaceMapFunction::optimize(child.get(), stash));
child.set(MixedSimpleJoinFunction::optimize(child.get(), stash));
child.set(JoinWithNumberFunction::optimize(child.get(), stash));
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index a462ece4734..56184c113d4 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -30,6 +30,7 @@ vespa_add_library(eval_instruction OBJECT
index_lookup_table.cpp
inplace_map_function.cpp
join_with_number_function.cpp
+ l2_distance.cpp
mixed_inner_product_function.cpp
mixed_simple_join_function.cpp
pow_as_map_optimizer.cpp
diff --git a/eval/src/vespa/eval/instruction/l2_distance.cpp b/eval/src/vespa/eval/instruction/l2_distance.cpp
new file mode 100644
index 00000000000..3f1e7632431
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/l2_distance.cpp
@@ -0,0 +1,96 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "l2_distance.h"
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
+#include <vespa/vespalib/util/require.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".eval.instruction.l2_distance");
+
+namespace vespalib::eval {
+
+using namespace tensor_function;
+
+namespace {
+
+static const auto &hw = hwaccelrated::IAccelrated::getAccelerator();
+
+template <typename T>
+double sq_l2(const Value &lhs, const Value &rhs, size_t len) {
+ return hw.squaredEuclideanDistance((const T *)lhs.cells().data, (const T *)rhs.cells().data, len);
+}
+
+template <>
+double sq_l2<Int8Float>(const Value &lhs, const Value &rhs, size_t len) {
+ return sq_l2<int8_t>(lhs, rhs, len);
+}
+
+template <typename CT>
+void my_squared_l2_distance_op(InterpretedFunction::State &state, uint64_t vector_size) {
+ double result = sq_l2<CT>(state.peek(1), state.peek(0), vector_size);
+ state.pop_pop_push(state.stash.create<DoubleValue>(result));
+}
+
+struct SelectOp {
+ template <typename CT>
+ static InterpretedFunction::op_function invoke() {
+ constexpr bool is_bfloat16 = std::is_same_v<CT, BFloat16>;
+ if constexpr (!is_bfloat16) {
+ return my_squared_l2_distance_op<CT>;
+ } else {
+ abort();
+ }
+ }
+};
+
+bool compatible_cell_types(CellType lhs, CellType rhs) {
+ return ((lhs == rhs) && ((lhs == CellType::INT8) ||
+ (lhs == CellType::FLOAT) ||
+ (lhs == CellType::DOUBLE)));
+}
+
+bool compatible_types(const ValueType &lhs, const ValueType &rhs) {
+ return (compatible_cell_types(lhs.cell_type(), rhs.cell_type()) &&
+ lhs.is_dense() && rhs.is_dense() &&
+ (lhs.nontrivial_indexed_dimensions() == rhs.nontrivial_indexed_dimensions()));
+}
+
+} // namespace <unnamed>
+
+L2Distance::L2Distance(const TensorFunction &lhs_in, const TensorFunction &rhs_in)
+ : tensor_function::Op2(ValueType::double_type(), lhs_in, rhs_in)
+{
+}
+
+InterpretedFunction::Instruction
+L2Distance::compile_self(const ValueBuilderFactory &, Stash &) const
+{
+ auto lhs_t = lhs().result_type();
+ auto rhs_t = rhs().result_type();
+ REQUIRE_EQ(lhs_t.cell_type(), rhs_t.cell_type());
+ REQUIRE_EQ(lhs_t.dense_subspace_size(), rhs_t.dense_subspace_size());
+ auto op = typify_invoke<1, TypifyCellType, SelectOp>(lhs_t.cell_type());
+ return InterpretedFunction::Instruction(op, lhs_t.dense_subspace_size());
+}
+
+const TensorFunction &
+L2Distance::optimize(const TensorFunction &expr, Stash &stash)
+{
+ auto reduce = as<Reduce>(expr);
+ if (reduce && (reduce->aggr() == Aggr::SUM) && expr.result_type().is_double()) {
+ auto map = as<Map>(reduce->child());
+ if (map && (map->function() == operation::Square::f)) {
+ auto join = as<Join>(map->child());
+ if (join && (join->function() == operation::Sub::f)) {
+ if (compatible_types(join->lhs().result_type(), join->rhs().result_type())) {
+ return stash.create<L2Distance>(join->lhs(), join->rhs());
+ }
+ }
+ }
+ }
+ return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/l2_distance.h b/eval/src/vespa/eval/instruction/l2_distance.h
new file mode 100644
index 00000000000..95b11b6c229
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/l2_distance.h
@@ -0,0 +1,21 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Tensor function for a squared euclidean distance producing a scalar result.
+ **/
+class L2Distance : public tensor_function::Op2
+{
+public:
+ L2Distance(const TensorFunction &lhs_in, const TensorFunction &rhs_in);
+ InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+ bool result_is_mutable() const override { return true; }
+ static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace