1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "l2_distance.h"
#include <vespa/eval/eval/operation.h>
#include <vespa/eval/eval/value.h>
#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
#include <vespa/vespalib/util/require.h>
#include <vespa/log/log.h>
LOG_SETUP(".eval.instruction.l2_distance");
namespace vespalib::eval {
using namespace tensor_function;
namespace {
static const auto &hw = hwaccelrated::IAccelrated::getAccelerator();
template <typename T>
double sq_l2(const Value &lhs, const Value &rhs, size_t len) {
return hw.squaredEuclideanDistance((const T *)lhs.cells().data, (const T *)rhs.cells().data, len);
}
template <>
double sq_l2<Int8Float>(const Value &lhs, const Value &rhs, size_t len) {
return sq_l2<int8_t>(lhs, rhs, len);
}
template <typename CT>
void my_squared_l2_distance_op(InterpretedFunction::State &state, uint64_t vector_size) {
double result = sq_l2<CT>(state.peek(1), state.peek(0), vector_size);
state.pop_pop_push(state.stash.create<DoubleValue>(result));
}
struct SelectOp {
template <typename CT>
static InterpretedFunction::op_function invoke() {
constexpr bool is_bfloat16 = std::is_same_v<CT, BFloat16>;
if constexpr (!is_bfloat16) {
return my_squared_l2_distance_op<CT>;
} else {
abort();
}
}
};
bool compatible_cell_types(CellType lhs, CellType rhs) {
return ((lhs == rhs) && ((lhs == CellType::INT8) ||
(lhs == CellType::FLOAT) ||
(lhs == CellType::DOUBLE)));
}
bool compatible_types(const ValueType &lhs, const ValueType &rhs) {
return (compatible_cell_types(lhs.cell_type(), rhs.cell_type()) &&
lhs.is_dense() && rhs.is_dense() &&
(lhs.nontrivial_indexed_dimensions() == rhs.nontrivial_indexed_dimensions()));
}
} // namespace <unnamed>
L2Distance::L2Distance(const TensorFunction &lhs_in, const TensorFunction &rhs_in)
: tensor_function::Op2(ValueType::double_type(), lhs_in, rhs_in)
{
}
InterpretedFunction::Instruction
L2Distance::compile_self(const ValueBuilderFactory &, Stash &) const
{
auto lhs_t = lhs().result_type();
auto rhs_t = rhs().result_type();
REQUIRE_EQ(lhs_t.cell_type(), rhs_t.cell_type());
REQUIRE_EQ(lhs_t.dense_subspace_size(), rhs_t.dense_subspace_size());
auto op = typify_invoke<1, TypifyCellType, SelectOp>(lhs_t.cell_type());
return InterpretedFunction::Instruction(op, lhs_t.dense_subspace_size());
}
const TensorFunction &
L2Distance::optimize(const TensorFunction &expr, Stash &stash)
{
auto reduce = as<Reduce>(expr);
if (reduce && (reduce->aggr() == Aggr::SUM) && expr.result_type().is_double()) {
auto map = as<Map>(reduce->child());
if (map && (map->function() == operation::Square::f)) {
auto join = as<Join>(map->child());
if (join && (join->function() == operation::Sub::f)) {
if (compatible_types(join->lhs().result_type(), join->rhs().result_type())) {
return stash.create<L2Distance>(join->lhs(), join->rhs());
}
}
}
}
return expr;
}
} // namespace
|