summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2021-02-02 14:05:39 +0000
committerHåvard Pettersen <havardpe@oath.com>2021-02-02 15:04:47 +0000
commitb6472df72687995369aeef76f46a2fc137f97909 (patch)
treea570e7679afbc69c95606a5932e67e7cea954889 /eval
parent4b3b7543f0f8acf3f72d25d76e36d63841de278a (diff)
added some optimizations for single-dimension sparse matching
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp14
-rw-r--r--eval/src/vespa/eval/eval/fast_addr_map.h22
-rw-r--r--eval/src/vespa/eval/eval/fast_value.hpp15
-rw-r--r--eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp71
4 files changed, 79 insertions, 43 deletions
diff --git a/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp b/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp
index 65eab2778aa..ac333f5224a 100644
--- a/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp
+++ b/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp
@@ -17,10 +17,8 @@ const ValueBuilderFactory &test_factory = SimpleValueBuilderFactory::get();
EvalFixture::ParamRepo make_params() {
return EvalFixture::ParamRepo()
- .add("v1_x", GenSpec().map("x", 32, 1).seq_bias(3.0).gen())
- .add("v1_x_f", GenSpec().map("x", 32, 1).seq_bias(3.0).cells_float().gen())
- .add("v2_x", GenSpec().map("x", 16, 2).seq_bias(7.0).gen())
- .add("v2_x_f", GenSpec().map("x", 16, 2).seq_bias(7.0).cells_float().gen())
+ .add_variants("v1_x", GenSpec().map("x", 32, 1).seq_bias(3.0))
+ .add_variants("v2_x", GenSpec().map("x", 16, 2).seq_bias(7.0))
.add("v3_y", GenSpec().map("y", 10, 1).gen())
.add("v4_xd", GenSpec().idx("x", 10).gen())
.add("m1_xy", GenSpec().map("x", 32, 1).map("y", 16, 2).seq_bias(3.0).gen())
@@ -53,8 +51,6 @@ TEST(SparseDotProduct, expression_can_be_optimized)
{
assert_optimized("reduce(v1_x*v2_x,sum,x)");
assert_optimized("reduce(v2_x*v1_x,sum)");
- assert_optimized("reduce(v1_x*v2_x_f,sum)");
- assert_optimized("reduce(v1_x_f*v2_x,sum)");
assert_optimized("reduce(v1_x_f*v2_x_f,sum)");
}
@@ -80,6 +76,12 @@ TEST(SparseDotProduct, similar_expressions_are_not_optimized)
assert_not_optimized("reduce(m3_xym*m3_xym,sum)");
}
+TEST(SparseDotProduct, mixed_cell_types_are_not_optimized)
+{
+ assert_not_optimized("reduce(v1_x*v2_x_f,sum)");
+ assert_not_optimized("reduce(v1_x_f*v2_x,sum)");
+}
+
//-----------------------------------------------------------------------------
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h
index d8f68d2a37c..38df645a752 100644
--- a/eval/src/vespa/eval/eval/fast_addr_map.h
+++ b/eval/src/vespa/eval/eval/fast_addr_map.h
@@ -21,8 +21,8 @@ class FastAddrMap
{
public:
// label hasing functions
- static constexpr uint32_t hash_label(string_id label) { return label.hash(); }
- static constexpr uint32_t hash_label(const string_id *label) { return label->hash(); }
+ static constexpr uint32_t hash_label(string_id label) { return label.value(); }
+ static constexpr uint32_t hash_label(const string_id *label) { return label->value(); }
static constexpr uint32_t combine_label_hash(uint32_t full_hash, uint32_t next_hash) {
return ((full_hash * 31) + next_hash);
}
@@ -71,6 +71,7 @@ public:
template <typename T>
constexpr uint32_t operator()(const AltKey<T> &key) const { return key.hash; }
constexpr uint32_t operator()(const Entry &entry) const { return entry.hash; }
+ constexpr uint32_t operator()(string_id label) const { return label.value(); }
};
// equality functor for sparse hash set
@@ -81,10 +82,14 @@ public:
static constexpr bool eq_labels(string_id a, const string_id *b) { return (a == *b); }
template <typename T>
bool operator()(const Entry &a, const AltKey<T> &b) const {
- if ((a.hash != b.hash) || (b.key.size() != label_view.addr_size)) {
+ if (a.hash != b.hash) {
return false;
}
+ if (label_view.addr_size == 1) {
+ return true;
+ }
auto a_key = label_view.get_addr(a.tag.idx);
+ assert(a_key.size() == b.key.size());
for (size_t i = 0; i < a_key.size(); ++i) {
if (!eq_labels(a_key[i], b.key[i])) {
return false;
@@ -92,6 +97,7 @@ public:
}
return true;
}
+ bool operator()(const Entry &a, string_id b) const { return (a.hash == b.value()); }
};
using HashType = hashtable<Entry, Entry, Hash, Equal, Identity, hashtable_base::and_modulator>;
@@ -101,8 +107,8 @@ private:
HashType _map;
public:
- FastAddrMap(size_t num_mapped_dims, const std::vector<string_id> &labels, size_t expected_subspaces)
- : _labels(num_mapped_dims, labels),
+ FastAddrMap(size_t num_mapped_dims, const std::vector<string_id> &labels_in, size_t expected_subspaces)
+ : _labels(num_mapped_dims, labels_in),
_map(expected_subspaces * 2, Hash(), Equal(_labels)) {}
~FastAddrMap();
FastAddrMap(const FastAddrMap &) = delete;
@@ -113,6 +119,7 @@ public:
ConstArrayRef<string_id> get_addr(size_t idx) const { return _labels.get_addr(idx); }
size_t size() const { return _map.size(); }
constexpr size_t addr_size() const { return _labels.addr_size; }
+ const std::vector<string_id> &labels() const { return _labels.labels; }
template <typename T>
size_t lookup(ConstArrayRef<T> addr, uint32_t hash) const {
AltKey<T> key{addr, hash};
@@ -123,6 +130,11 @@ public:
size_t lookup(ConstArrayRef<T> addr) const {
return lookup(addr, hash_labels(addr));
}
+ size_t lookup_singledim(string_id addr) const {
+ // assert(addr_size() == 1);
+ auto pos = _map.find(addr);
+ return (pos == _map.end()) ? npos() : pos->tag.idx;
+ }
void add_mapping(uint32_t hash) {
uint32_t idx = _map.size();
_map.force_insert(Entry{{idx}, hash});
diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp
index 5657c93f53f..88319df7590 100644
--- a/eval/src/vespa/eval/eval/fast_value.hpp
+++ b/eval/src/vespa/eval/eval/fast_value.hpp
@@ -6,6 +6,7 @@
#include <vespa/eval/instruction/generic_join.h>
#include <vespa/vespalib/stllike/hashtable.hpp>
#include <vespa/vespalib/util/shared_string_repo.h>
+#include <typeindex>
namespace vespalib::eval {
@@ -135,9 +136,9 @@ struct FastIterateView : public Value::Index::View {
//-----------------------------------------------------------------------------
using JoinAddrSource = instruction::SparseJoinPlan::Source;
+
// This is the class instructions will look for when optimizing sparse
// operations by calling inline functions directly.
-
struct FastValueIndex final : Value::Index {
FastAddrMap map;
FastValueIndex(size_t num_mapped_dims_in, const std::vector<string_id> &labels, size_t expected_subspaces_in)
@@ -164,6 +165,18 @@ struct FastValueIndex final : Value::Index {
std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override;
};
+inline bool is_fast(const Value::Index &index) {
+ return (std::type_index(typeid(index)) == std::type_index(typeid(FastValueIndex)));
+}
+
+inline bool are_fast(const Value::Index &a, const Value::Index &b) {
+ return (is_fast(a) && is_fast(b));
+}
+
+constexpr const FastValueIndex &as_fast(const Value::Index &index) {
+ return static_cast<const FastValueIndex &>(index);
+}
+
//-----------------------------------------------------------------------------
template <typename T>
diff --git a/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp b/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp
index 93ae2856372..2924027c5d6 100644
--- a/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp
+++ b/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp
@@ -2,8 +2,8 @@
#include "sparse_dot_product_function.h"
#include "generic_join.h"
-#include "detect_type.h"
#include <vespa/eval/eval/fast_value.hpp>
+#include <vespa/vespalib/util/typify.h>
namespace vespalib::eval {
@@ -13,41 +13,45 @@ using namespace instruction;
namespace {
-template <typename SCT, typename BCT>
-double my_fast_sparse_dot_product(const FastValueIndex &small_idx, const FastValueIndex &big_idx,
- const SCT *small_cells, const BCT *big_cells)
+template <typename CT, bool single_dim>
+double my_fast_sparse_dot_product(const FastValueIndex *small_idx, const FastValueIndex *big_idx,
+ const CT *small_cells, const CT *big_cells)
{
double result = 0.0;
- small_idx.map.each_map_entry([&](auto small_subspace, auto hash) {
- auto small_addr = small_idx.map.get_addr(small_subspace);
- auto big_subspace = big_idx.map.lookup(small_addr, hash);
- if (big_subspace != FastAddrMap::npos()) {
- result += (small_cells[small_subspace] * big_cells[big_subspace]);
- }
- });
+ if (big_idx->map.size() < small_idx->map.size()) {
+ std::swap(small_idx, big_idx);
+ std::swap(small_cells, big_cells);
+ }
+ if (single_dim) {
+ const auto &labels = small_idx->map.labels();
+ for (size_t i = 0; i < labels.size(); ++i) {
+ auto big_subspace = big_idx->map.lookup_singledim(labels[i]);
+ if (big_subspace != FastAddrMap::npos()) {
+ result += (small_cells[i] * big_cells[big_subspace]);
+ }
+ }
+ } else {
+ small_idx->map.each_map_entry([&](auto small_subspace, auto hash) {
+ auto small_addr = small_idx->map.get_addr(small_subspace);
+ auto big_subspace = big_idx->map.lookup(small_addr, hash);
+ if (big_subspace != FastAddrMap::npos()) {
+ result += (small_cells[small_subspace] * big_cells[big_subspace]);
+ }
+ });
+ }
return result;
}
-template <typename LCT, typename RCT>
+template <typename CT, bool single_dim>
void my_sparse_dot_product_op(InterpretedFunction::State &state, uint64_t num_mapped_dims) {
const auto &lhs_idx = state.peek(1).index();
const auto &rhs_idx = state.peek(0).index();
- const LCT *lhs_cells = state.peek(1).cells().typify<LCT>().cbegin();
- const RCT *rhs_cells = state.peek(0).cells().typify<RCT>().cbegin();
- if (auto indexes = detect_type<FastValueIndex>(lhs_idx, rhs_idx)) {
-#if __has_cpp_attribute(likely)
- [[likely]];
-#endif
- const auto &lhs_fast = indexes.get<0>();
- const auto &rhs_fast = indexes.get<1>();
- double result = (rhs_fast.map.size() < lhs_fast.map.size())
- ? my_fast_sparse_dot_product(rhs_fast, lhs_fast, rhs_cells, lhs_cells)
- : my_fast_sparse_dot_product(lhs_fast, rhs_fast, lhs_cells, rhs_cells);
+ const CT *lhs_cells = state.peek(1).cells().typify<CT>().cbegin();
+ const CT *rhs_cells = state.peek(0).cells().typify<CT>().cbegin();
+ if (__builtin_expect(are_fast(lhs_idx, rhs_idx), true)) {
+ double result = my_fast_sparse_dot_product<CT,single_dim>(&as_fast(lhs_idx), &as_fast(rhs_idx), lhs_cells, rhs_cells);
state.pop_pop_push(state.stash.create<ScalarValue<double>>(result));
} else {
-#if __has_cpp_attribute(unlikely)
- [[unlikely]];
-#endif
double result = 0.0;
SparseJoinPlan plan(num_mapped_dims);
SparseJoinState sparse(plan, lhs_idx, rhs_idx);
@@ -65,10 +69,12 @@ void my_sparse_dot_product_op(InterpretedFunction::State &state, uint64_t num_ma
}
struct MyGetFun {
- template <typename LCT, typename RCT>
- static auto invoke() { return my_sparse_dot_product_op<LCT,RCT>; }
+ template <typename CT, typename SINGLE_DIM>
+ static auto invoke() { return my_sparse_dot_product_op<CT,SINGLE_DIM::value>; }
};
+using MyTypify = TypifyValue<TypifyCellType,TypifyBool>;
+
} // namespace <unnamed>
SparseDotProductFunction::SparseDotProductFunction(const TensorFunction &lhs_in,
@@ -80,15 +86,18 @@ SparseDotProductFunction::SparseDotProductFunction(const TensorFunction &lhs_in,
InterpretedFunction::Instruction
SparseDotProductFunction::compile_self(const ValueBuilderFactory &, Stash &) const
{
- auto op = typify_invoke<2,TypifyCellType,MyGetFun>(lhs().result_type().cell_type(), rhs().result_type().cell_type());
- return InterpretedFunction::Instruction(op, lhs().result_type().count_mapped_dimensions());
+ size_t num_dims = lhs().result_type().count_mapped_dimensions();
+ auto op = typify_invoke<2,MyTypify,MyGetFun>(lhs().result_type().cell_type(),
+ (num_dims == 1));
+ return InterpretedFunction::Instruction(op, num_dims);
}
bool
SparseDotProductFunction::compatible_types(const ValueType &res, const ValueType &lhs, const ValueType &rhs)
{
return (res.is_scalar() && (res.cell_type() == CellType::DOUBLE) &&
- lhs.is_sparse() && (rhs.dimensions() == lhs.dimensions()));
+ lhs.is_sparse() && (rhs.dimensions() == lhs.dimensions()) &&
+ lhs.cell_type() == rhs.cell_type());
}
const TensorFunction &