summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2021-02-03 11:53:20 +0100
committerGitHub <noreply@github.com>2021-02-03 11:53:20 +0100
commit5bc79680f181742971c35eda70a136c81b3affba (patch)
treeed1b0ad2ed5ee4c92ed0f926f7a5703a6b99e70a /eval
parent986dc62765dc2264ab07c2f4744d86d48e707193 (diff)
parentcfe138a74ec1db72993ac4b1a6bca7ca63bbc781 (diff)
Merge pull request #16344 from vespa-engine/havardpe/faster-single-dim-matching
added some optimizations for single-dimension sparse matching
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp14
-rw-r--r--eval/src/vespa/eval/eval/fast_addr_map.h33
-rw-r--r--eval/src/vespa/eval/eval/fast_value.hpp15
-rw-r--r--eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp111
4 files changed, 109 insertions, 64 deletions
diff --git a/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp b/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp
index 65eab2778aa..ac333f5224a 100644
--- a/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp
+++ b/eval/src/tests/instruction/sparse_dot_product_function/sparse_dot_product_function_test.cpp
@@ -17,10 +17,8 @@ const ValueBuilderFactory &test_factory = SimpleValueBuilderFactory::get();
EvalFixture::ParamRepo make_params() {
return EvalFixture::ParamRepo()
- .add("v1_x", GenSpec().map("x", 32, 1).seq_bias(3.0).gen())
- .add("v1_x_f", GenSpec().map("x", 32, 1).seq_bias(3.0).cells_float().gen())
- .add("v2_x", GenSpec().map("x", 16, 2).seq_bias(7.0).gen())
- .add("v2_x_f", GenSpec().map("x", 16, 2).seq_bias(7.0).cells_float().gen())
+ .add_variants("v1_x", GenSpec().map("x", 32, 1).seq_bias(3.0))
+ .add_variants("v2_x", GenSpec().map("x", 16, 2).seq_bias(7.0))
.add("v3_y", GenSpec().map("y", 10, 1).gen())
.add("v4_xd", GenSpec().idx("x", 10).gen())
.add("m1_xy", GenSpec().map("x", 32, 1).map("y", 16, 2).seq_bias(3.0).gen())
@@ -53,8 +51,6 @@ TEST(SparseDotProduct, expression_can_be_optimized)
{
assert_optimized("reduce(v1_x*v2_x,sum,x)");
assert_optimized("reduce(v2_x*v1_x,sum)");
- assert_optimized("reduce(v1_x*v2_x_f,sum)");
- assert_optimized("reduce(v1_x_f*v2_x,sum)");
assert_optimized("reduce(v1_x_f*v2_x_f,sum)");
}
@@ -80,6 +76,12 @@ TEST(SparseDotProduct, similar_expressions_are_not_optimized)
assert_not_optimized("reduce(m3_xym*m3_xym,sum)");
}
+TEST(SparseDotProduct, mixed_cell_types_are_not_optimized)
+{
+ assert_not_optimized("reduce(v1_x*v2_x_f,sum)");
+ assert_not_optimized("reduce(v1_x_f*v2_x,sum)");
+}
+
//-----------------------------------------------------------------------------
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h
index d8f68d2a37c..7df6bd7fafc 100644
--- a/eval/src/vespa/eval/eval/fast_addr_map.h
+++ b/eval/src/vespa/eval/eval/fast_addr_map.h
@@ -20,9 +20,13 @@ namespace vespalib::eval {
class FastAddrMap
{
public:
- // label hasing functions
- static constexpr uint32_t hash_label(string_id label) { return label.hash(); }
- static constexpr uint32_t hash_label(const string_id *label) { return label->hash(); }
+ // label extracting functions
+ static constexpr string_id self(string_id label) { return label; }
+ static constexpr string_id self(const string_id *label) { return *label; }
+
+ // label hashing functions
+ static constexpr uint32_t hash_label(string_id label) { return label.value(); }
+ static constexpr uint32_t hash_label(const string_id *label) { return label->value(); }
static constexpr uint32_t combine_label_hash(uint32_t full_hash, uint32_t next_hash) {
return ((full_hash * 31) + next_hash);
}
@@ -71,27 +75,27 @@ public:
template <typename T>
constexpr uint32_t operator()(const AltKey<T> &key) const { return key.hash; }
constexpr uint32_t operator()(const Entry &entry) const { return entry.hash; }
+ constexpr uint32_t operator()(string_id label) const { return label.value(); }
};
// equality functor for sparse hash set
struct Equal {
const LabelView &label_view;
Equal(const LabelView &label_view_in) : label_view(label_view_in) {}
- static constexpr bool eq_labels(string_id a, string_id b) { return (a == b); }
- static constexpr bool eq_labels(string_id a, const string_id *b) { return (a == *b); }
template <typename T>
bool operator()(const Entry &a, const AltKey<T> &b) const {
- if ((a.hash != b.hash) || (b.key.size() != label_view.addr_size)) {
+ if (a.hash != b.hash) {
return false;
}
auto a_key = label_view.get_addr(a.tag.idx);
for (size_t i = 0; i < a_key.size(); ++i) {
- if (!eq_labels(a_key[i], b.key[i])) {
+ if (a_key[i] != self(b.key[i])) {
return false;
}
}
return true;
}
+ bool operator()(const Entry &a, string_id b) const { return (a.hash == b.value()); }
};
using HashType = hashtable<Entry, Entry, Hash, Equal, Identity, hashtable_base::and_modulator>;
@@ -101,8 +105,8 @@ private:
HashType _map;
public:
- FastAddrMap(size_t num_mapped_dims, const std::vector<string_id> &labels, size_t expected_subspaces)
- : _labels(num_mapped_dims, labels),
+ FastAddrMap(size_t num_mapped_dims, const std::vector<string_id> &labels_in, size_t expected_subspaces)
+ : _labels(num_mapped_dims, labels_in),
_map(expected_subspaces * 2, Hash(), Equal(_labels)) {}
~FastAddrMap();
FastAddrMap(const FastAddrMap &) = delete;
@@ -113,15 +117,24 @@ public:
ConstArrayRef<string_id> get_addr(size_t idx) const { return _labels.get_addr(idx); }
size_t size() const { return _map.size(); }
constexpr size_t addr_size() const { return _labels.addr_size; }
+ const std::vector<string_id> &labels() const { return _labels.labels; }
template <typename T>
size_t lookup(ConstArrayRef<T> addr, uint32_t hash) const {
+ // assert(addr_size() == addr.size());
AltKey<T> key{addr, hash};
auto pos = _map.find(key);
return (pos == _map.end()) ? npos() : pos->tag.idx;
}
+ size_t lookup_singledim(string_id addr) const {
+ // assert(addr_size() == 1);
+ auto pos = _map.find(addr);
+ return (pos == _map.end()) ? npos() : pos->tag.idx;
+ }
template <typename T>
size_t lookup(ConstArrayRef<T> addr) const {
- return lookup(addr, hash_labels(addr));
+ return (addr.size() == 1)
+ ? lookup_singledim(self(addr[0]))
+ : lookup(addr, hash_labels(addr));
}
void add_mapping(uint32_t hash) {
uint32_t idx = _map.size();
diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp
index 5657c93f53f..88319df7590 100644
--- a/eval/src/vespa/eval/eval/fast_value.hpp
+++ b/eval/src/vespa/eval/eval/fast_value.hpp
@@ -6,6 +6,7 @@
#include <vespa/eval/instruction/generic_join.h>
#include <vespa/vespalib/stllike/hashtable.hpp>
#include <vespa/vespalib/util/shared_string_repo.h>
+#include <typeindex>
namespace vespalib::eval {
@@ -135,9 +136,9 @@ struct FastIterateView : public Value::Index::View {
//-----------------------------------------------------------------------------
using JoinAddrSource = instruction::SparseJoinPlan::Source;
+
// This is the class instructions will look for when optimizing sparse
// operations by calling inline functions directly.
-
struct FastValueIndex final : Value::Index {
FastAddrMap map;
FastValueIndex(size_t num_mapped_dims_in, const std::vector<string_id> &labels, size_t expected_subspaces_in)
@@ -164,6 +165,18 @@ struct FastValueIndex final : Value::Index {
std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override;
};
+inline bool is_fast(const Value::Index &index) {
+ return (std::type_index(typeid(index)) == std::type_index(typeid(FastValueIndex)));
+}
+
+inline bool are_fast(const Value::Index &a, const Value::Index &b) {
+ return (is_fast(a) && is_fast(b));
+}
+
+constexpr const FastValueIndex &as_fast(const Value::Index &index) {
+ return static_cast<const FastValueIndex &>(index);
+}
+
//-----------------------------------------------------------------------------
template <typename T>
diff --git a/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp b/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp
index 93ae2856372..7cc4417bdbb 100644
--- a/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp
+++ b/eval/src/vespa/eval/instruction/sparse_dot_product_function.cpp
@@ -2,8 +2,8 @@
#include "sparse_dot_product_function.h"
#include "generic_join.h"
-#include "detect_type.h"
#include <vespa/eval/eval/fast_value.hpp>
+#include <vespa/vespalib/util/typify.h>
namespace vespalib::eval {
@@ -13,62 +13,76 @@ using namespace instruction;
namespace {
-template <typename SCT, typename BCT>
-double my_fast_sparse_dot_product(const FastValueIndex &small_idx, const FastValueIndex &big_idx,
- const SCT *small_cells, const BCT *big_cells)
+template <typename CT>
+double my_sparse_dot_product_fallback(const Value::Index &lhs_idx, const Value::Index &rhs_idx,
+ const CT *lhs_cells, const CT *rhs_cells, size_t num_mapped_dims) __attribute__((noinline));
+template <typename CT>
+double my_sparse_dot_product_fallback(const Value::Index &lhs_idx, const Value::Index &rhs_idx,
+ const CT *lhs_cells, const CT *rhs_cells, size_t num_mapped_dims)
{
double result = 0.0;
- small_idx.map.each_map_entry([&](auto small_subspace, auto hash) {
- auto small_addr = small_idx.map.get_addr(small_subspace);
- auto big_subspace = big_idx.map.lookup(small_addr, hash);
- if (big_subspace != FastAddrMap::npos()) {
- result += (small_cells[small_subspace] * big_cells[big_subspace]);
- }
- });
+ SparseJoinPlan plan(num_mapped_dims);
+ SparseJoinState sparse(plan, lhs_idx, rhs_idx);
+ auto outer = sparse.first_index.create_view({});
+ auto inner = sparse.second_index.create_view(sparse.second_view_dims);
+ outer->lookup({});
+ while (outer->next_result(sparse.first_address, sparse.first_subspace)) {
+ inner->lookup(sparse.address_overlap);
+ if (inner->next_result(sparse.second_only_address, sparse.second_subspace)) {
+ result += (lhs_cells[sparse.lhs_subspace] * rhs_cells[sparse.rhs_subspace]);
+ }
+ }
return result;
}
-template <typename LCT, typename RCT>
-void my_sparse_dot_product_op(InterpretedFunction::State &state, uint64_t num_mapped_dims) {
- const auto &lhs_idx = state.peek(1).index();
- const auto &rhs_idx = state.peek(0).index();
- const LCT *lhs_cells = state.peek(1).cells().typify<LCT>().cbegin();
- const RCT *rhs_cells = state.peek(0).cells().typify<RCT>().cbegin();
- if (auto indexes = detect_type<FastValueIndex>(lhs_idx, rhs_idx)) {
-#if __has_cpp_attribute(likely)
- [[likely]];
-#endif
- const auto &lhs_fast = indexes.get<0>();
- const auto &rhs_fast = indexes.get<1>();
- double result = (rhs_fast.map.size() < lhs_fast.map.size())
- ? my_fast_sparse_dot_product(rhs_fast, lhs_fast, rhs_cells, lhs_cells)
- : my_fast_sparse_dot_product(lhs_fast, rhs_fast, lhs_cells, rhs_cells);
- state.pop_pop_push(state.stash.create<ScalarValue<double>>(result));
- } else {
-#if __has_cpp_attribute(unlikely)
- [[unlikely]];
-#endif
- double result = 0.0;
- SparseJoinPlan plan(num_mapped_dims);
- SparseJoinState sparse(plan, lhs_idx, rhs_idx);
- auto outer = sparse.first_index.create_view({});
- auto inner = sparse.second_index.create_view(sparse.second_view_dims);
- outer->lookup({});
- while (outer->next_result(sparse.first_address, sparse.first_subspace)) {
- inner->lookup(sparse.address_overlap);
- if (inner->next_result(sparse.second_only_address, sparse.second_subspace)) {
- result += (lhs_cells[sparse.lhs_subspace] * rhs_cells[sparse.rhs_subspace]);
+template <typename CT, bool single_dim>
+double my_fast_sparse_dot_product(const FastAddrMap *small_map, const FastAddrMap *big_map,
+ const CT *small_cells, const CT *big_cells)
+{
+ double result = 0.0;
+ if (big_map->size() < small_map->size()) {
+ std::swap(small_map, big_map);
+ std::swap(small_cells, big_cells);
+ }
+ if constexpr (single_dim) {
+ const auto &labels = small_map->labels();
+ for (size_t i = 0; i < labels.size(); ++i) {
+ auto big_subspace = big_map->lookup_singledim(labels[i]);
+ if (big_subspace != FastAddrMap::npos()) {
+ result += (small_cells[i] * big_cells[big_subspace]);
}
}
- state.pop_pop_push(state.stash.create<ScalarValue<double>>(result));
+ } else {
+ small_map->each_map_entry([&](auto small_subspace, auto hash) {
+ auto small_addr = small_map->get_addr(small_subspace);
+ auto big_subspace = big_map->lookup(small_addr, hash);
+ if (big_subspace != FastAddrMap::npos()) {
+ result += (small_cells[small_subspace] * big_cells[big_subspace]);
+ }
+ });
}
+ return result;
+}
+
+template <typename CT, bool single_dim>
+void my_sparse_dot_product_op(InterpretedFunction::State &state, uint64_t num_mapped_dims) {
+ const auto &lhs_idx = state.peek(1).index();
+ const auto &rhs_idx = state.peek(0).index();
+ const CT *lhs_cells = state.peek(1).cells().typify<CT>().cbegin();
+ const CT *rhs_cells = state.peek(0).cells().typify<CT>().cbegin();
+ double result = __builtin_expect(are_fast(lhs_idx, rhs_idx), true)
+ ? my_fast_sparse_dot_product<CT,single_dim>(&as_fast(lhs_idx).map, &as_fast(rhs_idx).map, lhs_cells, rhs_cells)
+ : my_sparse_dot_product_fallback<CT>(lhs_idx, rhs_idx, lhs_cells, rhs_cells, num_mapped_dims);
+ state.pop_pop_push(state.stash.create<ScalarValue<double>>(result));
}
struct MyGetFun {
- template <typename LCT, typename RCT>
- static auto invoke() { return my_sparse_dot_product_op<LCT,RCT>; }
+ template <typename CT, typename SINGLE_DIM>
+ static auto invoke() { return my_sparse_dot_product_op<CT,SINGLE_DIM::value>; }
};
+using MyTypify = TypifyValue<TypifyCellType,TypifyBool>;
+
} // namespace <unnamed>
SparseDotProductFunction::SparseDotProductFunction(const TensorFunction &lhs_in,
@@ -80,15 +94,18 @@ SparseDotProductFunction::SparseDotProductFunction(const TensorFunction &lhs_in,
InterpretedFunction::Instruction
SparseDotProductFunction::compile_self(const ValueBuilderFactory &, Stash &) const
{
- auto op = typify_invoke<2,TypifyCellType,MyGetFun>(lhs().result_type().cell_type(), rhs().result_type().cell_type());
- return InterpretedFunction::Instruction(op, lhs().result_type().count_mapped_dimensions());
+ size_t num_dims = lhs().result_type().count_mapped_dimensions();
+ auto op = typify_invoke<2,MyTypify,MyGetFun>(lhs().result_type().cell_type(),
+ (num_dims == 1));
+ return InterpretedFunction::Instruction(op, num_dims);
}
bool
SparseDotProductFunction::compatible_types(const ValueType &res, const ValueType &lhs, const ValueType &rhs)
{
return (res.is_scalar() && (res.cell_type() == CellType::DOUBLE) &&
- lhs.is_sparse() && (rhs.dimensions() == lhs.dimensions()));
+ lhs.is_sparse() && (rhs.dimensions() == lhs.dimensions()) &&
+ lhs.cell_type() == rhs.cell_type());
}
const TensorFunction &