diff options
author | Håvard Pettersen <havardpe@oath.com> | 2020-05-26 13:52:38 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2020-05-27 08:42:30 +0000 |
commit | 03e79905d9255c9494777baf675eb6cc2383dfa2 (patch) | |
tree | 8826b5ae0edd9f0fd1da575b6454903e8f837a7c /eval | |
parent | 847e0f303d3ff408d48acfc4c6f81e0ec7f0d1dc (diff) |
use index lookup table with shared cache
Diffstat (limited to 'eval')
7 files changed, 307 insertions, 22 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 7471fc51fe7..353a9b3e701 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -46,6 +46,7 @@ vespa_define_module( src/tests/tensor/dense_xw_product_function src/tests/tensor/direct_dense_tensor_builder src/tests/tensor/direct_sparse_tensor_builder + src/tests/tensor/index_lookup_table src/tests/tensor/tensor_add_operation src/tests/tensor/tensor_address src/tests/tensor/tensor_conformance diff --git a/eval/src/tests/tensor/index_lookup_table/CMakeLists.txt b/eval/src/tests/tensor/index_lookup_table/CMakeLists.txt new file mode 100644 index 00000000000..f2dc5d31045 --- /dev/null +++ b/eval/src/tests/tensor/index_lookup_table/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_index_lookup_table_test_app TEST + SOURCES + index_lookup_table_test.cpp + DEPENDS + vespaeval + gtest +) +vespa_add_test(NAME eval_index_lookup_table_test_app COMMAND eval_index_lookup_table_test_app) diff --git a/eval/src/tests/tensor/index_lookup_table/index_lookup_table_test.cpp b/eval/src/tests/tensor/index_lookup_table/index_lookup_table_test.cpp new file mode 100644 index 00000000000..3f806e8252c --- /dev/null +++ b/eval/src/tests/tensor/index_lookup_table/index_lookup_table_test.cpp @@ -0,0 +1,118 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/tensor/dense/index_lookup_table.h> +#include <vespa/eval/eval/function.h> +#include <vespa/eval/eval/value_type.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib::eval; +using namespace vespalib::tensor; + +std::vector<uint32_t> make_table(std::vector<uint32_t> list) { return list; } + +TEST(IndexLookupTableTest, single_dimension_lookup_table_is_correct) +{ + auto idx_fun = Function::parse({"x"}, "5-x"); + auto type = ValueType::from_spec("tensor(x[6])"); + auto table = IndexLookupTable::create(*idx_fun, type); + + EXPECT_EQ(IndexLookupTable::num_cached(), 1); + EXPECT_EQ(IndexLookupTable::count_refs(), 1); + EXPECT_EQ(table->get(), make_table({5,4,3,2,1,0})); +} + +TEST(IndexLookupTableTest, dual_dimension_lookup_table_is_correct) +{ + auto idx_fun = Function::parse({"x","y"}, "5-(x*2+y)"); + auto type = ValueType::from_spec("tensor(x[3],y[2])"); + auto table = IndexLookupTable::create(*idx_fun, type); + + EXPECT_EQ(IndexLookupTable::num_cached(), 1); + EXPECT_EQ(IndexLookupTable::count_refs(), 1); + EXPECT_EQ(table->get(), make_table({5,4,3,2,1,0})); +} + +TEST(IndexLookupTableTest, multi_dimension_lookup_table_is_correct) +{ + auto idx_fun = Function::parse({"a","b","c","d"}, "11-(a*6+b*2+c*2+d)"); + auto type = ValueType::from_spec("tensor(a[2],b[3],c[1],d[2])"); + auto table = IndexLookupTable::create(*idx_fun, type); + + EXPECT_EQ(IndexLookupTable::num_cached(), 1); + EXPECT_EQ(IndexLookupTable::count_refs(), 1); + EXPECT_EQ(table->get(), make_table({11,10,9,8,7,6,5,4,3,2,1,0})); +} + +TEST(IndexLookupTableTest, lookup_tables_can_be_shared) +{ + auto idx_fun1 = Function::parse({"x"}, "5-x"); + auto type1 = ValueType::from_spec("tensor(x[6])"); + auto table1 = IndexLookupTable::create(*idx_fun1, type1); + + auto idx_fun2 = Function::parse({"x"}, "5-x"); + auto type2 = ValueType::from_spec("tensor(x[6])"); + auto table2 = IndexLookupTable::create(*idx_fun2, type2); + + EXPECT_EQ(IndexLookupTable::num_cached(), 1); + EXPECT_EQ(IndexLookupTable::count_refs(), 2); + EXPECT_EQ(&table1->get(), &table2->get()); + EXPECT_EQ(table1->get(), make_table({5,4,3,2,1,0})); +} + +TEST(IndexLookupTableTest, lookup_tables_with_different_index_functions_are_not_shared) +{ + auto idx_fun1 = Function::parse({"x"}, "5-x"); + auto type1 = ValueType::from_spec("tensor(x[6])"); + auto table1 = IndexLookupTable::create(*idx_fun1, type1); + + auto idx_fun2 = Function::parse({"x"}, "x"); + auto type2 = ValueType::from_spec("tensor(x[6])"); + auto table2 = IndexLookupTable::create(*idx_fun2, type2); + + EXPECT_EQ(IndexLookupTable::num_cached(), 2); + EXPECT_EQ(IndexLookupTable::count_refs(), 2); + EXPECT_NE(&table1->get(), &table2->get()); + EXPECT_EQ(table1->get(), make_table({5,4,3,2,1,0})); + EXPECT_EQ(table2->get(), make_table({0,1,2,3,4,5})); +} + +TEST(IndexLookupTableTest, lookup_tables_with_different_value_types_are_not_shared) +{ + auto idx_fun1 = Function::parse({"x"}, "x"); + auto type1 = ValueType::from_spec("tensor(x[6])"); + auto table1 = IndexLookupTable::create(*idx_fun1, type1); + + auto idx_fun2 = Function::parse({"x"}, "x"); + auto type2 = ValueType::from_spec("tensor(x[5])"); + auto table2 = IndexLookupTable::create(*idx_fun2, type2); + + EXPECT_EQ(IndexLookupTable::num_cached(), 2); + EXPECT_EQ(IndexLookupTable::count_refs(), 2); + EXPECT_NE(&table1->get(), &table2->get()); + EXPECT_EQ(table1->get(), make_table({0,1,2,3,4,5})); + EXPECT_EQ(table2->get(), make_table({0,1,2,3,4})); +} + +TEST(IndexLookupTableTest, identical_lookup_tables_might_not_be_shared) +{ + auto idx_fun1 = Function::parse({"x"}, "5-x"); + auto type1 = ValueType::from_spec("tensor(x[6])"); + auto table1 = IndexLookupTable::create(*idx_fun1, type1); + + auto idx_fun2 = Function::parse({"x","y"}, "5-(x*2+y)"); + auto type2 = ValueType::from_spec("tensor(x[3],y[2])"); + auto table2 = IndexLookupTable::create(*idx_fun2, type2); + + EXPECT_EQ(IndexLookupTable::num_cached(), 2); + EXPECT_EQ(IndexLookupTable::count_refs(), 2); + EXPECT_NE(&table1->get(), &table2->get()); + EXPECT_EQ(table1->get(), make_table({5,4,3,2,1,0})); + EXPECT_EQ(table2->get(), make_table({5,4,3,2,1,0})); +} + +TEST(IndexLookupTableTest, unused_lookup_tables_are_discarded) { + EXPECT_EQ(IndexLookupTable::num_cached(), 0); + EXPECT_EQ(IndexLookupTable::count_refs(), 0); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt index 141e5901988..b955b59cc39 100644 --- a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt +++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt @@ -24,6 +24,7 @@ vespa_add_library(eval_tensor_dense OBJECT dense_tensor_reduce.cpp dense_tensor_view.cpp dense_xw_product_function.cpp + index_lookup_table.cpp mutable_dense_tensor_view.cpp typed_cells.cpp typed_dense_tensor_builder.cpp diff --git a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp index 0c7debde4ef..a5f532e643a 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp @@ -1,13 +1,12 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "dense_lambda_peek_function.h" +#include "index_lookup_table.h" #include "dense_tensor_view.h" #include <vespa/eval/eval/value.h> -#include <vespa/eval/eval/llvm/compile_cache.h> namespace vespalib::tensor { -using eval::CompileCache; using eval::Function; using eval::InterpretedFunction; using eval::PassParams; @@ -22,35 +21,25 @@ namespace { struct Self { const ValueType &result_type; - CompileCache::Token::UP compile_token; + IndexLookupTable::Token::UP table_token; Self(const ValueType &result_type_in, const Function &function) : result_type(result_type_in), - compile_token(CompileCache::compile(function, PassParams::ARRAY)) {} -}; - -bool step_params(std::vector<double> ¶ms, const ValueType &type) { - const auto &dims = type.dimensions(); - for (size_t idx = params.size(); idx-- > 0; ) { - if (size_t(params[idx] += 1.0) < dims[idx].size) { - return true; - } else { - params[idx] = 0.0; - } + table_token(IndexLookupTable::create(function, result_type_in)) + { + assert(table_token->get().size() == result_type.dense_subspace_size()); } - return false; -} +}; template <typename DST_CT, typename SRC_CT> void my_lambda_peek_op(InterpretedFunction::State &state, uint64_t param) { const auto *self = (const Self *)(param); + const std::vector<uint32_t> &lookup_table = self->table_token->get(); auto src_cells = DenseTensorView::typify_cells<SRC_CT>(state.peek(0)); - ArrayRef<DST_CT> dst_cells = state.stash.create_array<DST_CT>(self->result_type.dense_subspace_size()); + ArrayRef<DST_CT> dst_cells = state.stash.create_array<DST_CT>(lookup_table.size()); DST_CT *dst = &dst_cells[0]; - std::vector<double> params(self->result_type.dimensions().size(), 0.0); - auto idx_fun = self->compile_token->get().get_function(); - do { - *dst++ = src_cells[size_t(idx_fun(¶ms[0]))]; - } while(step_params(params, self->result_type)); + for (uint32_t idx: lookup_table) { + *dst++ = src_cells[idx]; + } state.pop_push(state.stash.create<DenseTensorView>(self->result_type, TypedCells(dst_cells))); } diff --git a/eval/src/vespa/eval/tensor/dense/index_lookup_table.cpp b/eval/src/vespa/eval/tensor/dense/index_lookup_table.cpp new file mode 100644 index 00000000000..99a05762a34 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/index_lookup_table.cpp @@ -0,0 +1,98 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "index_lookup_table.h" +#include <vespa/eval/eval/value_type.h> +#include <vespa/eval/eval/key_gen.h> +#include <vespa/eval/eval/llvm/compiled_function.h> + +namespace vespalib::tensor { + +using eval::CompiledFunction; +using eval::Function; +using eval::PassParams; +using eval::ValueType; + +namespace { + +bool step_params(std::vector<double> ¶ms, const ValueType &type) { + const auto &dims = type.dimensions(); + for (size_t idx = params.size(); idx-- > 0; ) { + if (size_t(params[idx] += 1.0) < dims[idx].size) { + return true; + } else { + params[idx] = 0.0; + } + } + return false; +} + +std::vector<uint32_t> make_index_table(const Function &idx_fun, const ValueType &type) { + std::vector<uint32_t> result; + result.reserve(type.dense_subspace_size()); + std::vector<double> params(type.dimensions().size(), 0.0); + CompiledFunction cfun(idx_fun, PassParams::ARRAY); + auto fun = cfun.get_function(); + do { + result.push_back(uint32_t(fun(¶ms[0]))); + } while(step_params(params, type)); + assert(result.size() == type.dense_subspace_size()); + return result; +} + +} + +std::mutex IndexLookupTable::_lock{}; +IndexLookupTable::Map IndexLookupTable::_cached{}; + +size_t +IndexLookupTable::num_cached() +{ + std::lock_guard<std::mutex> guard(_lock); + return _cached.size(); +} + +size_t +IndexLookupTable::count_refs() +{ + std::lock_guard<std::mutex> guard(_lock); + size_t refs = 0; + for (const auto &entry: _cached) { + refs += entry.second.num_refs; + } + return refs; +} + +IndexLookupTable::Token::UP +IndexLookupTable::create(const Function &idx_fun, const ValueType &type) +{ + assert(type.is_dense()); + assert(idx_fun.num_params() == type.dimensions().size()); + assert(!CompiledFunction::detect_issues(idx_fun)); + auto key = type.to_spec() + gen_key(idx_fun, PassParams::ARRAY); + { + std::lock_guard<std::mutex> guard(_lock); + auto pos = _cached.find(key); + if (pos != _cached.end()) { + ++(pos->second.num_refs); + return std::make_unique<Token>(pos, Token::ctor_tag()); + } + } + // avoid holding the lock while making the index table + auto table = make_index_table(idx_fun, type); + { + std::lock_guard<std::mutex> guard(_lock); + auto pos = _cached.find(key); + if (pos != _cached.end()) { + ++(pos->second.num_refs); + // in case of a race; return the same table for all callers + return std::make_unique<Token>(pos, Token::ctor_tag()); + } else { + auto res = _cached.emplace(std::move(key), Value::ctor_tag()); + assert(res.second); + res.first->second.data = std::move(table); + return std::make_unique<Token>(res.first, Token::ctor_tag()); + } + } +} + +} diff --git a/eval/src/vespa/eval/tensor/dense/index_lookup_table.h b/eval/src/vespa/eval/tensor/dense/index_lookup_table.h new file mode 100644 index 00000000000..ee5776ef5ca --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/index_lookup_table.h @@ -0,0 +1,69 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <mutex> +#include <vector> +#include <map> + +namespace vespalib::eval { +class ValueType; +class Function; +} + +namespace vespalib::tensor { + +/** + * Pre-computed index tables used by DenseLambdaPeekFunction. The + * underlying index tables are shared between operations. + **/ +class IndexLookupTable +{ +private: + using Key = vespalib::string; + struct Value { + size_t num_refs; + std::vector<uint32_t> data; + struct ctor_tag {}; + Value(ctor_tag) : num_refs(1), data() {} + }; + using Map = std::map<Key,Value>; + static std::mutex _lock; + static Map _cached; + + static void release(Map::iterator entry) { + std::lock_guard<std::mutex> guard(_lock); + if (--(entry->second.num_refs) == 0) { + _cached.erase(entry); + } + } + +public: + /** + * A token represents shared ownership of a cached index lookup + * table. + **/ + class Token + { + private: + friend class IndexLookupTable; + struct ctor_tag {}; + IndexLookupTable::Map::iterator _entry; + public: + Token(Token &&) = delete; + Token(const Token &) = delete; + Token &operator=(Token &&) = delete; + Token &operator=(const Token &) = delete; + using UP = std::unique_ptr<Token>; + Token(IndexLookupTable::Map::iterator entry, ctor_tag) : _entry(entry) {} + const std::vector<uint32_t> &get() const { return _entry->second.data; } + ~Token() { IndexLookupTable::release(_entry); } + }; + IndexLookupTable() = delete; + static size_t num_cached(); + static size_t count_refs(); + static Token::UP create(const eval::Function &idx_fun, const eval::ValueType &type); +}; + +} |