From 03e79905d9255c9494777baf675eb6cc2383dfa2 Mon Sep 17 00:00:00 2001
From: Håvard Pettersen <havardpe@oath.com>
Date: Tue, 26 May 2020 13:52:38 +0000
Subject: use index lookup table with shared cache

---
 eval/CMakeLists.txt                                |   1 +
 .../tests/tensor/index_lookup_table/CMakeLists.txt |   9 ++
 .../index_lookup_table/index_lookup_table_test.cpp | 118 +++++++++++++++++++++
 eval/src/vespa/eval/tensor/dense/CMakeLists.txt    |   1 +
 .../tensor/dense/dense_lambda_peek_function.cpp    |  33 ++----
 .../vespa/eval/tensor/dense/index_lookup_table.cpp |  98 +++++++++++++++++
 .../vespa/eval/tensor/dense/index_lookup_table.h   |  69 ++++++++++++
 7 files changed, 307 insertions(+), 22 deletions(-)
 create mode 100644 eval/src/tests/tensor/index_lookup_table/CMakeLists.txt
 create mode 100644 eval/src/tests/tensor/index_lookup_table/index_lookup_table_test.cpp
 create mode 100644 eval/src/vespa/eval/tensor/dense/index_lookup_table.cpp
 create mode 100644 eval/src/vespa/eval/tensor/dense/index_lookup_table.h

diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 7471fc51fe7..353a9b3e701 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -46,6 +46,7 @@ vespa_define_module(
     src/tests/tensor/dense_xw_product_function
     src/tests/tensor/direct_dense_tensor_builder
     src/tests/tensor/direct_sparse_tensor_builder
+    src/tests/tensor/index_lookup_table
     src/tests/tensor/tensor_add_operation
     src/tests/tensor/tensor_address
     src/tests/tensor/tensor_conformance
diff --git a/eval/src/tests/tensor/index_lookup_table/CMakeLists.txt b/eval/src/tests/tensor/index_lookup_table/CMakeLists.txt
new file mode 100644
index 00000000000..f2dc5d31045
--- /dev/null
+++ b/eval/src/tests/tensor/index_lookup_table/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_index_lookup_table_test_app TEST
+    SOURCES
+    index_lookup_table_test.cpp
+    DEPENDS
+    vespaeval
+    gtest
+)
+vespa_add_test(NAME eval_index_lookup_table_test_app COMMAND eval_index_lookup_table_test_app)
diff --git a/eval/src/tests/tensor/index_lookup_table/index_lookup_table_test.cpp b/eval/src/tests/tensor/index_lookup_table/index_lookup_table_test.cpp
new file mode 100644
index 00000000000..3f806e8252c
--- /dev/null
+++ b/eval/src/tests/tensor/index_lookup_table/index_lookup_table_test.cpp
@@ -0,0 +1,118 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/tensor/dense/index_lookup_table.h>
+#include <vespa/eval/eval/function.h>
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib::eval;
+using namespace vespalib::tensor;
+
+std::vector<uint32_t> make_table(std::vector<uint32_t> list) { return list; }
+
+TEST(IndexLookupTableTest, single_dimension_lookup_table_is_correct)
+{
+    auto idx_fun = Function::parse({"x"}, "5-x");
+    auto type = ValueType::from_spec("tensor(x[6])");
+    auto table = IndexLookupTable::create(*idx_fun, type);
+
+    EXPECT_EQ(IndexLookupTable::num_cached(), 1);
+    EXPECT_EQ(IndexLookupTable::count_refs(), 1);
+    EXPECT_EQ(table->get(), make_table({5,4,3,2,1,0}));
+}
+
+TEST(IndexLookupTableTest, dual_dimension_lookup_table_is_correct)
+{
+    auto idx_fun = Function::parse({"x","y"}, "5-(x*2+y)");
+    auto type = ValueType::from_spec("tensor(x[3],y[2])");
+    auto table = IndexLookupTable::create(*idx_fun, type);
+
+    EXPECT_EQ(IndexLookupTable::num_cached(), 1);
+    EXPECT_EQ(IndexLookupTable::count_refs(), 1);
+    EXPECT_EQ(table->get(), make_table({5,4,3,2,1,0}));
+}
+
+TEST(IndexLookupTableTest, multi_dimension_lookup_table_is_correct)
+{
+    auto idx_fun = Function::parse({"a","b","c","d"}, "11-(a*6+b*2+c*2+d)");
+    auto type = ValueType::from_spec("tensor(a[2],b[3],c[1],d[2])");
+    auto table = IndexLookupTable::create(*idx_fun, type);
+
+    EXPECT_EQ(IndexLookupTable::num_cached(), 1);
+    EXPECT_EQ(IndexLookupTable::count_refs(), 1);
+    EXPECT_EQ(table->get(), make_table({11,10,9,8,7,6,5,4,3,2,1,0}));
+}
+
+TEST(IndexLookupTableTest, lookup_tables_can_be_shared)
+{
+    auto idx_fun1 = Function::parse({"x"}, "5-x");
+    auto type1 = ValueType::from_spec("tensor(x[6])");
+    auto table1 = IndexLookupTable::create(*idx_fun1, type1);
+
+    auto idx_fun2 = Function::parse({"x"}, "5-x");
+    auto type2 = ValueType::from_spec("tensor(x[6])");
+    auto table2 = IndexLookupTable::create(*idx_fun2, type2);
+
+    EXPECT_EQ(IndexLookupTable::num_cached(), 1);
+    EXPECT_EQ(IndexLookupTable::count_refs(), 2);
+    EXPECT_EQ(&table1->get(), &table2->get());
+    EXPECT_EQ(table1->get(), make_table({5,4,3,2,1,0}));
+}
+
+TEST(IndexLookupTableTest, lookup_tables_with_different_index_functions_are_not_shared)
+{
+    auto idx_fun1 = Function::parse({"x"}, "5-x");
+    auto type1 = ValueType::from_spec("tensor(x[6])");
+    auto table1 = IndexLookupTable::create(*idx_fun1, type1);
+
+    auto idx_fun2 = Function::parse({"x"}, "x");
+    auto type2 = ValueType::from_spec("tensor(x[6])");
+    auto table2 = IndexLookupTable::create(*idx_fun2, type2);
+
+    EXPECT_EQ(IndexLookupTable::num_cached(), 2);
+    EXPECT_EQ(IndexLookupTable::count_refs(), 2);
+    EXPECT_NE(&table1->get(), &table2->get());
+    EXPECT_EQ(table1->get(), make_table({5,4,3,2,1,0}));
+    EXPECT_EQ(table2->get(), make_table({0,1,2,3,4,5}));
+}
+
+TEST(IndexLookupTableTest, lookup_tables_with_different_value_types_are_not_shared)
+{
+    auto idx_fun1 = Function::parse({"x"}, "x");
+    auto type1 = ValueType::from_spec("tensor(x[6])");
+    auto table1 = IndexLookupTable::create(*idx_fun1, type1);
+
+    auto idx_fun2 = Function::parse({"x"}, "x");
+    auto type2 = ValueType::from_spec("tensor(x[5])");
+    auto table2 = IndexLookupTable::create(*idx_fun2, type2);
+
+    EXPECT_EQ(IndexLookupTable::num_cached(), 2);
+    EXPECT_EQ(IndexLookupTable::count_refs(), 2);
+    EXPECT_NE(&table1->get(), &table2->get());
+    EXPECT_EQ(table1->get(), make_table({0,1,2,3,4,5}));
+    EXPECT_EQ(table2->get(), make_table({0,1,2,3,4}));
+}
+
+TEST(IndexLookupTableTest, identical_lookup_tables_might_not_be_shared)
+{
+    auto idx_fun1 = Function::parse({"x"}, "5-x");
+    auto type1 = ValueType::from_spec("tensor(x[6])");
+    auto table1 = IndexLookupTable::create(*idx_fun1, type1);
+
+    auto idx_fun2 = Function::parse({"x","y"}, "5-(x*2+y)");
+    auto type2 = ValueType::from_spec("tensor(x[3],y[2])");
+    auto table2 = IndexLookupTable::create(*idx_fun2, type2);
+
+    EXPECT_EQ(IndexLookupTable::num_cached(), 2);
+    EXPECT_EQ(IndexLookupTable::count_refs(), 2);
+    EXPECT_NE(&table1->get(), &table2->get());
+    EXPECT_EQ(table1->get(), make_table({5,4,3,2,1,0}));
+    EXPECT_EQ(table2->get(), make_table({5,4,3,2,1,0}));
+}
+
+TEST(IndexLookupTableTest, unused_lookup_tables_are_discarded) {
+    EXPECT_EQ(IndexLookupTable::num_cached(), 0);
+    EXPECT_EQ(IndexLookupTable::count_refs(), 0);
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
index 141e5901988..b955b59cc39 100644
--- a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
+++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
@@ -24,6 +24,7 @@ vespa_add_library(eval_tensor_dense OBJECT
     dense_tensor_reduce.cpp
     dense_tensor_view.cpp
     dense_xw_product_function.cpp
+    index_lookup_table.cpp
     mutable_dense_tensor_view.cpp
     typed_cells.cpp
     typed_dense_tensor_builder.cpp
diff --git a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp
index 0c7debde4ef..a5f532e643a 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_lambda_peek_function.cpp
@@ -1,13 +1,12 @@
 // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 
 #include "dense_lambda_peek_function.h"
+#include "index_lookup_table.h"
 #include "dense_tensor_view.h"
 #include <vespa/eval/eval/value.h>
-#include <vespa/eval/eval/llvm/compile_cache.h>
 
 namespace vespalib::tensor {
 
-using eval::CompileCache;
 using eval::Function;
 using eval::InterpretedFunction;
 using eval::PassParams;
@@ -22,35 +21,25 @@ namespace {
 
 struct Self {
     const ValueType &result_type;
-    CompileCache::Token::UP compile_token;
+    IndexLookupTable::Token::UP table_token;
     Self(const ValueType &result_type_in, const Function &function)
         : result_type(result_type_in),
-          compile_token(CompileCache::compile(function, PassParams::ARRAY)) {}
-};
-
-bool step_params(std::vector<double> &params, const ValueType &type) {
-    const auto &dims = type.dimensions();
-    for (size_t idx = params.size(); idx-- > 0; ) {
-        if (size_t(params[idx] += 1.0) < dims[idx].size) {
-            return true;
-        } else {
-            params[idx] = 0.0;
-        }
+          table_token(IndexLookupTable::create(function, result_type_in))
+    {
+        assert(table_token->get().size() == result_type.dense_subspace_size());
     }
-    return false;
-}
+};
 
 template <typename DST_CT, typename SRC_CT>
 void my_lambda_peek_op(InterpretedFunction::State &state, uint64_t param) {
     const auto *self = (const Self *)(param);
+    const std::vector<uint32_t> &lookup_table = self->table_token->get();
     auto src_cells = DenseTensorView::typify_cells<SRC_CT>(state.peek(0));
-    ArrayRef<DST_CT> dst_cells = state.stash.create_array<DST_CT>(self->result_type.dense_subspace_size());
+    ArrayRef<DST_CT> dst_cells = state.stash.create_array<DST_CT>(lookup_table.size());
     DST_CT *dst = &dst_cells[0];
-    std::vector<double> params(self->result_type.dimensions().size(), 0.0);
-    auto idx_fun = self->compile_token->get().get_function();
-    do {
-        *dst++ = src_cells[size_t(idx_fun(&params[0]))];
-    } while(step_params(params, self->result_type));
+    for (uint32_t idx: lookup_table) {
+        *dst++ = src_cells[idx];
+    }
     state.pop_push(state.stash.create<DenseTensorView>(self->result_type, TypedCells(dst_cells)));
 }
 
diff --git a/eval/src/vespa/eval/tensor/dense/index_lookup_table.cpp b/eval/src/vespa/eval/tensor/dense/index_lookup_table.cpp
new file mode 100644
index 00000000000..99a05762a34
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/index_lookup_table.cpp
@@ -0,0 +1,98 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "index_lookup_table.h"
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/eval/eval/key_gen.h>
+#include <vespa/eval/eval/llvm/compiled_function.h>
+
+namespace vespalib::tensor {
+
+using eval::CompiledFunction;
+using eval::Function;
+using eval::PassParams;
+using eval::ValueType;
+
+namespace {
+
+bool step_params(std::vector<double> &params, const ValueType &type) {
+    const auto &dims = type.dimensions();
+    for (size_t idx = params.size(); idx-- > 0; ) {
+        if (size_t(params[idx] += 1.0) < dims[idx].size) {
+            return true;
+        } else {
+            params[idx] = 0.0;
+        }
+    }
+    return false;
+}
+
+std::vector<uint32_t> make_index_table(const Function &idx_fun, const ValueType &type) {
+    std::vector<uint32_t> result;
+    result.reserve(type.dense_subspace_size());
+    std::vector<double> params(type.dimensions().size(), 0.0);
+    CompiledFunction cfun(idx_fun, PassParams::ARRAY);
+    auto fun = cfun.get_function();
+    do {
+        result.push_back(uint32_t(fun(&params[0])));
+    } while(step_params(params, type));
+    assert(result.size() == type.dense_subspace_size());
+    return result;
+}
+
+}
+
+std::mutex IndexLookupTable::_lock{};
+IndexLookupTable::Map IndexLookupTable::_cached{};
+
+size_t
+IndexLookupTable::num_cached()
+{
+    std::lock_guard<std::mutex> guard(_lock);
+    return _cached.size();
+}
+
+size_t
+IndexLookupTable::count_refs()
+{
+    std::lock_guard<std::mutex> guard(_lock);
+    size_t refs = 0;
+    for (const auto &entry: _cached) {
+        refs += entry.second.num_refs;
+    }
+    return refs;
+}
+
+IndexLookupTable::Token::UP
+IndexLookupTable::create(const Function &idx_fun, const ValueType &type)
+{
+    assert(type.is_dense());
+    assert(idx_fun.num_params() == type.dimensions().size());
+    assert(!CompiledFunction::detect_issues(idx_fun));
+    auto key = type.to_spec() + gen_key(idx_fun, PassParams::ARRAY);
+    {
+        std::lock_guard<std::mutex> guard(_lock);
+        auto pos = _cached.find(key);
+        if (pos != _cached.end()) {
+            ++(pos->second.num_refs);
+            return std::make_unique<Token>(pos, Token::ctor_tag());
+        }
+    }
+    // avoid holding the lock while making the index table
+    auto table = make_index_table(idx_fun, type);
+    {
+        std::lock_guard<std::mutex> guard(_lock);
+        auto pos = _cached.find(key);
+        if (pos != _cached.end()) {
+            ++(pos->second.num_refs);
+            // in case of a race; return the same table for all callers
+            return std::make_unique<Token>(pos, Token::ctor_tag());
+        } else {
+            auto res = _cached.emplace(std::move(key), Value::ctor_tag());
+            assert(res.second);
+            res.first->second.data = std::move(table);
+            return std::make_unique<Token>(res.first, Token::ctor_tag());
+        }
+    }
+}
+
+}
diff --git a/eval/src/vespa/eval/tensor/dense/index_lookup_table.h b/eval/src/vespa/eval/tensor/dense/index_lookup_table.h
new file mode 100644
index 00000000000..ee5776ef5ca
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/index_lookup_table.h
@@ -0,0 +1,69 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <mutex>
+#include <vector>
+#include <map>
+
+namespace vespalib::eval {
+class ValueType;
+class Function;
+}
+
+namespace vespalib::tensor {
+
+/**
+ * Pre-computed index tables used by DenseLambdaPeekFunction. The
+ * underlying index tables are shared between operations.
+ **/
+class IndexLookupTable
+{
+private:
+    using Key = vespalib::string;
+    struct Value {
+        size_t num_refs;
+        std::vector<uint32_t> data;
+        struct ctor_tag {};
+        Value(ctor_tag) : num_refs(1), data() {}
+    };
+    using Map = std::map<Key,Value>;
+    static std::mutex _lock;
+    static Map _cached;
+
+    static void release(Map::iterator entry) {
+        std::lock_guard<std::mutex> guard(_lock);
+        if (--(entry->second.num_refs) == 0) {
+            _cached.erase(entry);
+        }
+    }
+
+public:
+    /**
+     * A token represents shared ownership of a cached index lookup
+     * table.
+     **/
+    class Token
+    {
+    private:
+        friend class IndexLookupTable;
+        struct ctor_tag {};
+        IndexLookupTable::Map::iterator _entry;
+    public:
+        Token(Token &&) = delete;
+        Token(const Token &) = delete;
+        Token &operator=(Token &&) = delete;
+        Token &operator=(const Token &) = delete;
+        using UP = std::unique_ptr<Token>;
+        Token(IndexLookupTable::Map::iterator entry, ctor_tag) : _entry(entry) {}
+        const std::vector<uint32_t> &get() const { return _entry->second.data; }
+        ~Token() { IndexLookupTable::release(_entry); }
+    };
+    IndexLookupTable() = delete;
+    static size_t num_cached();
+    static size_t count_refs();
+    static Token::UP create(const eval::Function &idx_fun, const eval::ValueType &type);
+};
+
+}
-- 
cgit v1.2.3