unpack bits function

author: Håvard Pettersen <havardpe@oath.com> 2021-06-21 13:46:26 +0000
committer: Håvard Pettersen <havardpe@oath.com> 2021-06-28 17:57:57 +0000
commit: 74f8c30232f86a5ff9ef70748258e6f8a39238b8 (patch)
tree: c024704e927b3bce036d328b46d5b74f3e0ee27a /eval/src
parent: 9b7582100d7752185dd94a1dceea5b625c26044c (diff)
6 files changed, 271 insertions, 1 deletions
diff --git a/eval/src/tests/instruction/unpack_bits_function/CMakeLists.txt b/eval/src/tests/instruction/unpack_bits_function/CMakeLists.txt
new file mode 100644
index 00000000000..9416ce1fa4c
--- /dev/null
+++ b/eval/src/tests/instruction/unpack_bits_function/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_unpack_bits_function_test_app TEST
+    SOURCES
+    unpack_bits_function_test.cpp
+    DEPENDS
+    vespaeval
+    GTest::GTest
+)
+vespa_add_test(NAME eval_unpack_bits_function_test_app COMMAND eval_unpack_bits_function_test_app)
diff --git a/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp b/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp
new file mode 100644
index 00000000000..8250893225a
--- /dev/null
+++ b/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp
@@ -0,0 +1,87 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/instruction/unpack_bits_function.h>
+#include <vespa/eval/eval/test/eval_fixture.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib::eval;
+using namespace vespalib::eval::test;
+
+const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get();
+const ValueBuilderFactory &test_factory = SimpleValueBuilderFactory::get();
+
+//-----------------------------------------------------------------------------
+
+auto my_seq = Seq({-128, -43, 85, 127});
+
+EvalFixture::ParamRepo make_params() {
+    return EvalFixture::ParamRepo()
+        .add("full", GenSpec(-128).idx("x", 256).cells(CellType::INT8))
+        .add("vx8", GenSpec().seq(my_seq).idx("x", 8).cells(CellType::INT8))
+        .add("vy8", GenSpec().seq(my_seq).idx("y", 8).cells(CellType::INT8))
+        .add("vxf", GenSpec().seq(my_seq).idx("x", 8).cells(CellType::FLOAT));
+}
+EvalFixture::ParamRepo param_repo = make_params();
+
+void assert_optimized(const vespalib::string &expr) {
+    EvalFixture fast_fixture(prod_factory, expr, param_repo, true);
+    EvalFixture test_fixture(test_factory, expr, param_repo, true);
+    EvalFixture slow_fixture(prod_factory, expr, param_repo, false);
+    auto expect = EvalFixture::ref(expr, param_repo);
+    EXPECT_EQ(fast_fixture.result(), expect);
+    EXPECT_EQ(test_fixture.result(), expect);
+    EXPECT_EQ(slow_fixture.result(), expect);
+    EXPECT_EQ(fast_fixture.find_all<UnpackBitsFunction>().size(), 1u);
+    EXPECT_EQ(test_fixture.find_all<UnpackBitsFunction>().size(), 1u);
+    EXPECT_EQ(slow_fixture.find_all<UnpackBitsFunction>().size(), 0u);
+}
+
+void assert_not_optimized(const vespalib::string &expr) {
+    EvalFixture fast_fixture(prod_factory, expr, param_repo, true);
+    EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo));
+    EXPECT_EQ(fast_fixture.find_all<UnpackBitsFunction>().size(), 0u);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(UnpackBitsTest, expression_can_be_optimized) {
+    assert_optimized("tensor<int8>(x[2048])(bit(full{x:(x/8)},7-x%8))");
+    assert_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%8))");
+}
+
+TEST(UnpackBitsTest, unpack_bits_can_rename_dimension) {
+    assert_optimized("tensor<int8>(x[64])(bit(vy8{y:(x/8)},7-x%8))");
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(UnpackBitsTest, dimension_sizes_must_be_appropriate) {
+    assert_not_optimized("tensor<int8>(x[60])(bit(vx8{x:(x/8)},7-x%8))");
+    assert_not_optimized("tensor<int8>(x[68])(bit(vx8{x:(x/8)},7-x%8))");
+}
+
+TEST(UnpackBitsTest, source_must_be_int8) {
+    assert_not_optimized("tensor<int8>(x[64])(bit(vxf{x:(x/8)},7-x%8))");
+}
+
+TEST(UnpackBitsTest, result_must_be_int8) {
+    assert_not_optimized("tensor<float>(x[64])(bit(vx8{x:(x/8)},7-x%8))");
+}
+
+TEST(UnpackBitsTest, similar_expressions_are_not_optimized) {
+    assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%7))");
+    assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%9))");
+    assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/7)},7-x%8))");
+    assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/9)},7-x%8))");
+    assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},x%8-7))");
+    assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(8/x)},7-x%8))");
+    assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7+x%8))");
+    assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x*8)},7-x%8))");
+    assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},(7-x)%8))");
+}
+
+//-----------------------------------------------------------------------------
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
index 51cf58f3a12..64acbceff04 100644
--- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp
@@ -19,6 +19,7 @@
 #include <vespa/eval/instruction/dense_single_reduce_function.h>
 #include <vespa/eval/instruction/remove_trivial_dimension_optimizer.h>
 #include <vespa/eval/instruction/dense_lambda_peek_optimizer.h>
+#include <vespa/eval/instruction/unpack_bits_function.h>
 #include <vespa/eval/instruction/dense_simple_expand_function.h>
 #include <vespa/eval/instruction/mixed_simple_join_function.h>
 #include <vespa/eval/instruction/join_with_number_function.h>
@@ -69,6 +70,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te
             child.set(DenseTensorCreateFunction::optimize(child.get(), stash));
             child.set(DenseTensorPeekFunction::optimize(child.get(), stash));
             child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash));
+            child.set(UnpackBitsFunction::optimize(child.get(), stash));
             child.set(FastRenameOptimizer::optimize(child.get(), stash));
             child.set(PowAsMapOptimizer::optimize(child.get(), stash));
             child.set(InplaceMapFunction::optimize(child.get(), stash));
diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt
index 5dc5a0db171..88e3272bb7c 100644
--- a/eval/src/vespa/eval/instruction/CMakeLists.txt
+++ b/eval/src/vespa/eval/instruction/CMakeLists.txt
@@ -26,9 +26,9 @@ vespa_add_library(eval_instruction OBJECT
     generic_reduce.cpp
     generic_rename.cpp
     index_lookup_table.cpp
+    inplace_map_function.cpp
     join_with_number_function.cpp
     mixed_inner_product_function.cpp
-    inplace_map_function.cpp
     mixed_simple_join_function.cpp
     pow_as_map_optimizer.cpp
     remove_trivial_dimension_optimizer.cpp
@@ -38,5 +38,6 @@ vespa_add_library(eval_instruction OBJECT
     sparse_merge_function.cpp
     sparse_no_overlap_join_function.cpp
     sum_max_dot_product_function.cpp
+    unpack_bits_function.cpp
     vector_from_doubles_function.cpp
 )
diff --git a/eval/src/vespa/eval/instruction/unpack_bits_function.cpp b/eval/src/vespa/eval/instruction/unpack_bits_function.cpp
new file mode 100644
index 00000000000..d77ead79a37
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/unpack_bits_function.cpp
@@ -0,0 +1,134 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "unpack_bits_function.h"
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/node_tools.h>
+#include <vespa/eval/eval/basic_nodes.h>
+#include <vespa/eval/eval/operator_nodes.h>
+#include <vespa/eval/eval/call_nodes.h>
+#include <vespa/eval/eval/tensor_nodes.h>
+#include <vespa/eval/eval/wrap_param.h>
+
+namespace vespalib::eval {
+
+using namespace vespalib::eval::nodes;
+using tensor_function::Lambda;
+using tensor_function::wrap_param;
+using tensor_function::unwrap_param;
+using tensor_function::inject;
+
+namespace {
+
+void my_unpack_bits_op(InterpretedFunction::State &state, uint64_t param) {
+    const ValueType &res_type = unwrap_param<ValueType>(param);
+    auto packed_cells = state.peek(0).cells().typify<Int8Float>();
+    auto unpacked_cells = state.stash.create_uninitialized_array<Int8Float>(packed_cells.size() * 8);
+    int8_t *dst = reinterpret_cast<int8_t*>(unpacked_cells.begin());
+    for (Int8Float cell: packed_cells) {
+        for (int n = 7; n >= 0; --n) {
+            *dst++ = bool(cell.get_bits() & (1 << n));
+        }
+    }
+    Value &result_ref = state.stash.create<DenseValueView>(res_type, TypedCells(unpacked_cells));
+    state.pop_push(result_ref);
+}
+
+bool valid_lambda_params(const Lambda &lambda) {
+    return ((lambda.lambda().num_params() == 2) &&
+            (lambda.bindings().size() == 1));
+}
+
+bool valid_type(const ValueType &type) {
+    return ((type.is_dense()) &&
+            (type.dimensions().size() == 1) &&
+            (type.cell_type() == CellType::INT8));
+}
+
+bool compatible_types(const ValueType &packed, const ValueType &unpacked) {
+    return (valid_type(packed) && valid_type(unpacked) &&
+            (unpacked.dimensions()[0].size == (packed.dimensions()[0].size * 8)));
+}
+
+bool is_bit_expr(const Node &node) {
+    // '7-(x%8)'
+    if (auto sub = as<Sub>(node)) {
+        if (auto seven = as<Number>(sub->lhs())) {
+            if (auto mod = as<Mod>(sub->rhs())) {
+                if (auto param = as<Symbol>(mod->lhs())) {
+                    if (auto eight = as<Number>(mod->rhs())) {
+                        return ((seven->value() == 7.0) &&
+                                (eight->value() == 8.0) &&
+                                (param->id() == 0));
+                    }
+                }
+            }
+        }
+    }
+    return false;
+}
+
+bool is_byte_expr(const Node &node) {
+    // 'x/8'
+    if (auto div = as<Div>(node)) {
+        if (auto param = as<Symbol>(div->lhs())) {
+            if (auto eight = as<Number>(div->rhs())) {
+                return ((eight->value() == 8.0) &&
+                        (param->id() == 0));
+            }
+        }
+    }
+    return false;
+}
+
+bool is_byte_peek(const TensorPeek &peek) {
+    if (auto param = as<Symbol>(peek.param())) {
+        if ((param->id() == 1) &&
+            (peek.dim_list().size() == 1) &&
+            (peek.num_children() == 2))
+        {
+            return is_byte_expr(peek.get_child(1));
+        }
+    }
+    return false;
+}
+
+} // namespace <unnamed>
+
+UnpackBitsFunction::UnpackBitsFunction(const ValueType &res_type_in,
+                                       const TensorFunction &packed)
+  : Op1(res_type_in, packed)
+{
+}
+
+InterpretedFunction::Instruction
+UnpackBitsFunction::compile_self(const ValueBuilderFactory &, Stash &) const
+{
+    return InterpretedFunction::Instruction(my_unpack_bits_op, wrap_param<ValueType>(result_type()));
+}
+
+const TensorFunction &
+UnpackBitsFunction::optimize(const TensorFunction &expr, Stash &stash)
+{
+    if (auto lambda = as<Lambda>(expr)) {
+        // 'tensor<int8>(x[64])(bit(packed{x:(x/8)},7-(x%8)))'
+        const ValueType &dst_type = lambda->result_type();
+        if (auto bit = as<Bit>(lambda->lambda().root())) {
+            if (auto peek = as<TensorPeek>(bit->get_child(0))) {
+                const ValueType &src_type = lambda->types().get_type(peek->param());
+                if (valid_lambda_params(*lambda) &&
+                    compatible_types(src_type, dst_type) &&
+                    is_bit_expr(bit->get_child(1)) &&
+                    is_byte_peek(*peek))
+                {
+                    size_t param_idx = lambda->bindings()[0];
+                    const auto &packed_param = inject(src_type, param_idx, stash);
+                    return stash.create<UnpackBitsFunction>(dst_type, packed_param);
+                }
+            }
+        }
+    }
+    return expr;
+}
+
+} // namespace
diff --git a/eval/src/vespa/eval/instruction/unpack_bits_function.h b/eval/src/vespa/eval/instruction/unpack_bits_function.h
new file mode 100644
index 00000000000..5e24746508d
--- /dev/null
+++ b/eval/src/vespa/eval/instruction/unpack_bits_function.h
@@ -0,0 +1,37 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::eval {
+
+/**
+ * Tensor function unpacking bits into separate values.
+ *
+ * Both the tensor containing the packed bits and the result tensor
+ * must have cell type 'int8'. The bits must be unpacked in canonical
+ * order; bytes are unpacked with increasing index, bits within a byte
+ * are unpacked from most to least significant.
+ *
+ * The baseline expression looks like this:
+ *
+ * tensor<int8>(x[64])(bit(packed{x:(x/8)},7-(x%8)))
+ *
+ * in this case 'packed' must be a tensor with type
+ * 'tensor<int8>(x[8])' (the inner result dimension is always 8 times
+ * larger than the inner input dimension).
+ *
+ * Unpacking of bits from multi-dimensional tensors will currently not
+ * be optimized.
+ **/
+class UnpackBitsFunction : public tensor_function::Op1
+{
+public:
+    UnpackBitsFunction(const ValueType &res_type_in, const TensorFunction &packed);
+    InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
+    bool result_is_mutable() const override { return true; }
+    static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);
+};
+
+} // namespace
author	Håvard Pettersen <havardpe@oath.com>	2021-06-21 13:46:26 +0000
committer	Håvard Pettersen <havardpe@oath.com>	2021-06-28 17:57:57 +0000
commit	74f8c30232f86a5ff9ef70748258e6f8a39238b8 (patch)
tree	c024704e927b3bce036d328b46d5b74f3e0ee27a /eval/src
parent	9b7582100d7752185dd94a1dceea5b625c26044c (diff)