diff options
author | Håvard Pettersen <havardpe@oath.com> | 2021-06-21 13:46:26 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2021-06-28 17:57:57 +0000 |
commit | 74f8c30232f86a5ff9ef70748258e6f8a39238b8 (patch) | |
tree | c024704e927b3bce036d328b46d5b74f3e0ee27a /eval/src | |
parent | 9b7582100d7752185dd94a1dceea5b625c26044c (diff) |
unpack bits function
Diffstat (limited to 'eval/src')
6 files changed, 271 insertions, 1 deletions
diff --git a/eval/src/tests/instruction/unpack_bits_function/CMakeLists.txt b/eval/src/tests/instruction/unpack_bits_function/CMakeLists.txt new file mode 100644 index 00000000000..9416ce1fa4c --- /dev/null +++ b/eval/src/tests/instruction/unpack_bits_function/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_unpack_bits_function_test_app TEST + SOURCES + unpack_bits_function_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_unpack_bits_function_test_app COMMAND eval_unpack_bits_function_test_app) diff --git a/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp b/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp new file mode 100644 index 00000000000..8250893225a --- /dev/null +++ b/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp @@ -0,0 +1,87 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/instruction/unpack_bits_function.h> +#include <vespa/eval/eval/test/eval_fixture.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +const ValueBuilderFactory &prod_factory = FastValueBuilderFactory::get(); +const ValueBuilderFactory &test_factory = SimpleValueBuilderFactory::get(); + +//----------------------------------------------------------------------------- + +auto my_seq = Seq({-128, -43, 85, 127}); + +EvalFixture::ParamRepo make_params() { + return EvalFixture::ParamRepo() + .add("full", GenSpec(-128).idx("x", 256).cells(CellType::INT8)) + .add("vx8", GenSpec().seq(my_seq).idx("x", 8).cells(CellType::INT8)) + .add("vy8", GenSpec().seq(my_seq).idx("y", 8).cells(CellType::INT8)) + .add("vxf", GenSpec().seq(my_seq).idx("x", 8).cells(CellType::FLOAT)); +} +EvalFixture::ParamRepo param_repo = make_params(); + +void assert_optimized(const vespalib::string &expr) { + EvalFixture fast_fixture(prod_factory, expr, param_repo, true); + EvalFixture test_fixture(test_factory, expr, param_repo, true); + EvalFixture slow_fixture(prod_factory, expr, param_repo, false); + auto expect = EvalFixture::ref(expr, param_repo); + EXPECT_EQ(fast_fixture.result(), expect); + EXPECT_EQ(test_fixture.result(), expect); + EXPECT_EQ(slow_fixture.result(), expect); + EXPECT_EQ(fast_fixture.find_all<UnpackBitsFunction>().size(), 1u); + EXPECT_EQ(test_fixture.find_all<UnpackBitsFunction>().size(), 1u); + EXPECT_EQ(slow_fixture.find_all<UnpackBitsFunction>().size(), 0u); +} + +void assert_not_optimized(const vespalib::string &expr) { + EvalFixture fast_fixture(prod_factory, expr, param_repo, true); + EXPECT_EQ(fast_fixture.result(), EvalFixture::ref(expr, param_repo)); + EXPECT_EQ(fast_fixture.find_all<UnpackBitsFunction>().size(), 0u); +} + +//----------------------------------------------------------------------------- + +TEST(UnpackBitsTest, expression_can_be_optimized) { + assert_optimized("tensor<int8>(x[2048])(bit(full{x:(x/8)},7-x%8))"); + assert_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%8))"); +} + +TEST(UnpackBitsTest, unpack_bits_can_rename_dimension) { + assert_optimized("tensor<int8>(x[64])(bit(vy8{y:(x/8)},7-x%8))"); +} + +//----------------------------------------------------------------------------- + +TEST(UnpackBitsTest, dimension_sizes_must_be_appropriate) { + assert_not_optimized("tensor<int8>(x[60])(bit(vx8{x:(x/8)},7-x%8))"); + assert_not_optimized("tensor<int8>(x[68])(bit(vx8{x:(x/8)},7-x%8))"); +} + +TEST(UnpackBitsTest, source_must_be_int8) { + assert_not_optimized("tensor<int8>(x[64])(bit(vxf{x:(x/8)},7-x%8))"); +} + +TEST(UnpackBitsTest, result_must_be_int8) { + assert_not_optimized("tensor<float>(x[64])(bit(vx8{x:(x/8)},7-x%8))"); +} + +TEST(UnpackBitsTest, similar_expressions_are_not_optimized) { + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%7))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%9))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/7)},7-x%8))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/9)},7-x%8))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},x%8-7))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(8/x)},7-x%8))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7+x%8))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x*8)},7-x%8))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},(7-x)%8))"); +} + +//----------------------------------------------------------------------------- + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp index 51cf58f3a12..64acbceff04 100644 --- a/eval/src/vespa/eval/eval/optimize_tensor_function.cpp +++ b/eval/src/vespa/eval/eval/optimize_tensor_function.cpp @@ -19,6 +19,7 @@ #include <vespa/eval/instruction/dense_single_reduce_function.h> #include <vespa/eval/instruction/remove_trivial_dimension_optimizer.h> #include <vespa/eval/instruction/dense_lambda_peek_optimizer.h> +#include <vespa/eval/instruction/unpack_bits_function.h> #include <vespa/eval/instruction/dense_simple_expand_function.h> #include <vespa/eval/instruction/mixed_simple_join_function.h> #include <vespa/eval/instruction/join_with_number_function.h> @@ -69,6 +70,7 @@ const TensorFunction &optimize_for_factory(const ValueBuilderFactory &, const Te child.set(DenseTensorCreateFunction::optimize(child.get(), stash)); child.set(DenseTensorPeekFunction::optimize(child.get(), stash)); child.set(DenseLambdaPeekOptimizer::optimize(child.get(), stash)); + child.set(UnpackBitsFunction::optimize(child.get(), stash)); child.set(FastRenameOptimizer::optimize(child.get(), stash)); child.set(PowAsMapOptimizer::optimize(child.get(), stash)); child.set(InplaceMapFunction::optimize(child.get(), stash)); diff --git a/eval/src/vespa/eval/instruction/CMakeLists.txt b/eval/src/vespa/eval/instruction/CMakeLists.txt index 5dc5a0db171..88e3272bb7c 100644 --- a/eval/src/vespa/eval/instruction/CMakeLists.txt +++ b/eval/src/vespa/eval/instruction/CMakeLists.txt @@ -26,9 +26,9 @@ vespa_add_library(eval_instruction OBJECT generic_reduce.cpp generic_rename.cpp index_lookup_table.cpp + inplace_map_function.cpp join_with_number_function.cpp mixed_inner_product_function.cpp - inplace_map_function.cpp mixed_simple_join_function.cpp pow_as_map_optimizer.cpp remove_trivial_dimension_optimizer.cpp @@ -38,5 +38,6 @@ vespa_add_library(eval_instruction OBJECT sparse_merge_function.cpp sparse_no_overlap_join_function.cpp sum_max_dot_product_function.cpp + unpack_bits_function.cpp vector_from_doubles_function.cpp ) diff --git a/eval/src/vespa/eval/instruction/unpack_bits_function.cpp b/eval/src/vespa/eval/instruction/unpack_bits_function.cpp new file mode 100644 index 00000000000..d77ead79a37 --- /dev/null +++ b/eval/src/vespa/eval/instruction/unpack_bits_function.cpp @@ -0,0 +1,134 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "unpack_bits_function.h" +#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/node_tools.h> +#include <vespa/eval/eval/basic_nodes.h> +#include <vespa/eval/eval/operator_nodes.h> +#include <vespa/eval/eval/call_nodes.h> +#include <vespa/eval/eval/tensor_nodes.h> +#include <vespa/eval/eval/wrap_param.h> + +namespace vespalib::eval { + +using namespace vespalib::eval::nodes; +using tensor_function::Lambda; +using tensor_function::wrap_param; +using tensor_function::unwrap_param; +using tensor_function::inject; + +namespace { + +void my_unpack_bits_op(InterpretedFunction::State &state, uint64_t param) { + const ValueType &res_type = unwrap_param<ValueType>(param); + auto packed_cells = state.peek(0).cells().typify<Int8Float>(); + auto unpacked_cells = state.stash.create_uninitialized_array<Int8Float>(packed_cells.size() * 8); + int8_t *dst = reinterpret_cast<int8_t*>(unpacked_cells.begin()); + for (Int8Float cell: packed_cells) { + for (int n = 7; n >= 0; --n) { + *dst++ = bool(cell.get_bits() & (1 << n)); + } + } + Value &result_ref = state.stash.create<DenseValueView>(res_type, TypedCells(unpacked_cells)); + state.pop_push(result_ref); +} + +bool valid_lambda_params(const Lambda &lambda) { + return ((lambda.lambda().num_params() == 2) && + (lambda.bindings().size() == 1)); +} + +bool valid_type(const ValueType &type) { + return ((type.is_dense()) && + (type.dimensions().size() == 1) && + (type.cell_type() == CellType::INT8)); +} + +bool compatible_types(const ValueType &packed, const ValueType &unpacked) { + return (valid_type(packed) && valid_type(unpacked) && + (unpacked.dimensions()[0].size == (packed.dimensions()[0].size * 8))); +} + +bool is_bit_expr(const Node &node) { + // '7-(x%8)' + if (auto sub = as<Sub>(node)) { + if (auto seven = as<Number>(sub->lhs())) { + if (auto mod = as<Mod>(sub->rhs())) { + if (auto param = as<Symbol>(mod->lhs())) { + if (auto eight = as<Number>(mod->rhs())) { + return ((seven->value() == 7.0) && + (eight->value() == 8.0) && + (param->id() == 0)); + } + } + } + } + } + return false; +} + +bool is_byte_expr(const Node &node) { + // 'x/8' + if (auto div = as<Div>(node)) { + if (auto param = as<Symbol>(div->lhs())) { + if (auto eight = as<Number>(div->rhs())) { + return ((eight->value() == 8.0) && + (param->id() == 0)); + } + } + } + return false; +} + +bool is_byte_peek(const TensorPeek &peek) { + if (auto param = as<Symbol>(peek.param())) { + if ((param->id() == 1) && + (peek.dim_list().size() == 1) && + (peek.num_children() == 2)) + { + return is_byte_expr(peek.get_child(1)); + } + } + return false; +} + +} // namespace <unnamed> + +UnpackBitsFunction::UnpackBitsFunction(const ValueType &res_type_in, + const TensorFunction &packed) + : Op1(res_type_in, packed) +{ +} + +InterpretedFunction::Instruction +UnpackBitsFunction::compile_self(const ValueBuilderFactory &, Stash &) const +{ + return InterpretedFunction::Instruction(my_unpack_bits_op, wrap_param<ValueType>(result_type())); +} + +const TensorFunction & +UnpackBitsFunction::optimize(const TensorFunction &expr, Stash &stash) +{ + if (auto lambda = as<Lambda>(expr)) { + // 'tensor<int8>(x[64])(bit(packed{x:(x/8)},7-(x%8)))' + const ValueType &dst_type = lambda->result_type(); + if (auto bit = as<Bit>(lambda->lambda().root())) { + if (auto peek = as<TensorPeek>(bit->get_child(0))) { + const ValueType &src_type = lambda->types().get_type(peek->param()); + if (valid_lambda_params(*lambda) && + compatible_types(src_type, dst_type) && + is_bit_expr(bit->get_child(1)) && + is_byte_peek(*peek)) + { + size_t param_idx = lambda->bindings()[0]; + const auto &packed_param = inject(src_type, param_idx, stash); + return stash.create<UnpackBitsFunction>(dst_type, packed_param); + } + } + } + } + return expr; +} + +} // namespace diff --git a/eval/src/vespa/eval/instruction/unpack_bits_function.h b/eval/src/vespa/eval/instruction/unpack_bits_function.h new file mode 100644 index 00000000000..5e24746508d --- /dev/null +++ b/eval/src/vespa/eval/instruction/unpack_bits_function.h @@ -0,0 +1,37 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::eval { + +/** + * Tensor function unpacking bits into separate values. + * + * Both the tensor containing the packed bits and the result tensor + * must have cell type 'int8'. The bits must be unpacked in canonical + * order; bytes are unpacked with increasing index, bits within a byte + * are unpacked from most to least significant. + * + * The baseline expression looks like this: + * + * tensor<int8>(x[64])(bit(packed{x:(x/8)},7-(x%8))) + * + * in this case 'packed' must be a tensor with type + * 'tensor<int8>(x[8])' (the inner result dimension is always 8 times + * larger than the inner input dimension). + * + * Unpacking of bits from multi-dimensional tensors will currently not + * be optimized. + **/ +class UnpackBitsFunction : public tensor_function::Op1 +{ +public: + UnpackBitsFunction(const ValueType &res_type_in, const TensorFunction &packed); + InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override; + bool result_is_mutable() const override { return true; } + static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash); +}; + +} // namespace |