diff options
author | Håvard Pettersen <havardpe@oath.com> | 2021-06-29 10:30:38 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2021-06-29 10:55:10 +0000 |
commit | 77b57d25819751b082cfcf746537d254b08ccdfa (patch) | |
tree | 1a5de60e5dd354a1cc06319ae45618126f94261c /eval/src | |
parent | e928ee61e47fe9c1cd15585df6dc553eaffa4370 (diff) |
optimize additional variants
Diffstat (limited to 'eval/src')
3 files changed, 97 insertions, 56 deletions
diff --git a/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp b/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp index 8250893225a..c0d7cdc43e7 100644 --- a/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp +++ b/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp @@ -46,40 +46,49 @@ void assert_not_optimized(const vespalib::string &expr) { //----------------------------------------------------------------------------- -TEST(UnpackBitsTest, expression_can_be_optimized) { +TEST(UnpackBitsTest, expression_can_be_optimized_with_big_bitorder) { assert_optimized("tensor<int8>(x[2048])(bit(full{x:(x/8)},7-x%8))"); assert_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%8))"); } +TEST(UnpackBitsTest, expression_can_be_optimized_with_small_bitorder) { + assert_optimized("tensor<int8>(x[2048])(bit(full{x:(x/8)},x%8))"); + assert_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},x%8))"); +} + TEST(UnpackBitsTest, unpack_bits_can_rename_dimension) { assert_optimized("tensor<int8>(x[64])(bit(vy8{y:(x/8)},7-x%8))"); + assert_optimized("tensor<int8>(x[64])(bit(vy8{y:(x/8)},x%8))"); } -//----------------------------------------------------------------------------- +TEST(UnpackBitsTest, result_may_have_other_cell_types_than_int8) { + assert_optimized("tensor<bfloat16>(x[64])(bit(vx8{x:(x/8)},7-x%8))"); + assert_optimized("tensor<float>(x[64])(bit(vx8{x:(x/8)},7-x%8))"); + assert_optimized("tensor<double>(x[64])(bit(vx8{x:(x/8)},7-x%8))"); -TEST(UnpackBitsTest, dimension_sizes_must_be_appropriate) { - assert_not_optimized("tensor<int8>(x[60])(bit(vx8{x:(x/8)},7-x%8))"); - assert_not_optimized("tensor<int8>(x[68])(bit(vx8{x:(x/8)},7-x%8))"); + assert_optimized("tensor<bfloat16>(x[64])(bit(vx8{x:(x/8)},x%8))"); + assert_optimized("tensor<float>(x[64])(bit(vx8{x:(x/8)},x%8))"); + assert_optimized("tensor<double>(x[64])(bit(vx8{x:(x/8)},x%8))"); } +//----------------------------------------------------------------------------- + TEST(UnpackBitsTest, source_must_be_int8) { assert_not_optimized("tensor<int8>(x[64])(bit(vxf{x:(x/8)},7-x%8))"); } -TEST(UnpackBitsTest, result_must_be_int8) { - assert_not_optimized("tensor<float>(x[64])(bit(vx8{x:(x/8)},7-x%8))"); +TEST(UnpackBitsTest, dimension_sizes_must_be_appropriate) { + assert_not_optimized("tensor<int8>(x[60])(bit(vx8{x:(x/8)},7-x%8))"); + assert_not_optimized("tensor<int8>(x[68])(bit(vx8{x:(x/8)},7-x%8))"); } TEST(UnpackBitsTest, similar_expressions_are_not_optimized) { - assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%7))"); - assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%9))"); - assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/7)},7-x%8))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x*8)},7-x%8))"); assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/9)},7-x%8))"); - assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},x%8-7))"); - assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(8/x)},7-x%8))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},8-x%8))"); assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7+x%8))"); - assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x*8)},7-x%8))"); - assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},(7-x)%8))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x/8))"); + assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%9))"); } //----------------------------------------------------------------------------- diff --git a/eval/src/vespa/eval/instruction/unpack_bits_function.cpp b/eval/src/vespa/eval/instruction/unpack_bits_function.cpp index d77ead79a37..330982aa4b7 100644 --- a/eval/src/vespa/eval/instruction/unpack_bits_function.cpp +++ b/eval/src/vespa/eval/instruction/unpack_bits_function.cpp @@ -20,49 +20,74 @@ using tensor_function::inject; namespace { +//----------------------------------------------------------------------------- + +template <typename OCT, bool big> void my_unpack_bits_op(InterpretedFunction::State &state, uint64_t param) { const ValueType &res_type = unwrap_param<ValueType>(param); auto packed_cells = state.peek(0).cells().typify<Int8Float>(); - auto unpacked_cells = state.stash.create_uninitialized_array<Int8Float>(packed_cells.size() * 8); - int8_t *dst = reinterpret_cast<int8_t*>(unpacked_cells.begin()); + auto unpacked_cells = state.stash.create_uninitialized_array<OCT>(packed_cells.size() * 8); + OCT *dst = unpacked_cells.begin(); for (Int8Float cell: packed_cells) { - for (int n = 7; n >= 0; --n) { - *dst++ = bool(cell.get_bits() & (1 << n)); + if constexpr (big) { + for (int n = 7; n >= 0; --n) { + *dst++ = (OCT) bool(cell.get_bits() & (1 << n)); + } + } else { + for (int n = 0; n <= 7; ++n) { + *dst++ = (OCT) bool(cell.get_bits() & (1 << n)); + } } } Value &result_ref = state.stash.create<DenseValueView>(res_type, TypedCells(unpacked_cells)); state.pop_push(result_ref); } +//----------------------------------------------------------------------------- + +struct MyGetFun { + template <typename OCT, typename BIG> static auto invoke() { + return my_unpack_bits_op<OCT, BIG::value>; + } +}; + +using MyTypify = TypifyValue<TypifyCellType,TypifyBool>; + +//----------------------------------------------------------------------------- + bool valid_lambda_params(const Lambda &lambda) { return ((lambda.lambda().num_params() == 2) && (lambda.bindings().size() == 1)); } -bool valid_type(const ValueType &type) { +bool valid_type(const ValueType &type, bool must_be_int8) { return ((type.is_dense()) && (type.dimensions().size() == 1) && - (type.cell_type() == CellType::INT8)); + (!must_be_int8 || (type.cell_type() == CellType::INT8))); } bool compatible_types(const ValueType &packed, const ValueType &unpacked) { - return (valid_type(packed) && valid_type(unpacked) && + return (valid_type(packed, true) && valid_type(unpacked, false) && (unpacked.dimensions()[0].size == (packed.dimensions()[0].size * 8))); } -bool is_bit_expr(const Node &node) { +bool is_little_bit_expr(const Node &node) { + // 'x%8' + if (auto mod = as<Mod>(node)) { + if (auto param = as<Symbol>(mod->lhs())) { + if (auto eight = as<Number>(mod->rhs())) { + return ((param->id() == 0) && (eight->value() == 8.0)); + } + } + } + return false; +} + +bool is_big_bit_expr(const Node &node) { // '7-(x%8)' if (auto sub = as<Sub>(node)) { if (auto seven = as<Number>(sub->lhs())) { - if (auto mod = as<Mod>(sub->rhs())) { - if (auto param = as<Symbol>(mod->lhs())) { - if (auto eight = as<Number>(mod->rhs())) { - return ((seven->value() == 7.0) && - (eight->value() == 8.0) && - (param->id() == 0)); - } - } - } + return ((seven->value() == 7.0) && is_little_bit_expr(sub->rhs())); } } return false; @@ -73,8 +98,7 @@ bool is_byte_expr(const Node &node) { if (auto div = as<Div>(node)) { if (auto param = as<Symbol>(div->lhs())) { if (auto eight = as<Number>(div->rhs())) { - return ((eight->value() == 8.0) && - (param->id() == 0)); + return ((param->id() == 0) && (eight->value() == 8.0)); } } } @@ -93,37 +117,44 @@ bool is_byte_peek(const TensorPeek &peek) { return false; } +//----------------------------------------------------------------------------- + } // namespace <unnamed> UnpackBitsFunction::UnpackBitsFunction(const ValueType &res_type_in, - const TensorFunction &packed) - : Op1(res_type_in, packed) + const TensorFunction &packed, + bool big_bitorder) + : Op1(res_type_in, packed), + _big_bitorder(big_bitorder) { } InterpretedFunction::Instruction UnpackBitsFunction::compile_self(const ValueBuilderFactory &, Stash &) const { - return InterpretedFunction::Instruction(my_unpack_bits_op, wrap_param<ValueType>(result_type())); + const ValueType &res_type = result_type(); + auto op = typify_invoke<2,MyTypify,MyGetFun>(res_type.cell_type(), _big_bitorder); + return InterpretedFunction::Instruction(op, wrap_param<ValueType>(res_type)); } const TensorFunction & UnpackBitsFunction::optimize(const TensorFunction &expr, Stash &stash) { if (auto lambda = as<Lambda>(expr)) { - // 'tensor<int8>(x[64])(bit(packed{x:(x/8)},7-(x%8)))' const ValueType &dst_type = lambda->result_type(); if (auto bit = as<Bit>(lambda->lambda().root())) { if (auto peek = as<TensorPeek>(bit->get_child(0))) { const ValueType &src_type = lambda->types().get_type(peek->param()); - if (valid_lambda_params(*lambda) && - compatible_types(src_type, dst_type) && - is_bit_expr(bit->get_child(1)) && + if (compatible_types(src_type, dst_type) && + valid_lambda_params(*lambda) && is_byte_peek(*peek)) { size_t param_idx = lambda->bindings()[0]; - const auto &packed_param = inject(src_type, param_idx, stash); - return stash.create<UnpackBitsFunction>(dst_type, packed_param); + if (is_big_bit_expr(bit->get_child(1))) { + return stash.create<UnpackBitsFunction>(dst_type, inject(src_type, param_idx, stash), true); + } else if (is_little_bit_expr(bit->get_child(1))) { + return stash.create<UnpackBitsFunction>(dst_type, inject(src_type, param_idx, stash), false); + } } } } diff --git a/eval/src/vespa/eval/instruction/unpack_bits_function.h b/eval/src/vespa/eval/instruction/unpack_bits_function.h index 5e24746508d..5b0da84072f 100644 --- a/eval/src/vespa/eval/instruction/unpack_bits_function.h +++ b/eval/src/vespa/eval/instruction/unpack_bits_function.h @@ -9,26 +9,27 @@ namespace vespalib::eval { /** * Tensor function unpacking bits into separate values. * - * Both the tensor containing the packed bits and the result tensor - * must have cell type 'int8'. The bits must be unpacked in canonical - * order; bytes are unpacked with increasing index, bits within a byte - * are unpacked from most to least significant. + * The tensor containing the packed bits must be a vector (dense + * tensor with 1 dimension) with cell type 'int8'. Bytes must be + * processed with increasing index. Bits may be unpacked in either + * 'big' or 'little' order. The result must be a vector (dense tensor + * with 1 dimension) where the dimension is 8 times larger than the + * input (since there are 8 bits packed into each int8 value). * - * The baseline expression looks like this: + * Baseline expression for 'big' bitorder (most significant bit first): + * (Note: this is the default order used by numpy unpack_bits) + * 'tensor<int8>(x[64])(bit(packed{x:(x/8)},7-(x%8)))' * - * tensor<int8>(x[64])(bit(packed{x:(x/8)},7-(x%8))) - * - * in this case 'packed' must be a tensor with type - * 'tensor<int8>(x[8])' (the inner result dimension is always 8 times - * larger than the inner input dimension). - * - * Unpacking of bits from multi-dimensional tensors will currently not - * be optimized. + * Baseline expression for 'little' bitorder (least significant bit first): + * (Note: make sure this is the actual order of your bits) + * 'tensor<int8>(x[64])(bit(packed{x:(x/8)},x%8))' **/ class UnpackBitsFunction : public tensor_function::Op1 { +private: + bool _big_bitorder; public: - UnpackBitsFunction(const ValueType &res_type_in, const TensorFunction &packed); + UnpackBitsFunction(const ValueType &res_type_in, const TensorFunction &packed, bool big); InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override; bool result_is_mutable() const override { return true; } static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash); |