aboutsummaryrefslogtreecommitdiffstats
path: root/eval/src
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2021-06-29 10:30:38 +0000
committerHåvard Pettersen <havardpe@oath.com>2021-06-29 10:55:10 +0000
commit77b57d25819751b082cfcf746537d254b08ccdfa (patch)
tree1a5de60e5dd354a1cc06319ae45618126f94261c /eval/src
parente928ee61e47fe9c1cd15585df6dc553eaffa4370 (diff)
optimize additional variants
Diffstat (limited to 'eval/src')
-rw-r--r--eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp37
-rw-r--r--eval/src/vespa/eval/instruction/unpack_bits_function.cpp87
-rw-r--r--eval/src/vespa/eval/instruction/unpack_bits_function.h29
3 files changed, 97 insertions, 56 deletions
diff --git a/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp b/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp
index 8250893225a..c0d7cdc43e7 100644
--- a/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp
+++ b/eval/src/tests/instruction/unpack_bits_function/unpack_bits_function_test.cpp
@@ -46,40 +46,49 @@ void assert_not_optimized(const vespalib::string &expr) {
//-----------------------------------------------------------------------------
-TEST(UnpackBitsTest, expression_can_be_optimized) {
+TEST(UnpackBitsTest, expression_can_be_optimized_with_big_bitorder) {
assert_optimized("tensor<int8>(x[2048])(bit(full{x:(x/8)},7-x%8))");
assert_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%8))");
}
+TEST(UnpackBitsTest, expression_can_be_optimized_with_small_bitorder) {
+ assert_optimized("tensor<int8>(x[2048])(bit(full{x:(x/8)},x%8))");
+ assert_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},x%8))");
+}
+
TEST(UnpackBitsTest, unpack_bits_can_rename_dimension) {
assert_optimized("tensor<int8>(x[64])(bit(vy8{y:(x/8)},7-x%8))");
+ assert_optimized("tensor<int8>(x[64])(bit(vy8{y:(x/8)},x%8))");
}
-//-----------------------------------------------------------------------------
+TEST(UnpackBitsTest, result_may_have_other_cell_types_than_int8) {
+ assert_optimized("tensor<bfloat16>(x[64])(bit(vx8{x:(x/8)},7-x%8))");
+ assert_optimized("tensor<float>(x[64])(bit(vx8{x:(x/8)},7-x%8))");
+ assert_optimized("tensor<double>(x[64])(bit(vx8{x:(x/8)},7-x%8))");
-TEST(UnpackBitsTest, dimension_sizes_must_be_appropriate) {
- assert_not_optimized("tensor<int8>(x[60])(bit(vx8{x:(x/8)},7-x%8))");
- assert_not_optimized("tensor<int8>(x[68])(bit(vx8{x:(x/8)},7-x%8))");
+ assert_optimized("tensor<bfloat16>(x[64])(bit(vx8{x:(x/8)},x%8))");
+ assert_optimized("tensor<float>(x[64])(bit(vx8{x:(x/8)},x%8))");
+ assert_optimized("tensor<double>(x[64])(bit(vx8{x:(x/8)},x%8))");
}
+//-----------------------------------------------------------------------------
+
TEST(UnpackBitsTest, source_must_be_int8) {
assert_not_optimized("tensor<int8>(x[64])(bit(vxf{x:(x/8)},7-x%8))");
}
-TEST(UnpackBitsTest, result_must_be_int8) {
- assert_not_optimized("tensor<float>(x[64])(bit(vx8{x:(x/8)},7-x%8))");
+TEST(UnpackBitsTest, dimension_sizes_must_be_appropriate) {
+ assert_not_optimized("tensor<int8>(x[60])(bit(vx8{x:(x/8)},7-x%8))");
+ assert_not_optimized("tensor<int8>(x[68])(bit(vx8{x:(x/8)},7-x%8))");
}
TEST(UnpackBitsTest, similar_expressions_are_not_optimized) {
- assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%7))");
- assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%9))");
- assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/7)},7-x%8))");
+ assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x*8)},7-x%8))");
assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/9)},7-x%8))");
- assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},x%8-7))");
- assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(8/x)},7-x%8))");
+ assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},8-x%8))");
assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7+x%8))");
- assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x*8)},7-x%8))");
- assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},(7-x)%8))");
+ assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x/8))");
+ assert_not_optimized("tensor<int8>(x[64])(bit(vx8{x:(x/8)},7-x%9))");
}
//-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/instruction/unpack_bits_function.cpp b/eval/src/vespa/eval/instruction/unpack_bits_function.cpp
index d77ead79a37..330982aa4b7 100644
--- a/eval/src/vespa/eval/instruction/unpack_bits_function.cpp
+++ b/eval/src/vespa/eval/instruction/unpack_bits_function.cpp
@@ -20,49 +20,74 @@ using tensor_function::inject;
namespace {
+//-----------------------------------------------------------------------------
+
+template <typename OCT, bool big>
void my_unpack_bits_op(InterpretedFunction::State &state, uint64_t param) {
const ValueType &res_type = unwrap_param<ValueType>(param);
auto packed_cells = state.peek(0).cells().typify<Int8Float>();
- auto unpacked_cells = state.stash.create_uninitialized_array<Int8Float>(packed_cells.size() * 8);
- int8_t *dst = reinterpret_cast<int8_t*>(unpacked_cells.begin());
+ auto unpacked_cells = state.stash.create_uninitialized_array<OCT>(packed_cells.size() * 8);
+ OCT *dst = unpacked_cells.begin();
for (Int8Float cell: packed_cells) {
- for (int n = 7; n >= 0; --n) {
- *dst++ = bool(cell.get_bits() & (1 << n));
+ if constexpr (big) {
+ for (int n = 7; n >= 0; --n) {
+ *dst++ = (OCT) bool(cell.get_bits() & (1 << n));
+ }
+ } else {
+ for (int n = 0; n <= 7; ++n) {
+ *dst++ = (OCT) bool(cell.get_bits() & (1 << n));
+ }
}
}
Value &result_ref = state.stash.create<DenseValueView>(res_type, TypedCells(unpacked_cells));
state.pop_push(result_ref);
}
+//-----------------------------------------------------------------------------
+
+struct MyGetFun {
+ template <typename OCT, typename BIG> static auto invoke() {
+ return my_unpack_bits_op<OCT, BIG::value>;
+ }
+};
+
+using MyTypify = TypifyValue<TypifyCellType,TypifyBool>;
+
+//-----------------------------------------------------------------------------
+
bool valid_lambda_params(const Lambda &lambda) {
return ((lambda.lambda().num_params() == 2) &&
(lambda.bindings().size() == 1));
}
-bool valid_type(const ValueType &type) {
+bool valid_type(const ValueType &type, bool must_be_int8) {
return ((type.is_dense()) &&
(type.dimensions().size() == 1) &&
- (type.cell_type() == CellType::INT8));
+ (!must_be_int8 || (type.cell_type() == CellType::INT8)));
}
bool compatible_types(const ValueType &packed, const ValueType &unpacked) {
- return (valid_type(packed) && valid_type(unpacked) &&
+ return (valid_type(packed, true) && valid_type(unpacked, false) &&
(unpacked.dimensions()[0].size == (packed.dimensions()[0].size * 8)));
}
-bool is_bit_expr(const Node &node) {
+bool is_little_bit_expr(const Node &node) {
+ // 'x%8'
+ if (auto mod = as<Mod>(node)) {
+ if (auto param = as<Symbol>(mod->lhs())) {
+ if (auto eight = as<Number>(mod->rhs())) {
+ return ((param->id() == 0) && (eight->value() == 8.0));
+ }
+ }
+ }
+ return false;
+}
+
+bool is_big_bit_expr(const Node &node) {
// '7-(x%8)'
if (auto sub = as<Sub>(node)) {
if (auto seven = as<Number>(sub->lhs())) {
- if (auto mod = as<Mod>(sub->rhs())) {
- if (auto param = as<Symbol>(mod->lhs())) {
- if (auto eight = as<Number>(mod->rhs())) {
- return ((seven->value() == 7.0) &&
- (eight->value() == 8.0) &&
- (param->id() == 0));
- }
- }
- }
+ return ((seven->value() == 7.0) && is_little_bit_expr(sub->rhs()));
}
}
return false;
@@ -73,8 +98,7 @@ bool is_byte_expr(const Node &node) {
if (auto div = as<Div>(node)) {
if (auto param = as<Symbol>(div->lhs())) {
if (auto eight = as<Number>(div->rhs())) {
- return ((eight->value() == 8.0) &&
- (param->id() == 0));
+ return ((param->id() == 0) && (eight->value() == 8.0));
}
}
}
@@ -93,37 +117,44 @@ bool is_byte_peek(const TensorPeek &peek) {
return false;
}
+//-----------------------------------------------------------------------------
+
} // namespace <unnamed>
UnpackBitsFunction::UnpackBitsFunction(const ValueType &res_type_in,
- const TensorFunction &packed)
- : Op1(res_type_in, packed)
+ const TensorFunction &packed,
+ bool big_bitorder)
+ : Op1(res_type_in, packed),
+ _big_bitorder(big_bitorder)
{
}
InterpretedFunction::Instruction
UnpackBitsFunction::compile_self(const ValueBuilderFactory &, Stash &) const
{
- return InterpretedFunction::Instruction(my_unpack_bits_op, wrap_param<ValueType>(result_type()));
+ const ValueType &res_type = result_type();
+ auto op = typify_invoke<2,MyTypify,MyGetFun>(res_type.cell_type(), _big_bitorder);
+ return InterpretedFunction::Instruction(op, wrap_param<ValueType>(res_type));
}
const TensorFunction &
UnpackBitsFunction::optimize(const TensorFunction &expr, Stash &stash)
{
if (auto lambda = as<Lambda>(expr)) {
- // 'tensor<int8>(x[64])(bit(packed{x:(x/8)},7-(x%8)))'
const ValueType &dst_type = lambda->result_type();
if (auto bit = as<Bit>(lambda->lambda().root())) {
if (auto peek = as<TensorPeek>(bit->get_child(0))) {
const ValueType &src_type = lambda->types().get_type(peek->param());
- if (valid_lambda_params(*lambda) &&
- compatible_types(src_type, dst_type) &&
- is_bit_expr(bit->get_child(1)) &&
+ if (compatible_types(src_type, dst_type) &&
+ valid_lambda_params(*lambda) &&
is_byte_peek(*peek))
{
size_t param_idx = lambda->bindings()[0];
- const auto &packed_param = inject(src_type, param_idx, stash);
- return stash.create<UnpackBitsFunction>(dst_type, packed_param);
+ if (is_big_bit_expr(bit->get_child(1))) {
+ return stash.create<UnpackBitsFunction>(dst_type, inject(src_type, param_idx, stash), true);
+ } else if (is_little_bit_expr(bit->get_child(1))) {
+ return stash.create<UnpackBitsFunction>(dst_type, inject(src_type, param_idx, stash), false);
+ }
}
}
}
diff --git a/eval/src/vespa/eval/instruction/unpack_bits_function.h b/eval/src/vespa/eval/instruction/unpack_bits_function.h
index 5e24746508d..5b0da84072f 100644
--- a/eval/src/vespa/eval/instruction/unpack_bits_function.h
+++ b/eval/src/vespa/eval/instruction/unpack_bits_function.h
@@ -9,26 +9,27 @@ namespace vespalib::eval {
/**
* Tensor function unpacking bits into separate values.
*
- * Both the tensor containing the packed bits and the result tensor
- * must have cell type 'int8'. The bits must be unpacked in canonical
- * order; bytes are unpacked with increasing index, bits within a byte
- * are unpacked from most to least significant.
+ * The tensor containing the packed bits must be a vector (dense
+ * tensor with 1 dimension) with cell type 'int8'. Bytes must be
+ * processed with increasing index. Bits may be unpacked in either
+ * 'big' or 'little' order. The result must be a vector (dense tensor
+ * with 1 dimension) where the dimension is 8 times larger than the
+ * input (since there are 8 bits packed into each int8 value).
*
- * The baseline expression looks like this:
+ * Baseline expression for 'big' bitorder (most significant bit first):
+ * (Note: this is the default order used by numpy unpack_bits)
+ * 'tensor<int8>(x[64])(bit(packed{x:(x/8)},7-(x%8)))'
*
- * tensor<int8>(x[64])(bit(packed{x:(x/8)},7-(x%8)))
- *
- * in this case 'packed' must be a tensor with type
- * 'tensor<int8>(x[8])' (the inner result dimension is always 8 times
- * larger than the inner input dimension).
- *
- * Unpacking of bits from multi-dimensional tensors will currently not
- * be optimized.
+ * Baseline expression for 'little' bitorder (least significant bit first):
+ * (Note: make sure this is the actual order of your bits)
+ * 'tensor<int8>(x[64])(bit(packed{x:(x/8)},x%8))'
**/
class UnpackBitsFunction : public tensor_function::Op1
{
+private:
+ bool _big_bitorder;
public:
- UnpackBitsFunction(const ValueType &res_type_in, const TensorFunction &packed);
+ UnpackBitsFunction(const ValueType &res_type_in, const TensorFunction &packed, bool big);
InterpretedFunction::Instruction compile_self(const ValueBuilderFactory &factory, Stash &stash) const override;
bool result_is_mutable() const override { return true; }
static const TensorFunction &optimize(const TensorFunction &expr, Stash &stash);