diff options
author | HÃ¥vard Pettersen <havardpe@oath.com> | 2020-08-26 15:00:02 +0000 |
---|---|---|
committer | HÃ¥vard Pettersen <havardpe@oath.com> | 2020-08-27 17:50:44 +0000 |
commit | 97388434444e962506f9d9fdf57de6de3a8c516d (patch) | |
tree | b0da0f6a53318082c6240a368b10ad5419e72acf /eval | |
parent | 84f53a3d79e4b2e47b88f5a1a06e401069257f44 (diff) |
infer unknown onnx dimension sizes
also pre-allocate output onnx tensors and generally try to resolve as
much as possible up-front to reduce per-eval overhead.
Diffstat (limited to 'eval')
-rw-r--r-- | eval/src/tests/tensor/onnx_wrapper/dynamic.onnx | 27 | ||||
-rwxr-xr-x | eval/src/tests/tensor/onnx_wrapper/dynamic.py | 39 | ||||
-rw-r--r-- | eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp | 223 | ||||
-rw-r--r-- | eval/src/tests/tensor/onnx_wrapper/simple.onnx | 23 | ||||
-rwxr-xr-x | eval/src/tests/tensor/onnx_wrapper/simple.py | 33 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/dense_tensor_view.h | 2 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/onnx_wrapper.cpp | 271 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/dense/onnx_wrapper.h | 92 |
8 files changed, 563 insertions, 147 deletions
diff --git a/eval/src/tests/tensor/onnx_wrapper/dynamic.onnx b/eval/src/tests/tensor/onnx_wrapper/dynamic.onnx new file mode 100644 index 00000000000..95bbf36885a --- /dev/null +++ b/eval/src/tests/tensor/onnx_wrapper/dynamic.onnx @@ -0,0 +1,27 @@ + +dynamic.py:¦ +0 +query_tensor +attribute_tensormatmul"MatMul +- +bias_tensorreduce" ReduceSum* +axes@ + +matmul +reduceoutput"Adddynamic_scoringZ# +query_tensor +
+batch +Z" +attribute_tensor + + +Z+ +bias_tensor + +batch +ÿÿÿÿÿÿÿÿÿb +output +
+batch +B
\ No newline at end of file diff --git a/eval/src/tests/tensor/onnx_wrapper/dynamic.py b/eval/src/tests/tensor/onnx_wrapper/dynamic.py new file mode 100755 index 00000000000..d098324fae8 --- /dev/null +++ b/eval/src/tests/tensor/onnx_wrapper/dynamic.py @@ -0,0 +1,39 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +import onnx +from onnx import helper, TensorProto + +QUERY_TENSOR = helper.make_tensor_value_info('query_tensor', TensorProto.FLOAT, ['batch', 4]) +ATTRIBUTE_TENSOR = helper.make_tensor_value_info('attribute_tensor', TensorProto.FLOAT, [4, 1]) +BIAS_TENSOR = helper.make_tensor_value_info('bias_tensor', TensorProto.FLOAT, ['batch', -1]) +OUTPUT = helper.make_tensor_value_info('output', TensorProto.FLOAT, ['batch', 1]) + +nodes = [ + helper.make_node( + 'MatMul', + ['query_tensor', 'attribute_tensor'], + ['matmul'], + ), + helper.make_node( + 'ReduceSum', + ['bias_tensor'], + ['reduce'], + axes=[1] + ), + helper.make_node( + 'Add', + ['matmul', 'reduce'], + ['output'], + ), +] +graph_def = helper.make_graph( + nodes, + 'dynamic_scoring', + [ + QUERY_TENSOR, + ATTRIBUTE_TENSOR, + BIAS_TENSOR, + ], + [OUTPUT], +) +model_def = helper.make_model(graph_def, producer_name='dynamic.py') +onnx.save(model_def, 'dynamic.onnx') diff --git a/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp b/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp index 28a4a34b2e4..db2415e9969 100644 --- a/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp +++ b/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp @@ -10,83 +10,224 @@ using namespace vespalib::eval; using namespace vespalib::tensor; using vespalib::make_string_short::fmt; +using TensorInfo = Onnx::TensorInfo; +using DZ = Onnx::DimSize; std::string get_source_dir() { const char *dir = getenv("SOURCE_DIRECTORY"); return (dir ? dir : "."); } std::string source_dir = get_source_dir(); -std::string vespa_dir = source_dir + "/" + "../../../../.."; -std::string simple_model = vespa_dir + "/" + "model-integration/src/test/models/onnx/simple/simple.onnx"; +std::string simple_model = source_dir + "/simple.onnx"; +std::string dynamic_model = source_dir + "/dynamic.onnx"; -void dump_info(const char *ctx, const std::vector<OnnxWrapper::TensorInfo> &info) { +void dump_info(const char *ctx, const std::vector<TensorInfo> &info) { fprintf(stderr, "%s:\n", ctx); for (size_t i = 0; i < info.size(); ++i) { fprintf(stderr, " %s[%zu]: '%s' %s\n", ctx, i, info[i].name.c_str(), info[i].type_as_string().c_str()); } } -TEST(OnnxWrapperTest, onnx_model_can_be_inspected) +TEST(WirePlannerTest, element_types_must_match) { + Onnx::WirePlanner planner; + ValueType type1 = ValueType::from_spec("tensor<float>(a[5])"); + ValueType type2 = ValueType::from_spec("tensor<double>(a[5])"); + TensorInfo info1 = TensorInfo{"info", {DZ(5)}, TensorInfo::ElementType::FLOAT}; + TensorInfo info2 = TensorInfo{"info", {DZ(5)}, TensorInfo::ElementType::DOUBLE}; + EXPECT_TRUE(planner.bind_input_type(type1, info1)); + EXPECT_FALSE(planner.bind_input_type(type2, info1)); + EXPECT_FALSE(planner.bind_input_type(type1, info2)); + EXPECT_TRUE(planner.bind_input_type(type2, info2)); +} + +TEST(WirePlannerTest, known_dimension_sizes_must_match) { + Onnx::WirePlanner planner; + ValueType type1 = ValueType::from_spec("tensor<float>(a[5],b[10])"); + ValueType type2 = ValueType::from_spec("tensor<float>(a[10],b[5])"); + ValueType type3 = ValueType::from_spec("tensor<float>(a[5],b[5])"); + TensorInfo info = TensorInfo{"info", {DZ(5),DZ(5)}, TensorInfo::ElementType::FLOAT}; + EXPECT_FALSE(planner.bind_input_type(type1, info)); + EXPECT_FALSE(planner.bind_input_type(type2, info)); + EXPECT_TRUE(planner.bind_input_type(type3, info)); +} + +TEST(WirePlannerTest, symbolic_dimension_sizes_must_match) { + Onnx::WirePlanner planner; + ValueType type1 = ValueType::from_spec("tensor<float>(a[5])"); + ValueType type2 = ValueType::from_spec("tensor<float>(a[10])"); + TensorInfo info = TensorInfo{"info", {DZ("dim")}, TensorInfo::ElementType::FLOAT}; + EXPECT_TRUE(planner.bind_input_type(type1, info)); // binds 'dim' to 5 + EXPECT_FALSE(planner.bind_input_type(type2, info)); + EXPECT_TRUE(planner.bind_input_type(type1, info)); +} + +TEST(WirePlannerTest, unknown_dimension_sizes_match_anything) { + Onnx::WirePlanner planner; + ValueType type1 = ValueType::from_spec("tensor<float>(a[5])"); + ValueType type2 = ValueType::from_spec("tensor<float>(a[10])"); + TensorInfo info = TensorInfo{"info", {DZ()}, TensorInfo::ElementType::FLOAT}; + EXPECT_TRUE(planner.bind_input_type(type1, info)); + EXPECT_TRUE(planner.bind_input_type(type2, info)); +} + +TEST(WirePlannerTest, all_output_dimensions_must_be_bound) { + Onnx::WirePlanner planner; + ValueType type = ValueType::from_spec("tensor<float>(a[5],b[10])"); + TensorInfo info1 = TensorInfo{"info", {DZ()}, TensorInfo::ElementType::FLOAT}; + TensorInfo info2 = TensorInfo{"info", {DZ("dim")}, TensorInfo::ElementType::FLOAT}; + TensorInfo info3 = TensorInfo{"info", {DZ("dim"),DZ()}, TensorInfo::ElementType::FLOAT}; + EXPECT_TRUE(planner.make_output_type(info1).is_error()); + EXPECT_TRUE(planner.make_output_type(info2).is_error()); + EXPECT_TRUE(planner.make_output_type(info3).is_error()); + EXPECT_TRUE(planner.bind_input_type(type, info3)); // binds 'dim' to 5 + EXPECT_TRUE(planner.make_output_type(info1).is_error()); + EXPECT_EQ(planner.make_output_type(info2).to_spec(), "tensor<float>(d0[5])"); + EXPECT_TRUE(planner.make_output_type(info3).is_error()); +} + +TEST(WirePlannerTest, dimensions_resolve_left_to_right) { + Onnx::WirePlanner planner; + ValueType type1 = ValueType::from_spec("tensor<float>(a[5],b[10])"); + ValueType type2 = ValueType::from_spec("tensor<float>(a[10],b[10])"); + ValueType type3 = ValueType::from_spec("tensor<float>(a[5],b[5])"); + TensorInfo info = TensorInfo{"info", {DZ("dim"),DZ("dim")}, TensorInfo::ElementType::FLOAT}; + EXPECT_FALSE(planner.bind_input_type(type1, info)); // binds 'dim' to 5, then fails (5 != 10) + EXPECT_FALSE(planner.bind_input_type(type2, info)); + EXPECT_TRUE(planner.bind_input_type(type3, info)); +} + +TEST(OnnxTest, simple_onnx_model_can_be_inspected) { - OnnxWrapper wrapper(simple_model, OnnxWrapper::Optimize::DISABLE); - dump_info("inputs", wrapper.inputs()); - dump_info("outputs", wrapper.outputs()); - ASSERT_EQ(wrapper.inputs().size(), 3); - ASSERT_EQ(wrapper.outputs().size(), 1); + Onnx model(simple_model, Onnx::Optimize::DISABLE); + dump_info("inputs", model.inputs()); + dump_info("outputs", model.outputs()); + ASSERT_EQ(model.inputs().size(), 3); + ASSERT_EQ(model.outputs().size(), 1); //------------------------------------------------------------------------- - EXPECT_EQ(wrapper.inputs()[0].name, "query_tensor"); - EXPECT_EQ(wrapper.inputs()[0].type_as_string(), "float[1][4]"); + EXPECT_EQ(model.inputs()[0].name, "query_tensor"); + EXPECT_EQ(model.inputs()[0].type_as_string(), "float[1][4]"); //------------------------------------------------------------------------- - EXPECT_EQ(wrapper.inputs()[1].name, "attribute_tensor"); - EXPECT_EQ(wrapper.inputs()[1].type_as_string(), "float[4][1]"); + EXPECT_EQ(model.inputs()[1].name, "attribute_tensor"); + EXPECT_EQ(model.inputs()[1].type_as_string(), "float[4][1]"); //------------------------------------------------------------------------- - EXPECT_EQ(wrapper.inputs()[2].name, "bias_tensor"); - EXPECT_EQ(wrapper.inputs()[2].type_as_string(), "float[1][1]"); + EXPECT_EQ(model.inputs()[2].name, "bias_tensor"); + EXPECT_EQ(model.inputs()[2].type_as_string(), "float[1][1]"); //------------------------------------------------------------------------- - EXPECT_EQ(wrapper.outputs()[0].name, "output"); - EXPECT_EQ(wrapper.outputs()[0].type_as_string(), "float[1][1]"); + EXPECT_EQ(model.outputs()[0].name, "output"); + EXPECT_EQ(model.outputs()[0].type_as_string(), "float[1][1]"); } -TEST(OnnxWrapperTest, onnx_model_can_be_evaluated) +TEST(OnnxTest, dynamic_onnx_model_can_be_inspected) { - OnnxWrapper wrapper(simple_model, OnnxWrapper::Optimize::ENABLE); + Onnx model(dynamic_model, Onnx::Optimize::DISABLE); + dump_info("inputs", model.inputs()); + dump_info("outputs", model.outputs()); + ASSERT_EQ(model.inputs().size(), 3); + ASSERT_EQ(model.outputs().size(), 1); + //------------------------------------------------------------------------- + EXPECT_EQ(model.inputs()[0].name, "query_tensor"); + EXPECT_EQ(model.inputs()[0].type_as_string(), "float[batch][4]"); + //------------------------------------------------------------------------- + EXPECT_EQ(model.inputs()[1].name, "attribute_tensor"); + EXPECT_EQ(model.inputs()[1].type_as_string(), "float[4][1]"); + //------------------------------------------------------------------------- + EXPECT_EQ(model.inputs()[2].name, "bias_tensor"); + EXPECT_EQ(model.inputs()[2].type_as_string(), "float[batch][]"); + //------------------------------------------------------------------------- + EXPECT_EQ(model.outputs()[0].name, "output"); + EXPECT_EQ(model.outputs()[0].type_as_string(), "float[batch][1]"); +} + +TEST(OnnxTest, simple_onnx_model_can_be_evaluated) +{ + Onnx model(simple_model, Onnx::Optimize::ENABLE); + Onnx::WirePlanner planner; ValueType query_type = ValueType::from_spec("tensor<float>(a[1],b[4])"); std::vector<float> query_values({1.0, 2.0, 3.0, 4.0}); DenseTensorView query(query_type, TypedCells(query_values)); - EXPECT_TRUE(wrapper.inputs()[0].is_compatible(query_type)); - EXPECT_FALSE(wrapper.inputs()[1].is_compatible(query_type)); - EXPECT_FALSE(wrapper.inputs()[2].is_compatible(query_type)); + EXPECT_TRUE(planner.bind_input_type(query_type, model.inputs()[0])); ValueType attribute_type = ValueType::from_spec("tensor<float>(a[4],b[1])"); std::vector<float> attribute_values({5.0, 6.0, 7.0, 8.0}); DenseTensorView attribute(attribute_type, TypedCells(attribute_values)); - EXPECT_FALSE(wrapper.inputs()[0].is_compatible(attribute_type)); - EXPECT_TRUE(wrapper.inputs()[1].is_compatible(attribute_type)); - EXPECT_FALSE(wrapper.inputs()[2].is_compatible(attribute_type)); + EXPECT_TRUE(planner.bind_input_type(attribute_type, model.inputs()[1])); ValueType bias_type = ValueType::from_spec("tensor<float>(a[1],b[1])"); std::vector<float> bias_values({9.0}); DenseTensorView bias(bias_type, TypedCells(bias_values)); - EXPECT_FALSE(wrapper.inputs()[0].is_compatible(bias_type)); - EXPECT_FALSE(wrapper.inputs()[1].is_compatible(bias_type)); - EXPECT_TRUE(wrapper.inputs()[2].is_compatible(bias_type)); - - MutableDenseTensorView output(wrapper.outputs()[0].make_compatible_type()); - EXPECT_EQ(output.fast_type().to_spec(), "tensor<float>(d0[1],d1[1])"); - - OnnxWrapper::Params params; - params.bind(0, query); - params.bind(1, attribute); - params.bind(2, bias); - auto result = wrapper.eval(params); - - EXPECT_EQ(result.num_values(), 1); - result.get(0, output); - auto cells = output.cellsRef(); + EXPECT_TRUE(planner.bind_input_type(bias_type, model.inputs()[2])); + + EXPECT_EQ(planner.make_output_type(model.outputs()[0]).to_spec(), + "tensor<float>(d0[1],d1[1])"); + + Onnx::WireInfo wire_info = planner.get_wire_info(model); + Onnx::EvalContext ctx(model, wire_info); + + const Value &output = ctx.get_result(0); + EXPECT_EQ(output.type().to_spec(), "tensor<float>(d0[1],d1[1])"); + //------------------------------------------------------------------------- + ctx.bind_param(0, query); + ctx.bind_param(1, attribute); + ctx.bind_param(2, bias); + ctx.eval(); + auto cells = static_cast<const DenseTensorView&>(output).cellsRef(); EXPECT_EQ(cells.type, ValueType::CellType::FLOAT); EXPECT_EQ(cells.size, 1); EXPECT_EQ(cells.get(0), 79.0); + //------------------------------------------------------------------------- + std::vector<float> new_bias_values({10.0}); + DenseTensorView new_bias(bias_type, TypedCells(new_bias_values)); + ctx.bind_param(2, new_bias); + ctx.eval(); + EXPECT_EQ(static_cast<const DenseTensorView&>(output).cellsRef().get(0), 80.0); + //------------------------------------------------------------------------- +} + +TEST(OnnxTest, dynamic_onnx_model_can_be_evaluated) +{ + Onnx model(dynamic_model, Onnx::Optimize::ENABLE); + Onnx::WirePlanner planner; + + ValueType query_type = ValueType::from_spec("tensor<float>(a[1],b[4])"); + std::vector<float> query_values({1.0, 2.0, 3.0, 4.0}); + DenseTensorView query(query_type, TypedCells(query_values)); + EXPECT_TRUE(planner.bind_input_type(query_type, model.inputs()[0])); + + ValueType attribute_type = ValueType::from_spec("tensor<float>(a[4],b[1])"); + std::vector<float> attribute_values({5.0, 6.0, 7.0, 8.0}); + DenseTensorView attribute(attribute_type, TypedCells(attribute_values)); + EXPECT_TRUE(planner.bind_input_type(attribute_type, model.inputs()[1])); + + ValueType bias_type = ValueType::from_spec("tensor<float>(a[1],b[2])"); + std::vector<float> bias_values({4.0, 5.0}); + DenseTensorView bias(bias_type, TypedCells(bias_values)); + EXPECT_TRUE(planner.bind_input_type(bias_type, model.inputs()[2])); + + EXPECT_EQ(planner.make_output_type(model.outputs()[0]).to_spec(), + "tensor<float>(d0[1],d1[1])"); + + Onnx::WireInfo wire_info = planner.get_wire_info(model); + Onnx::EvalContext ctx(model, wire_info); + + const Value &output = ctx.get_result(0); + EXPECT_EQ(output.type().to_spec(), "tensor<float>(d0[1],d1[1])"); + //------------------------------------------------------------------------- + ctx.bind_param(0, query); + ctx.bind_param(1, attribute); + ctx.bind_param(2, bias); + ctx.eval(); + auto cells = static_cast<const DenseTensorView&>(output).cellsRef(); + EXPECT_EQ(cells.type, ValueType::CellType::FLOAT); + EXPECT_EQ(cells.size, 1); + EXPECT_EQ(cells.get(0), 79.0); + //------------------------------------------------------------------------- + std::vector<float> new_bias_values({5.0,6.0}); + DenseTensorView new_bias(bias_type, TypedCells(new_bias_values)); + ctx.bind_param(2, new_bias); + ctx.eval(); + EXPECT_EQ(static_cast<const DenseTensorView&>(output).cellsRef().get(0), 81.0); + //------------------------------------------------------------------------- } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/tests/tensor/onnx_wrapper/simple.onnx b/eval/src/tests/tensor/onnx_wrapper/simple.onnx new file mode 100644 index 00000000000..88ed0ef23f0 --- /dev/null +++ b/eval/src/tests/tensor/onnx_wrapper/simple.onnx @@ -0,0 +1,23 @@ + simple.py:ã +0 +query_tensor +attribute_tensormatmul"MatMul +" +matmul +bias_tensoroutput"Addsimple_scoringZ +query_tensor + + +Z" +attribute_tensor + + +Z +bias_tensor + + +b +output + + +B
\ No newline at end of file diff --git a/eval/src/tests/tensor/onnx_wrapper/simple.py b/eval/src/tests/tensor/onnx_wrapper/simple.py new file mode 100755 index 00000000000..a3cd2425d58 --- /dev/null +++ b/eval/src/tests/tensor/onnx_wrapper/simple.py @@ -0,0 +1,33 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +import onnx +from onnx import helper, TensorProto + +QUERY_TENSOR = helper.make_tensor_value_info('query_tensor', TensorProto.FLOAT, [1, 4]) +ATTRIBUTE_TENSOR = helper.make_tensor_value_info('attribute_tensor', TensorProto.FLOAT, [4, 1]) +BIAS_TENSOR = helper.make_tensor_value_info('bias_tensor', TensorProto.FLOAT, [1, 1]) +OUTPUT = helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 1]) + +nodes = [ + helper.make_node( + 'MatMul', + ['query_tensor', 'attribute_tensor'], + ['matmul'], + ), + helper.make_node( + 'Add', + ['matmul', 'bias_tensor'], + ['output'], + ), +] +graph_def = helper.make_graph( + nodes, + 'simple_scoring', + [ + QUERY_TENSOR, + ATTRIBUTE_TENSOR, + BIAS_TENSOR, + ], + [OUTPUT], +) +model_def = helper.make_model(graph_def, producer_name='simple.py') +onnx.save(model_def, 'simple.onnx') diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_view.h b/eval/src/vespa/eval/tensor/dense/dense_tensor_view.h index 93dd2dbedeb..bf2464791f6 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_tensor_view.h +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_view.h @@ -18,6 +18,7 @@ public: using CellsIterator = DenseTensorCellsIterator; using Address = std::vector<eval::ValueType::Dimension::size_type>; + DenseTensorView(const DenseTensorView &rhs) : DenseTensorView(rhs._typeRef, rhs._cellsRef) {} DenseTensorView(const eval::ValueType &type_in, TypedCells cells_in) : _typeRef(type_in), _cellsRef(cells_in) @@ -55,7 +56,6 @@ protected: : _typeRef(type_in), _cellsRef() {} - DenseTensorView(const DenseTensorView &rhs) : DenseTensorView(rhs._typeRef, rhs._cellsRef) {} void initCellsRef(TypedCells cells_in) { assert(_typeRef.cell_type() == cells_in.type); diff --git a/eval/src/vespa/eval/tensor/dense/onnx_wrapper.cpp b/eval/src/vespa/eval/tensor/dense/onnx_wrapper.cpp index 125095ff23e..88346213901 100644 --- a/eval/src/vespa/eval/tensor/dense/onnx_wrapper.cpp +++ b/eval/src/vespa/eval/tensor/dense/onnx_wrapper.cpp @@ -18,31 +18,31 @@ namespace vespalib::tensor { namespace { -vespalib::string to_str(OnnxWrapper::TensorInfo::ElementType element_type) { - if (element_type == OnnxWrapper::TensorInfo::ElementType::FLOAT) { +vespalib::string to_str(Onnx::TensorInfo::ElementType element_type) { + if (element_type == Onnx::TensorInfo::ElementType::FLOAT) { return "float"; } - if (element_type == OnnxWrapper::TensorInfo::ElementType::DOUBLE) { + if (element_type == Onnx::TensorInfo::ElementType::DOUBLE) { return "double"; } return "???"; } -ValueType::CellType as_cell_type(OnnxWrapper::TensorInfo::ElementType type) { - if (type == OnnxWrapper::TensorInfo::ElementType::FLOAT) { +ValueType::CellType as_cell_type(Onnx::TensorInfo::ElementType type) { + if (type == Onnx::TensorInfo::ElementType::FLOAT) { return ValueType::CellType::FLOAT; } - if (type == OnnxWrapper::TensorInfo::ElementType::DOUBLE) { + if (type == Onnx::TensorInfo::ElementType::DOUBLE) { return ValueType::CellType::DOUBLE; } abort(); } -auto convert_optimize(OnnxWrapper::Optimize optimize) { - if (optimize == OnnxWrapper::Optimize::ENABLE) { +auto convert_optimize(Onnx::Optimize optimize) { + if (optimize == Onnx::Optimize::ENABLE) { return ORT_ENABLE_ALL; } else { - assert(optimize == OnnxWrapper::Optimize::DISABLE); + assert(optimize == Onnx::Optimize::DISABLE); return ORT_DISABLE_ALL; } } @@ -81,37 +81,77 @@ public: }; Ort::AllocatorWithDefaultOptions OnnxString::_alloc; -std::vector<size_t> make_dimensions(const std::vector<int64_t> &shape) { - std::vector<size_t> result; - for (int64_t size: shape) { - result.push_back(std::max(size, 0L)); - } +std::vector<Onnx::DimSize> make_dimensions(const Ort::TensorTypeAndShapeInfo &tensor_info) { + std::vector<const char *> symbolic_sizes(tensor_info.GetDimensionsCount(), nullptr); + tensor_info.GetSymbolicDimensions(symbolic_sizes.data(), symbolic_sizes.size()); + auto shape = tensor_info.GetShape(); + std::vector<Onnx::DimSize> result; + for (size_t i = 0; i < shape.size(); ++i) { + if (shape[i] > 0) { + result.emplace_back(shape[i]); + } else if (symbolic_sizes[i] != nullptr) { + result.emplace_back(vespalib::string(symbolic_sizes[i])); + } else { + result.emplace_back(); + } + } return result; } -OnnxWrapper::TensorInfo::ElementType make_element_type(ONNXTensorElementDataType element_type) { +Onnx::TensorInfo::ElementType make_element_type(ONNXTensorElementDataType element_type) { if (element_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { - return OnnxWrapper::TensorInfo::ElementType::FLOAT; + return Onnx::TensorInfo::ElementType::FLOAT; } else if (element_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) { - return OnnxWrapper::TensorInfo::ElementType::DOUBLE; + return Onnx::TensorInfo::ElementType::DOUBLE; } else { - return OnnxWrapper::TensorInfo::ElementType::UNKNOWN; + return Onnx::TensorInfo::ElementType::UNKNOWN; } } -OnnxWrapper::TensorInfo make_tensor_info(const OnnxString &name, const Ort::TypeInfo &type_info) { +Onnx::TensorInfo make_tensor_info(const OnnxString &name, const Ort::TypeInfo &type_info) { auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); - auto shape = tensor_info.GetShape(); auto element_type = tensor_info.GetElementType(); - return OnnxWrapper::TensorInfo{vespalib::string(name.get()), make_dimensions(shape), make_element_type(element_type)}; + return Onnx::TensorInfo{vespalib::string(name.get()), make_dimensions(tensor_info), make_element_type(element_type)}; } } +vespalib::string +Onnx::DimSize::as_string() const +{ + if (is_known()) { + return fmt("[%zu]", value); + } else if (is_symbolic()) { + return fmt("[%s]", name.c_str()); + } else { + return "[]"; + } +} + +vespalib::string +Onnx::TensorInfo::type_as_string() const +{ + vespalib::string res = to_str(elements); + for (const auto &dim: dimensions) { + res += dim.as_string(); + } + return res; +} + +Onnx::TensorInfo::~TensorInfo() = default; + +//----------------------------------------------------------------------------- + +Onnx::WirePlanner::~WirePlanner() = default; + bool -OnnxWrapper::TensorInfo::is_compatible(const eval::ValueType &type) const +Onnx::WirePlanner::bind_input_type(const eval::ValueType &vespa_in, const TensorInfo &onnx_in) { - if ((elements == ElementType::UNKNOWN) || dimensions.empty()) { + const auto &type = vespa_in; + const auto &name = onnx_in.name; + const auto &dimensions = onnx_in.dimensions; + const auto &elements = onnx_in.elements; + if ((elements == TensorInfo::ElementType::UNKNOWN) || dimensions.empty()) { return false; } if (type.cell_type() != as_cell_type(elements)) { @@ -121,21 +161,41 @@ OnnxWrapper::TensorInfo::is_compatible(const eval::ValueType &type) const return false; } for (size_t i = 0; i < dimensions.size(); ++i) { - if (type.dimensions()[i].size != dimensions[i]) { - return false; + if (dimensions[i].is_known()) { + if (dimensions[i].value != type.dimensions()[i].size) { + return false; + } + } else if (dimensions[i].is_symbolic()) { + auto &bound_size = _symbolic_sizes[dimensions[i].name]; + if (bound_size == 0) { + bound_size = type.dimensions()[i].size; + } else if (bound_size != type.dimensions()[i].size) { + return false; + } + } else { + _unknown_sizes[std::make_pair(name,i)] = type.dimensions()[i].size; } } return true; } eval::ValueType -OnnxWrapper::TensorInfo::make_compatible_type() const +Onnx::WirePlanner::make_output_type(const TensorInfo &onnx_out) const { - if ((elements == ElementType::UNKNOWN) || dimensions.empty()) { + const auto &dimensions = onnx_out.dimensions; + const auto &elements = onnx_out.elements; + if ((elements == TensorInfo::ElementType::UNKNOWN) || dimensions.empty()) { return ValueType::error_type(); } std::vector<ValueType::Dimension> dim_list; - for (size_t dim_size: dimensions) { + for (const auto &dim: dimensions) { + size_t dim_size = dim.value; + if (dim.is_symbolic()) { + auto pos = _symbolic_sizes.find(dim.name); + if (pos != _symbolic_sizes.end()) { + dim_size = pos->second; + } + } if ((dim_size == 0) || (dim_list.size() > 9)) { return ValueType::error_type(); } @@ -144,71 +204,131 @@ OnnxWrapper::TensorInfo::make_compatible_type() const return ValueType::tensor_type(std::move(dim_list), as_cell_type(elements)); } -vespalib::string -OnnxWrapper::TensorInfo::type_as_string() const +Onnx::WireInfo +Onnx::WirePlanner::get_wire_info(const Onnx &model) const { - vespalib::string res = to_str(elements); - for (size_t dim_size: dimensions) { - if (dim_size == 0) { - res += "[]"; - } else { - res += fmt("[%zu]", dim_size); + WireInfo info; + for (const auto &input: model.inputs()) { + size_t input_idx = 0; + std::vector<int64_t> sizes; + for (const auto &dim: input.dimensions) { + if (dim.is_known()) { + sizes.push_back(dim.value); + } else if (dim.is_symbolic()) { + const auto &pos = _symbolic_sizes.find(dim.name); + assert(pos != _symbolic_sizes.end()); + sizes.push_back(pos->second); + } else { + const auto &pos = _unknown_sizes.find(std::make_pair(input.name, input_idx)); + assert(pos != _unknown_sizes.end()); + sizes.push_back(pos->second); + } + ++input_idx; } + info.input_sizes.push_back(sizes); } - return res; + for (const auto &output: model.outputs()) { + info.output_types.push_back(make_output_type(output)); + } + return info; } -OnnxWrapper::TensorInfo::~TensorInfo() = default; +//----------------------------------------------------------------------------- -OnnxWrapper::Shared::Shared() - : _env(ORT_LOGGING_LEVEL_WARNING, "vespa-onnx-wrapper") +Ort::AllocatorWithDefaultOptions Onnx::EvalContext::_alloc; + +Onnx::EvalContext::EvalContext(const Onnx &model, const WireInfo &wire_info) + : _model(model), + _wire_info(wire_info), + _cpu_memory(Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault)), + _param_values(), + _result_values(), + _result_views() { + assert(_wire_info.input_sizes.size() == _model.inputs().size()); + assert(_wire_info.output_types.size() == _model.outputs().size()); + for (const auto &input: _wire_info.input_sizes) { + (void) input; + _param_values.push_back(Ort::Value(nullptr)); + } + std::vector<int64_t> dim_sizes; + size_t num_cells; + dim_sizes.reserve(16); + // NB: output type must be reference inside vector since the view does not copy it + for (const auto &output: _wire_info.output_types) { + num_cells = 1; + dim_sizes.clear(); + for (const auto &dim: output.dimensions()) { + dim_sizes.push_back(dim.size); + num_cells *= dim.size; + } + if (output.cell_type() == ValueType::CellType::FLOAT) { + _result_values.push_back(Ort::Value::CreateTensor<float>(_alloc, dim_sizes.data(), dim_sizes.size())); + ConstArrayRef<float> cells(_result_values.back().GetTensorMutableData<float>(), num_cells); + _result_views.emplace_back(output, TypedCells(cells)); + } else { + assert(output.cell_type() == ValueType::CellType::DOUBLE); + _result_values.push_back(Ort::Value::CreateTensor<double>(_alloc, dim_sizes.data(), dim_sizes.size())); + ConstArrayRef<double> cells(_result_values.back().GetTensorMutableData<double>(), num_cells); + _result_views.emplace_back(output, TypedCells(cells)); + } + } } +Onnx::EvalContext::~EvalContext() = default; + void -OnnxWrapper::Params::bind(size_t idx, const DenseTensorView &src) +Onnx::EvalContext::bind_param(size_t i, const eval::Value ¶m) { - assert(idx == values.size()); - std::vector<int64_t> dim_sizes; - for (const auto &dim: src.fast_type().dimensions()) { - dim_sizes.push_back(dim.size); - } - auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - if (src.fast_type().cell_type() == ValueType::CellType::FLOAT) { + // NB: dense tensors are always (sub)classes of DenseTensorView + const auto &cells_ref = static_cast<const DenseTensorView &>(param).cellsRef(); + const auto &input_sizes = _wire_info.input_sizes; + if (cells_ref.type == ValueType::CellType::FLOAT) { // NB: create requires non-const input - auto cells = unconstify(src.cellsRef().typify<float>()); - values.push_back(Ort::Value::CreateTensor<float>(memory_info, cells.begin(), cells.size(), dim_sizes.data(), dim_sizes.size())); - } else if (src.fast_type().cell_type() == ValueType::CellType::DOUBLE) { + auto cells = unconstify(cells_ref.typify<float>()); + _param_values[i] = Ort::Value::CreateTensor<float>(_cpu_memory, cells.begin(), cells.size(), input_sizes[i].data(), input_sizes[i].size()); + } else { + assert(cells_ref.type == ValueType::CellType::DOUBLE); // NB: create requires non-const input - auto cells = unconstify(src.cellsRef().typify<double>()); - values.push_back(Ort::Value::CreateTensor<double>(memory_info, cells.begin(), cells.size(), dim_sizes.data(), dim_sizes.size())); + auto cells = unconstify(cells_ref.typify<double>()); + _param_values[i] = Ort::Value::CreateTensor<double>(_cpu_memory, cells.begin(), cells.size(), input_sizes[i].data(), input_sizes[i].size()); } } void -OnnxWrapper::Result::get(size_t idx, MutableDenseTensorView &dst) +Onnx::EvalContext::eval() { - assert(values[idx].IsTensor()); - auto meta = values[idx].GetTensorTypeAndShapeInfo(); - if (dst.fast_type().cell_type() == ValueType::CellType::FLOAT) { - assert(meta.GetElementType() == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT); - ConstArrayRef<float> cells(values[idx].GetTensorMutableData<float>(), meta.GetElementCount()); - dst.setCells(TypedCells(cells)); - } else if (dst.fast_type().cell_type() == ValueType::CellType::DOUBLE) { - assert(meta.GetElementType() == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE); - ConstArrayRef<double> cells(values[idx].GetTensorMutableData<double>(), meta.GetElementCount()); - dst.setCells(TypedCells(cells)); - } + // NB: Run requires non-const session + Ort::Session &session = const_cast<Ort::Session&>(_model._session); + Ort::RunOptions run_opts(nullptr); + session.Run(run_opts, + _model._input_name_refs.data(), _param_values.data(), _param_values.size(), + _model._output_name_refs.data(), _result_values.data(), _result_values.size()); } -OnnxWrapper::Shared & -OnnxWrapper::Shared::get() { +const eval::Value & +Onnx::EvalContext::get_result(size_t i) const +{ + return _result_views[i]; +} + +//----------------------------------------------------------------------------- + +Onnx::Shared::Shared() + : _env(ORT_LOGGING_LEVEL_WARNING, "vespa-onnx-wrapper") +{ +} + +Onnx::Shared & +Onnx::Shared::get() { static Shared shared; return shared; } +//----------------------------------------------------------------------------- + void -OnnxWrapper::extract_meta_data() +Onnx::extract_meta_data() { Ort::AllocatorWithDefaultOptions allocator; size_t num_inputs = _session.GetInputCount(); @@ -227,7 +347,7 @@ OnnxWrapper::extract_meta_data() } } -OnnxWrapper::OnnxWrapper(const vespalib::string &model_file, Optimize optimize) +Onnx::Onnx(const vespalib::string &model_file, Optimize optimize) : _shared(Shared::get()), _options(), _session(nullptr), @@ -243,17 +363,6 @@ OnnxWrapper::OnnxWrapper(const vespalib::string &model_file, Optimize optimize) extract_meta_data(); } -OnnxWrapper::~OnnxWrapper() = default; - -OnnxWrapper::Result -OnnxWrapper::eval(const Params ¶ms) const -{ - assert(params.values.size() == _inputs.size()); - Ort::RunOptions run_opts(nullptr); - // NB: Run requires non-const session - Ort::Session &session = const_cast<Ort::Session&>(_session); - return Result(session.Run(run_opts, _input_name_refs.data(), params.values.data(), _inputs.size(), - _output_name_refs.data(), _outputs.size())); -} +Onnx::~Onnx() = default; } diff --git a/eval/src/vespa/eval/tensor/dense/onnx_wrapper.h b/eval/src/vespa/eval/tensor/dense/onnx_wrapper.h index abe1da252c7..6e2802df526 100644 --- a/eval/src/vespa/eval/tensor/dense/onnx_wrapper.h +++ b/eval/src/vespa/eval/tensor/dense/onnx_wrapper.h @@ -2,56 +2,101 @@ #pragma once +#include "dense_tensor_view.h" #include <onnxruntime/onnxruntime_cxx_api.h> #include <vespa/vespalib/stllike/string.h> #include <vespa/eval/eval/value_type.h> #include <vector> +#include <map> -namespace vespalib::tensor { +namespace vespalib::eval { class Value; } -class DenseTensorView; -class MutableDenseTensorView; +namespace vespalib::tensor { /** * Wrapper around an ONNX model handeled by onnxruntime. + * + * Create an Onnx object that will load your model and extract + * information about inputs and outputs. Use an Onnx::WirePlanner to + * bind vespa value types to each of the onnx model inputs. Ask the + * wire planner about the vespa value types corresponding to each of + * the model outputs for external wiring. Use the wire planner to make + * a WireInfo object which is a simple struct indicating the concrete + * onnx and vespa types to be used when converting inputs and + * outputs. Create an Onnx::EvalContex based on the model and the wire + * plan. Bind actual vespa values to the model inputs, invoke eval and + * inspect the results. See the unit test (tests/tensor/onnx_wrapper) + * for some examples. **/ -class OnnxWrapper { +class Onnx { public: // model optimization enum class Optimize { ENABLE, DISABLE }; + // the size of a dimension + struct DimSize { + size_t value; + vespalib::string name; + DimSize() : value(0), name() {} + DimSize(size_t size) : value(size), name() {} + DimSize(const vespalib::string &symbol) : value(0), name(symbol) {} + bool is_known() const { return (value > 0); } + bool is_symbolic() const { return !name.empty(); } + vespalib::string as_string() const; + }; + // information about a single input or output tensor struct TensorInfo { enum class ElementType { FLOAT, DOUBLE, UNKNOWN }; vespalib::string name; - std::vector<size_t> dimensions; + std::vector<DimSize> dimensions; ElementType elements; - bool is_compatible(const eval::ValueType &type) const; - eval::ValueType make_compatible_type() const; vespalib::string type_as_string() const; ~TensorInfo(); }; - // used to build model parameters - class Params { - friend class OnnxWrapper; + // how the model should be wired with inputs/outputs + struct WireInfo { + std::vector<std::vector<int64_t>> input_sizes; + std::vector<eval::ValueType> output_types; + WireInfo() : input_sizes(), output_types() {} + }; + + // planning how we should wire the model based on input types + class WirePlanner { private: - std::vector<Ort::Value> values; + std::map<vespalib::string,size_t> _symbolic_sizes; + std::map<std::pair<vespalib::string,size_t>,size_t> _unknown_sizes; public: - Params() : values() {} - void bind(size_t idx, const DenseTensorView &src); + WirePlanner() : _symbolic_sizes(), _unknown_sizes() {} + ~WirePlanner(); + bool bind_input_type(const eval::ValueType &vespa_in, const TensorInfo &onnx_in); + eval::ValueType make_output_type(const TensorInfo &onnx_out) const; + WireInfo get_wire_info(const Onnx &model) const; }; - // used to inspect model results - class Result { - friend class OnnxWrapper; + // evaluation context; use one per thread and keep model/wire_info alive + // all parameter values are expected to be bound per evaluation + // output values are pre-allocated and will not change + class EvalContext { private: - std::vector<Ort::Value> values; - Result(std::vector<Ort::Value> values_in) : values(std::move(values_in)) {} + static Ort::AllocatorWithDefaultOptions _alloc; + + const Onnx &_model; + const WireInfo &_wire_info; + Ort::MemoryInfo _cpu_memory; + std::vector<Ort::Value> _param_values; + std::vector<Ort::Value> _result_values; + std::vector<DenseTensorView> _result_views; + public: - static Result make_empty() { return Result({}); } - size_t num_values() const { return values.size(); } - void get(size_t idx, MutableDenseTensorView &dst); + EvalContext(const Onnx &model, const WireInfo &wire_info); + ~EvalContext(); + size_t num_params() const { return _param_values.size(); } + size_t num_results() const { return _result_values.size(); } + void bind_param(size_t i, const eval::Value ¶m); + void eval(); + const eval::Value &get_result(size_t i) const; }; private: @@ -76,11 +121,10 @@ private: void extract_meta_data(); public: - OnnxWrapper(const vespalib::string &model_file, Optimize optimize); - ~OnnxWrapper(); + Onnx(const vespalib::string &model_file, Optimize optimize); + ~Onnx(); const std::vector<TensorInfo> &inputs() const { return _inputs; } const std::vector<TensorInfo> &outputs() const { return _outputs; } - Result eval(const Params ¶ms) const; }; } |