diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-08-11 16:29:48 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-11 16:29:48 +0200 |
commit | 35e7cbbfdde4e544ce72b968e9c79e552fa0d3a2 (patch) | |
tree | 4bddabdc8de40261c72aaf4a5aeefca43fd678c7 | |
parent | 99cb5741270e2a0f00c5ea857ff5f012cf265375 (diff) | |
parent | da4e0c4a0d22c614b028c83b9106328996eca36a (diff) |
Merge pull request #18716 from vespa-engine/havardpe/avoid-crash-on-runtime-onnx-errors
avoid crash on run-time onnx errors
7 files changed, 75 insertions, 11 deletions
diff --git a/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp b/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp index 6b45172ef80..2a336ecf099 100644 --- a/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp +++ b/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp @@ -178,6 +178,9 @@ TEST(OnnxTest, simple_onnx_model_can_be_evaluated) ctx.eval(); EXPECT_EQ(output.cells().typify<float>()[0], 80.0); //------------------------------------------------------------------------- + ctx.clear_results(); + EXPECT_EQ(output.cells().typify<float>()[0], 0.0); + //------------------------------------------------------------------------- } TEST(OnnxTest, dynamic_onnx_model_can_be_evaluated) @@ -224,6 +227,9 @@ TEST(OnnxTest, dynamic_onnx_model_can_be_evaluated) ctx.eval(); EXPECT_EQ(output.cells().typify<float>()[0], 81.0); //------------------------------------------------------------------------- + ctx.clear_results(); + EXPECT_EQ(output.cells().typify<float>()[0], 0.0); + //------------------------------------------------------------------------- } TEST(OnnxTest, int_types_onnx_model_can_be_evaluated) @@ -270,6 +276,9 @@ TEST(OnnxTest, int_types_onnx_model_can_be_evaluated) ctx.eval(); EXPECT_EQ(output.cells().typify<double>()[0], 80.0); //------------------------------------------------------------------------- + ctx.clear_results(); + EXPECT_EQ(output.cells().typify<double>()[0], 0.0); + //------------------------------------------------------------------------- } TEST(OnnxTest, we_guess_batch_dimension_size_when_inference_fails) { @@ -311,6 +320,15 @@ TEST(OnnxTest, we_guess_batch_dimension_size_when_inference_fails) { EXPECT_EQ(out_3, expect_3); EXPECT_EQ(out_4, expect_4); //------------------------------------------------------------------------- + auto zero_3 = TensorSpec::from_expr("tensor<float>(d0[3]):[0,0,0]"); + auto zero_4 = TensorSpec::from_expr("tensor<float>(d0[4]):[0,0,0,0]"); + ctx_3.clear_results(); + EXPECT_EQ(TensorSpec::from_value(ctx_3.get_result(0)), zero_3); + EXPECT_EQ(TensorSpec::from_value(ctx_4.get_result(0)), expect_4); + ctx_4.clear_results(); + EXPECT_EQ(TensorSpec::from_value(ctx_3.get_result(0)), zero_3); + EXPECT_EQ(TensorSpec::from_value(ctx_4.get_result(0)), zero_4); + //------------------------------------------------------------------------- } TEST(OnnxTest, zero_copy_unstable_types) { @@ -356,6 +374,14 @@ TEST(OnnxTest, zero_copy_unstable_types) { EXPECT_EQ(cells16.typify<BFloat16>()[1], 2.0); EXPECT_EQ(cells16.typify<BFloat16>()[2], 3.0); //------------------------------------------------------------------------- + ctx.clear_results(); + EXPECT_EQ(cells8.typify<Int8Float>()[0], 0.0); + EXPECT_EQ(cells8.typify<Int8Float>()[1], 0.0); + EXPECT_EQ(cells8.typify<Int8Float>()[2], 0.0); + EXPECT_EQ(cells16.typify<BFloat16>()[0], 0.0); + EXPECT_EQ(cells16.typify<BFloat16>()[1], 0.0); + EXPECT_EQ(cells16.typify<BFloat16>()[2], 0.0); + //------------------------------------------------------------------------- } TEST(OnnxTest, converted_unstable_types) { @@ -401,6 +427,14 @@ TEST(OnnxTest, converted_unstable_types) { EXPECT_EQ(cells16.typify<BFloat16>()[1], 2.0); EXPECT_EQ(cells16.typify<BFloat16>()[2], 3.0); //------------------------------------------------------------------------- + ctx.clear_results(); + EXPECT_EQ(cells8.typify<Int8Float>()[0], 0.0); + EXPECT_EQ(cells8.typify<Int8Float>()[1], 0.0); + EXPECT_EQ(cells8.typify<Int8Float>()[2], 0.0); + EXPECT_EQ(cells16.typify<BFloat16>()[0], 0.0); + EXPECT_EQ(cells16.typify<BFloat16>()[1], 0.0); + EXPECT_EQ(cells16.typify<BFloat16>()[2], 0.0); + //------------------------------------------------------------------------- } TEST(OnnxTest, inspect_float_to_int8_conversion) { diff --git a/eval/src/vespa/eval/onnx/onnx_wrapper.cpp b/eval/src/vespa/eval/onnx/onnx_wrapper.cpp index e2528fcb1c3..6e857c51b2b 100644 --- a/eval/src/vespa/eval/onnx/onnx_wrapper.cpp +++ b/eval/src/vespa/eval/onnx/onnx_wrapper.cpp @@ -97,6 +97,17 @@ struct CreateVespaTensor { } }; +struct ClearVespaTensor { + template <typename CT> static void invoke(const Value &value) { + auto cells = unconstify(value.cells().typify<CT>()); + std::fill(cells.begin(), cells.end(), CT{}); + } + void operator()(const Value &value) { + return typify_invoke<1,TypifyCellType,ClearVespaTensor>(value.type().cell_type(), value); + } +}; +ClearVespaTensor clear_vespa_tensor; + //----------------------------------------------------------------------------- template <typename E> vespalib::string type_name(E enum_value) { @@ -202,7 +213,7 @@ std::vector<int64_t> extract_sizes(const ValueType &type) { return sizes; } -} +} // <unnamed> vespalib::string Onnx::DimSize::as_string() const @@ -488,6 +499,14 @@ Onnx::EvalContext::eval() } } +void +Onnx::EvalContext::clear_results() +{ + for (const Value::UP &result: _results) { + clear_vespa_tensor(*result); + } +} + const Value & Onnx::EvalContext::get_result(size_t i) const { diff --git a/eval/src/vespa/eval/onnx/onnx_wrapper.h b/eval/src/vespa/eval/onnx/onnx_wrapper.h index 9392536eae7..7084efab207 100644 --- a/eval/src/vespa/eval/onnx/onnx_wrapper.h +++ b/eval/src/vespa/eval/onnx/onnx_wrapper.h @@ -134,6 +134,7 @@ public: size_t num_results() const { return _result_values.size(); } void bind_param(size_t i, const Value ¶m); void eval(); + void clear_results(); const Value &get_result(size_t i) const; }; diff --git a/searchlib/src/tests/features/onnx_feature/fragile.onnx b/searchlib/src/tests/features/onnx_feature/fragile.onnx index 2a05500e95b..dc650154f83 100644 --- a/searchlib/src/tests/features/onnx_feature/fragile.onnx +++ b/searchlib/src/tests/features/onnx_feature/fragile.onnx @@ -1,5 +1,5 @@ -fragile.py:b +fragile.py:] in1 in2out"AddfragileZ @@ -9,7 +9,8 @@ fragile.py:b Z in2 -batchb -out -
-batchB
\ No newline at end of file +batchb +out + + +B
\ No newline at end of file diff --git a/searchlib/src/tests/features/onnx_feature/fragile.py b/searchlib/src/tests/features/onnx_feature/fragile.py index e4eaf168e14..fe5851f5a63 100755 --- a/searchlib/src/tests/features/onnx_feature/fragile.py +++ b/searchlib/src/tests/features/onnx_feature/fragile.py @@ -6,7 +6,7 @@ from onnx import helper, TensorProto INPUT1 = helper.make_tensor_value_info('in1', TensorProto.FLOAT, [2]) INPUT2 = helper.make_tensor_value_info('in2', TensorProto.FLOAT, ['batch']) -OUTPUT = helper.make_tensor_value_info('out', TensorProto.FLOAT, ['batch']) +OUTPUT = helper.make_tensor_value_info('out', TensorProto.FLOAT, [2]) nodes = [ helper.make_node( diff --git a/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp b/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp index c07ebc48604..7e80d9fc335 100644 --- a/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp +++ b/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp @@ -147,20 +147,22 @@ TEST_F(OnnxFeatureTest, input_features_and_output_names_can_be_specified) { TEST_F(OnnxFeatureTest, fragile_model_can_be_evaluated) { add_expr("in1", "tensor<float>(x[2]):[docid,5]"); add_expr("in2", "tensor<float>(x[2]):[docid,10]"); - add_onnx(OnnxModel("fragile", fragile_model)); + add_onnx(OnnxModel("fragile", fragile_model).dry_run_on_setup(true)); EXPECT_TRUE(try_compile(onnx_feature("fragile"))); EXPECT_EQ(get(1), TensorSpec::from_expr("tensor<float>(d0[2]):[2,15]")); EXPECT_EQ(get(3), TensorSpec::from_expr("tensor<float>(d0[2]):[6,15]")); } -TEST_F(OnnxFeatureTest, runtime_broken_model_can_be_set_up_without_dry_run) { +TEST_F(OnnxFeatureTest, broken_model_evaluates_to_all_zeros) { add_expr("in1", "tensor<float>(x[2]):[docid,5]"); add_expr("in2", "tensor<float>(x[3]):[docid,10,31515]"); add_onnx(OnnxModel("fragile", fragile_model).dry_run_on_setup(false)); EXPECT_TRUE(try_compile(onnx_feature("fragile"))); + EXPECT_EQ(get(1), TensorSpec::from_expr("tensor<float>(d0[2]):[0,0]")); + EXPECT_EQ(get(3), TensorSpec::from_expr("tensor<float>(d0[2]):[0,0]")); } -TEST_F(OnnxFeatureTest, runtime_broken_model_fails_with_dry_run) { +TEST_F(OnnxFeatureTest, broken_model_fails_with_dry_run) { add_expr("in1", "tensor<float>(x[2]):[docid,5]"); add_expr("in2", "tensor<float>(x[3]):[docid,10,31515]"); add_onnx(OnnxModel("fragile", fragile_model).dry_run_on_setup(true)); diff --git a/searchlib/src/vespa/searchlib/features/onnx_feature.cpp b/searchlib/src/vespa/searchlib/features/onnx_feature.cpp index 0af03f3aa86..d4756a2f8a7 100644 --- a/searchlib/src/vespa/searchlib/features/onnx_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/onnx_feature.cpp @@ -89,7 +89,12 @@ public: for (size_t i = 0; i < _eval_context.num_params(); ++i) { _eval_context.bind_param(i, inputs().get_object(i).get()); } - _eval_context.eval(); + try { + _eval_context.eval(); + } catch (const Ort::Exception &ex) { + LOG(warning, "onnx model evaluation failed: %s", ex.what()); + _eval_context.clear_results(); + } } }; @@ -162,6 +167,8 @@ OnnxBlueprint::setup(const IIndexEnvironment &env, if (!error_msg.empty()) { return fail("onnx model dry-run failed: %s", error_msg.c_str()); } + } else { + LOG(warning, "dry-run disabled for onnx model '%s'", model_cfg->name().c_str()); } return true; } |