summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-08-11 16:29:48 +0200
committerGitHub <noreply@github.com>2021-08-11 16:29:48 +0200
commit35e7cbbfdde4e544ce72b968e9c79e552fa0d3a2 (patch)
tree4bddabdc8de40261c72aaf4a5aeefca43fd678c7
parent99cb5741270e2a0f00c5ea857ff5f012cf265375 (diff)
parentda4e0c4a0d22c614b028c83b9106328996eca36a (diff)
Merge pull request #18716 from vespa-engine/havardpe/avoid-crash-on-runtime-onnx-errors
avoid crash on run-time onnx errors
-rw-r--r--eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp34
-rw-r--r--eval/src/vespa/eval/onnx/onnx_wrapper.cpp21
-rw-r--r--eval/src/vespa/eval/onnx/onnx_wrapper.h1
-rw-r--r--searchlib/src/tests/features/onnx_feature/fragile.onnx11
-rwxr-xr-xsearchlib/src/tests/features/onnx_feature/fragile.py2
-rw-r--r--searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/features/onnx_feature.cpp9
7 files changed, 75 insertions, 11 deletions
diff --git a/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp b/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp
index 6b45172ef80..2a336ecf099 100644
--- a/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp
+++ b/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp
@@ -178,6 +178,9 @@ TEST(OnnxTest, simple_onnx_model_can_be_evaluated)
ctx.eval();
EXPECT_EQ(output.cells().typify<float>()[0], 80.0);
//-------------------------------------------------------------------------
+ ctx.clear_results();
+ EXPECT_EQ(output.cells().typify<float>()[0], 0.0);
+ //-------------------------------------------------------------------------
}
TEST(OnnxTest, dynamic_onnx_model_can_be_evaluated)
@@ -224,6 +227,9 @@ TEST(OnnxTest, dynamic_onnx_model_can_be_evaluated)
ctx.eval();
EXPECT_EQ(output.cells().typify<float>()[0], 81.0);
//-------------------------------------------------------------------------
+ ctx.clear_results();
+ EXPECT_EQ(output.cells().typify<float>()[0], 0.0);
+ //-------------------------------------------------------------------------
}
TEST(OnnxTest, int_types_onnx_model_can_be_evaluated)
@@ -270,6 +276,9 @@ TEST(OnnxTest, int_types_onnx_model_can_be_evaluated)
ctx.eval();
EXPECT_EQ(output.cells().typify<double>()[0], 80.0);
//-------------------------------------------------------------------------
+ ctx.clear_results();
+ EXPECT_EQ(output.cells().typify<double>()[0], 0.0);
+ //-------------------------------------------------------------------------
}
TEST(OnnxTest, we_guess_batch_dimension_size_when_inference_fails) {
@@ -311,6 +320,15 @@ TEST(OnnxTest, we_guess_batch_dimension_size_when_inference_fails) {
EXPECT_EQ(out_3, expect_3);
EXPECT_EQ(out_4, expect_4);
//-------------------------------------------------------------------------
+ auto zero_3 = TensorSpec::from_expr("tensor<float>(d0[3]):[0,0,0]");
+ auto zero_4 = TensorSpec::from_expr("tensor<float>(d0[4]):[0,0,0,0]");
+ ctx_3.clear_results();
+ EXPECT_EQ(TensorSpec::from_value(ctx_3.get_result(0)), zero_3);
+ EXPECT_EQ(TensorSpec::from_value(ctx_4.get_result(0)), expect_4);
+ ctx_4.clear_results();
+ EXPECT_EQ(TensorSpec::from_value(ctx_3.get_result(0)), zero_3);
+ EXPECT_EQ(TensorSpec::from_value(ctx_4.get_result(0)), zero_4);
+ //-------------------------------------------------------------------------
}
TEST(OnnxTest, zero_copy_unstable_types) {
@@ -356,6 +374,14 @@ TEST(OnnxTest, zero_copy_unstable_types) {
EXPECT_EQ(cells16.typify<BFloat16>()[1], 2.0);
EXPECT_EQ(cells16.typify<BFloat16>()[2], 3.0);
//-------------------------------------------------------------------------
+ ctx.clear_results();
+ EXPECT_EQ(cells8.typify<Int8Float>()[0], 0.0);
+ EXPECT_EQ(cells8.typify<Int8Float>()[1], 0.0);
+ EXPECT_EQ(cells8.typify<Int8Float>()[2], 0.0);
+ EXPECT_EQ(cells16.typify<BFloat16>()[0], 0.0);
+ EXPECT_EQ(cells16.typify<BFloat16>()[1], 0.0);
+ EXPECT_EQ(cells16.typify<BFloat16>()[2], 0.0);
+ //-------------------------------------------------------------------------
}
TEST(OnnxTest, converted_unstable_types) {
@@ -401,6 +427,14 @@ TEST(OnnxTest, converted_unstable_types) {
EXPECT_EQ(cells16.typify<BFloat16>()[1], 2.0);
EXPECT_EQ(cells16.typify<BFloat16>()[2], 3.0);
//-------------------------------------------------------------------------
+ ctx.clear_results();
+ EXPECT_EQ(cells8.typify<Int8Float>()[0], 0.0);
+ EXPECT_EQ(cells8.typify<Int8Float>()[1], 0.0);
+ EXPECT_EQ(cells8.typify<Int8Float>()[2], 0.0);
+ EXPECT_EQ(cells16.typify<BFloat16>()[0], 0.0);
+ EXPECT_EQ(cells16.typify<BFloat16>()[1], 0.0);
+ EXPECT_EQ(cells16.typify<BFloat16>()[2], 0.0);
+ //-------------------------------------------------------------------------
}
TEST(OnnxTest, inspect_float_to_int8_conversion) {
diff --git a/eval/src/vespa/eval/onnx/onnx_wrapper.cpp b/eval/src/vespa/eval/onnx/onnx_wrapper.cpp
index e2528fcb1c3..6e857c51b2b 100644
--- a/eval/src/vespa/eval/onnx/onnx_wrapper.cpp
+++ b/eval/src/vespa/eval/onnx/onnx_wrapper.cpp
@@ -97,6 +97,17 @@ struct CreateVespaTensor {
}
};
+struct ClearVespaTensor {
+ template <typename CT> static void invoke(const Value &value) {
+ auto cells = unconstify(value.cells().typify<CT>());
+ std::fill(cells.begin(), cells.end(), CT{});
+ }
+ void operator()(const Value &value) {
+ return typify_invoke<1,TypifyCellType,ClearVespaTensor>(value.type().cell_type(), value);
+ }
+};
+ClearVespaTensor clear_vespa_tensor;
+
//-----------------------------------------------------------------------------
template <typename E> vespalib::string type_name(E enum_value) {
@@ -202,7 +213,7 @@ std::vector<int64_t> extract_sizes(const ValueType &type) {
return sizes;
}
-}
+} // <unnamed>
vespalib::string
Onnx::DimSize::as_string() const
@@ -488,6 +499,14 @@ Onnx::EvalContext::eval()
}
}
+void
+Onnx::EvalContext::clear_results()
+{
+ for (const Value::UP &result: _results) {
+ clear_vespa_tensor(*result);
+ }
+}
+
const Value &
Onnx::EvalContext::get_result(size_t i) const
{
diff --git a/eval/src/vespa/eval/onnx/onnx_wrapper.h b/eval/src/vespa/eval/onnx/onnx_wrapper.h
index 9392536eae7..7084efab207 100644
--- a/eval/src/vespa/eval/onnx/onnx_wrapper.h
+++ b/eval/src/vespa/eval/onnx/onnx_wrapper.h
@@ -134,6 +134,7 @@ public:
size_t num_results() const { return _result_values.size(); }
void bind_param(size_t i, const Value &param);
void eval();
+ void clear_results();
const Value &get_result(size_t i) const;
};
diff --git a/searchlib/src/tests/features/onnx_feature/fragile.onnx b/searchlib/src/tests/features/onnx_feature/fragile.onnx
index 2a05500e95b..dc650154f83 100644
--- a/searchlib/src/tests/features/onnx_feature/fragile.onnx
+++ b/searchlib/src/tests/features/onnx_feature/fragile.onnx
@@ -1,5 +1,5 @@

-fragile.py:b
+fragile.py:]

in1
in2out"AddfragileZ
@@ -9,7 +9,8 @@ fragile.py:b
Z
in2

-batchb
-out
- 
-batchB \ No newline at end of file
+batchb
+out
+
+
+B \ No newline at end of file
diff --git a/searchlib/src/tests/features/onnx_feature/fragile.py b/searchlib/src/tests/features/onnx_feature/fragile.py
index e4eaf168e14..fe5851f5a63 100755
--- a/searchlib/src/tests/features/onnx_feature/fragile.py
+++ b/searchlib/src/tests/features/onnx_feature/fragile.py
@@ -6,7 +6,7 @@ from onnx import helper, TensorProto
INPUT1 = helper.make_tensor_value_info('in1', TensorProto.FLOAT, [2])
INPUT2 = helper.make_tensor_value_info('in2', TensorProto.FLOAT, ['batch'])
-OUTPUT = helper.make_tensor_value_info('out', TensorProto.FLOAT, ['batch'])
+OUTPUT = helper.make_tensor_value_info('out', TensorProto.FLOAT, [2])
nodes = [
helper.make_node(
diff --git a/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp b/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp
index c07ebc48604..7e80d9fc335 100644
--- a/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp
+++ b/searchlib/src/tests/features/onnx_feature/onnx_feature_test.cpp
@@ -147,20 +147,22 @@ TEST_F(OnnxFeatureTest, input_features_and_output_names_can_be_specified) {
TEST_F(OnnxFeatureTest, fragile_model_can_be_evaluated) {
add_expr("in1", "tensor<float>(x[2]):[docid,5]");
add_expr("in2", "tensor<float>(x[2]):[docid,10]");
- add_onnx(OnnxModel("fragile", fragile_model));
+ add_onnx(OnnxModel("fragile", fragile_model).dry_run_on_setup(true));
EXPECT_TRUE(try_compile(onnx_feature("fragile")));
EXPECT_EQ(get(1), TensorSpec::from_expr("tensor<float>(d0[2]):[2,15]"));
EXPECT_EQ(get(3), TensorSpec::from_expr("tensor<float>(d0[2]):[6,15]"));
}
-TEST_F(OnnxFeatureTest, runtime_broken_model_can_be_set_up_without_dry_run) {
+TEST_F(OnnxFeatureTest, broken_model_evaluates_to_all_zeros) {
add_expr("in1", "tensor<float>(x[2]):[docid,5]");
add_expr("in2", "tensor<float>(x[3]):[docid,10,31515]");
add_onnx(OnnxModel("fragile", fragile_model).dry_run_on_setup(false));
EXPECT_TRUE(try_compile(onnx_feature("fragile")));
+ EXPECT_EQ(get(1), TensorSpec::from_expr("tensor<float>(d0[2]):[0,0]"));
+ EXPECT_EQ(get(3), TensorSpec::from_expr("tensor<float>(d0[2]):[0,0]"));
}
-TEST_F(OnnxFeatureTest, runtime_broken_model_fails_with_dry_run) {
+TEST_F(OnnxFeatureTest, broken_model_fails_with_dry_run) {
add_expr("in1", "tensor<float>(x[2]):[docid,5]");
add_expr("in2", "tensor<float>(x[3]):[docid,10,31515]");
add_onnx(OnnxModel("fragile", fragile_model).dry_run_on_setup(true));
diff --git a/searchlib/src/vespa/searchlib/features/onnx_feature.cpp b/searchlib/src/vespa/searchlib/features/onnx_feature.cpp
index 0af03f3aa86..d4756a2f8a7 100644
--- a/searchlib/src/vespa/searchlib/features/onnx_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/onnx_feature.cpp
@@ -89,7 +89,12 @@ public:
for (size_t i = 0; i < _eval_context.num_params(); ++i) {
_eval_context.bind_param(i, inputs().get_object(i).get());
}
- _eval_context.eval();
+ try {
+ _eval_context.eval();
+ } catch (const Ort::Exception &ex) {
+ LOG(warning, "onnx model evaluation failed: %s", ex.what());
+ _eval_context.clear_results();
+ }
}
};
@@ -162,6 +167,8 @@ OnnxBlueprint::setup(const IIndexEnvironment &env,
if (!error_msg.empty()) {
return fail("onnx model dry-run failed: %s", error_msg.c_str());
}
+ } else {
+ LOG(warning, "dry-run disabled for onnx model '%s'", model_cfg->name().c_str());
}
return true;
}