From 0f5b53e3022f3e1af647bf3bcba10c6fde00371d Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Mon, 14 Jun 2021 14:32:46 +0000 Subject: estimate time and space needed for onnx model evaluation --- .../apps/analyze_onnx_model/analyze_onnx_model.cpp | 155 ++++++++++++++++++++- eval/src/vespa/eval/onnx/onnx_wrapper.cpp | 48 ++++--- eval/src/vespa/eval/onnx/onnx_wrapper.h | 2 + 3 files changed, 184 insertions(+), 21 deletions(-) (limited to 'eval') diff --git a/eval/src/apps/analyze_onnx_model/analyze_onnx_model.cpp b/eval/src/apps/analyze_onnx_model/analyze_onnx_model.cpp index f1cc3b28751..3f56610dcaa 100644 --- a/eval/src/apps/analyze_onnx_model/analyze_onnx_model.cpp +++ b/eval/src/apps/analyze_onnx_model/analyze_onnx_model.cpp @@ -1,7 +1,15 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include +#include +#include +#include +#include +#include #include +#include + +using vespalib::make_string_short::fmt; using vespalib::FilePointer; using namespace vespalib::eval; @@ -42,18 +50,159 @@ void report_memory_usage(const vespalib::string &desc) { fprintf(stderr, "vm_size: %s, vm_rss: %s (%s)\n", vm_size.c_str(), vm_rss.c_str(), desc.c_str()); } +struct Options { + size_t pos = 0; + std::vector opt_list; + void add_option(const vespalib::string &opt) { + opt_list.push_back(opt); + } + vespalib::string get_option(const vespalib::string &desc, const vespalib::string &fallback) { + vespalib::string opt; + if (pos < opt_list.size()) { + opt = opt_list[pos]; + fprintf(stderr, "option[%zu](%s): %s\n", + pos, desc.c_str(), opt.c_str()); + } else { + opt = fallback; + fprintf(stderr, "unspecified option[%zu](%s), fallback: %s\n", + pos, desc.c_str(), fallback.c_str()); + } + ++pos; + return opt; + } + bool get_bool_opt(const vespalib::string &desc, const vespalib::string &fallback) { + auto opt = get_option(desc, fallback); + REQUIRE((opt == "true") || (opt == "false")); + return (opt == "true"); + } + size_t get_size_opt(const vespalib::string &desc, const vespalib::string &fallback) { + auto opt = get_option(desc, fallback); + size_t value = atoi(opt.c_str()); + REQUIRE(value > 0); + return value; + } +}; + +void dump_model_info(const Onnx &model) { + fprintf(stderr, "model meta-data:\n"); + for (size_t i = 0; i < model.inputs().size(); ++i) { + fprintf(stderr, " input[%zu]: '%s' %s\n", i, model.inputs()[i].name.c_str(), model.inputs()[i].type_as_string().c_str()); + } + for (size_t i = 0; i < model.outputs().size(); ++i) { + fprintf(stderr, " output[%zu]: '%s' %s\n", i, model.outputs()[i].name.c_str(), model.outputs()[i].type_as_string().c_str()); + } +} + +void dump_wire_info(const Onnx::WireInfo &wire) { + fprintf(stderr, "test setup:\n"); + REQUIRE_EQ(wire.vespa_inputs.size(), wire.onnx_inputs.size()); + for (size_t i = 0; i < wire.vespa_inputs.size(); ++i) { + fprintf(stderr, " input[%zu]: %s -> %s\n", i, wire.vespa_inputs[i].to_spec().c_str(), wire.onnx_inputs[i].type_as_string().c_str()); + } + REQUIRE_EQ(wire.onnx_outputs.size(), wire.vespa_outputs.size()); + for (size_t i = 0; i < wire.onnx_outputs.size(); ++i) { + fprintf(stderr, " output[%zu]: %s -> %s\n", i, wire.onnx_outputs[i].type_as_string().c_str(), wire.vespa_outputs[i].to_spec().c_str()); + } +} + +struct MakeInputType { + Options &opts; + std::map symbolic_sizes; + MakeInputType(Options &opts_in) : opts(opts_in), symbolic_sizes() {} + ValueType operator()(const Onnx::TensorInfo &info) { + int d = 0; + std::vector dim_list; + for (const auto &dim: info.dimensions) { + REQUIRE(d <= 9); + size_t size = 0; + if (dim.is_known()) { + size = dim.value; + } else if (dim.is_symbolic()) { + size = symbolic_sizes[dim.name]; + if (size == 0) { + size = opts.get_size_opt(fmt("symbolic size '%s'", dim.name.c_str()), "1"); + symbolic_sizes[dim.name] = size; + } + } else { + size = opts.get_size_opt(fmt("size of input '%s' dimension %d", info.name.c_str(), d), "1"); + } + dim_list.emplace_back(fmt("d%d", d), size); + ++d; + } + return ValueType::make_type(Onnx::WirePlanner::best_cell_type(info.elements), std::move(dim_list)); + } +}; + +Onnx::WireInfo make_plan(Options &opts, const Onnx &model) { + Onnx::WirePlanner planner; + MakeInputType make_input_type(opts); + for (const auto &input: model.inputs()) { + auto type = make_input_type(input); + REQUIRE(planner.bind_input_type(type, input)); + } + for (const auto &output: model.outputs()) { + REQUIRE(!planner.make_output_type(output).is_error()); + } + return planner.get_wire_info(model); +} + +struct MyEval { + Onnx::EvalContext context; + std::vector inputs; + MyEval(const Onnx &model, const Onnx::WireInfo &wire) : context(model, wire), inputs() { + for (const auto &input_type: wire.vespa_inputs) { + TensorSpec spec(input_type.to_spec()); + inputs.push_back(value_from_spec(spec, FastValueBuilderFactory::get())); + } + } + void eval() { + for (size_t i = 0; i < inputs.size(); ++i) { + context.bind_param(i, *inputs[i]); + } + context.eval(); + } +}; + int usage(const char *self) { - fprintf(stderr, "usage: %s \n", self); + fprintf(stderr, "usage: %s [options...]\n", self); fprintf(stderr, " load onnx model and report memory usage\n"); + fprintf(stderr, " options are used to specify unknown values, like dimension sizes\n"); + fprintf(stderr, " options are accepted in the order in which they are needed\n"); + fprintf(stderr, " tip: run without options first, to see which you need\n"); return 1; } int main(int argc, char **argv) { - if (argc != 2) { + if (argc < 2) { return usage(argv[0]); } + Options opts; + for (int i = 2; i < argc; ++i) { + opts.add_option(argv[i]); + } + Onnx::Optimize optimize = opts.get_bool_opt("optimize model", "true") + ? Onnx::Optimize::ENABLE : Onnx::Optimize::DISABLE; report_memory_usage("before loading model"); - Onnx onnx(argv[1], Onnx::Optimize::ENABLE); + Onnx model(argv[1], optimize); report_memory_usage("after loading model"); + dump_model_info(model); + auto wire_info = make_plan(opts, model); + dump_wire_info(wire_info); + std::vector> eval_list; + size_t max_concurrent = opts.get_size_opt("max concurrent evaluations", "1"); + report_memory_usage("no evaluations yet"); + for (size_t i = 1; i <= max_concurrent; ++i) { + eval_list.push_back(std::make_unique(model, wire_info)); + eval_list.back()->eval(); + if ((i % 8) == 0) { + report_memory_usage(fmt("concurrent evaluations: %zu", i)); + } + } + if ((max_concurrent % 8) != 0) { + report_memory_usage(fmt("concurrent evaluations: %zu", max_concurrent)); + } + eval_list.resize(1); + double min_time_s = vespalib::BenchmarkTimer::benchmark([&e = *eval_list.back()](){ e.eval(); }, 10.0); + fprintf(stderr, "estimated model evaluation time: %g ms\n", min_time_s * 1000.0); return 0; } diff --git a/eval/src/vespa/eval/onnx/onnx_wrapper.cpp b/eval/src/vespa/eval/onnx/onnx_wrapper.cpp index f848c421c9d..e2528fcb1c3 100644 --- a/eval/src/vespa/eval/onnx/onnx_wrapper.cpp +++ b/eval/src/vespa/eval/onnx/onnx_wrapper.cpp @@ -117,23 +117,6 @@ auto convert_optimize(Onnx::Optimize optimize) { abort(); } -CellType to_cell_type(Onnx::ElementType type) { - switch (type) { - case Onnx::ElementType::INT8: return CellType::INT8; - case Onnx::ElementType::BFLOAT16: return CellType::BFLOAT16; - case Onnx::ElementType::UINT8: [[fallthrough]]; - case Onnx::ElementType::INT16: [[fallthrough]]; - case Onnx::ElementType::UINT16: [[fallthrough]]; - case Onnx::ElementType::FLOAT: return CellType::FLOAT; - case Onnx::ElementType::INT32: [[fallthrough]]; - case Onnx::ElementType::INT64: [[fallthrough]]; - case Onnx::ElementType::UINT32: [[fallthrough]]; - case Onnx::ElementType::UINT64: [[fallthrough]]; - case Onnx::ElementType::DOUBLE: return CellType::DOUBLE; - } - abort(); -} - Onnx::ElementType make_element_type(ONNXTensorElementDataType element_type) { switch (element_type) { case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: return Onnx::ElementType::INT8; @@ -245,12 +228,41 @@ Onnx::TensorInfo::type_as_string() const Onnx::TensorInfo::~TensorInfo() = default; +vespalib::string +Onnx::TensorType::type_as_string() const +{ + vespalib::string res = type_name(elements); + for (const auto &size: dimensions) { + res += DimSize(size).as_string(); + } + return res; +} + //----------------------------------------------------------------------------- Onnx::WireInfo::~WireInfo() = default; Onnx::WirePlanner::~WirePlanner() = default; +CellType +Onnx::WirePlanner::best_cell_type(Onnx::ElementType type) +{ + switch (type) { + case Onnx::ElementType::INT8: return CellType::INT8; + case Onnx::ElementType::BFLOAT16: return CellType::BFLOAT16; + case Onnx::ElementType::UINT8: [[fallthrough]]; + case Onnx::ElementType::INT16: [[fallthrough]]; + case Onnx::ElementType::UINT16: [[fallthrough]]; + case Onnx::ElementType::FLOAT: return CellType::FLOAT; + case Onnx::ElementType::INT32: [[fallthrough]]; + case Onnx::ElementType::INT64: [[fallthrough]]; + case Onnx::ElementType::UINT32: [[fallthrough]]; + case Onnx::ElementType::UINT64: [[fallthrough]]; + case Onnx::ElementType::DOUBLE: return CellType::DOUBLE; + } + abort(); +} + bool Onnx::WirePlanner::bind_input_type(const ValueType &vespa_in, const TensorInfo &onnx_in) { @@ -309,7 +321,7 @@ Onnx::WirePlanner::make_output_type(const TensorInfo &onnx_out) const } dim_list.emplace_back(fmt("d%zu", dim_list.size()), dim_size); } - return ValueType::make_type(to_cell_type(elements), std::move(dim_list)); + return ValueType::make_type(best_cell_type(elements), std::move(dim_list)); } Onnx::WireInfo diff --git a/eval/src/vespa/eval/onnx/onnx_wrapper.h b/eval/src/vespa/eval/onnx/onnx_wrapper.h index 507d75efbd9..9392536eae7 100644 --- a/eval/src/vespa/eval/onnx/onnx_wrapper.h +++ b/eval/src/vespa/eval/onnx/onnx_wrapper.h @@ -68,6 +68,7 @@ public: std::vector dimensions; TensorType(ElementType elements_in, std::vector dimensions_in) noexcept : elements(elements_in), dimensions(std::move(dimensions_in)) {} + vespalib::string type_as_string() const; }; // how the model should be wired with inputs/outputs @@ -88,6 +89,7 @@ public: public: WirePlanner() : _input_types(), _symbolic_sizes(), _bound_unknown_sizes() {} ~WirePlanner(); + static CellType best_cell_type(Onnx::ElementType type); bool bind_input_type(const ValueType &vespa_in, const TensorInfo &onnx_in); ValueType make_output_type(const TensorInfo &onnx_out) const; WireInfo get_wire_info(const Onnx &model) const; -- cgit v1.2.3