From 0f5b53e3022f3e1af647bf3bcba10c6fde00371d Mon Sep 17 00:00:00 2001
From: Håvard Pettersen <havardpe@oath.com>
Date: Mon, 14 Jun 2021 14:32:46 +0000
Subject: estimate time and space needed for onnx model evaluation

---
 .../apps/analyze_onnx_model/analyze_onnx_model.cpp | 155 ++++++++++++++++++++-
 eval/src/vespa/eval/onnx/onnx_wrapper.cpp          |  48 ++++---
 eval/src/vespa/eval/onnx/onnx_wrapper.h            |   2 +
 3 files changed, 184 insertions(+), 21 deletions(-)

(limited to 'eval')

diff --git a/eval/src/apps/analyze_onnx_model/analyze_onnx_model.cpp b/eval/src/apps/analyze_onnx_model/analyze_onnx_model.cpp
index f1cc3b28751..3f56610dcaa 100644
--- a/eval/src/apps/analyze_onnx_model/analyze_onnx_model.cpp
+++ b/eval/src/apps/analyze_onnx_model/analyze_onnx_model.cpp
@@ -1,7 +1,15 @@
 // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 
 #include <vespa/eval/onnx/onnx_wrapper.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/vespalib/util/benchmark_timer.h>
+#include <vespa/vespalib/util/require.h>
 #include <vespa/vespalib/util/guard.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+using vespalib::make_string_short::fmt;
 
 using vespalib::FilePointer;
 using namespace vespalib::eval;
@@ -42,18 +50,159 @@ void report_memory_usage(const vespalib::string &desc) {
     fprintf(stderr, "vm_size: %s, vm_rss: %s (%s)\n", vm_size.c_str(), vm_rss.c_str(), desc.c_str());
 }
 
+struct Options {
+    size_t pos = 0;
+    std::vector<vespalib::string> opt_list;
+    void add_option(const vespalib::string &opt) {
+        opt_list.push_back(opt);
+    }
+    vespalib::string get_option(const vespalib::string &desc, const vespalib::string &fallback) {
+        vespalib::string opt;
+        if (pos < opt_list.size()) {
+            opt = opt_list[pos];
+            fprintf(stderr, "option[%zu](%s): %s\n",
+                    pos, desc.c_str(), opt.c_str());
+        } else {
+            opt = fallback;
+            fprintf(stderr, "unspecified option[%zu](%s), fallback: %s\n",
+                    pos, desc.c_str(), fallback.c_str());
+        }
+        ++pos;
+        return opt;
+    }
+    bool get_bool_opt(const vespalib::string &desc, const vespalib::string &fallback) {
+        auto opt = get_option(desc, fallback);
+        REQUIRE((opt == "true") || (opt == "false"));
+        return (opt == "true");
+    }
+    size_t get_size_opt(const vespalib::string &desc, const vespalib::string &fallback) {
+        auto opt = get_option(desc, fallback);
+        size_t value = atoi(opt.c_str());
+        REQUIRE(value > 0);
+        return value;
+    }
+};
+
+void dump_model_info(const Onnx &model) {
+    fprintf(stderr, "model meta-data:\n");
+    for (size_t i = 0; i < model.inputs().size(); ++i) {
+        fprintf(stderr, "  input[%zu]: '%s' %s\n", i, model.inputs()[i].name.c_str(), model.inputs()[i].type_as_string().c_str());
+    }
+    for (size_t i = 0; i < model.outputs().size(); ++i) {
+        fprintf(stderr, "  output[%zu]: '%s' %s\n", i, model.outputs()[i].name.c_str(), model.outputs()[i].type_as_string().c_str());
+    }
+}
+
+void dump_wire_info(const Onnx::WireInfo &wire) {
+    fprintf(stderr, "test setup:\n");
+    REQUIRE_EQ(wire.vespa_inputs.size(), wire.onnx_inputs.size());
+    for (size_t i = 0; i < wire.vespa_inputs.size(); ++i) {
+        fprintf(stderr, "  input[%zu]: %s -> %s\n", i, wire.vespa_inputs[i].to_spec().c_str(), wire.onnx_inputs[i].type_as_string().c_str());
+    }
+    REQUIRE_EQ(wire.onnx_outputs.size(), wire.vespa_outputs.size());
+    for (size_t i = 0; i < wire.onnx_outputs.size(); ++i) {
+        fprintf(stderr, "  output[%zu]: %s -> %s\n", i, wire.onnx_outputs[i].type_as_string().c_str(), wire.vespa_outputs[i].to_spec().c_str());
+    }
+}
+
+struct MakeInputType {
+    Options &opts;
+    std::map<vespalib::string,int> symbolic_sizes;
+    MakeInputType(Options &opts_in) : opts(opts_in), symbolic_sizes() {}
+    ValueType operator()(const Onnx::TensorInfo &info) {
+        int d = 0;
+        std::vector<ValueType::Dimension> dim_list;
+        for (const auto &dim: info.dimensions) {
+            REQUIRE(d <= 9);
+            size_t size = 0;
+            if (dim.is_known()) {
+                size = dim.value;
+            } else if (dim.is_symbolic()) {
+                size = symbolic_sizes[dim.name];
+                if (size == 0) {
+                    size = opts.get_size_opt(fmt("symbolic size '%s'", dim.name.c_str()), "1");
+                    symbolic_sizes[dim.name] = size;
+                }
+            } else {
+                size = opts.get_size_opt(fmt("size of input '%s' dimension %d", info.name.c_str(), d), "1");
+            }
+            dim_list.emplace_back(fmt("d%d", d), size);
+            ++d;
+        }
+        return ValueType::make_type(Onnx::WirePlanner::best_cell_type(info.elements), std::move(dim_list));
+    }
+};
+
+Onnx::WireInfo make_plan(Options &opts, const Onnx &model) {
+    Onnx::WirePlanner planner;
+    MakeInputType make_input_type(opts);
+    for (const auto &input: model.inputs()) {
+        auto type = make_input_type(input);
+        REQUIRE(planner.bind_input_type(type, input));
+    }
+    for (const auto &output: model.outputs()) {
+        REQUIRE(!planner.make_output_type(output).is_error());
+    }
+    return planner.get_wire_info(model);
+}
+
+struct MyEval {
+    Onnx::EvalContext context;
+    std::vector<Value::UP> inputs;    
+    MyEval(const Onnx &model, const Onnx::WireInfo &wire) : context(model, wire), inputs() {
+        for (const auto &input_type: wire.vespa_inputs) {
+            TensorSpec spec(input_type.to_spec());
+            inputs.push_back(value_from_spec(spec, FastValueBuilderFactory::get()));
+        }
+    }
+    void eval() {
+        for (size_t i = 0; i < inputs.size(); ++i) {
+            context.bind_param(i, *inputs[i]);
+        }
+        context.eval();
+    }
+};
+
 int usage(const char *self) {
-    fprintf(stderr, "usage: %s <onnx-model>\n", self);
+    fprintf(stderr, "usage: %s <onnx-model> [options...]\n", self);
     fprintf(stderr, "  load onnx model and report memory usage\n");
+    fprintf(stderr, "  options are used to specify unknown values, like dimension sizes\n");
+    fprintf(stderr, "  options are accepted in the order in which they are needed\n");
+    fprintf(stderr, "  tip: run without options first, to see which you need\n");
     return 1;
 }
 
 int main(int argc, char **argv) {
-    if (argc != 2) {
+    if (argc < 2) {
         return usage(argv[0]);
     }
+    Options opts;
+    for (int i = 2; i < argc; ++i) {
+        opts.add_option(argv[i]);
+    }
+    Onnx::Optimize optimize = opts.get_bool_opt("optimize model", "true")
+        ? Onnx::Optimize::ENABLE : Onnx::Optimize::DISABLE;
     report_memory_usage("before loading model");
-    Onnx onnx(argv[1], Onnx::Optimize::ENABLE);
+    Onnx model(argv[1], optimize);
     report_memory_usage("after loading model");
+    dump_model_info(model);
+    auto wire_info = make_plan(opts, model);
+    dump_wire_info(wire_info);
+    std::vector<std::unique_ptr<MyEval>> eval_list;
+    size_t max_concurrent = opts.get_size_opt("max concurrent evaluations", "1");
+    report_memory_usage("no evaluations yet");
+    for (size_t i = 1; i <= max_concurrent; ++i) {
+        eval_list.push_back(std::make_unique<MyEval>(model, wire_info));
+        eval_list.back()->eval();
+        if ((i % 8) == 0) {
+            report_memory_usage(fmt("concurrent evaluations: %zu", i));
+        }
+    }
+    if ((max_concurrent % 8) != 0) {
+        report_memory_usage(fmt("concurrent evaluations: %zu", max_concurrent));
+    }
+    eval_list.resize(1);
+    double min_time_s = vespalib::BenchmarkTimer::benchmark([&e = *eval_list.back()](){ e.eval(); }, 10.0);    
+    fprintf(stderr, "estimated model evaluation time: %g ms\n", min_time_s * 1000.0);
     return 0;
 }
diff --git a/eval/src/vespa/eval/onnx/onnx_wrapper.cpp b/eval/src/vespa/eval/onnx/onnx_wrapper.cpp
index f848c421c9d..e2528fcb1c3 100644
--- a/eval/src/vespa/eval/onnx/onnx_wrapper.cpp
+++ b/eval/src/vespa/eval/onnx/onnx_wrapper.cpp
@@ -117,23 +117,6 @@ auto convert_optimize(Onnx::Optimize optimize) {
     abort();
 }
 
-CellType to_cell_type(Onnx::ElementType type) {
-    switch (type) {
-    case Onnx::ElementType::INT8:     return CellType::INT8;
-    case Onnx::ElementType::BFLOAT16: return CellType::BFLOAT16;
-    case Onnx::ElementType::UINT8:    [[fallthrough]];
-    case Onnx::ElementType::INT16:    [[fallthrough]];
-    case Onnx::ElementType::UINT16:   [[fallthrough]];
-    case Onnx::ElementType::FLOAT:    return CellType::FLOAT;
-    case Onnx::ElementType::INT32:    [[fallthrough]];
-    case Onnx::ElementType::INT64:    [[fallthrough]];
-    case Onnx::ElementType::UINT32:   [[fallthrough]];
-    case Onnx::ElementType::UINT64:   [[fallthrough]];
-    case Onnx::ElementType::DOUBLE:   return CellType::DOUBLE;
-    }
-    abort();
-}
-
 Onnx::ElementType make_element_type(ONNXTensorElementDataType element_type) {
     switch (element_type) {
     case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:     return Onnx::ElementType::INT8;
@@ -245,12 +228,41 @@ Onnx::TensorInfo::type_as_string() const
 
 Onnx::TensorInfo::~TensorInfo() = default;
 
+vespalib::string
+Onnx::TensorType::type_as_string() const
+{
+    vespalib::string res = type_name(elements);
+    for (const auto &size: dimensions) {
+        res += DimSize(size).as_string();
+    }
+    return res;
+}
+
 //-----------------------------------------------------------------------------
 
 Onnx::WireInfo::~WireInfo() = default;
 
 Onnx::WirePlanner::~WirePlanner() = default;
 
+CellType
+Onnx::WirePlanner::best_cell_type(Onnx::ElementType type)
+{
+    switch (type) {
+    case Onnx::ElementType::INT8:     return CellType::INT8;
+    case Onnx::ElementType::BFLOAT16: return CellType::BFLOAT16;
+    case Onnx::ElementType::UINT8:    [[fallthrough]];
+    case Onnx::ElementType::INT16:    [[fallthrough]];
+    case Onnx::ElementType::UINT16:   [[fallthrough]];
+    case Onnx::ElementType::FLOAT:    return CellType::FLOAT;
+    case Onnx::ElementType::INT32:    [[fallthrough]];
+    case Onnx::ElementType::INT64:    [[fallthrough]];
+    case Onnx::ElementType::UINT32:   [[fallthrough]];
+    case Onnx::ElementType::UINT64:   [[fallthrough]];
+    case Onnx::ElementType::DOUBLE:   return CellType::DOUBLE;
+    }
+    abort();
+}
+
 bool
 Onnx::WirePlanner::bind_input_type(const ValueType &vespa_in, const TensorInfo &onnx_in)
 {
@@ -309,7 +321,7 @@ Onnx::WirePlanner::make_output_type(const TensorInfo &onnx_out) const
         }
         dim_list.emplace_back(fmt("d%zu", dim_list.size()), dim_size);
     }
-    return ValueType::make_type(to_cell_type(elements), std::move(dim_list));
+    return ValueType::make_type(best_cell_type(elements), std::move(dim_list));
 }
 
 Onnx::WireInfo
diff --git a/eval/src/vespa/eval/onnx/onnx_wrapper.h b/eval/src/vespa/eval/onnx/onnx_wrapper.h
index 507d75efbd9..9392536eae7 100644
--- a/eval/src/vespa/eval/onnx/onnx_wrapper.h
+++ b/eval/src/vespa/eval/onnx/onnx_wrapper.h
@@ -68,6 +68,7 @@ public:
         std::vector<int64_t> dimensions;
         TensorType(ElementType elements_in, std::vector<int64_t> dimensions_in) noexcept
             : elements(elements_in), dimensions(std::move(dimensions_in)) {}
+        vespalib::string type_as_string() const;
     };
 
     // how the model should be wired with inputs/outputs
@@ -88,6 +89,7 @@ public:
     public:
         WirePlanner() : _input_types(), _symbolic_sizes(), _bound_unknown_sizes() {}
         ~WirePlanner();
+        static CellType best_cell_type(Onnx::ElementType type);
         bool bind_input_type(const ValueType &vespa_in, const TensorInfo &onnx_in);
         ValueType make_output_type(const TensorInfo &onnx_out) const;
         WireInfo get_wire_info(const Onnx &model) const;
-- 
cgit v1.2.3