Merge pull request #18835 from vespa-engine/havardpe/improve-vespa-eval-expr

improve vespa-eval-expr
author: Håvard Pettersen <3535158+havardpe@users.noreply.github.com> 2021-08-25 10:28:55 +0200
committer: GitHub <noreply@github.com> 2021-08-25 10:28:55 +0200
commit: 25a715b5479f1a8d001dbc2e6d4993652f82ba67 (patch)
tree: c3709aa42fb15ea21301cece8e94d2826cbca3de /eval
parent: 44eda57b93fed75b04532311b81503650454a0ca (diff)
parent: 68551790e11aff76990be4a554ba00a7cb6e30ca (diff)
5 files changed, 127 insertions, 33 deletions
diff --git a/eval/src/apps/eval_expr/eval_expr.cpp b/eval/src/apps/eval_expr/eval_expr.cpp
index 12c94c6e68e..d6c95772498 100644
--- a/eval/src/apps/eval_expr/eval_expr.cpp
+++ b/eval/src/apps/eval_expr/eval_expr.cpp
@@ -1,31 +1,132 @@
 // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 
 #include <vespa/eval/eval/function.h>
-#include <vespa/eval/eval/interpreted_function.h>
 #include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/lazy_params.h>
+#include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/feature_name_extractor.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/eval/make_tensor_function.h>
+#include <vespa/eval/eval/optimize_tensor_function.h>
+#include <vespa/eval/eval/compile_tensor_function.h>
+#include <vespa/eval/eval/test/test_io.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+using vespalib::make_string_short::fmt;
 
 using namespace vespalib::eval;
 
+const auto &factory = FastValueBuilderFactory::get();
+
+int usage(const char *self) {
+    fprintf(stderr, "usage: %s [--verbose] <expr> [expr ...]\n", self);
+    fprintf(stderr, "  Evaluate a sequence of expressions. The first expression must be\n");
+    fprintf(stderr, "  self-contained (no external values). Later expressions may use the\n");
+    fprintf(stderr, "  results of earlier expressions. Expressions are automatically named\n");
+    fprintf(stderr, "  using single letter symbols ('a' through 'z'). Quote expressions to\n");
+    fprintf(stderr, "  make sure they become separate parameters.\n");
+    fprintf(stderr, "  The --verbose option may be specified to get more detailed informaion\n");
+    fprintf(stderr, "  about how the various expressions are optimized.\n");
+    fprintf(stderr, "example: %s \"2+2\" \"a+2\" \"a+b\"\n", self);
+    fprintf(stderr, "  (a=4, b=6, c=10)\n\n");
+    return 1;
+}
+
+int overflow(int cnt, int max) {
+    fprintf(stderr, "error: too many expressions: %d (max is %d)\n", cnt, max);
+    return 2;
+}
+
+struct Context {
+    std::vector<vespalib::string> param_names;
+    std::vector<ValueType>        param_types;
+    std::vector<Value::UP>        param_values;
+    std::vector<Value::CREF>      param_refs;
+    bool                          collect_meta;
+    CTFMetaData                   meta;
+
+    Context(bool collect_meta_in) : param_names(), param_types(), param_values(), param_refs(), collect_meta(collect_meta_in), meta() {}
+    ~Context();
+
+    bool eval_next(const vespalib::string &name, const vespalib::string &expr) {
+        meta = CTFMetaData();
+        SimpleObjectParams params(param_refs);
+        auto fun = Function::parse(param_names, expr, FeatureNameExtractor());
+        if (fun->has_error()) {
+            fprintf(stderr, "error: expression parse error (%s): %s\n", name.c_str(), fun->get_error().c_str());
+            return false;
+        }
+        NodeTypes types = NodeTypes(*fun, param_types);
+        ValueType res_type = types.get_type(fun->root());
+        if (res_type.is_error() || !types.errors().empty()) {
+            fprintf(stderr, "error: expression type issues (%s)\n", name.c_str());
+            for (const auto &issue: types.errors()) {
+                fprintf(stderr, "   issue: %s\n", issue.c_str());
+            }
+            return false;
+        }
+        vespalib::Stash stash;
+        const TensorFunction &plain_fun = make_tensor_function(factory, fun->root(), types, stash);
+        const TensorFunction &optimized = optimize_tensor_function(factory, plain_fun, stash);
+        InterpretedFunction ifun(factory, optimized, collect_meta ? &meta : nullptr);
+        InterpretedFunction::Context ctx(ifun);
+        Value::UP result = factory.copy(ifun.eval(ctx, params));
+        assert(result->type() == res_type);
+        param_names.push_back(name);
+        param_types.push_back(res_type);
+        param_values.push_back(std::move(result));
+        param_refs.emplace_back(*param_values.back());
+        return true;
+    }
+
+    void print_last(bool with_name) {
+        auto spec = spec_from_value(param_refs.back().get());
+        if (!meta.steps.empty()) {
+            if (with_name) {
+                fprintf(stderr, "meta-data(%s):\n", param_names.back().c_str());
+            } else {
+                fprintf(stderr, "meta-data:\n");
+            }
+            for (const auto &step: meta.steps) {
+                fprintf(stderr, "  class: %s\n", step.class_name.c_str());
+                fprintf(stderr, "    symbol: %s\n", step.symbol_name.c_str());
+            }
+        }
+        if (with_name) {
+            fprintf(stdout, "%s: ", param_names.back().c_str());
+        }
+        if (param_types.back().is_double()) {
+            fprintf(stdout, "%.32g\n", spec.as_double());
+        } else {
+            fprintf(stdout, "%s\n", spec.to_string().c_str());
+        }
+    }
+};
+Context::~Context() = default;
+
 int main(int argc, char **argv) {
-    if (argc != 2) {
-        fprintf(stderr, "usage: %s <expr>\n", argv[0]);
-        fprintf(stderr, "  the expression must be self-contained (no arguments)\n");
-        fprintf(stderr, "  quote the expression to make it a single parameter\n");
-        return 1;
+    bool verbose = ((argc > 1) && (vespalib::string(argv[1]) == "--verbose"));
+    int expr_idx = verbose ? 2 : 1;
+    int expr_cnt = (argc - expr_idx);
+    int expr_max = ('z' - 'a') + 1;
+    if (expr_cnt == 0) {
+        return usage(argv[0]);
     }
-    auto function = Function::parse({}, argv[1]);
-    if (function->has_error()) {
-        fprintf(stderr, "expression error: %s\n", function->get_error().c_str());
-        return 1;
+    if (expr_cnt > expr_max) {
+        return overflow(expr_cnt, expr_max);
     }
-    auto result = TensorSpec::from_expr(argv[1]);
-    auto type = ValueType::from_spec(result.type());
-    if (type.is_error()) {
-        fprintf(stdout, "error\n");
-    } else if (type.is_double()) {
-        fprintf(stdout, "%.32g\n", result.as_double());
-    } else {
-        fprintf(stdout, "%s\n", result.to_string().c_str());
+    Context ctx(verbose);
+    vespalib::string name("a");
+    for (int i = expr_idx; i < argc; ++i) {
+        if (!ctx.eval_next(name, argv[i])) {
+            return 3;
+        }
+        ctx.print_last(expr_cnt > 1);
+        ++name[0];
     }
     return 0;
 }
diff --git a/eval/src/tests/eval/interpreted_function/interpreted_function_test.cpp b/eval/src/tests/eval/interpreted_function/interpreted_function_test.cpp
index bcb2e29472c..a5d87c6cf0b 100644
--- a/eval/src/tests/eval/interpreted_function/interpreted_function_test.cpp
+++ b/eval/src/tests/eval/interpreted_function/interpreted_function_test.cpp
@@ -180,7 +180,7 @@ TEST("require that compilation meta-data can be collected") {
     const auto &flag = tensor_function::inject(ValueType::from_spec("double"), 0, stash);
     const auto &root = tensor_function::if_node(flag, concat_x5, mapped_x5, stash);
     CTFMetaData meta;
-    InterpretedFunction ifun(FastValueBuilderFactory::get(), root, meta);
+    InterpretedFunction ifun(FastValueBuilderFactory::get(), root, &meta);
     fprintf(stderr, "compilation meta-data:\n");
     for (const auto &step: meta.steps) {
         fprintf(stderr, "  %s -> %s\n", step.class_name.c_str(), step.symbol_name.c_str());        
diff --git a/eval/src/vespa/eval/eval/compile_tensor_function.h b/eval/src/vespa/eval/eval/compile_tensor_function.h
index 15241b914f9..c98ae3846ed 100644
--- a/eval/src/vespa/eval/eval/compile_tensor_function.h
+++ b/eval/src/vespa/eval/eval/compile_tensor_function.h
@@ -39,6 +39,6 @@ struct CTFMetaData {
 };
 
 std::vector<InterpretedFunction::Instruction> compile_tensor_function(const ValueBuilderFactory &factory, const TensorFunction &function, Stash &stash,
-                                                                      CTFMetaData *meta = nullptr);
+                                                                      CTFMetaData *meta);
 
 } // namespace vespalib::eval
diff --git a/eval/src/vespa/eval/eval/interpreted_function.cpp b/eval/src/vespa/eval/eval/interpreted_function.cpp
index a5368566aa4..16dbb2ec95c 100644
--- a/eval/src/vespa/eval/eval/interpreted_function.cpp
+++ b/eval/src/vespa/eval/eval/interpreted_function.cpp
@@ -76,20 +76,12 @@ InterpretedFunction::Instruction::nop()
     return Instruction(my_nop);
 }
 
-InterpretedFunction::InterpretedFunction(const ValueBuilderFactory &factory, const TensorFunction &function)
+InterpretedFunction::InterpretedFunction(const ValueBuilderFactory &factory, const TensorFunction &function, CTFMetaData *meta)
     : _program(),
       _stash(),
       _factory(factory)
 {
-    _program = compile_tensor_function(factory, function, _stash);
-}
-
-InterpretedFunction::InterpretedFunction(const ValueBuilderFactory &factory, const TensorFunction &function, CTFMetaData &meta)
-    : _program(),
-      _stash(),
-      _factory(factory)
-{
-    _program = compile_tensor_function(factory, function, _stash, &meta);
+    _program = compile_tensor_function(factory, function, _stash, meta);
 }
 
 InterpretedFunction::InterpretedFunction(const ValueBuilderFactory &factory, const nodes::Node &root, const NodeTypes &types)
@@ -99,7 +91,7 @@ InterpretedFunction::InterpretedFunction(const ValueBuilderFactory &factory, con
 {
     const TensorFunction &plain_fun = make_tensor_function(factory, root, types, _stash);
     const TensorFunction &optimized = optimize_tensor_function(factory, plain_fun, _stash);
-    _program = compile_tensor_function(factory, optimized, _stash);
+    _program = compile_tensor_function(factory, optimized, _stash, nullptr);
 }
 
 InterpretedFunction::~InterpretedFunction() = default;
diff --git a/eval/src/vespa/eval/eval/interpreted_function.h b/eval/src/vespa/eval/eval/interpreted_function.h
index 4e88920b45e..7ad4cbf81d4 100644
--- a/eval/src/vespa/eval/eval/interpreted_function.h
+++ b/eval/src/vespa/eval/eval/interpreted_function.h
@@ -95,8 +95,9 @@ private:
 public:
     typedef std::unique_ptr<InterpretedFunction> UP;
     // for testing; use with care; the tensor function must be kept alive
-    InterpretedFunction(const ValueBuilderFactory &factory, const TensorFunction &function);
-    InterpretedFunction(const ValueBuilderFactory &factory, const TensorFunction &function, CTFMetaData &meta);
+    InterpretedFunction(const ValueBuilderFactory &factory, const TensorFunction &function, CTFMetaData *meta);
+    InterpretedFunction(const ValueBuilderFactory &factory, const TensorFunction &function)
+      : InterpretedFunction(factory, function, nullptr) {}
     InterpretedFunction(const ValueBuilderFactory &factory, const nodes::Node &root, const NodeTypes &types);
     InterpretedFunction(const ValueBuilderFactory &factory, const Function &function, const NodeTypes &types)
         : InterpretedFunction(factory, function.root(), types) {}
author	Håvard Pettersen <3535158+havardpe@users.noreply.github.com>	2021-08-25 10:28:55 +0200
committer	GitHub <noreply@github.com>	2021-08-25 10:28:55 +0200
commit	25a715b5479f1a8d001dbc2e6d4993652f82ba67 (patch)
tree	c3709aa42fb15ea21301cece8e94d2826cbca3de /eval
parent	44eda57b93fed75b04532311b81503650454a0ca (diff)
parent	68551790e11aff76990be4a554ba00a7cb6e30ca (diff)