application to facilitate cross-language tensor conformance testing - WIP

with a few smoke tests to show the format
author: Håvard Pettersen <havardpe@oath.com> 2017-09-06 10:45:52 +0000
committer: Håvard Pettersen <havardpe@oath.com> 2017-09-28 12:08:35 +0000
commit: 7b4fe2ac8ca997907b4955c7518c834c3e605096 (patch)
tree: 3e86f5645b303f4122bf035cc856d41aae0147fd /eval
parent: 6796195b37f351f843eea4992d2d45b7ba4eb771 (diff)
8 files changed, 413 insertions, 3 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 19a614464a1..89e8a72e330 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -6,6 +6,7 @@ vespa_define_module(
     APPS
     src/apps/eval_expr
     src/apps/make_tensor_binary_format_test_spec
+    src/apps/tensor_conformance
 
     TESTS
     src/tests/eval/aggr
diff --git a/eval/src/apps/make_tensor_binary_format_test_spec/make_tensor_binary_format_test_spec.cpp b/eval/src/apps/make_tensor_binary_format_test_spec/make_tensor_binary_format_test_spec.cpp
index 5040ae35ff9..a7695408a85 100644
--- a/eval/src/apps/make_tensor_binary_format_test_spec/make_tensor_binary_format_test_spec.cpp
+++ b/eval/src/apps/make_tensor_binary_format_test_spec/make_tensor_binary_format_test_spec.cpp
@@ -188,7 +188,6 @@ void make_matrix_test(Cursor &test, size_t x_size, size_t y_size) {
 //-----------------------------------------------------------------------------
 
 void make_map_test(Cursor &test, const Dict &x_dict_in) {
-    TensorSpec spec("tensor(x{})");
     nbostream sparse_base = make_sparse();
     sparse_base.putInt1_4Bytes(1);
     sparse_base.writeSmallString("x");
@@ -200,6 +199,7 @@ void make_map_test(Cursor &test, const Dict &x_dict_in) {
     mixed_base.putInt1_4Bytes(x_dict_in.size());
     auto x_perm = make_permutations(x_dict_in);
     for (const Dict &x_dict: x_perm) {
+        TensorSpec spec("tensor(x{})");
         nbostream sparse = sparse_base;
         nbostream mixed = mixed_base;
         for (vespalib::string x: x_dict) {
@@ -214,13 +214,13 @@ void make_map_test(Cursor &test, const Dict &x_dict_in) {
         add_binary(test, {sparse, mixed});
     }
     if (x_dict_in.empty()) {
+        TensorSpec spec("tensor(x{})");
         set_tensor(test, spec);
         add_binary(test, {sparse_base, mixed_base});
     }
 }
 
 void make_mesh_test(Cursor &test, const Dict &x_dict_in, const vespalib::string &y) {
-    TensorSpec spec("tensor(x{},y{})");
     nbostream sparse_base = make_sparse();
     sparse_base.putInt1_4Bytes(2);
     sparse_base.writeSmallString("x");
@@ -234,6 +234,7 @@ void make_mesh_test(Cursor &test, const Dict &x_dict_in, const vespalib::string
     mixed_base.putInt1_4Bytes(x_dict_in.size() * 1);
     auto x_perm = make_permutations(x_dict_in);
     for (const Dict &x_dict: x_perm) {
+        TensorSpec spec("tensor(x{},y{})");
         nbostream sparse = sparse_base;
         nbostream mixed = mixed_base;
         for (vespalib::string x: x_dict) {
@@ -250,6 +251,7 @@ void make_mesh_test(Cursor &test, const Dict &x_dict_in, const vespalib::string
         add_binary(test, {sparse, mixed});
     }
     if (x_dict_in.empty()) {
+        TensorSpec spec("tensor(x{},y{})");
         set_tensor(test, spec);
         add_binary(test, {sparse_base, mixed_base});
     }
@@ -264,7 +266,6 @@ void make_vector_map_test(Cursor &test,
     auto type_str = vespalib::make_string("tensor(%s{},%s[%zu])",
                                           mapped_name.c_str(), indexed_name.c_str(), indexed_size);
     ValueType type = ValueType::from_spec(type_str);
-    TensorSpec spec(type.to_spec()); // ensures type string is normalized
     nbostream mixed_base = make_mixed();
     mixed_base.putInt1_4Bytes(1);
     mixed_base.writeSmallString(mapped_name);
@@ -274,6 +275,7 @@ void make_vector_map_test(Cursor &test,
     mixed_base.putInt1_4Bytes(mapped_dict.size());
     auto mapped_perm = make_permutations(mapped_dict);
     for (const Dict &dict: mapped_perm) {
+        TensorSpec spec(type.to_spec()); // ensures type string is normalized
         nbostream mixed = mixed_base;
         for (vespalib::string label: dict) {
             mixed.writeSmallString(label);
@@ -287,6 +289,7 @@ void make_vector_map_test(Cursor &test,
         add_binary(test, mixed);
     }
     if (mapped_dict.empty()) {
+        TensorSpec spec(type.to_spec()); // ensures type string is normalized
         set_tensor(test, spec);
         add_binary(test, mixed_base);
     }
diff --git a/eval/src/apps/tensor_conformance/.gitignore b/eval/src/apps/tensor_conformance/.gitignore
new file mode 100644
index 00000000000..3e87a05826c
--- /dev/null
+++ b/eval/src/apps/tensor_conformance/.gitignore
@@ -0,0 +1 @@
+/vespa-tensor-conformance
diff --git a/eval/src/apps/tensor_conformance/CMakeLists.txt b/eval/src/apps/tensor_conformance/CMakeLists.txt
new file mode 100644
index 00000000000..76ababd9f5e
--- /dev/null
+++ b/eval/src/apps/tensor_conformance/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespa-tensor-conformance
+    SOURCES
+    generate.cpp
+    tensor_conformance.cpp
+    DEPENDS
+    vespaeval
+)
diff --git a/eval/src/apps/tensor_conformance/generate.cpp b/eval/src/apps/tensor_conformance/generate.cpp
new file mode 100644
index 00000000000..45ff6243d81
--- /dev/null
+++ b/eval/src/apps/tensor_conformance/generate.cpp
@@ -0,0 +1,18 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "generate.h"
+
+using TensorSpec = vespalib::eval::TensorSpec;
+
+TensorSpec spec(double value) { return TensorSpec("double").add({}, value); }
+
+void
+Generator::generate(TestBuilder &dst)
+{
+    // smoke tests with expected result
+    dst.add("a+a", {{"a", spec(2.0)}}, spec(4.0));
+    dst.add("a*b", {{"a", spec(2.0)}, {"b", spec(3.0)}}, spec(6.0));
+    dst.add("(a+b)*(a-b)", {{"a", spec(5.0)}, {"b", spec(2.0)}}, spec(21.0));
+    // smoke test without expected result
+    dst.add("(a-b)/(a+b)", {{"a", spec(5.0)}, {"b", spec(2.0)}});
+}
diff --git a/eval/src/apps/tensor_conformance/generate.h b/eval/src/apps/tensor_conformance/generate.h
new file mode 100644
index 00000000000..d20d085f00c
--- /dev/null
+++ b/eval/src/apps/tensor_conformance/generate.h
@@ -0,0 +1,22 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_spec.h>
+#include <map>
+
+struct TestBuilder {
+    using TensorSpec = vespalib::eval::TensorSpec;
+    // add test with undefined expected result
+    virtual void add(const vespalib::string &expression,
+                     const std::map<vespalib::string,TensorSpec> &inputs) = 0;
+    // add test with pre-defined expected result
+    virtual void add(const vespalib::string &expression,
+                     const std::map<vespalib::string,TensorSpec> &inputs,
+                     const TensorSpec &expect) = 0;
+    virtual ~TestBuilder() {}
+};
+
+struct Generator {
+    static void generate(TestBuilder &out);
+};
diff --git a/eval/src/apps/tensor_conformance/tensor_conformance.cpp b/eval/src/apps/tensor_conformance/tensor_conformance.cpp
new file mode 100644
index 00000000000..593e4439b0a
--- /dev/null
+++ b/eval/src/apps/tensor_conformance/tensor_conformance.cpp
@@ -0,0 +1,318 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/data/slime/json_format.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/tensor.h>
+#include <vespa/eval/eval/function.h>
+#include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/tensor_engine.h>
+#include <vespa/eval/eval/simple_tensor_engine.h>
+#include <vespa/eval/tensor/default_tensor_engine.h>
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/eval/eval/value.h>
+#include <unistd.h>
+
+#include "generate.h"
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::slime::convenience;
+using slime::JsonFormat;
+using tensor::DefaultTensorEngine;
+
+constexpr size_t CHUNK_SIZE = 16384;
+constexpr bool not_compact = false;
+
+//-----------------------------------------------------------------------------
+
+size_t num_tests = 0;
+std::map<vespalib::string,size_t> result_map;
+
+vespalib::string result_stats() {
+    vespalib::string stats;
+    for (const auto &entry: result_map) {        
+        if (!stats.empty()) {
+            stats += ", ";
+        }
+        stats += make_string("%s: %zu", entry.first.c_str(), entry.second);
+    }
+    return stats;
+}
+
+//-----------------------------------------------------------------------------
+
+class StdIn : public Input {
+private:
+    bool _eof = false;
+    SimpleBuffer _input;
+public:
+    ~StdIn() {}
+    Memory obtain() override {
+        if ((_input.get().size == 0) && !_eof) {
+            WritableMemory buf = _input.reserve(CHUNK_SIZE);
+            ssize_t res = read(STDIN_FILENO, buf.data, buf.size);
+            _eof = (res == 0);
+            assert(res >= 0); // fail on stdio read errors
+            _input.commit(res);
+        }
+        return _input.obtain();
+    }
+    Input &evict(size_t bytes) override {
+        _input.evict(bytes);
+        return *this;
+    }
+};
+
+class StdOut : public Output {
+private:
+    SimpleBuffer _output;
+public:
+    ~StdOut() {}
+    WritableMemory reserve(size_t bytes) override {
+        return _output.reserve(bytes);
+    }
+    Output &commit(size_t bytes) override {
+        _output.commit(bytes);
+        Memory buf = _output.obtain();
+        ssize_t res = write(STDOUT_FILENO, buf.data, buf.size);
+        assert(res == ssize_t(buf.size)); // fail on stdout write failures
+        _output.evict(res);
+        return *this;
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+uint8_t unhex(char c) {
+    if (c >= '0' && c <= '9') {
+        return (c - '0');
+    }
+    if (c >= 'A' && c <= 'F') {
+        return ((c - 'A') + 10);
+    }
+    TEST_ERROR("bad hex char");
+    return 0;
+}
+
+void extract_data_from_string(Memory hex_dump, nbostream &data) {
+    if ((hex_dump.size > 2) && (hex_dump.data[0] == '0') && (hex_dump.data[1] == 'x')) {
+        for (size_t i = 2; i < (hex_dump.size - 1); i += 2) {
+            data << uint8_t((unhex(hex_dump.data[i]) << 4) | unhex(hex_dump.data[i + 1]));
+        }
+    }
+}
+
+nbostream extract_data(const Inspector &value) {
+    nbostream data;
+    if (value.asString().size > 0) {
+        extract_data_from_string(value.asString(), data);
+    } else {
+        Memory buf = value.asData();
+        data.write(buf.data, buf.size);
+    }
+    return data;
+}
+
+//-----------------------------------------------------------------------------
+
+TensorSpec to_spec(const Value &value) {
+    if (value.is_error()) {
+        return TensorSpec("error");
+    } else if (value.is_double()) {
+        return TensorSpec("double").add({}, value.as_double());
+    } else {
+        ASSERT_TRUE(value.is_tensor());
+        auto tensor = value.as_tensor();
+        return tensor->engine().to_spec(*tensor);        
+    }
+}
+
+const Value &to_value(const TensorSpec &spec, const TensorEngine &engine, Stash &stash) {
+    if (spec.type() == "error") {
+        return stash.create<ErrorValue>();
+    } else if (spec.type() == "double") {
+        double value = 0.0;
+        for (const auto &cell: spec.cells()) {
+            value += cell.second;
+        }
+        return stash.create<DoubleValue>(value);
+    } else {
+        ASSERT_TRUE(starts_with(spec.type(), "tensor("));
+        return stash.create<TensorValue>(engine.create(spec));
+    }
+}
+
+void insert_value(Cursor &cursor, const vespalib::string &name, const TensorSpec &spec) {
+    Stash stash;
+    nbostream data;
+    const Value &value = to_value(spec, SimpleTensorEngine::ref(), stash);
+    SimpleTensorEngine::ref().encode(value, data, stash);
+    cursor.setData(name, Memory(data.peek(), data.size()));
+}
+
+TensorSpec extract_value(const Inspector &inspector) {
+    Stash stash;
+    nbostream data = extract_data(inspector);
+    return to_spec(SimpleTensorEngine::ref().decode(data, stash));
+}
+
+//-----------------------------------------------------------------------------
+
+TensorSpec eval_expr(const Inspector &test, const TensorEngine &engine) {
+    Stash stash;
+    Function fun = Function::parse(test["expression"].asString().make_string());
+    std::vector<Value::CREF> param_values;
+    std::vector<ValueType> param_types;
+    for (size_t i = 0; i < fun.num_params(); ++i) {
+        param_values.emplace_back(to_value(extract_value(test["inputs"][fun.param_name(i)]), engine, stash));
+    }
+    for (size_t i = 0; i < fun.num_params(); ++i) {
+        param_types.emplace_back(param_values[i].get().type());
+    }
+    NodeTypes types(fun, param_types);
+    InterpretedFunction ifun(engine, fun, types);
+    InterpretedFunction::Context ctx(ifun);
+    InterpretedFunction::SimpleObjectParams params(param_values);
+    return to_spec(ifun.eval(ctx, params));
+}
+
+//-----------------------------------------------------------------------------
+
+std::vector<vespalib::string> extract_fields(const Inspector &object) {
+    struct FieldExtractor : slime::ObjectTraverser {
+        std::vector<vespalib::string> result;
+        void field(const Memory &symbol, const Inspector &) override {
+            result.push_back(symbol.make_string());
+        }
+    } extractor;
+    object.traverse(extractor);
+    return std::move(extractor.result);
+};
+
+void dump_test(const Inspector &test) {
+    fprintf(stderr, "expression: '%s'\n", test["expression"].asString().make_string().c_str());
+    for (const auto &input: extract_fields(test["inputs"])) {
+        auto value = extract_value(test["inputs"][input]);
+        fprintf(stderr, "input '%s': %s\n", input.c_str(), value.to_string().c_str());
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+class MyTestBuilder : public TestBuilder {
+private:
+    Output &_out;
+    void build_test(Cursor &test, const vespalib::string &expression,
+                    const std::map<vespalib::string,TensorSpec> &input_map)
+    {
+        test.setString("expression", expression);
+        Cursor &inputs = test.setObject("inputs");
+        for (const auto &input: input_map) {
+            insert_value(inputs, input.first, input.second);
+        }
+    }
+public:
+    MyTestBuilder(Output &out) : _out(out) {}
+    void add(const vespalib::string &expression,
+             const std::map<vespalib::string,TensorSpec> &inputs) override
+    {
+        Slime slime;
+        build_test(slime.setObject(), expression, inputs);
+        insert_value(slime.get().setObject("result"), "expect",
+                     eval_expr(slime.get(), SimpleTensorEngine::ref()));
+        JsonFormat::encode(slime, _out, not_compact);
+        ++num_tests;
+    }
+    void add(const vespalib::string &expression,
+             const std::map<vespalib::string,TensorSpec> &inputs,
+             const TensorSpec &expect) override
+    {
+        Slime slime;
+        build_test(slime.setObject(), expression, inputs);
+        insert_value(slime.get().setObject("result"), "expect", expect);
+        if (!EXPECT_EQUAL(eval_expr(slime.get(), SimpleTensorEngine::ref()), expect)) {
+            dump_test(slime.get());
+        }
+        JsonFormat::encode(slime, _out, not_compact);
+        ++num_tests;
+    }
+};
+
+void generate(Output &out) {
+    MyTestBuilder my_test_builder(out);
+    Generator::generate(my_test_builder);
+}
+
+//-----------------------------------------------------------------------------
+
+void evaluate(Input &in, Output &out) {
+    while (in.obtain().size > 0) {
+        Slime slime;
+        if (JsonFormat::decode(in, slime)) {
+            ++num_tests;
+            insert_value(slime.get()["result"], "prod_cpp",
+                         eval_expr(slime.get(), DefaultTensorEngine::ref()));
+            JsonFormat::encode(slime, out, not_compact);
+        }
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+void verify(Input &in) {
+    while (in.obtain().size > 0) {
+        Slime slime;
+        if (JsonFormat::decode(in, slime)) {
+            ++num_tests;
+            TensorSpec reference_result = eval_expr(slime.get(), SimpleTensorEngine::ref());
+            for (const auto &result: extract_fields(slime.get()["result"])) {
+                ++result_map[result];
+                TEST_STATE(make_string("verifying result: '%s'", result.c_str()).c_str());
+                if (!EXPECT_EQUAL(reference_result, extract_value(slime.get()["result"][result]))) {
+                    dump_test(slime.get());
+                }
+            }
+        }
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+int usage(const char *self) {
+    fprintf(stderr, "usage: %s <mode>\n", self);
+    fprintf(stderr, "  <mode>: which mode to activate\n");
+    fprintf(stderr, "    'generate': write test cases to stdout\n");
+    fprintf(stderr, "    'evaluate': read test cases from stdin, annotate them with\n");
+    fprintf(stderr, "                results from various implementations and write\n");
+    fprintf(stderr, "                them to stdout\n");
+    fprintf(stderr, "    'verify': read annotated test cases from stdin and verify\n");
+    fprintf(stderr, "              that all results are as expected\n");
+    return 1;
+}
+
+int main(int argc, char **argv) {
+    StdIn std_in;
+    StdOut std_out;
+    if (argc != 2) {
+        return usage(argv[0]);
+    }
+    vespalib::string mode = argv[1];
+    TEST_MASTER.init(make_string("vespa-tensor-conformance-%s", mode.c_str()).c_str());
+    if (mode == "generate") {
+        generate(std_out);
+        fprintf(stderr, "generated %zu test cases\n", num_tests);
+    } else if (mode == "evaluate") {
+        evaluate(std_in, std_out);
+        fprintf(stderr, "evaluated %zu test cases\n", num_tests);
+    } else if (mode == "verify") {
+        verify(std_in);
+        fprintf(stderr, "verified %zu test cases (%s)\n", num_tests, result_stats().c_str());
+    } else {
+        TEST_ERROR(make_string("unknown mode: %s", mode.c_str()).c_str());
+    }
+    return (TEST_MASTER.fini() ? 0 : 1);
+}
diff --git a/eval/src/apps/tensor_conformance/test_spec.json b/eval/src/apps/tensor_conformance/test_spec.json
new file mode 100644
index 00000000000..a7c906cfb85
--- /dev/null
+++ b/eval/src/apps/tensor_conformance/test_spec.json
@@ -0,0 +1,39 @@
+{
+    "expression": "a+a",
+    "inputs": {
+        "a": "0x02004000000000000000"
+    },
+    "result": {
+        "expect": "0x02004010000000000000"
+    }
+}
+{
+    "expression": "a*b",
+    "inputs": {
+        "a": "0x02004000000000000000",
+        "b": "0x02004008000000000000"
+    },
+    "result": {
+        "expect": "0x02004018000000000000"
+    }
+}
+{
+    "expression": "(a+b)*(a-b)",
+    "inputs": {
+        "a": "0x02004014000000000000",
+        "b": "0x02004000000000000000"
+    },
+    "result": {
+        "expect": "0x02004035000000000000"
+    }
+}
+{
+    "expression": "(a-b)/(a+b)",
+    "inputs": {
+        "a": "0x02004014000000000000",
+        "b": "0x02004000000000000000"
+    },
+    "result": {
+        "expect": "0x02003FDB6DB6DB6DB6DB"
+    }
+}
author	Håvard Pettersen <havardpe@oath.com>	2017-09-06 10:45:52 +0000
committer	Håvard Pettersen <havardpe@oath.com>	2017-09-28 12:08:35 +0000
commit	7b4fe2ac8ca997907b4955c7518c834c3e605096 (patch)
tree	3e86f5645b303f4122bf035cc856d41aae0147fd /eval
parent	6796195b37f351f843eea4992d2d45b7ba4eb771 (diff)