diff options
author | Håvard Pettersen <havardpe@oath.com> | 2017-09-06 10:45:52 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2017-09-28 12:08:35 +0000 |
commit | 7b4fe2ac8ca997907b4955c7518c834c3e605096 (patch) | |
tree | 3e86f5645b303f4122bf035cc856d41aae0147fd /eval/src/apps | |
parent | 6796195b37f351f843eea4992d2d45b7ba4eb771 (diff) |
application to facilitate cross-language tensor conformance testing - WIP
with a few smoke tests to show the format
Diffstat (limited to 'eval/src/apps')
7 files changed, 412 insertions, 3 deletions
diff --git a/eval/src/apps/make_tensor_binary_format_test_spec/make_tensor_binary_format_test_spec.cpp b/eval/src/apps/make_tensor_binary_format_test_spec/make_tensor_binary_format_test_spec.cpp index 5040ae35ff9..a7695408a85 100644 --- a/eval/src/apps/make_tensor_binary_format_test_spec/make_tensor_binary_format_test_spec.cpp +++ b/eval/src/apps/make_tensor_binary_format_test_spec/make_tensor_binary_format_test_spec.cpp @@ -188,7 +188,6 @@ void make_matrix_test(Cursor &test, size_t x_size, size_t y_size) { //----------------------------------------------------------------------------- void make_map_test(Cursor &test, const Dict &x_dict_in) { - TensorSpec spec("tensor(x{})"); nbostream sparse_base = make_sparse(); sparse_base.putInt1_4Bytes(1); sparse_base.writeSmallString("x"); @@ -200,6 +199,7 @@ void make_map_test(Cursor &test, const Dict &x_dict_in) { mixed_base.putInt1_4Bytes(x_dict_in.size()); auto x_perm = make_permutations(x_dict_in); for (const Dict &x_dict: x_perm) { + TensorSpec spec("tensor(x{})"); nbostream sparse = sparse_base; nbostream mixed = mixed_base; for (vespalib::string x: x_dict) { @@ -214,13 +214,13 @@ void make_map_test(Cursor &test, const Dict &x_dict_in) { add_binary(test, {sparse, mixed}); } if (x_dict_in.empty()) { + TensorSpec spec("tensor(x{})"); set_tensor(test, spec); add_binary(test, {sparse_base, mixed_base}); } } void make_mesh_test(Cursor &test, const Dict &x_dict_in, const vespalib::string &y) { - TensorSpec spec("tensor(x{},y{})"); nbostream sparse_base = make_sparse(); sparse_base.putInt1_4Bytes(2); sparse_base.writeSmallString("x"); @@ -234,6 +234,7 @@ void make_mesh_test(Cursor &test, const Dict &x_dict_in, const vespalib::string mixed_base.putInt1_4Bytes(x_dict_in.size() * 1); auto x_perm = make_permutations(x_dict_in); for (const Dict &x_dict: x_perm) { + TensorSpec spec("tensor(x{},y{})"); nbostream sparse = sparse_base; nbostream mixed = mixed_base; for (vespalib::string x: x_dict) { @@ -250,6 +251,7 @@ void make_mesh_test(Cursor &test, const Dict &x_dict_in, const vespalib::string add_binary(test, {sparse, mixed}); } if (x_dict_in.empty()) { + TensorSpec spec("tensor(x{},y{})"); set_tensor(test, spec); add_binary(test, {sparse_base, mixed_base}); } @@ -264,7 +266,6 @@ void make_vector_map_test(Cursor &test, auto type_str = vespalib::make_string("tensor(%s{},%s[%zu])", mapped_name.c_str(), indexed_name.c_str(), indexed_size); ValueType type = ValueType::from_spec(type_str); - TensorSpec spec(type.to_spec()); // ensures type string is normalized nbostream mixed_base = make_mixed(); mixed_base.putInt1_4Bytes(1); mixed_base.writeSmallString(mapped_name); @@ -274,6 +275,7 @@ void make_vector_map_test(Cursor &test, mixed_base.putInt1_4Bytes(mapped_dict.size()); auto mapped_perm = make_permutations(mapped_dict); for (const Dict &dict: mapped_perm) { + TensorSpec spec(type.to_spec()); // ensures type string is normalized nbostream mixed = mixed_base; for (vespalib::string label: dict) { mixed.writeSmallString(label); @@ -287,6 +289,7 @@ void make_vector_map_test(Cursor &test, add_binary(test, mixed); } if (mapped_dict.empty()) { + TensorSpec spec(type.to_spec()); // ensures type string is normalized set_tensor(test, spec); add_binary(test, mixed_base); } diff --git a/eval/src/apps/tensor_conformance/.gitignore b/eval/src/apps/tensor_conformance/.gitignore new file mode 100644 index 00000000000..3e87a05826c --- /dev/null +++ b/eval/src/apps/tensor_conformance/.gitignore @@ -0,0 +1 @@ +/vespa-tensor-conformance diff --git a/eval/src/apps/tensor_conformance/CMakeLists.txt b/eval/src/apps/tensor_conformance/CMakeLists.txt new file mode 100644 index 00000000000..76ababd9f5e --- /dev/null +++ b/eval/src/apps/tensor_conformance/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespa-tensor-conformance + SOURCES + generate.cpp + tensor_conformance.cpp + DEPENDS + vespaeval +) diff --git a/eval/src/apps/tensor_conformance/generate.cpp b/eval/src/apps/tensor_conformance/generate.cpp new file mode 100644 index 00000000000..45ff6243d81 --- /dev/null +++ b/eval/src/apps/tensor_conformance/generate.cpp @@ -0,0 +1,18 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "generate.h" + +using TensorSpec = vespalib::eval::TensorSpec; + +TensorSpec spec(double value) { return TensorSpec("double").add({}, value); } + +void +Generator::generate(TestBuilder &dst) +{ + // smoke tests with expected result + dst.add("a+a", {{"a", spec(2.0)}}, spec(4.0)); + dst.add("a*b", {{"a", spec(2.0)}, {"b", spec(3.0)}}, spec(6.0)); + dst.add("(a+b)*(a-b)", {{"a", spec(5.0)}, {"b", spec(2.0)}}, spec(21.0)); + // smoke test without expected result + dst.add("(a-b)/(a+b)", {{"a", spec(5.0)}, {"b", spec(2.0)}}); +} diff --git a/eval/src/apps/tensor_conformance/generate.h b/eval/src/apps/tensor_conformance/generate.h new file mode 100644 index 00000000000..d20d085f00c --- /dev/null +++ b/eval/src/apps/tensor_conformance/generate.h @@ -0,0 +1,22 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_spec.h> +#include <map> + +struct TestBuilder { + using TensorSpec = vespalib::eval::TensorSpec; + // add test with undefined expected result + virtual void add(const vespalib::string &expression, + const std::map<vespalib::string,TensorSpec> &inputs) = 0; + // add test with pre-defined expected result + virtual void add(const vespalib::string &expression, + const std::map<vespalib::string,TensorSpec> &inputs, + const TensorSpec &expect) = 0; + virtual ~TestBuilder() {} +}; + +struct Generator { + static void generate(TestBuilder &out); +}; diff --git a/eval/src/apps/tensor_conformance/tensor_conformance.cpp b/eval/src/apps/tensor_conformance/tensor_conformance.cpp new file mode 100644 index 00000000000..593e4439b0a --- /dev/null +++ b/eval/src/apps/tensor_conformance/tensor_conformance.cpp @@ -0,0 +1,318 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/data/slime/json_format.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/eval/tensor.h> +#include <vespa/eval/eval/function.h> +#include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/tensor_engine.h> +#include <vespa/eval/eval/simple_tensor_engine.h> +#include <vespa/eval/tensor/default_tensor_engine.h> +#include <vespa/eval/eval/value_type.h> +#include <vespa/eval/eval/value.h> +#include <unistd.h> + +#include "generate.h" + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::slime::convenience; +using slime::JsonFormat; +using tensor::DefaultTensorEngine; + +constexpr size_t CHUNK_SIZE = 16384; +constexpr bool not_compact = false; + +//----------------------------------------------------------------------------- + +size_t num_tests = 0; +std::map<vespalib::string,size_t> result_map; + +vespalib::string result_stats() { + vespalib::string stats; + for (const auto &entry: result_map) { + if (!stats.empty()) { + stats += ", "; + } + stats += make_string("%s: %zu", entry.first.c_str(), entry.second); + } + return stats; +} + +//----------------------------------------------------------------------------- + +class StdIn : public Input { +private: + bool _eof = false; + SimpleBuffer _input; +public: + ~StdIn() {} + Memory obtain() override { + if ((_input.get().size == 0) && !_eof) { + WritableMemory buf = _input.reserve(CHUNK_SIZE); + ssize_t res = read(STDIN_FILENO, buf.data, buf.size); + _eof = (res == 0); + assert(res >= 0); // fail on stdio read errors + _input.commit(res); + } + return _input.obtain(); + } + Input &evict(size_t bytes) override { + _input.evict(bytes); + return *this; + } +}; + +class StdOut : public Output { +private: + SimpleBuffer _output; +public: + ~StdOut() {} + WritableMemory reserve(size_t bytes) override { + return _output.reserve(bytes); + } + Output &commit(size_t bytes) override { + _output.commit(bytes); + Memory buf = _output.obtain(); + ssize_t res = write(STDOUT_FILENO, buf.data, buf.size); + assert(res == ssize_t(buf.size)); // fail on stdout write failures + _output.evict(res); + return *this; + } +}; + +//----------------------------------------------------------------------------- + +uint8_t unhex(char c) { + if (c >= '0' && c <= '9') { + return (c - '0'); + } + if (c >= 'A' && c <= 'F') { + return ((c - 'A') + 10); + } + TEST_ERROR("bad hex char"); + return 0; +} + +void extract_data_from_string(Memory hex_dump, nbostream &data) { + if ((hex_dump.size > 2) && (hex_dump.data[0] == '0') && (hex_dump.data[1] == 'x')) { + for (size_t i = 2; i < (hex_dump.size - 1); i += 2) { + data << uint8_t((unhex(hex_dump.data[i]) << 4) | unhex(hex_dump.data[i + 1])); + } + } +} + +nbostream extract_data(const Inspector &value) { + nbostream data; + if (value.asString().size > 0) { + extract_data_from_string(value.asString(), data); + } else { + Memory buf = value.asData(); + data.write(buf.data, buf.size); + } + return data; +} + +//----------------------------------------------------------------------------- + +TensorSpec to_spec(const Value &value) { + if (value.is_error()) { + return TensorSpec("error"); + } else if (value.is_double()) { + return TensorSpec("double").add({}, value.as_double()); + } else { + ASSERT_TRUE(value.is_tensor()); + auto tensor = value.as_tensor(); + return tensor->engine().to_spec(*tensor); + } +} + +const Value &to_value(const TensorSpec &spec, const TensorEngine &engine, Stash &stash) { + if (spec.type() == "error") { + return stash.create<ErrorValue>(); + } else if (spec.type() == "double") { + double value = 0.0; + for (const auto &cell: spec.cells()) { + value += cell.second; + } + return stash.create<DoubleValue>(value); + } else { + ASSERT_TRUE(starts_with(spec.type(), "tensor(")); + return stash.create<TensorValue>(engine.create(spec)); + } +} + +void insert_value(Cursor &cursor, const vespalib::string &name, const TensorSpec &spec) { + Stash stash; + nbostream data; + const Value &value = to_value(spec, SimpleTensorEngine::ref(), stash); + SimpleTensorEngine::ref().encode(value, data, stash); + cursor.setData(name, Memory(data.peek(), data.size())); +} + +TensorSpec extract_value(const Inspector &inspector) { + Stash stash; + nbostream data = extract_data(inspector); + return to_spec(SimpleTensorEngine::ref().decode(data, stash)); +} + +//----------------------------------------------------------------------------- + +TensorSpec eval_expr(const Inspector &test, const TensorEngine &engine) { + Stash stash; + Function fun = Function::parse(test["expression"].asString().make_string()); + std::vector<Value::CREF> param_values; + std::vector<ValueType> param_types; + for (size_t i = 0; i < fun.num_params(); ++i) { + param_values.emplace_back(to_value(extract_value(test["inputs"][fun.param_name(i)]), engine, stash)); + } + for (size_t i = 0; i < fun.num_params(); ++i) { + param_types.emplace_back(param_values[i].get().type()); + } + NodeTypes types(fun, param_types); + InterpretedFunction ifun(engine, fun, types); + InterpretedFunction::Context ctx(ifun); + InterpretedFunction::SimpleObjectParams params(param_values); + return to_spec(ifun.eval(ctx, params)); +} + +//----------------------------------------------------------------------------- + +std::vector<vespalib::string> extract_fields(const Inspector &object) { + struct FieldExtractor : slime::ObjectTraverser { + std::vector<vespalib::string> result; + void field(const Memory &symbol, const Inspector &) override { + result.push_back(symbol.make_string()); + } + } extractor; + object.traverse(extractor); + return std::move(extractor.result); +}; + +void dump_test(const Inspector &test) { + fprintf(stderr, "expression: '%s'\n", test["expression"].asString().make_string().c_str()); + for (const auto &input: extract_fields(test["inputs"])) { + auto value = extract_value(test["inputs"][input]); + fprintf(stderr, "input '%s': %s\n", input.c_str(), value.to_string().c_str()); + } +} + +//----------------------------------------------------------------------------- + +class MyTestBuilder : public TestBuilder { +private: + Output &_out; + void build_test(Cursor &test, const vespalib::string &expression, + const std::map<vespalib::string,TensorSpec> &input_map) + { + test.setString("expression", expression); + Cursor &inputs = test.setObject("inputs"); + for (const auto &input: input_map) { + insert_value(inputs, input.first, input.second); + } + } +public: + MyTestBuilder(Output &out) : _out(out) {} + void add(const vespalib::string &expression, + const std::map<vespalib::string,TensorSpec> &inputs) override + { + Slime slime; + build_test(slime.setObject(), expression, inputs); + insert_value(slime.get().setObject("result"), "expect", + eval_expr(slime.get(), SimpleTensorEngine::ref())); + JsonFormat::encode(slime, _out, not_compact); + ++num_tests; + } + void add(const vespalib::string &expression, + const std::map<vespalib::string,TensorSpec> &inputs, + const TensorSpec &expect) override + { + Slime slime; + build_test(slime.setObject(), expression, inputs); + insert_value(slime.get().setObject("result"), "expect", expect); + if (!EXPECT_EQUAL(eval_expr(slime.get(), SimpleTensorEngine::ref()), expect)) { + dump_test(slime.get()); + } + JsonFormat::encode(slime, _out, not_compact); + ++num_tests; + } +}; + +void generate(Output &out) { + MyTestBuilder my_test_builder(out); + Generator::generate(my_test_builder); +} + +//----------------------------------------------------------------------------- + +void evaluate(Input &in, Output &out) { + while (in.obtain().size > 0) { + Slime slime; + if (JsonFormat::decode(in, slime)) { + ++num_tests; + insert_value(slime.get()["result"], "prod_cpp", + eval_expr(slime.get(), DefaultTensorEngine::ref())); + JsonFormat::encode(slime, out, not_compact); + } + } +} + +//----------------------------------------------------------------------------- + +void verify(Input &in) { + while (in.obtain().size > 0) { + Slime slime; + if (JsonFormat::decode(in, slime)) { + ++num_tests; + TensorSpec reference_result = eval_expr(slime.get(), SimpleTensorEngine::ref()); + for (const auto &result: extract_fields(slime.get()["result"])) { + ++result_map[result]; + TEST_STATE(make_string("verifying result: '%s'", result.c_str()).c_str()); + if (!EXPECT_EQUAL(reference_result, extract_value(slime.get()["result"][result]))) { + dump_test(slime.get()); + } + } + } + } +} + +//----------------------------------------------------------------------------- + +int usage(const char *self) { + fprintf(stderr, "usage: %s <mode>\n", self); + fprintf(stderr, " <mode>: which mode to activate\n"); + fprintf(stderr, " 'generate': write test cases to stdout\n"); + fprintf(stderr, " 'evaluate': read test cases from stdin, annotate them with\n"); + fprintf(stderr, " results from various implementations and write\n"); + fprintf(stderr, " them to stdout\n"); + fprintf(stderr, " 'verify': read annotated test cases from stdin and verify\n"); + fprintf(stderr, " that all results are as expected\n"); + return 1; +} + +int main(int argc, char **argv) { + StdIn std_in; + StdOut std_out; + if (argc != 2) { + return usage(argv[0]); + } + vespalib::string mode = argv[1]; + TEST_MASTER.init(make_string("vespa-tensor-conformance-%s", mode.c_str()).c_str()); + if (mode == "generate") { + generate(std_out); + fprintf(stderr, "generated %zu test cases\n", num_tests); + } else if (mode == "evaluate") { + evaluate(std_in, std_out); + fprintf(stderr, "evaluated %zu test cases\n", num_tests); + } else if (mode == "verify") { + verify(std_in); + fprintf(stderr, "verified %zu test cases (%s)\n", num_tests, result_stats().c_str()); + } else { + TEST_ERROR(make_string("unknown mode: %s", mode.c_str()).c_str()); + } + return (TEST_MASTER.fini() ? 0 : 1); +} diff --git a/eval/src/apps/tensor_conformance/test_spec.json b/eval/src/apps/tensor_conformance/test_spec.json new file mode 100644 index 00000000000..a7c906cfb85 --- /dev/null +++ b/eval/src/apps/tensor_conformance/test_spec.json @@ -0,0 +1,39 @@ +{ + "expression": "a+a", + "inputs": { + "a": "0x02004000000000000000" + }, + "result": { + "expect": "0x02004010000000000000" + } +} +{ + "expression": "a*b", + "inputs": { + "a": "0x02004000000000000000", + "b": "0x02004008000000000000" + }, + "result": { + "expect": "0x02004018000000000000" + } +} +{ + "expression": "(a+b)*(a-b)", + "inputs": { + "a": "0x02004014000000000000", + "b": "0x02004000000000000000" + }, + "result": { + "expect": "0x02004035000000000000" + } +} +{ + "expression": "(a-b)/(a+b)", + "inputs": { + "a": "0x02004014000000000000", + "b": "0x02004000000000000000" + }, + "result": { + "expect": "0x02003FDB6DB6DB6DB6DB" + } +} |