From 587e20015727468b6a1208217f5e5f33baa10cf9 Mon Sep 17 00:00:00 2001 From: Håvard Pettersen Date: Thu, 28 Jan 2021 14:06:58 +0000 Subject: added GenSpec used to generate TensorSpec --- eval/CMakeLists.txt | 1 + eval/src/tests/eval/gen_spec/CMakeLists.txt | 9 ++ eval/src/tests/eval/gen_spec/gen_spec_test.cpp | 198 +++++++++++++++++++++++++ eval/src/vespa/eval/eval/test/CMakeLists.txt | 1 + eval/src/vespa/eval/eval/test/gen_spec.cpp | 58 ++++++++ eval/src/vespa/eval/eval/test/gen_spec.h | 104 +++++++++++++ 6 files changed, 371 insertions(+) create mode 100644 eval/src/tests/eval/gen_spec/CMakeLists.txt create mode 100644 eval/src/tests/eval/gen_spec/gen_spec_test.cpp create mode 100644 eval/src/vespa/eval/eval/test/gen_spec.cpp create mode 100644 eval/src/vespa/eval/eval/test/gen_spec.h diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 239cf8f0f23..fe621f6e9f0 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -19,6 +19,7 @@ vespa_define_module( src/tests/eval/function src/tests/eval/function_speed src/tests/eval/gbdt + src/tests/eval/gen_spec src/tests/eval/inline_operation src/tests/eval/interpreted_function src/tests/eval/multiply_add diff --git a/eval/src/tests/eval/gen_spec/CMakeLists.txt b/eval/src/tests/eval/gen_spec/CMakeLists.txt new file mode 100644 index 00000000000..3613554f0a0 --- /dev/null +++ b/eval/src/tests/eval/gen_spec/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_gen_spec_test_app TEST + SOURCES + gen_spec_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_gen_spec_test_app COMMAND eval_gen_spec_test_app) diff --git a/eval/src/tests/eval/gen_spec/gen_spec_test.cpp b/eval/src/tests/eval/gen_spec/gen_spec_test.cpp new file mode 100644 index 00000000000..bf4f7006058 --- /dev/null +++ b/eval/src/tests/eval/gen_spec/gen_spec_test.cpp @@ -0,0 +1,198 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include + +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +//----------------------------------------------------------------------------- + +TEST(DimSpec, indexed_dimension) { + ValueType::Dimension ref("foo", 10); + DimSpec idx("foo", 10); + EXPECT_EQ(idx.type(), ref); + EXPECT_TRUE(ref.is_indexed()); + EXPECT_EQ(idx.name(), "foo"); + EXPECT_EQ(idx.size(), 10); + EXPECT_EQ(idx.label(3), TensorSpec::Label(size_t(3))); +} + +TEST(DimSpec, mapped_dimension) { + ValueType::Dimension ref("foo"); + DimSpec map("foo", {"a", "b", "c", "d"}); + EXPECT_EQ(map.type(), ref); + EXPECT_TRUE(ref.is_mapped()); + EXPECT_EQ(map.name(), "foo"); + EXPECT_EQ(map.size(), 4); + EXPECT_EQ(map.label(2), TensorSpec::Label("c")); +} + +TEST(DimSpec, simple_dictionary_creation) { + auto dict = DimSpec::make_dict(5, 1, ""); + std::vector expect = {"0", "1", "2", "3", "4"}; +} + +TEST(DimSpec, advanced_dictionary_creation) { + auto dict = DimSpec::make_dict(5, 3, "str_"); + std::vector expect = {"str_0", "str_3", "str_6", "str_9", "str_12"}; +} + +//----------------------------------------------------------------------------- + +TEST(GenSpec, default_spec) { + GenSpec spec; + EXPECT_TRUE(spec.dims().empty()); + EXPECT_EQ(spec.cells(), CellType::DOUBLE); + auto seq = spec.seq(); + for (size_t i = 0; i < 4096; ++i) { + EXPECT_EQ(seq(i), (i + 1.0)); + } +} + +//----------------------------------------------------------------------------- + +TensorSpec scalar_1 = TensorSpec("double").add({}, 1.0); +TensorSpec scalar_1_f = TensorSpec("float").add({}, 1.0); +TensorSpec scalar_5 = TensorSpec("double").add({}, 5.0); +TensorSpec scalar_5_f = TensorSpec("float").add({}, 5.0); + +TEST(GenSpec, scalar_double) { + EXPECT_EQ(GenSpec().gen(), scalar_1); + EXPECT_EQ(GenSpec().seq_bias(5.0).gen(), scalar_5); +} + +TEST(GenSpec, scalar_float) { + EXPECT_EQ(GenSpec().cells_float().gen(), scalar_1_f); + EXPECT_EQ(GenSpec().cells_float().seq_bias(5.0).gen(), scalar_5_f); +} + +//----------------------------------------------------------------------------- + +TEST(Seq, seq_n) { + GenSpec::seq_t seq = GenSpec().seq_n().seq(); + for (size_t i = 0; i < 4096; ++i) { + EXPECT_EQ(seq(i), (i + 1.0)); + } +} + +TEST(Seq, seq_bias) { + GenSpec::seq_t seq = GenSpec().seq_bias(13.0).seq(); + for (size_t i = 0; i < 4096; ++i) { + EXPECT_EQ(seq(i), (i + 13.0)); + } +} + +//----------------------------------------------------------------------------- + +GenSpec flt() { return GenSpec().cells_float(); } +GenSpec dbl() { return GenSpec().cells_double(); } + +TEST(GenSpec, value_type) { + EXPECT_EQ(dbl().type().to_spec(), "double"); + EXPECT_EQ(flt().type().to_spec(), "float"); + EXPECT_EQ(dbl().idx("x", 10).type().to_spec(), "tensor(x[10])"); + EXPECT_EQ(flt().idx("x", 10).type().to_spec(), "tensor(x[10])"); + EXPECT_EQ(dbl().map("y", {}).type().to_spec(), "tensor(y{})"); + EXPECT_EQ(flt().map("y", {}).type().to_spec(), "tensor(y{})"); + EXPECT_EQ(dbl().idx("x", 10).map("y", {}).type().to_spec(), "tensor(x[10],y{})"); + EXPECT_EQ(flt().idx("x", 10).map("y", {}).type().to_spec(), "tensor(x[10],y{})"); + EXPECT_EQ(dbl().map("y", 3, 1).idx("x", 10).type().to_spec(), "tensor(x[10],y{})"); + EXPECT_EQ(flt().map("y", 3, 1, "str").idx("x", 10).type().to_spec(), "tensor(x[10],y{})"); +} + +//----------------------------------------------------------------------------- + +TensorSpec basic_vector = TensorSpec("tensor(a[5])") + .add({{"a", 0}}, 1.0) + .add({{"a", 1}}, 2.0) + .add({{"a", 2}}, 3.0) + .add({{"a", 3}}, 4.0) + .add({{"a", 4}}, 5.0); + +TensorSpec float_vector = TensorSpec("tensor(a[5])") + .add({{"a", 0}}, 1.0) + .add({{"a", 1}}, 2.0) + .add({{"a", 2}}, 3.0) + .add({{"a", 3}}, 4.0) + .add({{"a", 4}}, 5.0); + +TensorSpec custom_vector = TensorSpec("tensor(a[5])") + .add({{"a", 0}}, 5.0) + .add({{"a", 1}}, 4.0) + .add({{"a", 2}}, 3.0) + .add({{"a", 3}}, 2.0) + .add({{"a", 4}}, 1.0); + +TEST(GenSpec, generating_basic_vector) { + EXPECT_EQ(GenSpec().idx("a", 5).gen(), basic_vector); +} + +TEST(GenSpec, generating_float_vector) { + EXPECT_EQ(GenSpec().idx("a", 5).cells_float().gen(), float_vector); +} + +TEST(GenSpec, generating_custom_vector) { + GenSpec::seq_t my_seq = [](size_t idx){ return (5.0 - idx); }; + EXPECT_EQ(GenSpec().idx("a", 5).seq(my_seq).gen(), custom_vector); +} + +//----------------------------------------------------------------------------- + +TensorSpec basic_map = TensorSpec("tensor(a{})") + .add({{"a", "0"}}, 1.0) + .add({{"a", "1"}}, 2.0) + .add({{"a", "2"}}, 3.0); + +TensorSpec custom_map = TensorSpec("tensor(a{})") + .add({{"a", "s0"}}, 1.0) + .add({{"a", "s5"}}, 2.0) + .add({{"a", "s10"}}, 3.0); + +TEST(GenSpec, generating_basic_map) { + EXPECT_EQ(GenSpec().map("a", 3).gen(), basic_map); + EXPECT_EQ(GenSpec().map("a", 3, 1).gen(), basic_map); + EXPECT_EQ(GenSpec().map("a", 3, 1, "").gen(), basic_map); + EXPECT_EQ(GenSpec().map("a", {"0", "1", "2"}).gen(), basic_map); +} + +TEST(GenSpec, generating_custom_map) { + EXPECT_EQ(GenSpec().map("a", 3, 5, "s").gen(), custom_map); + EXPECT_EQ(GenSpec().map("a", {"s0", "s5", "s10"}).gen(), custom_map); +} + +//----------------------------------------------------------------------------- + +TensorSpec basic_mixed = TensorSpec("tensor(a{},b[1],c{},d[3])") + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 0}}, 1.0) + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 1}}, 2.0) + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 2}}, 3.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 0}}, 4.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 1}}, 5.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 2}}, 6.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 0}}, 7.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 1}}, 8.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 2}}, 9.0); + +TensorSpec inverted_mixed = TensorSpec("tensor(a{},b[1],c{},d[3])") + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 0}}, 1.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 0}}, 2.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 0}}, 3.0) + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 1}}, 4.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 1}}, 5.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 1}}, 6.0) + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 2}}, 7.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 2}}, 8.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 2}}, 9.0); + +TEST(GenSpec, generating_basic_mixed) { + EXPECT_EQ(GenSpec().map("a", 3).idx("b", 1).map("c", 1).idx("d", 3).gen(), basic_mixed); +} + +TEST(GenSpec, generating_inverted_mixed) { + EXPECT_EQ(GenSpec().idx("d", 3).map("c", 1).idx("b", 1).map("a", 3).gen(), inverted_mixed); +} + +//----------------------------------------------------------------------------- + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/test/CMakeLists.txt b/eval/src/vespa/eval/eval/test/CMakeLists.txt index 2e9b50da5e6..e82b85d1890 100644 --- a/eval/src/vespa/eval/eval/test/CMakeLists.txt +++ b/eval/src/vespa/eval/eval/test/CMakeLists.txt @@ -3,6 +3,7 @@ vespa_add_library(eval_eval_test OBJECT SOURCES eval_fixture.cpp eval_spec.cpp + gen_spec.cpp reference_evaluation.cpp reference_operations.cpp tensor_conformance.cpp diff --git a/eval/src/vespa/eval/eval/test/gen_spec.cpp b/eval/src/vespa/eval/eval/test/gen_spec.cpp new file mode 100644 index 00000000000..c86b7a5836f --- /dev/null +++ b/eval/src/vespa/eval/eval/test/gen_spec.cpp @@ -0,0 +1,58 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "gen_spec.h" +#include +#include + +using vespalib::make_string_short::fmt; + +namespace vespalib::eval::test { + +DimSpec::~DimSpec() = default; + +std::vector +DimSpec::make_dict(size_t size, size_t stride, const vespalib::string &prefix) +{ + std::vector dict; + for (size_t i = 0; i < size; ++i) { + dict.push_back(fmt("%s%zu", prefix.c_str(), i * stride)); + } + return dict; +} + +GenSpec::~GenSpec() = default; + +ValueType +GenSpec::type() const +{ + std::vector dim_types; + for (const auto &dim: _dims) { + dim_types.push_back(dim.type()); + } + auto type = ValueType::make_type(_cells, dim_types); + assert(!type.is_error()); + return type; +} + +TensorSpec +GenSpec::gen() const +{ + size_t idx = 0; + TensorSpec::Address addr; + TensorSpec result(type().to_spec()); + std::function add_cells = [&](size_t dim_idx) { + if (dim_idx == _dims.size()) { + result.add(addr, _seq(idx++)); + } else { + const auto &dim = _dims[dim_idx]; + for (size_t i = 0; i < dim.size(); ++i) { + addr.insert_or_assign(dim.name(), dim.label(i)); + add_cells(dim_idx + 1); + } + } + }; + add_cells(0); + return result; +} + +} // namespace diff --git a/eval/src/vespa/eval/eval/test/gen_spec.h b/eval/src/vespa/eval/eval/test/gen_spec.h new file mode 100644 index 00000000000..81843156fd9 --- /dev/null +++ b/eval/src/vespa/eval/eval/test/gen_spec.h @@ -0,0 +1,104 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace vespalib::eval::test { + +/** + * Type and labels for a single dimension of a TensorSpec to be + * generated. Dimensions are specified independent of each other for + * simplicity. All dense subspaces will be padded during conversion to + * actual values, which means that indexed dimensions are inherently + * independent already. Using different labels for the same mapped + * dimension for different tensors should enable us to exhibit + * sufficient levels of partial overlap. + **/ +class DimSpec +{ +private: + vespalib::string _name; + size_t _size; + std::vector _dict; +public: + DimSpec(const vespalib::string &name, size_t size) noexcept + : _name(name), _size(size), _dict() + { + assert(_size); + } + DimSpec(const vespalib::string &name, std::vector dict) noexcept + : _name(name), _size(), _dict(std::move(dict)) + { + assert(!_size); + } + ~DimSpec(); + static std::vector make_dict(size_t size, size_t stride, const vespalib::string &prefix); + ValueType::Dimension type() const { + return _size ? ValueType::Dimension{_name, uint32_t(_size)} : ValueType::Dimension{_name}; + } + const vespalib::string &name() const { return _name; } + size_t size() const { + return _size ? _size : _dict.size(); + } + TensorSpec::Label label(size_t idx) const { + assert(idx < size()); + return _size ? TensorSpec::Label{idx} : TensorSpec::Label{_dict[idx]}; + } +}; + +/** + * Specification defining how to generate a TensorSpec. Typically used + * to generate complex values for testing and benchmarking. + **/ +class GenSpec +{ +public: + using seq_t = std::function; +private: + std::vector _dims; + CellType _cells; + seq_t _seq; + + static double default_seq(size_t idx) { return (idx + 1.0); } +public: + GenSpec() : _dims(), _cells(CellType::DOUBLE), _seq(default_seq) {} + ~GenSpec(); + std::vector dims() const { return _dims; } + CellType cells() const { return _cells; } + seq_t seq() const { return _seq; } + GenSpec &idx(const vespalib::string &name, size_t size) { + _dims.emplace_back(name, size); + return *this; + } + GenSpec &map(const vespalib::string &name, size_t size, size_t stride = 1, const vespalib::string &prefix = "") { + _dims.emplace_back(name, DimSpec::make_dict(size, stride, prefix)); + return *this; + } + GenSpec &map(const vespalib::string &name, std::vector dict) { + _dims.emplace_back(name, std::move(dict)); + return *this; + } + GenSpec &cells(CellType cell_type) { + _cells = cell_type; + return *this; + } + GenSpec &cells_double() { return cells(CellType::DOUBLE); } + GenSpec &cells_float() { return cells(CellType::FLOAT); } + GenSpec &seq(seq_t seq_in) { + _seq = seq_in; + return *this; + } + GenSpec &seq_n() { return seq(default_seq); } + GenSpec &seq_bias(double bias) { + seq_t fun = [bias](size_t idx) { return (idx + bias); }; + return seq(fun); + } + ValueType type() const; + TensorSpec gen() const; +}; + +} // namespace -- cgit v1.2.3 From 2b10d02c9a931050faa8c3aab303949fcbe3fed0 Mon Sep 17 00:00:00 2001 From: Håvard Pettersen Date: Fri, 29 Jan 2021 13:04:51 +0000 Subject: do not allow generating free floats --- eval/src/tests/eval/gen_spec/gen_spec_test.cpp | 14 ++++++-------- eval/src/vespa/eval/eval/test/gen_spec.cpp | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/eval/src/tests/eval/gen_spec/gen_spec_test.cpp b/eval/src/tests/eval/gen_spec/gen_spec_test.cpp index bf4f7006058..bae25a68760 100644 --- a/eval/src/tests/eval/gen_spec/gen_spec_test.cpp +++ b/eval/src/tests/eval/gen_spec/gen_spec_test.cpp @@ -52,19 +52,17 @@ TEST(GenSpec, default_spec) { //----------------------------------------------------------------------------- -TensorSpec scalar_1 = TensorSpec("double").add({}, 1.0); -TensorSpec scalar_1_f = TensorSpec("float").add({}, 1.0); -TensorSpec scalar_5 = TensorSpec("double").add({}, 5.0); -TensorSpec scalar_5_f = TensorSpec("float").add({}, 5.0); +TensorSpec scalar_1 = TensorSpec("double").add({}, 1.0); +TensorSpec scalar_5 = TensorSpec("double").add({}, 5.0); TEST(GenSpec, scalar_double) { EXPECT_EQ(GenSpec().gen(), scalar_1); EXPECT_EQ(GenSpec().seq_bias(5.0).gen(), scalar_5); } -TEST(GenSpec, scalar_float) { - EXPECT_EQ(GenSpec().cells_float().gen(), scalar_1_f); - EXPECT_EQ(GenSpec().cells_float().seq_bias(5.0).gen(), scalar_5_f); +TEST(GenSpec, not_scalar_float_just_yet) { + EXPECT_EQ(GenSpec().cells_float().gen(), scalar_1); + EXPECT_EQ(GenSpec().cells_float().seq_bias(5.0).gen(), scalar_5); } //----------------------------------------------------------------------------- @@ -90,7 +88,7 @@ GenSpec dbl() { return GenSpec().cells_double(); } TEST(GenSpec, value_type) { EXPECT_EQ(dbl().type().to_spec(), "double"); - EXPECT_EQ(flt().type().to_spec(), "float"); + EXPECT_EQ(flt().type().to_spec(), "double"); // NB EXPECT_EQ(dbl().idx("x", 10).type().to_spec(), "tensor(x[10])"); EXPECT_EQ(flt().idx("x", 10).type().to_spec(), "tensor(x[10])"); EXPECT_EQ(dbl().map("y", {}).type().to_spec(), "tensor(y{})"); diff --git a/eval/src/vespa/eval/eval/test/gen_spec.cpp b/eval/src/vespa/eval/eval/test/gen_spec.cpp index c86b7a5836f..9c40c65620e 100644 --- a/eval/src/vespa/eval/eval/test/gen_spec.cpp +++ b/eval/src/vespa/eval/eval/test/gen_spec.cpp @@ -29,7 +29,7 @@ GenSpec::type() const for (const auto &dim: _dims) { dim_types.push_back(dim.type()); } - auto type = ValueType::make_type(_cells, dim_types); + auto type = ValueType::tensor_type(dim_types, _cells); assert(!type.is_error()); return type; } -- cgit v1.2.3