diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2021-01-30 16:07:51 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-30 16:07:51 +0100 |
commit | 11eef4bf87bba8100a5a605b312e21802432020e (patch) | |
tree | 4761da97cfe2bd2644cf2d9bda1fbacdab84bccf | |
parent | 527035ccb63501f3e0b3f23157c2cd902eef551a (diff) | |
parent | 2b10d02c9a931050faa8c3aab303949fcbe3fed0 (diff) |
Merge pull request #16291 from vespa-engine/havardpe/gen-spec
Havardpe/gen spec
-rw-r--r-- | eval/CMakeLists.txt | 1 | ||||
-rw-r--r-- | eval/src/tests/eval/gen_spec/CMakeLists.txt | 9 | ||||
-rw-r--r-- | eval/src/tests/eval/gen_spec/gen_spec_test.cpp | 196 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/test/CMakeLists.txt | 1 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/test/gen_spec.cpp | 58 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/test/gen_spec.h | 104 |
6 files changed, 369 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 239cf8f0f23..fe621f6e9f0 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -19,6 +19,7 @@ vespa_define_module( src/tests/eval/function src/tests/eval/function_speed src/tests/eval/gbdt + src/tests/eval/gen_spec src/tests/eval/inline_operation src/tests/eval/interpreted_function src/tests/eval/multiply_add diff --git a/eval/src/tests/eval/gen_spec/CMakeLists.txt b/eval/src/tests/eval/gen_spec/CMakeLists.txt new file mode 100644 index 00000000000..3613554f0a0 --- /dev/null +++ b/eval/src/tests/eval/gen_spec/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_gen_spec_test_app TEST + SOURCES + gen_spec_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_gen_spec_test_app COMMAND eval_gen_spec_test_app) diff --git a/eval/src/tests/eval/gen_spec/gen_spec_test.cpp b/eval/src/tests/eval/gen_spec/gen_spec_test.cpp new file mode 100644 index 00000000000..bae25a68760 --- /dev/null +++ b/eval/src/tests/eval/gen_spec/gen_spec_test.cpp @@ -0,0 +1,196 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/test/gen_spec.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib::eval; +using namespace vespalib::eval::test; + +//----------------------------------------------------------------------------- + +TEST(DimSpec, indexed_dimension) { + ValueType::Dimension ref("foo", 10); + DimSpec idx("foo", 10); + EXPECT_EQ(idx.type(), ref); + EXPECT_TRUE(ref.is_indexed()); + EXPECT_EQ(idx.name(), "foo"); + EXPECT_EQ(idx.size(), 10); + EXPECT_EQ(idx.label(3), TensorSpec::Label(size_t(3))); +} + +TEST(DimSpec, mapped_dimension) { + ValueType::Dimension ref("foo"); + DimSpec map("foo", {"a", "b", "c", "d"}); + EXPECT_EQ(map.type(), ref); + EXPECT_TRUE(ref.is_mapped()); + EXPECT_EQ(map.name(), "foo"); + EXPECT_EQ(map.size(), 4); + EXPECT_EQ(map.label(2), TensorSpec::Label("c")); +} + +TEST(DimSpec, simple_dictionary_creation) { + auto dict = DimSpec::make_dict(5, 1, ""); + std::vector<vespalib::string> expect = {"0", "1", "2", "3", "4"}; +} + +TEST(DimSpec, advanced_dictionary_creation) { + auto dict = DimSpec::make_dict(5, 3, "str_"); + std::vector<vespalib::string> expect = {"str_0", "str_3", "str_6", "str_9", "str_12"}; +} + +//----------------------------------------------------------------------------- + +TEST(GenSpec, default_spec) { + GenSpec spec; + EXPECT_TRUE(spec.dims().empty()); + EXPECT_EQ(spec.cells(), CellType::DOUBLE); + auto seq = spec.seq(); + for (size_t i = 0; i < 4096; ++i) { + EXPECT_EQ(seq(i), (i + 1.0)); + } +} + +//----------------------------------------------------------------------------- + +TensorSpec scalar_1 = TensorSpec("double").add({}, 1.0); +TensorSpec scalar_5 = TensorSpec("double").add({}, 5.0); + +TEST(GenSpec, scalar_double) { + EXPECT_EQ(GenSpec().gen(), scalar_1); + EXPECT_EQ(GenSpec().seq_bias(5.0).gen(), scalar_5); +} + +TEST(GenSpec, not_scalar_float_just_yet) { + EXPECT_EQ(GenSpec().cells_float().gen(), scalar_1); + EXPECT_EQ(GenSpec().cells_float().seq_bias(5.0).gen(), scalar_5); +} + +//----------------------------------------------------------------------------- + +TEST(Seq, seq_n) { + GenSpec::seq_t seq = GenSpec().seq_n().seq(); + for (size_t i = 0; i < 4096; ++i) { + EXPECT_EQ(seq(i), (i + 1.0)); + } +} + +TEST(Seq, seq_bias) { + GenSpec::seq_t seq = GenSpec().seq_bias(13.0).seq(); + for (size_t i = 0; i < 4096; ++i) { + EXPECT_EQ(seq(i), (i + 13.0)); + } +} + +//----------------------------------------------------------------------------- + +GenSpec flt() { return GenSpec().cells_float(); } +GenSpec dbl() { return GenSpec().cells_double(); } + +TEST(GenSpec, value_type) { + EXPECT_EQ(dbl().type().to_spec(), "double"); + EXPECT_EQ(flt().type().to_spec(), "double"); // NB + EXPECT_EQ(dbl().idx("x", 10).type().to_spec(), "tensor(x[10])"); + EXPECT_EQ(flt().idx("x", 10).type().to_spec(), "tensor<float>(x[10])"); + EXPECT_EQ(dbl().map("y", {}).type().to_spec(), "tensor(y{})"); + EXPECT_EQ(flt().map("y", {}).type().to_spec(), "tensor<float>(y{})"); + EXPECT_EQ(dbl().idx("x", 10).map("y", {}).type().to_spec(), "tensor(x[10],y{})"); + EXPECT_EQ(flt().idx("x", 10).map("y", {}).type().to_spec(), "tensor<float>(x[10],y{})"); + EXPECT_EQ(dbl().map("y", 3, 1).idx("x", 10).type().to_spec(), "tensor(x[10],y{})"); + EXPECT_EQ(flt().map("y", 3, 1, "str").idx("x", 10).type().to_spec(), "tensor<float>(x[10],y{})"); +} + +//----------------------------------------------------------------------------- + +TensorSpec basic_vector = TensorSpec("tensor(a[5])") + .add({{"a", 0}}, 1.0) + .add({{"a", 1}}, 2.0) + .add({{"a", 2}}, 3.0) + .add({{"a", 3}}, 4.0) + .add({{"a", 4}}, 5.0); + +TensorSpec float_vector = TensorSpec("tensor<float>(a[5])") + .add({{"a", 0}}, 1.0) + .add({{"a", 1}}, 2.0) + .add({{"a", 2}}, 3.0) + .add({{"a", 3}}, 4.0) + .add({{"a", 4}}, 5.0); + +TensorSpec custom_vector = TensorSpec("tensor(a[5])") + .add({{"a", 0}}, 5.0) + .add({{"a", 1}}, 4.0) + .add({{"a", 2}}, 3.0) + .add({{"a", 3}}, 2.0) + .add({{"a", 4}}, 1.0); + +TEST(GenSpec, generating_basic_vector) { + EXPECT_EQ(GenSpec().idx("a", 5).gen(), basic_vector); +} + +TEST(GenSpec, generating_float_vector) { + EXPECT_EQ(GenSpec().idx("a", 5).cells_float().gen(), float_vector); +} + +TEST(GenSpec, generating_custom_vector) { + GenSpec::seq_t my_seq = [](size_t idx){ return (5.0 - idx); }; + EXPECT_EQ(GenSpec().idx("a", 5).seq(my_seq).gen(), custom_vector); +} + +//----------------------------------------------------------------------------- + +TensorSpec basic_map = TensorSpec("tensor(a{})") + .add({{"a", "0"}}, 1.0) + .add({{"a", "1"}}, 2.0) + .add({{"a", "2"}}, 3.0); + +TensorSpec custom_map = TensorSpec("tensor(a{})") + .add({{"a", "s0"}}, 1.0) + .add({{"a", "s5"}}, 2.0) + .add({{"a", "s10"}}, 3.0); + +TEST(GenSpec, generating_basic_map) { + EXPECT_EQ(GenSpec().map("a", 3).gen(), basic_map); + EXPECT_EQ(GenSpec().map("a", 3, 1).gen(), basic_map); + EXPECT_EQ(GenSpec().map("a", 3, 1, "").gen(), basic_map); + EXPECT_EQ(GenSpec().map("a", {"0", "1", "2"}).gen(), basic_map); +} + +TEST(GenSpec, generating_custom_map) { + EXPECT_EQ(GenSpec().map("a", 3, 5, "s").gen(), custom_map); + EXPECT_EQ(GenSpec().map("a", {"s0", "s5", "s10"}).gen(), custom_map); +} + +//----------------------------------------------------------------------------- + +TensorSpec basic_mixed = TensorSpec("tensor(a{},b[1],c{},d[3])") + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 0}}, 1.0) + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 1}}, 2.0) + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 2}}, 3.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 0}}, 4.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 1}}, 5.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 2}}, 6.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 0}}, 7.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 1}}, 8.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 2}}, 9.0); + +TensorSpec inverted_mixed = TensorSpec("tensor(a{},b[1],c{},d[3])") + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 0}}, 1.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 0}}, 2.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 0}}, 3.0) + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 1}}, 4.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 1}}, 5.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 1}}, 6.0) + .add({{"a", "0"},{"b", 0},{"c", "0"},{"d", 2}}, 7.0) + .add({{"a", "1"},{"b", 0},{"c", "0"},{"d", 2}}, 8.0) + .add({{"a", "2"},{"b", 0},{"c", "0"},{"d", 2}}, 9.0); + +TEST(GenSpec, generating_basic_mixed) { + EXPECT_EQ(GenSpec().map("a", 3).idx("b", 1).map("c", 1).idx("d", 3).gen(), basic_mixed); +} + +TEST(GenSpec, generating_inverted_mixed) { + EXPECT_EQ(GenSpec().idx("d", 3).map("c", 1).idx("b", 1).map("a", 3).gen(), inverted_mixed); +} + +//----------------------------------------------------------------------------- + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/test/CMakeLists.txt b/eval/src/vespa/eval/eval/test/CMakeLists.txt index 2e9b50da5e6..e82b85d1890 100644 --- a/eval/src/vespa/eval/eval/test/CMakeLists.txt +++ b/eval/src/vespa/eval/eval/test/CMakeLists.txt @@ -3,6 +3,7 @@ vespa_add_library(eval_eval_test OBJECT SOURCES eval_fixture.cpp eval_spec.cpp + gen_spec.cpp reference_evaluation.cpp reference_operations.cpp tensor_conformance.cpp diff --git a/eval/src/vespa/eval/eval/test/gen_spec.cpp b/eval/src/vespa/eval/eval/test/gen_spec.cpp new file mode 100644 index 00000000000..9c40c65620e --- /dev/null +++ b/eval/src/vespa/eval/eval/test/gen_spec.cpp @@ -0,0 +1,58 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "gen_spec.h" +#include <vespa/eval/eval/string_stuff.h> +#include <vespa/vespalib/util/stringfmt.h> + +using vespalib::make_string_short::fmt; + +namespace vespalib::eval::test { + +DimSpec::~DimSpec() = default; + +std::vector<vespalib::string> +DimSpec::make_dict(size_t size, size_t stride, const vespalib::string &prefix) +{ + std::vector<vespalib::string> dict; + for (size_t i = 0; i < size; ++i) { + dict.push_back(fmt("%s%zu", prefix.c_str(), i * stride)); + } + return dict; +} + +GenSpec::~GenSpec() = default; + +ValueType +GenSpec::type() const +{ + std::vector<ValueType::Dimension> dim_types; + for (const auto &dim: _dims) { + dim_types.push_back(dim.type()); + } + auto type = ValueType::tensor_type(dim_types, _cells); + assert(!type.is_error()); + return type; +} + +TensorSpec +GenSpec::gen() const +{ + size_t idx = 0; + TensorSpec::Address addr; + TensorSpec result(type().to_spec()); + std::function<void(size_t)> add_cells = [&](size_t dim_idx) { + if (dim_idx == _dims.size()) { + result.add(addr, _seq(idx++)); + } else { + const auto &dim = _dims[dim_idx]; + for (size_t i = 0; i < dim.size(); ++i) { + addr.insert_or_assign(dim.name(), dim.label(i)); + add_cells(dim_idx + 1); + } + } + }; + add_cells(0); + return result; +} + +} // namespace diff --git a/eval/src/vespa/eval/eval/test/gen_spec.h b/eval/src/vespa/eval/eval/test/gen_spec.h new file mode 100644 index 00000000000..81843156fd9 --- /dev/null +++ b/eval/src/vespa/eval/eval/test/gen_spec.h @@ -0,0 +1,104 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/eval/value_type.h> +#include <functional> +#include <cassert> + +namespace vespalib::eval::test { + +/** + * Type and labels for a single dimension of a TensorSpec to be + * generated. Dimensions are specified independent of each other for + * simplicity. All dense subspaces will be padded during conversion to + * actual values, which means that indexed dimensions are inherently + * independent already. Using different labels for the same mapped + * dimension for different tensors should enable us to exhibit + * sufficient levels of partial overlap. + **/ +class DimSpec +{ +private: + vespalib::string _name; + size_t _size; + std::vector<vespalib::string> _dict; +public: + DimSpec(const vespalib::string &name, size_t size) noexcept + : _name(name), _size(size), _dict() + { + assert(_size); + } + DimSpec(const vespalib::string &name, std::vector<vespalib::string> dict) noexcept + : _name(name), _size(), _dict(std::move(dict)) + { + assert(!_size); + } + ~DimSpec(); + static std::vector<vespalib::string> make_dict(size_t size, size_t stride, const vespalib::string &prefix); + ValueType::Dimension type() const { + return _size ? ValueType::Dimension{_name, uint32_t(_size)} : ValueType::Dimension{_name}; + } + const vespalib::string &name() const { return _name; } + size_t size() const { + return _size ? _size : _dict.size(); + } + TensorSpec::Label label(size_t idx) const { + assert(idx < size()); + return _size ? TensorSpec::Label{idx} : TensorSpec::Label{_dict[idx]}; + } +}; + +/** + * Specification defining how to generate a TensorSpec. Typically used + * to generate complex values for testing and benchmarking. + **/ +class GenSpec +{ +public: + using seq_t = std::function<double(size_t)>; +private: + std::vector<DimSpec> _dims; + CellType _cells; + seq_t _seq; + + static double default_seq(size_t idx) { return (idx + 1.0); } +public: + GenSpec() : _dims(), _cells(CellType::DOUBLE), _seq(default_seq) {} + ~GenSpec(); + std::vector<DimSpec> dims() const { return _dims; } + CellType cells() const { return _cells; } + seq_t seq() const { return _seq; } + GenSpec &idx(const vespalib::string &name, size_t size) { + _dims.emplace_back(name, size); + return *this; + } + GenSpec &map(const vespalib::string &name, size_t size, size_t stride = 1, const vespalib::string &prefix = "") { + _dims.emplace_back(name, DimSpec::make_dict(size, stride, prefix)); + return *this; + } + GenSpec &map(const vespalib::string &name, std::vector<vespalib::string> dict) { + _dims.emplace_back(name, std::move(dict)); + return *this; + } + GenSpec &cells(CellType cell_type) { + _cells = cell_type; + return *this; + } + GenSpec &cells_double() { return cells(CellType::DOUBLE); } + GenSpec &cells_float() { return cells(CellType::FLOAT); } + GenSpec &seq(seq_t seq_in) { + _seq = seq_in; + return *this; + } + GenSpec &seq_n() { return seq(default_seq); } + GenSpec &seq_bias(double bias) { + seq_t fun = [bias](size_t idx) { return (idx + bias); }; + return seq(fun); + } + ValueType type() const; + TensorSpec gen() const; +}; + +} // namespace |