diff options
author | Arne Juul <arnej@yahooinc.com> | 2023-06-29 13:50:34 +0000 |
---|---|---|
committer | Arne Juul <arnej@yahooinc.com> | 2023-07-03 13:09:11 +0000 |
commit | 619aa88c3a8b49a1c2ea84afce9e59dc90ed75a9 (patch) | |
tree | 26732c1ee47b6b99c1934fbc7b32113b37895c7a /eval | |
parent | e02a187ae0951b3ee506ea930080b7b67a857df2 (diff) |
handle even more JSON formats for tensors
Diffstat (limited to 'eval')
6 files changed, 145 insertions, 22 deletions
diff --git a/eval/src/tests/eval/value_cache/mixed-blocks-11.json b/eval/src/tests/eval/value_cache/mixed-blocks-11.json new file mode 100644 index 00000000000..1a045491b53 --- /dev/null +++ b/eval/src/tests/eval/value_cache/mixed-blocks-11.json @@ -0,0 +1,5 @@ +{ + "blocks": { + "foo": [1.0, 2] + } +} diff --git a/eval/src/tests/eval/value_cache/mixed-blocks-21.json b/eval/src/tests/eval/value_cache/mixed-blocks-21.json new file mode 100644 index 00000000000..51258612bd3 --- /dev/null +++ b/eval/src/tests/eval/value_cache/mixed-blocks-21.json @@ -0,0 +1,17 @@ +{ + "blocks": [ + { + "address": { "brand": "shiny", "category": "foo" }, + "values": [ 1, 2, 3 ] + }, { + "address": { "brand": "shiny", "category": "bar" }, + "values": [ 1.25, 2.25, 3.25 ] + }, { + "address": { "brand": "stylish", "category": "bar" }, + "values": [ 1.5, 2.5, 3.5 ] + }, { + "address": { "brand": "stylish", "category": "foo" }, + "values": [ 1.75, 2.75, 3.75 ] + } + ] +}, diff --git a/eval/src/tests/eval/value_cache/sparse-short1.json b/eval/src/tests/eval/value_cache/sparse-short1.json index 949b7b2b8bd..741a2160898 100644 --- a/eval/src/tests/eval/value_cache/sparse-short1.json +++ b/eval/src/tests/eval/value_cache/sparse-short1.json @@ -1,5 +1,6 @@ { - "foo": 1.0, - "bar": 2.0, - "three": 3.0 + "foo": 1.0, + "cells": 2.0, + "values": 0.5, + "blocks": 1.5 } diff --git a/eval/src/tests/eval/value_cache/sparse-short2.json b/eval/src/tests/eval/value_cache/sparse-short2.json index f10b1b6f9fb..7eb377968e4 100644 --- a/eval/src/tests/eval/value_cache/sparse-short2.json +++ b/eval/src/tests/eval/value_cache/sparse-short2.json @@ -1,7 +1,8 @@ { - "cells": { - "foo": 1.0, - "bar": 2.0, - "three": 3.0 + "cells": { + "foo": 1.0, + "cells": 2.0, + "values": 0.5, + "blocks": 1.5 } } diff --git a/eval/src/tests/eval/value_cache/tensor_loader_test.cpp b/eval/src/tests/eval/value_cache/tensor_loader_test.cpp index 4b4ba3fc0d3..c10da861c83 100644 --- a/eval/src/tests/eval/value_cache/tensor_loader_test.cpp +++ b/eval/src/tests/eval/value_cache/tensor_loader_test.cpp @@ -35,8 +35,9 @@ TensorSpec make_sparse_tensor() { TensorSpec make_simple_sparse_tensor() { return TensorSpec("tensor(mydim{})") .add({{"mydim", "foo"}}, 1.0) - .add({{"mydim", "three"}}, 3.0) - .add({{"mydim", "bar"}}, 2.0); + .add({{"mydim", "cells"}}, 2.0) + .add({{"mydim", "values"}}, 0.5) + .add({{"mydim", "blocks"}}, 1.5); } TensorSpec make_mixed_tensor() { @@ -99,6 +100,27 @@ TEST_F("require that dense tensor short form can be loaded", ConstantTensorLoade TEST_DO(verify_tensor(make_simple_dense_tensor(), f1.create(TEST_PATH("dense-short2.json"), "tensor(z[3])"))); } +TensorSpec make_mix21_tensor() { + return TensorSpec("tensor<float>(brand{},category{},v[3])") + .add({{"brand", "shiny"}, {"category", "foo"}, {"v", 0}}, 1.0) + .add({{"brand", "shiny"}, {"category", "foo"}, {"v", 1}}, 2.0) + .add({{"brand", "shiny"}, {"category", "foo"}, {"v", 2}}, 3.0) + .add({{"brand", "shiny"}, {"category", "bar"}, {"v", 0}}, 1.25) + .add({{"brand", "shiny"}, {"category", "bar"}, {"v", 1}}, 2.25) + .add({{"brand", "shiny"}, {"category", "bar"}, {"v", 2}}, 3.25) + .add({{"brand", "stylish"}, {"category", "bar"}, {"v", 0}}, 1.5) + .add({{"brand", "stylish"}, {"category", "bar"}, {"v", 1}}, 2.5) + .add({{"brand", "stylish"}, {"category", "bar"}, {"v", 2}}, 3.5) + .add({{"brand", "stylish"}, {"category", "foo"}, {"v", 0}}, 1.75) + .add({{"brand", "stylish"}, {"category", "foo"}, {"v", 1}}, 2.75) + .add({{"brand", "stylish"}, {"category", "foo"}, {"v", 2}}, 3.75); +} + +TEST_F("require that mixed tensor blocks form can be loaded", ConstantTensorLoader(factory)) { + TEST_DO(verify_tensor(make_mixed_tensor(), f1.create(TEST_PATH("mixed-blocks-11.json"), "tensor(x{},y[2])"))); + TEST_DO(verify_tensor(make_mix21_tensor(), f1.create(TEST_PATH("mixed-blocks-21.json"), "tensor<float>(brand{},category{},v[3])"))); +} + TEST_F("require that bad lz4 file fails to load creating empty result", ConstantTensorLoader(factory)) { TEST_DO(verify_tensor(sparse_tensor_nocells(), f1.create(TEST_PATH("bad_lz4.json.lz4"), "tensor(x{},y{})"))); } diff --git a/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp index 5654a3abcbe..f22e4cbae0f 100644 --- a/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp +++ b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp @@ -72,6 +72,72 @@ void decodeSingleDenseForm(const Inspector &values, const ValueType &value_type, } } +struct DenseValuesDecoder { + const std::vector<ValueType::Dimension> _idims; + TensorSpec &_target; + void decode(const Inspector &input, const TensorSpec::Address &address, size_t dim_idx) { + if (dim_idx == _idims.size()) { + _target.add(address, input.asDouble()); + } else { + const auto &dimension = _idims[dim_idx]; + if (input.entries() != dimension.size) { + return; // TODO: handle mismatch better + } + for (size_t i = 0; i < input.entries(); ++i) { + TensorSpec::Address sub_address = address; + sub_address.emplace(dimension.name, TensorSpec::Label(i)); + decode(input[i], sub_address, dim_idx + 1); + } + } + } +}; + +void decodeDenseValues(const Inspector &values, const ValueType &value_type, TensorSpec &spec) { + TensorSpec::Address address; + DenseValuesDecoder decoder(value_type.indexed_dimensions(), spec); + decoder.decode(values, address, 0); +} + + +template<typename F> +struct TraverserCallback : ObjectTraverser { + F _f; + TraverserCallback(F f) : _f(std::move(f)) {} + void field(const Memory &name, const Inspector &inspector) override { + _f(name.make_string(), inspector); + } +}; + +void decodeSingleMappedBlocks(const Inspector &blocks, const ValueType &value_type, TensorSpec &spec) { + if (value_type.count_mapped_dimensions() != 1) { + return; // TODO handle mismatch + } + vespalib::string dim_name = value_type.mapped_dimensions()[0].name; + DenseValuesDecoder decoder(value_type.indexed_dimensions(), spec); + auto lambda = [&](vespalib::string label, const Inspector &input) { + TensorSpec::Address address; + address.emplace(dim_name, std::move(label)); + decoder.decode(input, std::move(address), 0); + }; + TraverserCallback cb(lambda); + blocks.traverse(cb); +} + +void decodeAddressedBlocks(const Inspector &blocks, const ValueType &value_type, TensorSpec &spec) { + const auto & idims = value_type.indexed_dimensions(); + std::set<vespalib::string> indexed; + for (const auto &dimension: idims) { + indexed.insert(dimension.name); + } + DenseValuesDecoder decoder(value_type.indexed_dimensions(), spec); + for (size_t i = 0; i < blocks.entries(); ++i) { + TensorSpec::Address address; + AddressExtractor extractor(indexed, address); + blocks[i]["address"].traverse(extractor); + decoder.decode(blocks[i]["values"], address, 0); + } +} + void decodeLiteralForm(const Inspector &cells, const ValueType &value_type, TensorSpec &spec) { std::set<vespalib::string> indexed; for (const auto &dimension: value_type.dimensions()) { @@ -140,19 +206,30 @@ ConstantTensorLoader::create(const vespalib::string &path, const vespalib::strin bool isSingleDenseType = value_type.is_dense() && (value_type.count_indexed_dimensions() == 1); bool isSingleMappedType = value_type.is_sparse() && (value_type.count_mapped_dimensions() == 1); const Inspector &root = slime.get(); - const Inspector &cells = root["cells"]; - const Inspector &values = root["values"]; - if (cells.type().getId() == vespalib::slime::ARRAY::ID) { - decodeLiteralForm(cells, value_type, spec); - } - else if (cells.type().getId() == vespalib::slime::OBJECT::ID && isSingleMappedType) { - decodeSingleMappedForm(cells, value_type, spec); - } - else if (values.type().getId() == vespalib::slime::ARRAY::ID && isSingleDenseType) { - decodeSingleDenseForm(values, value_type, spec); - } - else if (root.type().getId() == vespalib::slime::OBJECT::ID && isSingleMappedType) { - decodeSingleMappedForm(root, value_type, spec); + if (root.type().getId() == vespalib::slime::OBJECT::ID) { + const Inspector &cells = root["cells"]; + const Inspector &values = root["values"]; + const Inspector &blocks = root["blocks"]; + if (cells.type().getId() == vespalib::slime::ARRAY::ID) { + decodeLiteralForm(cells, value_type, spec); + } + else if (cells.type().getId() == vespalib::slime::OBJECT::ID) { + if (isSingleMappedType) { + decodeSingleMappedForm(cells, value_type, spec); + } + } + else if (values.type().getId() == vespalib::slime::ARRAY::ID) { + decodeDenseValues(values, value_type, spec); + } + else if (blocks.type().getId() == vespalib::slime::OBJECT::ID) { + decodeSingleMappedBlocks(blocks, value_type, spec); + } + else if (blocks.type().getId() == vespalib::slime::ARRAY::ID) { + decodeAddressedBlocks(blocks, value_type, spec); + } + else if (isSingleMappedType) { + decodeSingleMappedForm(root, value_type, spec); + } } else if (root.type().getId() == vespalib::slime::ARRAY::ID && isSingleDenseType) { decodeSingleDenseForm(root, value_type, spec); |