summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne Juul <arnej@yahooinc.com>2023-06-29 13:50:34 +0000
committerArne Juul <arnej@yahooinc.com>2023-07-03 13:09:11 +0000
commit619aa88c3a8b49a1c2ea84afce9e59dc90ed75a9 (patch)
tree26732c1ee47b6b99c1934fbc7b32113b37895c7a
parente02a187ae0951b3ee506ea930080b7b67a857df2 (diff)
handle even more JSON formats for tensors
-rw-r--r--eval/src/tests/eval/value_cache/mixed-blocks-11.json5
-rw-r--r--eval/src/tests/eval/value_cache/mixed-blocks-21.json17
-rw-r--r--eval/src/tests/eval/value_cache/sparse-short1.json7
-rw-r--r--eval/src/tests/eval/value_cache/sparse-short2.json9
-rw-r--r--eval/src/tests/eval/value_cache/tensor_loader_test.cpp26
-rw-r--r--eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp103
6 files changed, 145 insertions, 22 deletions
diff --git a/eval/src/tests/eval/value_cache/mixed-blocks-11.json b/eval/src/tests/eval/value_cache/mixed-blocks-11.json
new file mode 100644
index 00000000000..1a045491b53
--- /dev/null
+++ b/eval/src/tests/eval/value_cache/mixed-blocks-11.json
@@ -0,0 +1,5 @@
+{
+ "blocks": {
+ "foo": [1.0, 2]
+ }
+}
diff --git a/eval/src/tests/eval/value_cache/mixed-blocks-21.json b/eval/src/tests/eval/value_cache/mixed-blocks-21.json
new file mode 100644
index 00000000000..51258612bd3
--- /dev/null
+++ b/eval/src/tests/eval/value_cache/mixed-blocks-21.json
@@ -0,0 +1,17 @@
+{
+ "blocks": [
+ {
+ "address": { "brand": "shiny", "category": "foo" },
+ "values": [ 1, 2, 3 ]
+ }, {
+ "address": { "brand": "shiny", "category": "bar" },
+ "values": [ 1.25, 2.25, 3.25 ]
+ }, {
+ "address": { "brand": "stylish", "category": "bar" },
+ "values": [ 1.5, 2.5, 3.5 ]
+ }, {
+ "address": { "brand": "stylish", "category": "foo" },
+ "values": [ 1.75, 2.75, 3.75 ]
+ }
+ ]
+},
diff --git a/eval/src/tests/eval/value_cache/sparse-short1.json b/eval/src/tests/eval/value_cache/sparse-short1.json
index 949b7b2b8bd..741a2160898 100644
--- a/eval/src/tests/eval/value_cache/sparse-short1.json
+++ b/eval/src/tests/eval/value_cache/sparse-short1.json
@@ -1,5 +1,6 @@
{
- "foo": 1.0,
- "bar": 2.0,
- "three": 3.0
+ "foo": 1.0,
+ "cells": 2.0,
+ "values": 0.5,
+ "blocks": 1.5
}
diff --git a/eval/src/tests/eval/value_cache/sparse-short2.json b/eval/src/tests/eval/value_cache/sparse-short2.json
index f10b1b6f9fb..7eb377968e4 100644
--- a/eval/src/tests/eval/value_cache/sparse-short2.json
+++ b/eval/src/tests/eval/value_cache/sparse-short2.json
@@ -1,7 +1,8 @@
{
- "cells": {
- "foo": 1.0,
- "bar": 2.0,
- "three": 3.0
+ "cells": {
+ "foo": 1.0,
+ "cells": 2.0,
+ "values": 0.5,
+ "blocks": 1.5
}
}
diff --git a/eval/src/tests/eval/value_cache/tensor_loader_test.cpp b/eval/src/tests/eval/value_cache/tensor_loader_test.cpp
index 4b4ba3fc0d3..c10da861c83 100644
--- a/eval/src/tests/eval/value_cache/tensor_loader_test.cpp
+++ b/eval/src/tests/eval/value_cache/tensor_loader_test.cpp
@@ -35,8 +35,9 @@ TensorSpec make_sparse_tensor() {
TensorSpec make_simple_sparse_tensor() {
return TensorSpec("tensor(mydim{})")
.add({{"mydim", "foo"}}, 1.0)
- .add({{"mydim", "three"}}, 3.0)
- .add({{"mydim", "bar"}}, 2.0);
+ .add({{"mydim", "cells"}}, 2.0)
+ .add({{"mydim", "values"}}, 0.5)
+ .add({{"mydim", "blocks"}}, 1.5);
}
TensorSpec make_mixed_tensor() {
@@ -99,6 +100,27 @@ TEST_F("require that dense tensor short form can be loaded", ConstantTensorLoade
TEST_DO(verify_tensor(make_simple_dense_tensor(), f1.create(TEST_PATH("dense-short2.json"), "tensor(z[3])")));
}
+TensorSpec make_mix21_tensor() {
+ return TensorSpec("tensor<float>(brand{},category{},v[3])")
+ .add({{"brand", "shiny"}, {"category", "foo"}, {"v", 0}}, 1.0)
+ .add({{"brand", "shiny"}, {"category", "foo"}, {"v", 1}}, 2.0)
+ .add({{"brand", "shiny"}, {"category", "foo"}, {"v", 2}}, 3.0)
+ .add({{"brand", "shiny"}, {"category", "bar"}, {"v", 0}}, 1.25)
+ .add({{"brand", "shiny"}, {"category", "bar"}, {"v", 1}}, 2.25)
+ .add({{"brand", "shiny"}, {"category", "bar"}, {"v", 2}}, 3.25)
+ .add({{"brand", "stylish"}, {"category", "bar"}, {"v", 0}}, 1.5)
+ .add({{"brand", "stylish"}, {"category", "bar"}, {"v", 1}}, 2.5)
+ .add({{"brand", "stylish"}, {"category", "bar"}, {"v", 2}}, 3.5)
+ .add({{"brand", "stylish"}, {"category", "foo"}, {"v", 0}}, 1.75)
+ .add({{"brand", "stylish"}, {"category", "foo"}, {"v", 1}}, 2.75)
+ .add({{"brand", "stylish"}, {"category", "foo"}, {"v", 2}}, 3.75);
+}
+
+TEST_F("require that mixed tensor blocks form can be loaded", ConstantTensorLoader(factory)) {
+ TEST_DO(verify_tensor(make_mixed_tensor(), f1.create(TEST_PATH("mixed-blocks-11.json"), "tensor(x{},y[2])")));
+ TEST_DO(verify_tensor(make_mix21_tensor(), f1.create(TEST_PATH("mixed-blocks-21.json"), "tensor<float>(brand{},category{},v[3])")));
+}
+
TEST_F("require that bad lz4 file fails to load creating empty result", ConstantTensorLoader(factory)) {
TEST_DO(verify_tensor(sparse_tensor_nocells(), f1.create(TEST_PATH("bad_lz4.json.lz4"), "tensor(x{},y{})")));
}
diff --git a/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp
index 5654a3abcbe..f22e4cbae0f 100644
--- a/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp
+++ b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp
@@ -72,6 +72,72 @@ void decodeSingleDenseForm(const Inspector &values, const ValueType &value_type,
}
}
+struct DenseValuesDecoder {
+ const std::vector<ValueType::Dimension> _idims;
+ TensorSpec &_target;
+ void decode(const Inspector &input, const TensorSpec::Address &address, size_t dim_idx) {
+ if (dim_idx == _idims.size()) {
+ _target.add(address, input.asDouble());
+ } else {
+ const auto &dimension = _idims[dim_idx];
+ if (input.entries() != dimension.size) {
+ return; // TODO: handle mismatch better
+ }
+ for (size_t i = 0; i < input.entries(); ++i) {
+ TensorSpec::Address sub_address = address;
+ sub_address.emplace(dimension.name, TensorSpec::Label(i));
+ decode(input[i], sub_address, dim_idx + 1);
+ }
+ }
+ }
+};
+
+void decodeDenseValues(const Inspector &values, const ValueType &value_type, TensorSpec &spec) {
+ TensorSpec::Address address;
+ DenseValuesDecoder decoder(value_type.indexed_dimensions(), spec);
+ decoder.decode(values, address, 0);
+}
+
+
+template<typename F>
+struct TraverserCallback : ObjectTraverser {
+ F _f;
+ TraverserCallback(F f) : _f(std::move(f)) {}
+ void field(const Memory &name, const Inspector &inspector) override {
+ _f(name.make_string(), inspector);
+ }
+};
+
+void decodeSingleMappedBlocks(const Inspector &blocks, const ValueType &value_type, TensorSpec &spec) {
+ if (value_type.count_mapped_dimensions() != 1) {
+ return; // TODO handle mismatch
+ }
+ vespalib::string dim_name = value_type.mapped_dimensions()[0].name;
+ DenseValuesDecoder decoder(value_type.indexed_dimensions(), spec);
+ auto lambda = [&](vespalib::string label, const Inspector &input) {
+ TensorSpec::Address address;
+ address.emplace(dim_name, std::move(label));
+ decoder.decode(input, std::move(address), 0);
+ };
+ TraverserCallback cb(lambda);
+ blocks.traverse(cb);
+}
+
+void decodeAddressedBlocks(const Inspector &blocks, const ValueType &value_type, TensorSpec &spec) {
+ const auto & idims = value_type.indexed_dimensions();
+ std::set<vespalib::string> indexed;
+ for (const auto &dimension: idims) {
+ indexed.insert(dimension.name);
+ }
+ DenseValuesDecoder decoder(value_type.indexed_dimensions(), spec);
+ for (size_t i = 0; i < blocks.entries(); ++i) {
+ TensorSpec::Address address;
+ AddressExtractor extractor(indexed, address);
+ blocks[i]["address"].traverse(extractor);
+ decoder.decode(blocks[i]["values"], address, 0);
+ }
+}
+
void decodeLiteralForm(const Inspector &cells, const ValueType &value_type, TensorSpec &spec) {
std::set<vespalib::string> indexed;
for (const auto &dimension: value_type.dimensions()) {
@@ -140,19 +206,30 @@ ConstantTensorLoader::create(const vespalib::string &path, const vespalib::strin
bool isSingleDenseType = value_type.is_dense() && (value_type.count_indexed_dimensions() == 1);
bool isSingleMappedType = value_type.is_sparse() && (value_type.count_mapped_dimensions() == 1);
const Inspector &root = slime.get();
- const Inspector &cells = root["cells"];
- const Inspector &values = root["values"];
- if (cells.type().getId() == vespalib::slime::ARRAY::ID) {
- decodeLiteralForm(cells, value_type, spec);
- }
- else if (cells.type().getId() == vespalib::slime::OBJECT::ID && isSingleMappedType) {
- decodeSingleMappedForm(cells, value_type, spec);
- }
- else if (values.type().getId() == vespalib::slime::ARRAY::ID && isSingleDenseType) {
- decodeSingleDenseForm(values, value_type, spec);
- }
- else if (root.type().getId() == vespalib::slime::OBJECT::ID && isSingleMappedType) {
- decodeSingleMappedForm(root, value_type, spec);
+ if (root.type().getId() == vespalib::slime::OBJECT::ID) {
+ const Inspector &cells = root["cells"];
+ const Inspector &values = root["values"];
+ const Inspector &blocks = root["blocks"];
+ if (cells.type().getId() == vespalib::slime::ARRAY::ID) {
+ decodeLiteralForm(cells, value_type, spec);
+ }
+ else if (cells.type().getId() == vespalib::slime::OBJECT::ID) {
+ if (isSingleMappedType) {
+ decodeSingleMappedForm(cells, value_type, spec);
+ }
+ }
+ else if (values.type().getId() == vespalib::slime::ARRAY::ID) {
+ decodeDenseValues(values, value_type, spec);
+ }
+ else if (blocks.type().getId() == vespalib::slime::OBJECT::ID) {
+ decodeSingleMappedBlocks(blocks, value_type, spec);
+ }
+ else if (blocks.type().getId() == vespalib::slime::ARRAY::ID) {
+ decodeAddressedBlocks(blocks, value_type, spec);
+ }
+ else if (isSingleMappedType) {
+ decodeSingleMappedForm(root, value_type, spec);
+ }
}
else if (root.type().getId() == vespalib::slime::ARRAY::ID && isSingleDenseType) {
decodeSingleDenseForm(root, value_type, spec);