summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@yahooinc.com>2023-07-10 15:28:52 +0200
committerGitHub <noreply@github.com>2023-07-10 15:28:52 +0200
commit7f0e4c1d88d068d09b3c4d0100f5a9206bfafbaf (patch)
tree58d9ff127e008589be29b54d04ba8b1ac4eb28f5 /eval
parentc67b3bb2f3da442c0e4645157319c06f37761916 (diff)
parent4e5814778ac43143d42e6ce0336df8976a0ba48b (diff)
Merge pull request #27655 from vespa-engine/arnej/defensive-constant-tensor-loader
Arnej/defensive constant tensor loader
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/eval/value_cache/dense.json6
-rw-r--r--eval/src/tests/eval/value_cache/sparse-short1.json2
-rw-r--r--eval/src/tests/eval/value_cache/sparse-short2.json2
-rw-r--r--eval/src/tests/eval/value_cache/sparse.json1
-rw-r--r--eval/src/tests/eval/value_cache/sparse.json.lz4bin153 -> 170 bytes
-rw-r--r--eval/src/tests/eval/value_cache/tensor_loader_test.cpp5
-rw-r--r--eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp120
7 files changed, 101 insertions, 35 deletions
diff --git a/eval/src/tests/eval/value_cache/dense.json b/eval/src/tests/eval/value_cache/dense.json
index 2263053f01f..f310ee9dc32 100644
--- a/eval/src/tests/eval/value_cache/dense.json
+++ b/eval/src/tests/eval/value_cache/dense.json
@@ -1,8 +1,8 @@
{
"dimensions": ["x","y"],
"cells": [
- { "address": { "x": "0", "y": "0" }, "value": 1.0 },
- { "address": { "x": "0", "y": "1" }, "value": 2.0 },
- { "address": { "x": "1", "y": "0" }, "value": 3.0 },
+ { "address": { "x": 0, "y": 0 }, "value": 1.0 },
+ { "address": { "y": 1, "x": 0 }, "value": 2.0 },
+ { "address": { "x": "1", "y": 0 }, "value": 3.0 },
{ "address": { "x": "1", "y": "1" }, "value": 4.0 }]
}
diff --git a/eval/src/tests/eval/value_cache/sparse-short1.json b/eval/src/tests/eval/value_cache/sparse-short1.json
index 741a2160898..5b6aa6d6104 100644
--- a/eval/src/tests/eval/value_cache/sparse-short1.json
+++ b/eval/src/tests/eval/value_cache/sparse-short1.json
@@ -1,5 +1,5 @@
{
- "foo": 1.0,
+ "foo": 1,
"cells": 2.0,
"values": 0.5,
"blocks": 1.5
diff --git a/eval/src/tests/eval/value_cache/sparse-short2.json b/eval/src/tests/eval/value_cache/sparse-short2.json
index 7eb377968e4..552fec39bcc 100644
--- a/eval/src/tests/eval/value_cache/sparse-short2.json
+++ b/eval/src/tests/eval/value_cache/sparse-short2.json
@@ -1,6 +1,6 @@
{
"cells": {
- "foo": 1.0,
+ "foo": 1,
"cells": 2.0,
"values": 0.5,
"blocks": 1.5
diff --git a/eval/src/tests/eval/value_cache/sparse.json b/eval/src/tests/eval/value_cache/sparse.json
index a80e7906286..f52ad888c61 100644
--- a/eval/src/tests/eval/value_cache/sparse.json
+++ b/eval/src/tests/eval/value_cache/sparse.json
@@ -2,5 +2,6 @@
"dimensions": ["x","y"],
"cells": [
{ "address": { "x": "foo", "y": "bar" }, "value": 1.0 },
+ { "address": { "x": 17, "y": 42 }, "value": 1742.0 },
{ "address": { "x": "bar", "y": "foo" }, "value": 2.0 }]
}
diff --git a/eval/src/tests/eval/value_cache/sparse.json.lz4 b/eval/src/tests/eval/value_cache/sparse.json.lz4
index 0de6fae56e1..4064222d403 100644
--- a/eval/src/tests/eval/value_cache/sparse.json.lz4
+++ b/eval/src/tests/eval/value_cache/sparse.json.lz4
Binary files differ
diff --git a/eval/src/tests/eval/value_cache/tensor_loader_test.cpp b/eval/src/tests/eval/value_cache/tensor_loader_test.cpp
index c10da861c83..ba2412d6f70 100644
--- a/eval/src/tests/eval/value_cache/tensor_loader_test.cpp
+++ b/eval/src/tests/eval/value_cache/tensor_loader_test.cpp
@@ -28,6 +28,7 @@ TensorSpec make_simple_dense_tensor() {
TensorSpec make_sparse_tensor() {
return TensorSpec("tensor(x{},y{})")
+ .add({{"x", "17"}, {"y", "42"}}, 1742.0)
.add({{"x", "foo"}, {"y", "bar"}}, 1.0)
.add({{"x", "bar"}, {"y", "foo"}}, 2.0);
}
@@ -74,6 +75,10 @@ TEST_F("require that dense tensors can be loaded", ConstantTensorLoader(factory)
TEST_DO(verify_tensor(make_dense_tensor(), f1.create(TEST_PATH("dense.json"), "tensor(x[2],y[2])")));
}
+TEST_F("require that sparse tensors can be loaded", ConstantTensorLoader(factory)) {
+ TEST_DO(verify_tensor(make_sparse_tensor(), f1.create(TEST_PATH("sparse.json"), "tensor(x{},y{})")));
+}
+
TEST_F("require that mixed tensors can be loaded", ConstantTensorLoader(factory)) {
TEST_DO(verify_tensor(make_mixed_tensor(), f1.create(TEST_PATH("mixed.json"), "tensor(x{},y[2])")));
}
diff --git a/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp
index f22e4cbae0f..74965b79bbc 100644
--- a/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp
+++ b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp
@@ -20,6 +20,42 @@ using ObjectTraverser = slime::ObjectTraverser;
namespace {
+struct Target {
+ const ValueType tensor_type;
+ TensorSpec spec;
+ void check_add(TensorSpec::Address address, double value) {
+ for (const auto &dim : tensor_type.dimensions()) {
+ const auto & it = address.find(dim.name);
+ if (it == address.end()) {
+ LOG(error, "Missing dimension '%s' in address for constant tensor", dim.name.c_str());
+ throw std::exception();
+ }
+ if (it->second.is_mapped() != dim.is_mapped()) {
+ LOG(error, "Mismatch mapped/indexed for '%s' in address", dim.name.c_str());
+ throw std::exception();
+ }
+ if (dim.is_indexed()) {
+ if (it->second.index >= dim.size) {
+ LOG(error, "Index %zu out of range for dimension %s[%u]",
+ it->second.index, dim.name.c_str(), dim.size);
+ throw std::exception();
+ }
+ }
+ }
+ if (address.size() != tensor_type.dimensions().size()) {
+ for (const auto & [name, label] : address) {
+ if (tensor_type.dimension_index(name) == ValueType::Dimension::npos) {
+ LOG(error, "Extra dimension '%s' in address for constant tensor", name.c_str());
+ }
+ }
+ LOG(error, "Wrong number %zu of dimensions in address for constant tensor, wanted %zu",
+ address.size(), tensor_type.dimensions().size());
+ throw std::exception();
+ }
+ spec.add(address, value);
+ }
+};
+
struct AddressExtractor : ObjectTraverser {
const std::set<vespalib::string> &indexed;
TensorSpec::Address &address;
@@ -28,14 +64,38 @@ struct AddressExtractor : ObjectTraverser {
: indexed(indexed_in), address(address_out) {}
void field(const Memory &symbol, const Inspector &inspector) override {
vespalib::string dimension = symbol.make_string();
- vespalib::string label = inspector.asString().make_string();
- if (dimension.empty() || label.empty()) {
+ if (dimension.empty()) {
+ LOG(warning, "missing 'dimension' in address");
+ throw std::exception();
+ }
+ if (inspector.type().getId() == vespalib::slime::LONG::ID) {
+ size_t index = inspector.asLong();
+ if (indexed.contains(dimension)) {
+ address.emplace(dimension, TensorSpec::Label(index));
+ } else {
+ auto label = std::to_string(index);
+ address.emplace(dimension, TensorSpec::Label(label));
+ }
return;
}
+ vespalib::string label = inspector.asString().make_string();
+ if (label.empty()) {
+ auto got = inspector.toString();
+ int sz = got.size();
+ if (sz > 0) --sz;
+ LOG(error, "missing 'label' in address, got '%.*s'", sz, got.c_str());
+ throw std::exception();
+ }
if (indexed.find(dimension) == indexed.end()) {
address.emplace(dimension, TensorSpec::Label(label));
} else {
- size_t index = strtoull(label.c_str(), nullptr, 10);
+ const char *str_beg = label.c_str();
+ char *str_end = const_cast<char *>(str_beg);
+ size_t index = strtoull(str_beg, &str_end, 10);
+ if (str_end == str_beg || *str_end != '\0') {
+ LOG(error, "bad index: '%s' cannot be parsed as an unsigned integer", str_beg);
+ throw std::exception();
+ }
address.emplace(dimension, TensorSpec::Label(index));
}
}
@@ -43,41 +103,41 @@ struct AddressExtractor : ObjectTraverser {
struct SingleMappedExtractor : ObjectTraverser {
const vespalib::string &dimension;
- TensorSpec &spec;
- SingleMappedExtractor(const vespalib::string &dimension_in, TensorSpec &spec_in)
+ Target &target;
+ SingleMappedExtractor(const vespalib::string &dimension_in, Target &target_in)
: dimension(dimension_in),
- spec(spec_in)
+ target(target_in)
{}
void field(const Memory &symbol, const Inspector &inspector) override {
vespalib::string label = symbol.make_string();
double value = inspector.asDouble();
TensorSpec::Address address;
address.emplace(dimension, label);
- spec.add(address, value);
+ target.check_add(address, value);
}
};
-void decodeSingleMappedForm(const Inspector &root, const ValueType &value_type, TensorSpec &spec) {
- auto extractor = SingleMappedExtractor(value_type.dimensions()[0].name, spec);
+void decodeSingleMappedForm(const Inspector &root, const ValueType &value_type, Target &target) {
+ auto extractor = SingleMappedExtractor(value_type.dimensions()[0].name, target);
root.traverse(extractor);
}
-void decodeSingleDenseForm(const Inspector &values, const ValueType &value_type, TensorSpec &spec) {
+void decodeSingleDenseForm(const Inspector &values, const ValueType &value_type, Target &target) {
const auto &dimension = value_type.dimensions()[0].name;
for (size_t i = 0; i < values.entries(); ++i) {
TensorSpec::Address address;
address.emplace(dimension, TensorSpec::Label(i));
- spec.add(address, values[i].asDouble());
+ target.check_add(address, values[i].asDouble());
}
}
struct DenseValuesDecoder {
const std::vector<ValueType::Dimension> _idims;
- TensorSpec &_target;
+ Target &_target;
void decode(const Inspector &input, const TensorSpec::Address &address, size_t dim_idx) {
if (dim_idx == _idims.size()) {
- _target.add(address, input.asDouble());
+ _target.check_add(address, input.asDouble());
} else {
const auto &dimension = _idims[dim_idx];
if (input.entries() != dimension.size) {
@@ -92,9 +152,9 @@ struct DenseValuesDecoder {
}
};
-void decodeDenseValues(const Inspector &values, const ValueType &value_type, TensorSpec &spec) {
+void decodeDenseValues(const Inspector &values, const ValueType &value_type, Target &target) {
TensorSpec::Address address;
- DenseValuesDecoder decoder(value_type.indexed_dimensions(), spec);
+ DenseValuesDecoder decoder{value_type.indexed_dimensions(), target};
decoder.decode(values, address, 0);
}
@@ -108,12 +168,12 @@ struct TraverserCallback : ObjectTraverser {
}
};
-void decodeSingleMappedBlocks(const Inspector &blocks, const ValueType &value_type, TensorSpec &spec) {
+void decodeSingleMappedBlocks(const Inspector &blocks, const ValueType &value_type, Target &target) {
if (value_type.count_mapped_dimensions() != 1) {
return; // TODO handle mismatch
}
vespalib::string dim_name = value_type.mapped_dimensions()[0].name;
- DenseValuesDecoder decoder(value_type.indexed_dimensions(), spec);
+ DenseValuesDecoder decoder{value_type.indexed_dimensions(), target};
auto lambda = [&](vespalib::string label, const Inspector &input) {
TensorSpec::Address address;
address.emplace(dim_name, std::move(label));
@@ -123,13 +183,13 @@ void decodeSingleMappedBlocks(const Inspector &blocks, const ValueType &value_ty
blocks.traverse(cb);
}
-void decodeAddressedBlocks(const Inspector &blocks, const ValueType &value_type, TensorSpec &spec) {
+void decodeAddressedBlocks(const Inspector &blocks, const ValueType &value_type, Target &target) {
const auto & idims = value_type.indexed_dimensions();
std::set<vespalib::string> indexed;
for (const auto &dimension: idims) {
indexed.insert(dimension.name);
}
- DenseValuesDecoder decoder(value_type.indexed_dimensions(), spec);
+ DenseValuesDecoder decoder{value_type.indexed_dimensions(), target};
for (size_t i = 0; i < blocks.entries(); ++i) {
TensorSpec::Address address;
AddressExtractor extractor(indexed, address);
@@ -138,7 +198,7 @@ void decodeAddressedBlocks(const Inspector &blocks, const ValueType &value_type,
}
}
-void decodeLiteralForm(const Inspector &cells, const ValueType &value_type, TensorSpec &spec) {
+void decodeLiteralForm(const Inspector &cells, const ValueType &value_type, Target &target) {
std::set<vespalib::string> indexed;
for (const auto &dimension: value_type.dimensions()) {
if (dimension.is_indexed()) {
@@ -149,7 +209,7 @@ void decodeLiteralForm(const Inspector &cells, const ValueType &value_type, Tens
TensorSpec::Address address;
AddressExtractor extractor(indexed, address);
cells[i]["address"].traverse(extractor);
- spec.add(address, cells[i]["value"].asDouble());
+ target.check_add(address, cells[i]["value"].asDouble());
}
}
@@ -202,7 +262,7 @@ ConstantTensorLoader::create(const vespalib::string &path, const vespalib::strin
}
Slime slime;
decode_json(path, slime);
- TensorSpec spec(type);
+ Target target{value_type, TensorSpec(type)};
bool isSingleDenseType = value_type.is_dense() && (value_type.count_indexed_dimensions() == 1);
bool isSingleMappedType = value_type.is_sparse() && (value_type.count_mapped_dimensions() == 1);
const Inspector &root = slime.get();
@@ -211,31 +271,31 @@ ConstantTensorLoader::create(const vespalib::string &path, const vespalib::strin
const Inspector &values = root["values"];
const Inspector &blocks = root["blocks"];
if (cells.type().getId() == vespalib::slime::ARRAY::ID) {
- decodeLiteralForm(cells, value_type, spec);
+ decodeLiteralForm(cells, value_type, target);
}
else if (cells.type().getId() == vespalib::slime::OBJECT::ID) {
if (isSingleMappedType) {
- decodeSingleMappedForm(cells, value_type, spec);
+ decodeSingleMappedForm(cells, value_type, target);
}
}
else if (values.type().getId() == vespalib::slime::ARRAY::ID) {
- decodeDenseValues(values, value_type, spec);
+ decodeDenseValues(values, value_type, target);
}
else if (blocks.type().getId() == vespalib::slime::OBJECT::ID) {
- decodeSingleMappedBlocks(blocks, value_type, spec);
+ decodeSingleMappedBlocks(blocks, value_type, target);
}
else if (blocks.type().getId() == vespalib::slime::ARRAY::ID) {
- decodeAddressedBlocks(blocks, value_type, spec);
+ decodeAddressedBlocks(blocks, value_type, target);
}
else if (isSingleMappedType) {
- decodeSingleMappedForm(root, value_type, spec);
+ decodeSingleMappedForm(root, value_type, target);
}
}
else if (root.type().getId() == vespalib::slime::ARRAY::ID && isSingleDenseType) {
- decodeSingleDenseForm(root, value_type, spec);
+ decodeSingleDenseForm(root, value_type, target);
}
try {
- return std::make_unique<SimpleConstantValue>(value_from_spec(spec, _factory));
+ return std::make_unique<SimpleConstantValue>(value_from_spec(target.spec, _factory));
} catch (std::exception &) {
return std::make_unique<BadConstantValue>();
}