1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "constant_tensor_loader.h"
#include <vespa/eval/eval/tensor_spec.h>
#include <vespa/eval/eval/value_codec.h>
#include <vespa/vespalib/objects/nbostream.h>
#include <vespa/vespalib/io/mapped_file_input.h>
#include <vespa/vespalib/data/lz4_input_decoder.h>
#include <vespa/vespalib/data/slime/slime.h>
#include <vespa/vespalib/util/size_literals.h>
#include <set>
#include <vespa/log/log.h>
LOG_SETUP(".vespalib.eval.value_cache.constant_tensor_loader");
namespace vespalib::eval {
using Inspector = slime::Inspector;
using ObjectTraverser = slime::ObjectTraverser;
namespace {
struct AddressExtractor : ObjectTraverser {
const std::set<vespalib::string> &indexed;
TensorSpec::Address &address;
AddressExtractor(const std::set<vespalib::string> &indexed_in,
TensorSpec::Address &address_out)
: indexed(indexed_in), address(address_out) {}
void field(const Memory &symbol, const Inspector &inspector) override {
vespalib::string dimension = symbol.make_string();
vespalib::string label = inspector.asString().make_string();
if (dimension.empty() || label.empty()) {
return;
}
if (indexed.find(dimension) == indexed.end()) {
address.emplace(dimension, TensorSpec::Label(label));
} else {
size_t index = strtoull(label.c_str(), nullptr, 10);
address.emplace(dimension, TensorSpec::Label(index));
}
}
};
void decode_json(const vespalib::string &path, Input &input, Slime &slime) {
if (slime::JsonFormat::decode(input, slime) == 0) {
LOG(warning, "file contains invalid json: %s", path.c_str());
}
}
void decode_json(const vespalib::string &path, Slime &slime) {
MappedFileInput file(path);
if (!file.valid()) {
LOG(warning, "could not read file: %s", path.c_str());
} else {
if (ends_with(path, ".lz4")) {
size_t buffer_size = 64_Ki;
Lz4InputDecoder lz4_decoder(file, buffer_size);
decode_json(path, lz4_decoder, slime);
if (lz4_decoder.failed()) {
LOG(warning, "file contains lz4 errors (%s): %s",
lz4_decoder.reason().c_str(), path.c_str());
}
} else {
decode_json(path, file, slime);
}
}
}
} // namespace vespalib::eval::<unnamed>
ConstantTensorLoader::~ConstantTensorLoader() = default;
ConstantValue::UP
ConstantTensorLoader::create(const vespalib::string &path, const vespalib::string &type) const
{
ValueType value_type = ValueType::from_spec(type);
if (value_type.is_error()) {
LOG(warning, "invalid type specification: %s", type.c_str());
return std::make_unique<BadConstantValue>();
}
if (ends_with(path, ".tbf")) {
vespalib::MappedFileInput file(path);
vespalib::Memory content = file.get();
vespalib::nbostream stream(content.data, content.size);
try {
return std::make_unique<SimpleConstantValue>(decode_value(stream, _factory));
} catch (std::exception &) {
return std::make_unique<BadConstantValue>();
}
}
Slime slime;
decode_json(path, slime);
std::set<vespalib::string> indexed;
for (const auto &dimension: value_type.dimensions()) {
if (dimension.is_indexed()) {
indexed.insert(dimension.name);
}
}
TensorSpec spec(type);
const Inspector &cells = slime.get()["cells"];
for (size_t i = 0; i < cells.entries(); ++i) {
TensorSpec::Address address;
AddressExtractor extractor(indexed, address);
cells[i]["address"].traverse(extractor);
spec.add(address, cells[i]["value"].asDouble());
}
try {
return std::make_unique<SimpleConstantValue>(value_from_spec(spec, _factory));
} catch (std::exception &) {
return std::make_unique<BadConstantValue>();
}
}
}
|