summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorArne Juul <arnej@vespa.ai>2023-12-14 08:26:50 +0000
committerArne Juul <arnej@vespa.ai>2023-12-14 08:26:50 +0000
commit990aab34103e545f17f3e05b02a6b2f7bcdca05c (patch)
treeebde890e7fd9f58a129cafc36a2fa75846fd55e7 /eval
parent1ad5ec5fa814a92fdbf98db14121197023f434f0 (diff)
Reapply "add parsing of special strings for inf/nan cell values"
This reverts commit d976f82207c09b3215661e1d034ae9a42f28a63d.
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/eval/value_cache/dense-special.json8
-rw-r--r--eval/src/tests/eval/value_cache/tensor_loader_test.cpp24
-rw-r--r--eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp41
3 files changed, 67 insertions, 6 deletions
diff --git a/eval/src/tests/eval/value_cache/dense-special.json b/eval/src/tests/eval/value_cache/dense-special.json
new file mode 100644
index 00000000000..a3bee19a573
--- /dev/null
+++ b/eval/src/tests/eval/value_cache/dense-special.json
@@ -0,0 +1,8 @@
+[
+ "Infinity", "+Infinity",
+ "INF", "+INF",
+ "-Infinity", "-INF",
+ "NAN", "+NAN",
+ null,
+ "-nan", "-NAN"
+]
diff --git a/eval/src/tests/eval/value_cache/tensor_loader_test.cpp b/eval/src/tests/eval/value_cache/tensor_loader_test.cpp
index 22847a1d08e..82d3e2b0a55 100644
--- a/eval/src/tests/eval/value_cache/tensor_loader_test.cpp
+++ b/eval/src/tests/eval/value_cache/tensor_loader_test.cpp
@@ -130,4 +130,28 @@ TEST_F("require that bad lz4 file fails to load creating empty result", Constant
TEST_DO(verify_tensor(sparse_tensor_nocells(), f1.create(TEST_PATH("bad_lz4.json.lz4"), "tensor(x{},y{})")));
}
+void checkBitEq(double a, double b) {
+ size_t aa, bb;
+ memcpy(&aa, &a, sizeof(aa));
+ memcpy(&bb, &b, sizeof(bb));
+ EXPECT_EQUAL(aa, bb);
+}
+
+TEST_F("require that special string-encoded values work", ConstantTensorLoader(factory)) {
+ auto c = f1.create(TEST_PATH("dense-special.json"), "tensor<float>(z[11])");
+ const auto &v = c->value();
+ auto cells = v.cells().template typify<float>();
+ EXPECT_EQUAL(std::numeric_limits<float>::infinity(), cells[0]);
+ EXPECT_EQUAL(std::numeric_limits<float>::infinity(), cells[1]);
+ EXPECT_EQUAL(std::numeric_limits<float>::infinity(), cells[2]);
+ EXPECT_EQUAL(std::numeric_limits<float>::infinity(), cells[3]);
+ EXPECT_EQUAL(-std::numeric_limits<float>::infinity(), cells[4]);
+ EXPECT_EQUAL(-std::numeric_limits<float>::infinity(), cells[5]);
+ TEST_DO(checkBitEq(std::numeric_limits<float>::quiet_NaN(), cells[6]));
+ TEST_DO(checkBitEq(std::numeric_limits<float>::quiet_NaN(), cells[7]));
+ TEST_DO(checkBitEq(std::numeric_limits<float>::quiet_NaN(), cells[8]));
+ TEST_DO(checkBitEq(-std::numeric_limits<float>::quiet_NaN(), cells[9]));
+ TEST_DO(checkBitEq(-std::numeric_limits<float>::quiet_NaN(), cells[10]));
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp
index 189f7aa14ce..7cdc55fc37f 100644
--- a/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp
+++ b/eval/src/vespa/eval/eval/value_cache/constant_tensor_loader.cpp
@@ -3,10 +3,11 @@
#include "constant_tensor_loader.h"
#include <vespa/eval/eval/tensor_spec.h>
#include <vespa/eval/eval/value_codec.h>
-#include <vespa/vespalib/objects/nbostream.h>
-#include <vespa/vespalib/io/mapped_file_input.h>
#include <vespa/vespalib/data/lz4_input_decoder.h>
#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/io/mapped_file_input.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/text/lowercase.h>
#include <vespa/vespalib/util/size_literals.h>
#include <set>
@@ -20,6 +21,34 @@ using ObjectTraverser = slime::ObjectTraverser;
namespace {
+double decodeDouble(const Inspector &inspector) {
+ if (inspector.type().getId() == vespalib::slime::STRING::ID) {
+ auto orig = inspector.asString().make_stringref();
+ auto lower = vespalib::LowerCase::convert(orig);
+ if (lower == "infinity" || lower == "+infinity" || lower == "inf" || lower == "+inf") {
+ double d = std::numeric_limits<double>::infinity();
+ return d;
+ }
+ if (lower == "-infinity" || lower == "-inf") {
+ double d = -std::numeric_limits<double>::infinity();
+ return d;
+ }
+ if (lower == "nan" || lower == "+nan") {
+ double d = std::numeric_limits<double>::quiet_NaN();
+ return d;
+ }
+ if (lower == "-nan") {
+ double d = -std::numeric_limits<double>::quiet_NaN();
+ return d;
+ }
+ LOG(warning, "bad string-encoded numeric value '%.*s'", (int)orig.size(), orig.data());
+ }
+ if (inspector.valid() && inspector.type().getId() == vespalib::slime::NIX::ID) {
+ return std::numeric_limits<double>::quiet_NaN();
+ }
+ return inspector.asDouble();
+}
+
struct Target {
const ValueType tensor_type;
TensorSpec spec;
@@ -110,7 +139,7 @@ struct SingleMappedExtractor : ObjectTraverser {
{}
void field(const Memory &symbol, const Inspector &inspector) override {
vespalib::string label = symbol.make_string();
- double value = inspector.asDouble();
+ double value = decodeDouble(inspector);
TensorSpec::Address address;
address.emplace(dimension, label);
target.check_add(address, value);
@@ -128,7 +157,7 @@ void decodeSingleDenseForm(const Inspector &values, const ValueType &value_type,
for (size_t i = 0; i < values.entries(); ++i) {
TensorSpec::Address address;
address.emplace(dimension, TensorSpec::Label(i));
- target.check_add(address, values[i].asDouble());
+ target.check_add(address, decodeDouble(values[i]));
}
}
@@ -137,7 +166,7 @@ struct DenseValuesDecoder {
Target &_target;
void decode(const Inspector &input, const TensorSpec::Address &address, size_t dim_idx) {
if (dim_idx == _idims.size()) {
- _target.check_add(address, input.asDouble());
+ _target.check_add(address, decodeDouble(input));
} else {
const auto &dimension = _idims[dim_idx];
if (input.entries() != dimension.size) {
@@ -209,7 +238,7 @@ void decodeLiteralForm(const Inspector &cells, const ValueType &value_type, Targ
TensorSpec::Address address;
AddressExtractor extractor(indexed, address);
cells[i]["address"].traverse(extractor);
- target.check_add(address, cells[i]["value"].asDouble());
+ target.check_add(address, decodeDouble(cells[i]["value"]));
}
}