From f9fff4feb28350dafc400daaf6049ea7d1527f47 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Tue, 8 Oct 2019 15:09:40 +0200 Subject: Single sparse dimension short form --- .../yahoo/document/json/readers/TensorReader.java | 49 ++++++++++++++++------ .../yahoo/document/json/JsonReaderTestCase.java | 31 ++++++++++++++ .../com/yahoo/tensor/serialization/JsonFormat.java | 44 ++++++++++++++----- .../tensor/serialization/JsonFormatTestCase.java | 33 +++++++++++++++ 4 files changed, 134 insertions(+), 23 deletions(-) diff --git a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java index 7b5fcfed0db..497c717a6ad 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java +++ b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.document.json.readers; +import com.fasterxml.jackson.core.JsonToken; import com.yahoo.document.datatypes.TensorFieldValue; import com.yahoo.document.json.TokenBuffer; import com.yahoo.tensor.IndexedTensor; @@ -47,10 +48,19 @@ public class TensorReader { } static void readTensorCells(TokenBuffer buffer, Tensor.Builder builder) { - expectArrayStart(buffer.currentToken()); - int initNesting = buffer.nesting(); - for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) - readTensorCell(buffer, builder); + if (buffer.currentToken() == JsonToken.START_ARRAY) { + int initNesting = buffer.nesting(); + for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) + readTensorCell(buffer, builder); + } + else if (buffer.currentToken() == JsonToken.START_OBJECT) { // single dimension short form + int initNesting = buffer.nesting(); + for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) + builder.cell(asAddress(buffer.currentName(), builder.type()), readDouble(buffer)); + } + else { + throw new IllegalArgumentException("Expected 'cells' to contain an array or an object, but got " + buffer.currentToken()); + } expectCompositeEnd(buffer.currentToken()); } @@ -80,8 +90,6 @@ public class TensorReader { if ( ! (builder instanceof IndexedTensor.BoundBuilder)) throw new IllegalArgumentException("The 'values' field can only be used with dense tensors. " + "Use 'cells' or 'blocks' instead"); - expectArrayStart(buffer.currentToken()); - IndexedTensor.BoundBuilder indexedBuilder = (IndexedTensor.BoundBuilder)builder; int index = 0; int initNesting = buffer.nesting(); @@ -94,12 +102,23 @@ public class TensorReader { if ( ! (builder instanceof MixedTensor.BoundBuilder)) throw new IllegalArgumentException("The 'blocks' field can only be used with mixed tensors with bound dimensions. " + "Use 'cells' or 'values' instead"); - expectArrayStart(buffer.currentToken()); MixedTensor.BoundBuilder mixedBuilder = (MixedTensor.BoundBuilder) builder; - int initNesting = buffer.nesting(); - for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) - readTensorBlock(buffer, mixedBuilder); + if (buffer.currentToken() == JsonToken.START_ARRAY) { + int initNesting = buffer.nesting(); + for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) + readTensorBlock(buffer, mixedBuilder); + } + else if (buffer.currentToken() == JsonToken.START_OBJECT) { + int initNesting = buffer.nesting(); + for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) + mixedBuilder.block(asAddress(buffer.currentName(), builder.type().mappedSubtype()), + readValues(buffer, (int)mixedBuilder.denseSubspaceSize())); + } + else { + throw new IllegalArgumentException("Expected 'blocks' to contain an array or an object, but got " + buffer.currentToken()); + } + expectCompositeEnd(buffer.currentToken()); } @@ -127,8 +146,8 @@ public class TensorReader { private static TensorAddress readAddress(TokenBuffer buffer, TensorType type) { expectObjectStart(buffer.currentToken()); - int initNesting = buffer.nesting(); TensorAddress.Builder builder = new TensorAddress.Builder(type); + int initNesting = buffer.nesting(); for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) builder.add(buffer.currentName(), buffer.currentText()); expectObjectEnd(buffer.currentToken()); @@ -149,11 +168,17 @@ public class TensorReader { private static double readDouble(TokenBuffer buffer) { try { - return Double.valueOf(buffer.currentText()); + return Double.parseDouble(buffer.currentText()); } catch (NumberFormatException e) { throw new IllegalArgumentException("Expected a number but got '" + buffer.currentText()); } } + private static TensorAddress asAddress(String label, TensorType type) { + if (type.dimensions().size() != 1) + throw new IllegalArgumentException("Expected a tensor with a single dimension but got " + type); + return new TensorAddress.Builder(type).add(type.dimensions().get(0).name(), label).build(); + } + } diff --git a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java index 91998dedbb8..2af740147ed 100644 --- a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java +++ b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java @@ -153,6 +153,8 @@ public class JsonReaderTestCase { } { DocumentType x = new DocumentType("testtensor"); + x.addField(new Field("sparse_single_dimension_tensor", + new TensorDataType(new TensorType.Builder().mapped("x").build()))); x.addField(new Field("sparse_tensor", new TensorDataType(new TensorType.Builder().mapped("x").mapped("y").build()))); x.addField(new Field("dense_tensor", @@ -1334,6 +1336,26 @@ public class JsonReaderTestCase { assertTrue(tensor instanceof MixedTensor); // this matters for performance } + @Test + public void testMixedTensorInMixedFormWithSingleSparseDimensionShortForm() { + Tensor.Builder builder = Tensor.Builder.of(TensorType.fromSpec("tensor(x{},y[3])")); + builder.cell().label("x", 0).label("y", 0).value(2.0); + builder.cell().label("x", 0).label("y", 1).value(3.0); + builder.cell().label("x", 0).label("y", 2).value(4.0); + builder.cell().label("x", 1).label("y", 0).value(5.0); + builder.cell().label("x", 1).label("y", 1).value(6.0); + builder.cell().label("x", 1).label("y", 2).value(7.0); + Tensor expected = builder.build(); + + String mixedJson = "{\"blocks\":{" + + "\"0\":[2.0,3.0,4.0]," + + "\"1\":[5.0,6.0,7.0]" + + "}}"; + Tensor tensor = assertTensorField(expected, + createPutWithTensor(inputJson(mixedJson), "mixed_tensor"), "mixed_tensor"); + assertTrue(tensor instanceof MixedTensor); // this matters for performance + } + @Test public void testParsingOfTensorWithSingleCellInDifferentJsonOrder() { assertSparseTensorField("{{x:a,y:b}:2.0}", @@ -1538,6 +1560,15 @@ public class JsonReaderTestCase { " { 'address': { 'x': 'c', 'y': 'd' }, 'value': 3.0 } ]}")); } + @Test + public void tensor_add_update_on_sparse_tensor_with_single_dimension_short_form() { + assertTensorAddUpdate("{{x:a}:2.0, {x:c}: 3.0}", "sparse_single_dimension_tensor", + inputJson("{", + " 'cells': {", + " 'a': 2.0,", + " 'c': 3.0 }}")); + } + @Test public void tensor_add_update_on_mixed_tensor() { assertTensorAddUpdate("{{x:a,y:0}:2.0, {x:a,y:1}:3.0, {x:a,y:2}:0.0}", "mixed_tensor", diff --git a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java index 75690e45e15..e1b38264661 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java @@ -76,9 +76,12 @@ public class JsonFormat { } private static void decodeCells(Inspector cells, Tensor.Builder builder) { - if ( cells.type() != Type.ARRAY) - throw new IllegalArgumentException("Excepted 'cells' to contain an array, not " + cells.type()); - cells.traverse((ArrayTraverser) (__, cell) -> decodeCell(cell, builder)); + if ( cells.type() == Type.ARRAY) + cells.traverse((ArrayTraverser) (__, cell) -> decodeCell(cell, builder)); + else if (cells.type() == Type.OBJECT) + cells.traverse((ObjectTraverser) (key, value) -> decodeSingleDimensionCell(key, value, builder)); + else + throw new IllegalArgumentException("Excepted 'cells' to contain an array or obejct, not " + cells.type()); } private static void decodeCell(Inspector cell, Tensor.Builder builder) { @@ -91,6 +94,10 @@ public class JsonFormat { builder.cell(address, value.asDouble()); } + private static void decodeSingleDimensionCell(String key, Inspector value, Tensor.Builder builder) { + builder.cell(asAddress(key, builder.type()), decodeNumeric(value)); + } + private static void decodeValues(Inspector values, Tensor.Builder builder) { if ( ! (builder instanceof IndexedTensor.BoundBuilder)) throw new IllegalArgumentException("The 'values' field can only be used with dense tensors. " + @@ -111,27 +118,36 @@ public class JsonFormat { if ( ! (builder instanceof MixedTensor.BoundBuilder)) throw new IllegalArgumentException("The 'blocks' field can only be used with mixed tensors with bound dimensions. " + "Use 'cells' or 'values' instead"); - if (values.type() != Type.ARRAY) - throw new IllegalArgumentException("Excepted 'blocks' to contain an array, not " + values.type()); - MixedTensor.BoundBuilder mixedBuilder = (MixedTensor.BoundBuilder) builder; - values.traverse((ArrayTraverser) (__, value) -> decodeBlock(value, mixedBuilder)); + if (values.type() == Type.ARRAY) + values.traverse((ArrayTraverser) (__, value) -> decodeBlock(value, mixedBuilder)); + else if (values.type() == Type.OBJECT) + values.traverse((ObjectTraverser) (key, value) -> decodeSingleDimensionBlock(key, value, mixedBuilder)); + else + throw new IllegalArgumentException("Excepted 'blocks' to contain an array or object, not " + values.type()); } private static void decodeBlock(Inspector block, MixedTensor.BoundBuilder mixedBuilder) { if (block.type() != Type.OBJECT) throw new IllegalArgumentException("Expected an item in a 'blocks' array to be an object, not " + block.type()); + mixedBuilder.block(decodeAddress(block.field("address"), mixedBuilder.type().mappedSubtype()), + decodeValues(block.field("values"), mixedBuilder)); + } - TensorAddress mappedAddress = decodeAddress(block.field("address"), mixedBuilder.type().mappedSubtype()); + private static void decodeSingleDimensionBlock(String key, Inspector value, MixedTensor.BoundBuilder mixedBuilder) { + if (value.type() != Type.ARRAY) + throw new IllegalArgumentException("Expected an item in a 'blocks' array to be an object, not " + value.type()); + mixedBuilder.block(asAddress(key, mixedBuilder.type().mappedSubtype()), + decodeValues(value, mixedBuilder)); + } - Inspector valuesField = block.field("values"); + private static double[] decodeValues(Inspector valuesField, MixedTensor.BoundBuilder mixedBuilder) { if (valuesField.type() != Type.ARRAY) throw new IllegalArgumentException("Expected a block to contain a 'values' array"); double[] values = new double[(int)mixedBuilder.denseSubspaceSize()]; valuesField.traverse((ArrayTraverser) (index, value) -> values[index] = decodeNumeric(value)); - - mixedBuilder.block(mappedAddress, values); + return values; } private static TensorAddress decodeAddress(Inspector addressField, TensorType type) { @@ -142,6 +158,12 @@ public class JsonFormat { return builder.build(); } + private static TensorAddress asAddress(String label, TensorType type) { + if (type.dimensions().size() != 1) + throw new IllegalArgumentException("Expected a tensor with a single dimension but got " + type); + return new TensorAddress.Builder(type).add(type.dimensions().get(0).name(), label).build(); + } + private static double decodeNumeric(Inspector numericField) { if (numericField.type() != Type.LONG && numericField.type() != Type.DOUBLE) throw new IllegalArgumentException("Excepted a number, not " + numericField.type()); diff --git a/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java index 16f92289504..81de8a9db4c 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java @@ -31,6 +31,21 @@ public class JsonFormatTestCase { assertEquals(tensor, decoded); } + @Test + public void testSingleSparseDimensionShortForm() { + Tensor.Builder builder = Tensor.Builder.of(TensorType.fromSpec("tensor(x{})")); + builder.cell().label("x", "a").value(2.0); + builder.cell().label("x", "c").value(3.0); + Tensor expected = builder.build(); + + String json= "{\"cells\":{" + + "\"a\":2.0," + + "\"c\":3.0" + + "}}"; + Tensor decoded = JsonFormat.decode(expected.type(), json.getBytes(StandardCharsets.UTF_8)); + assertEquals(expected, decoded); + } + @Test public void testDenseTensor() { Tensor.Builder builder = Tensor.Builder.of(TensorType.fromSpec("tensor(x[2],y[2])")); @@ -84,6 +99,24 @@ public class JsonFormatTestCase { assertEquals(expected, decoded); } + @Test + public void testMixedTensorInMixedFormWithSingleSparseDimensionShortForm() { + Tensor.Builder builder = Tensor.Builder.of(TensorType.fromSpec("tensor(x{},y[3])")); + builder.cell().label("x", 0).label("y", 0).value(2.0); + builder.cell().label("x", 0).label("y", 1).value(3.0); + builder.cell().label("x", 0).label("y", 2).value(4.0); + builder.cell().label("x", 1).label("y", 0).value(5.0); + builder.cell().label("x", 1).label("y", 1).value(6.0); + builder.cell().label("x", 1).label("y", 2).value(7.0); + Tensor expected = builder.build(); + String mixedJson = "{\"blocks\":{" + + "\"0\":[2.0,3.0,4.0]," + + "\"1\":[5.0,6.0,7.0]" + + "}}"; + Tensor decoded = JsonFormat.decode(expected.type(), mixedJson.getBytes(StandardCharsets.UTF_8)); + assertEquals(expected, decoded); + } + @Test public void testTooManyCells() { TensorType x2 = TensorType.fromSpec("tensor(x[2])"); -- cgit v1.2.3