From ac28a2c925e90d0b1c651d8019e113ae4aa5cad9 Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Tue, 28 Sep 2021 14:39:17 +0200 Subject: Use values/cells/blocks format for short form --- .../handler/ModelsEvaluationHandlerTest.java | 12 +-- .../com/yahoo/tensor/serialization/JsonFormat.java | 85 +++++++++++++++------- .../tensor/serialization/JsonFormatTestCase.java | 53 ++++++++------ 3 files changed, 94 insertions(+), 56 deletions(-) diff --git a/model-evaluation/src/test/java/ai/vespa/models/handler/ModelsEvaluationHandlerTest.java b/model-evaluation/src/test/java/ai/vespa/models/handler/ModelsEvaluationHandlerTest.java index 3a900b0e815..7029be24a60 100644 --- a/model-evaluation/src/test/java/ai/vespa/models/handler/ModelsEvaluationHandlerTest.java +++ b/model-evaluation/src/test/java/ai/vespa/models/handler/ModelsEvaluationHandlerTest.java @@ -188,7 +188,7 @@ public class ModelsEvaluationHandlerTest { properties.put("Placeholder", inputTensorShortForm()); properties.put("format", "short"); String url = "http://localhost/model-evaluation/v1/mnist_softmax/default.add/eval"; - String expected = "{\"type\":\"tensor(d0[],d1[10])\",\"value\":[[-0.3546536862850189,0.3759574592113495,0.06054411828517914,-0.251544713973999,0.017951013520359993,1.2899067401885986,-0.10389615595340729,0.6367976665496826,-1.4136744737625122,-0.2573896050453186]]}"; + String expected = "{\"type\":\"tensor(d0[],d1[10])\",\"values\":[[-0.3546536862850189,0.3759574592113495,0.06054411828517914,-0.251544713973999,0.017951013520359993,1.2899067401885986,-0.10389615595340729,0.6367976665496826,-1.4136744737625122,-0.2573896050453186]]}"; handler.assertResponse(url, properties, 200, expected); } @@ -219,19 +219,19 @@ public class ModelsEvaluationHandlerTest { properties.put("format", "short"); String url = "http://localhost/model-evaluation/v1/vespa_model/"; handler.assertResponse(url + "test_mapped/eval", properties, 200, - "{\"type\":\"tensor(d0{})\",\"value\":{\"a\":1.0,\"b\":2.0}}"); + "{\"type\":\"tensor(d0{})\",\"cells\":{\"a\":1.0,\"b\":2.0}}"); handler.assertResponse(url + "test_indexed/eval", properties, 200, - "{\"type\":\"tensor(d0[2],d1[3])\",\"value\":[[1.0,2.0,3.0],[4.0,5.0,6.0]]}"); + "{\"type\":\"tensor(d0[2],d1[3])\",\"values\":[[1.0,2.0,3.0],[4.0,5.0,6.0]]}"); handler.assertResponse(url + "test_mixed/eval", properties, 200, - "{\"type\":\"tensor(x{},y[3])\",\"value\":{\"a\":[1.0,2.0,3.0],\"b\":[4.0,5.0,6.0]}}"); + "{\"type\":\"tensor(x{},y[3])\",\"blocks\":{\"a\":[1.0,2.0,3.0],\"b\":[4.0,5.0,6.0]}}"); handler.assertResponse(url + "test_mixed_2/eval", properties, 200, - "{\"type\":\"tensor(a[2],b[2],c{},d[2])\",\"value\":{\"a\":[[[1.0,2.0],[3.0,4.0]],[[5.0,6.0],[7.0,8.0]]],\"b\":[[[1.0,2.0],[3.0,4.0]],[[5.0,6.0],[7.0,8.0]]]}}"); + "{\"type\":\"tensor(a[2],b[2],c{},d[2])\",\"blocks\":{\"a\":[[[1.0,2.0],[3.0,4.0]],[[5.0,6.0],[7.0,8.0]]],\"b\":[[[1.0,2.0],[3.0,4.0]],[[5.0,6.0],[7.0,8.0]]]}}"); } @Test public void testVespaModelLiteralOutput() { Map properties = new HashMap<>(); - properties.put("format", "literal"); + properties.put("format", "string"); String url = "http://localhost/model-evaluation/v1/vespa_model/"; handler.assertResponse(url + "test_mapped/eval", properties, 200, "tensor(d0{}):{a:1.0,b:2.0}"); diff --git a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java index bebd706f815..87157495485 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java @@ -16,9 +16,11 @@ import com.yahoo.tensor.MixedTensor; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.evaluation.Name; import com.yahoo.tensor.functions.ConstantTensor; import com.yahoo.tensor.functions.Slice; +import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -61,23 +63,23 @@ public class JsonFormat { // Encode as nested lists if indexed tensor if (tensor instanceof IndexedTensor) { IndexedTensor denseTensor = (IndexedTensor) tensor; - encodeList(denseTensor, root.setArray("value"), new long[denseTensor.dimensionSizes().dimensions()], 0); + encodeValues(denseTensor, root.setArray("values"), new long[denseTensor.dimensionSizes().dimensions()], 0); } // Short form for a single mapped dimension else if (tensor instanceof MappedTensor && tensor.type().dimensions().size() == 1) { - encodeMap((MappedTensor) tensor, root.setObject("value")); + encodeSingleDimensionCells((MappedTensor) tensor, root); } - // Short form for a mixed tensor with a single mapped dimension + // Short form for a mixed tensor else if (tensor instanceof MixedTensor && - tensor.type().dimensions().stream().filter(TensorType.Dimension::isMapped).count() == 1) { - encodeMapBlocks((MixedTensor) tensor, root.setObject("value")); + tensor.type().dimensions().stream().filter(TensorType.Dimension::isMapped).count() >= 1) { + encodeBlocks((MixedTensor) tensor, root); } // No other short forms exist: default to standard cell address output else { - encodeCells(tensor, root.setObject("value")); + encodeCells(tensor, root); } return com.yahoo.slime.JsonFormat.toJsonBytes(slime); @@ -93,47 +95,76 @@ public class JsonFormat { } } + private static void encodeSingleDimensionCells(MappedTensor tensor, Cursor cursor) { + Cursor cells = cursor.setObject("cells"); + if (tensor.type().dimensions().size() > 1) + throw new IllegalStateException("JSON encode of mapped tensor can only contain a single dimension"); + tensor.cells().forEach((k,v) -> cells.setDouble(k.label(0), v)); + } + private static void encodeAddress(TensorType type, TensorAddress address, Cursor addressObject) { for (int i = 0; i < address.size(); i++) addressObject.setString(type.dimensions().get(i).name(), address.label(i)); } - private static void encodeList(IndexedTensor tensor, Cursor cursor, long[] indexes, int dimension) { + private static void encodeValues(IndexedTensor tensor, Cursor cursor, long[] indexes, int dimension) { DimensionSizes sizes = tensor.dimensionSizes(); for (indexes[dimension] = 0; indexes[dimension] < sizes.size(dimension); ++indexes[dimension]) { if (dimension < (sizes.dimensions() - 1)) { - encodeList(tensor, cursor.addArray(), indexes, dimension + 1); + encodeValues(tensor, cursor.addArray(), indexes, dimension + 1); } else { cursor.addDouble(tensor.get(indexes)); } } } - private static void encodeMap(MappedTensor tensor, Cursor cursor) { - if (tensor.type().dimensions().size() > 1) - throw new IllegalStateException("JSON encode of mapped tensor can only contain a single dimension"); - tensor.cells().forEach((k,v) -> cursor.setDouble(k.label(0), v)); - } - - private static void encodeMapBlocks(MixedTensor tensor, Cursor cursor) { - var mappedDimensions = tensor.type().dimensions().stream().filter(d -> !d.isIndexed()).collect(Collectors.toList()); - if (mappedDimensions.size() != 1) { + private static void encodeBlocks(MixedTensor tensor, Cursor cursor) { + var mappedDimensions = tensor.type().dimensions().stream().filter(d -> d.isMapped()) + .map(d -> TensorType.Dimension.mapped(d.name())).collect(Collectors.toList()); + if (mappedDimensions.size() < 1) { throw new IllegalArgumentException("Should be ensured by caller"); } - String mappedDimensionName = mappedDimensions.get(0).name(); - int mappedDimensionIndex = tensor.type().indexOfDimension(mappedDimensionName). - orElseThrow(() -> new IllegalStateException("Could not find mapped dimension index")); + cursor = (mappedDimensions.size() == 1) ? cursor.setObject("blocks") : cursor.setArray("blocks"); - // Find all unique indices for the mapped dimension - Set mappedIndices = new HashSet<>(); - tensor.cellIterator().forEachRemaining((cell) -> mappedIndices.add(cell.getKey().label(mappedDimensionIndex))); + // Create tensor type for mapped dimensions subtype + TensorType mappedSubType = new TensorType.Builder(mappedDimensions).build(); + + // Find all unique indices for the mapped dimensions + Set denseSubSpaceAddresses = new HashSet<>(); + tensor.cellIterator().forEachRemaining((cell) -> { + denseSubSpaceAddresses.add(subAddress(cell.getKey(), mappedSubType, tensor.type())); + }); // Slice out dense subspace of each and encode dense subspace as a list - for (String mappedIndex : mappedIndices) { - IndexedTensor denseSubspace = (IndexedTensor) new Slice<>(new ConstantTensor<>(tensor), - List.of(new Slice.DimensionValue<>(mappedDimensionName, mappedIndex))).evaluate(); - encodeList(denseSubspace, cursor.setArray(mappedIndex), new long[denseSubspace.dimensionSizes().dimensions()], 0); + for (TensorAddress denseSubSpaceAddress : denseSubSpaceAddresses) { + IndexedTensor denseSubspace = (IndexedTensor) sliceSubAddress(tensor, denseSubSpaceAddress, mappedSubType); + + if (mappedDimensions.size() == 1) { + encodeValues(denseSubspace, cursor.setArray(denseSubSpaceAddress.label(0)), new long[denseSubspace.dimensionSizes().dimensions()], 0); + } else { + Cursor block = cursor.addObject(); + encodeAddress(mappedSubType, denseSubSpaceAddress, block.setObject("address")); + encodeValues(denseSubspace, block.setArray("values"), new long[denseSubspace.dimensionSizes().dimensions()], 0); + } + + } + } + + private static TensorAddress subAddress(TensorAddress address, TensorType subType, TensorType origType) { + TensorAddress.Builder builder = new TensorAddress.Builder(subType); + for (TensorType.Dimension dim : subType.dimensions()) { + builder.add(dim.name(), address.label(origType.indexOfDimension(dim.name()). + orElseThrow(() -> new IllegalStateException("Could not find mapped dimension index")))); + } + return builder.build(); + } + + private static Tensor sliceSubAddress(Tensor tensor, TensorAddress subAddress, TensorType subType) { + List> sliceDims = new ArrayList<>(subAddress.size()); + for (int i = 0; i < subAddress.size(); ++i) { + sliceDims.add(new Slice.DimensionValue<>(subType.dimensions().get(i).name(), subAddress.label(i))); } + return new Slice<>(new ConstantTensor<>(tensor), sliceDims).evaluate(); } /** Deserializes the given tensor from JSON format */ diff --git a/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java index 15017dc95ca..cdfd19eb5c8 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java @@ -98,55 +98,62 @@ public class JsonFormatTestCase { } @Test - public void testDenseTensorShortForm() { + public void testEncodeIndexedShortForm() { assertEncodeShortForm("tensor(x[]):[1.0, 2.0]", - "{\"type\":\"tensor(x[])\",\"value\":[1.0,2.0]}"); + "{\"type\":\"tensor(x[])\",\"values\":[1.0,2.0]}"); assertEncodeShortForm("tensor(x[]):[1.0, 2.0]", - "{\"type\":\"tensor(x[])\",\"value\":[1.0,2.0]}"); + "{\"type\":\"tensor(x[])\",\"values\":[1.0,2.0]}"); assertEncodeShortForm("tensor(x[],y[]):[[1,2,3,4]]", - "{\"type\":\"tensor(x[],y[])\",\"value\":[[1.0,2.0,3.0,4.0]]}"); + "{\"type\":\"tensor(x[],y[])\",\"values\":[[1.0,2.0,3.0,4.0]]}"); assertEncodeShortForm("tensor(x[],y[]):[[1,2],[3,4]]", - "{\"type\":\"tensor(x[],y[])\",\"value\":[[1.0,2.0],[3.0,4.0]]}"); + "{\"type\":\"tensor(x[],y[])\",\"values\":[[1.0,2.0],[3.0,4.0]]}"); assertEncodeShortForm("tensor(x[],y[]):[[1],[2],[3],[4]]", - "{\"type\":\"tensor(x[],y[])\",\"value\":[[1.0],[2.0],[3.0],[4.0]]}"); + "{\"type\":\"tensor(x[],y[])\",\"values\":[[1.0],[2.0],[3.0],[4.0]]}"); assertEncodeShortForm("tensor(x[],y[],z[]):[[[1,2],[3,4]]]", - "{\"type\":\"tensor(x[],y[],z[])\",\"value\":[[[1.0,2.0],[3.0,4.0]]]}"); + "{\"type\":\"tensor(x[],y[],z[])\",\"values\":[[[1.0,2.0],[3.0,4.0]]]}"); assertEncodeShortForm("tensor(x[],y[],z[]):[[[1],[2],[3],[4]]]", - "{\"type\":\"tensor(x[],y[],z[])\",\"value\":[[[1.0],[2.0],[3.0],[4.0]]]}"); + "{\"type\":\"tensor(x[],y[],z[])\",\"values\":[[[1.0],[2.0],[3.0],[4.0]]]}"); assertEncodeShortForm("tensor(x[],y[],z[]):[[[1,2,3,4]]]", - "{\"type\":\"tensor(x[],y[],z[])\",\"value\":[[[1.0,2.0,3.0,4.0]]]}"); + "{\"type\":\"tensor(x[],y[],z[])\",\"values\":[[[1.0,2.0,3.0,4.0]]]}"); assertEncodeShortForm("tensor(x[],y[],z[]):[[[1]],[[2]],[[3]],[[4]]]", - "{\"type\":\"tensor(x[],y[],z[])\",\"value\":[[[1.0]],[[2.0]],[[3.0]],[[4.0]]]}"); + "{\"type\":\"tensor(x[],y[],z[])\",\"values\":[[[1.0]],[[2.0]],[[3.0]],[[4.0]]]}"); assertEncodeShortForm("tensor(x[],y[],z[2]):[[[1, 2]],[[3, 4]]]", - "{\"type\":\"tensor(x[],y[],z[2])\",\"value\":[[[1.0,2.0]],[[3.0,4.0]]]}"); + "{\"type\":\"tensor(x[],y[],z[2])\",\"values\":[[[1.0,2.0]],[[3.0,4.0]]]}"); } @Test - public void testSingleDimensionSparseTensorShortForm() { - assertEncodeShortForm("tensor(x{}):{a:1, b:2}", - "{\"type\":\"tensor(x{})\",\"value\":{\"a\":1.0,\"b\":2.0}}"); - + public void testEncodeMappedSingleDimensionShortForm() { + assertEncodeShortForm("tensor(x{}):{}", + "{\"type\":\"tensor(x{})\",\"cells\":{}}"); + assertEncodeShortForm("tensor(x{}):{a:1,b:2}", + "{\"type\":\"tensor(x{})\",\"cells\":{\"a\":1.0,\"b\":2.0}}"); // Multiple mapped dimensions: no short form available assertEncodeShortForm("tensor(x{},y{}):{{x:a,y:b}:1,{x:c,y:d}:2}", - "{\"type\":\"tensor(x{},y{})\",\"value\":{\"cells\":[{\"address\":{\"x\":\"a\",\"y\":\"b\"},\"value\":1.0},{\"address\":{\"x\":\"c\",\"y\":\"d\"},\"value\":2.0}]}}"); + "{\"type\":\"tensor(x{},y{})\",\"cells\":[{\"address\":{\"x\":\"a\",\"y\":\"b\"},\"value\":1.0},{\"address\":{\"x\":\"c\",\"y\":\"d\"},\"value\":2.0}]}"); } @Test - public void testSingleMappedDimensionMixedTensorShortForm() { + public void testEncodeMixedShortForm() { assertEncodeShortForm("tensor(x{},y[2]):{a:[1,2], b:[3,4] }", - "{\"type\":\"tensor(x{},y[2])\",\"value\":{\"a\":[1.0,2.0],\"b\":[3.0,4.0]}}"); + "{\"type\":\"tensor(x{},y[2])\",\"blocks\":{\"a\":[1.0,2.0],\"b\":[3.0,4.0]}}"); assertEncodeShortForm("tensor(x[2],y{}):{a:[1,2], b:[3,4] }", - "{\"type\":\"tensor(x[2],y{})\",\"value\":{\"a\":[1.0,2.0],\"b\":[3.0,4.0]}}"); + "{\"type\":\"tensor(x[2],y{})\",\"blocks\":{\"a\":[1.0,2.0],\"b\":[3.0,4.0]}}"); assertEncodeShortForm("tensor(x{},y[2],z[2]):{a:[[1,2],[3,4]], b:[[5,6],[7,8]] }", - "{\"type\":\"tensor(x{},y[2],z[2])\",\"value\":{\"a\":[[1.0,2.0],[3.0,4.0]],\"b\":[[5.0,6.0],[7.0,8.0]]}}"); + "{\"type\":\"tensor(x{},y[2],z[2])\",\"blocks\":{\"a\":[[1.0,2.0],[3.0,4.0]],\"b\":[[5.0,6.0],[7.0,8.0]]}}"); assertEncodeShortForm("tensor(x[1],y{},z[4]):{a:[[1,2,3,4]], b:[[5,6,7,8]] }", - "{\"type\":\"tensor(x[1],y{},z[4])\",\"value\":{\"a\":[[1.0,2.0,3.0,4.0]],\"b\":[[5.0,6.0,7.0,8.0]]}}"); + "{\"type\":\"tensor(x[1],y{},z[4])\",\"blocks\":{\"a\":[[1.0,2.0,3.0,4.0]],\"b\":[[5.0,6.0,7.0,8.0]]}}"); assertEncodeShortForm("tensor(x[4],y[1],z{}):{a:[[1],[2],[3],[4]], b:[[5],[6],[7],[8]] }", - "{\"type\":\"tensor(x[4],y[1],z{})\",\"value\":{\"a\":[[1.0],[2.0],[3.0],[4.0]],\"b\":[[5.0],[6.0],[7.0],[8.0]]}}"); + "{\"type\":\"tensor(x[4],y[1],z{})\",\"blocks\":{\"a\":[[1.0],[2.0],[3.0],[4.0]],\"b\":[[5.0],[6.0],[7.0],[8.0]]}}"); assertEncodeShortForm("tensor(a[2],b[2],c{},d[2]):{a:[[[1,2], [3,4]], [[5,6], [7,8]]], b:[[[1,2], [3,4]], [[5,6], [7,8]]] }", - "{\"type\":\"tensor(a[2],b[2],c{},d[2])\",\"value\":{" + + "{\"type\":\"tensor(a[2],b[2],c{},d[2])\",\"blocks\":{" + "\"a\":[[[1.0,2.0],[3.0,4.0]],[[5.0,6.0],[7.0,8.0]]]," + "\"b\":[[[1.0,2.0],[3.0,4.0]],[[5.0,6.0],[7.0,8.0]]]}}"); + + // Multiple mapped dimensions + assertEncodeShortForm("tensor(x{},y{},z[2]):{{x:a,y:0,z:0}:1, {x:a,y:0,z:1}:2, {x:b,y:1,z:0}:3, {x:b,y:1,z:1}:4 }", + "{\"type\":\"tensor(x{},y{},z[2])\",\"blocks\":[{\"address\":{\"x\":\"a\",\"y\":\"0\"},\"values\":[1.0,2.0]},{\"address\":{\"x\":\"b\",\"y\":\"1\"},\"values\":[3.0,4.0]}]}"); + assertEncodeShortForm("tensor(x{},y[2],z{}):{{x:a,y:0,z:0}:1, {x:a,y:1,z:0}:2, {x:b,y:0,z:1}:3, {x:b,y:1,z:1}:4 }", + "{\"type\":\"tensor(x{},y[2],z{})\",\"blocks\":[{\"address\":{\"x\":\"a\",\"z\":\"0\"},\"values\":[1.0,2.0]},{\"address\":{\"x\":\"b\",\"z\":\"1\"},\"values\":[3.0,4.0]}]}"); } @Test -- cgit v1.2.3