summaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-01-12 22:13:12 +0100
committerJon Bratseth <bratseth@gmail.com>2023-01-12 22:13:12 +0100
commit2229a4d2e3141010850fa23f5ad731c9038052a8 (patch)
treed35ed23f65ef1ee793367e28450eda483372f031 /document
parent844eeeeebfd8cdffb28ee7d64e05a803aa2f0e5a (diff)
Parse tensor JSON values at root
Our current tensor JSON formats require a "blocks", "cells" or "values" key at the root, containing values in various forms. This adds support for skipping that extra level and adding content at the root, where the permissible content format depends on the tensor type, and matches the formats below "blocks", "cells" or "values" for the corresponding tensor types.
Diffstat (limited to 'document')
-rw-r--r--document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java9
-rw-r--r--document/src/main/java/com/yahoo/document/json/TokenBuffer.java20
-rw-r--r--document/src/main/java/com/yahoo/document/json/document/DocumentParser.java36
-rw-r--r--document/src/main/java/com/yahoo/document/json/readers/JsonParserHelpers.java7
-rw-r--r--document/src/main/java/com/yahoo/document/json/readers/TensorReader.java58
-rw-r--r--document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java35
6 files changed, 111 insertions, 54 deletions
diff --git a/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java b/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java
index d33cc8078dd..7f6ead528fe 100644
--- a/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java
+++ b/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java
@@ -48,6 +48,7 @@ import java.util.Set;
* @author Vegard Sjonfjell
*/
public class JsonSerializationHelper {
+
private final static Base64.Encoder base64Encoder = Base64.getEncoder(); // Important: _basic_ format
static class JsonSerializationException extends RuntimeException {
@@ -99,14 +100,6 @@ public class JsonSerializationHelper {
});
}
- private static void serializeTensorDimensions(JsonGenerator generator, Set<String> dimensions) throws IOException {
- generator.writeArrayFieldStart(TensorReader.TENSOR_DIMENSIONS);
- for (String dimension : dimensions) {
- generator.writeString(dimension);
- }
- generator.writeEndArray();
- }
-
static void serializeTensorCells(JsonGenerator generator, Tensor tensor) throws IOException {
generator.writeArrayFieldStart(TensorReader.TENSOR_CELLS);
for (Map.Entry<TensorAddress, Double> cell : tensor.cells().entrySet()) {
diff --git a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java
index 6b2bdbd53d8..e1214920296 100644
--- a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java
+++ b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java
@@ -18,13 +18,15 @@ import com.google.common.base.Preconditions;
public class TokenBuffer {
private final Deque<Token> buffer;
+
private int nesting = 0;
+ private Token previousToken;
public TokenBuffer() {
this(new ArrayDeque<>());
}
- private TokenBuffer(Deque<Token> buffer) {
+ public TokenBuffer(Deque<Token> buffer) {
this.buffer = buffer;
if (buffer.size() > 0) {
updateNesting(buffer.peekFirst().token);
@@ -35,7 +37,7 @@ public class TokenBuffer {
public boolean isEmpty() { return size() == 0; }
public JsonToken next() {
- buffer.removeFirst();
+ previousToken = buffer.removeFirst();
Token t = buffer.peekFirst();
if (t == null) {
return null;
@@ -44,6 +46,16 @@ public class TokenBuffer {
return t.token;
}
+ /** Goes one token back. Repeated calls to this method will *not* go back further. */
+ public JsonToken previous() {
+ if (previousToken == null) return null;
+ updateNestingGoingBackwards(currentToken());
+ Token newCurrent = previousToken;
+ previousToken = null;
+ buffer.push(newCurrent);
+ return newCurrent.token;
+ }
+
/** Returns the current token without changing position, or null if none */
public JsonToken currentToken() {
Token token = buffer.peekFirst();
@@ -130,6 +142,10 @@ public class TokenBuffer {
nesting += nestingOffset(t);
}
+ private void updateNestingGoingBackwards(JsonToken t) {
+ nesting -= nestingOffset(t);
+ }
+
public int nesting() {
return nesting;
}
diff --git a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java
index 5e1c1eb6ac4..b63a39f51c5 100644
--- a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java
+++ b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java
@@ -76,18 +76,10 @@ public class DocumentParser {
throw new IllegalArgumentException("Could not read document, no document?");
}
switch (currentToken) {
- case START_OBJECT:
- indentLevel++;
- break;
- case END_OBJECT:
- indentLevel--;
- break;
- case START_ARRAY:
- indentLevel += 10000L;
- break;
- case END_ARRAY:
- indentLevel -= 10000L;
- break;
+ case START_OBJECT -> indentLevel++;
+ case END_OBJECT -> indentLevel--;
+ case START_ARRAY -> indentLevel += 10000L;
+ case END_ARRAY -> indentLevel -= 10000L;
}
}
@@ -133,18 +125,12 @@ public class DocumentParser {
}
private static DocumentOperationType operationNameToOperationType(String operationName) {
- switch (operationName) {
- case PUT:
- case ID:
- return DocumentOperationType.PUT;
- case REMOVE:
- return DocumentOperationType.REMOVE;
- case UPDATE:
- return DocumentOperationType.UPDATE;
- default:
- throw new IllegalArgumentException(
- "Got " + operationName + " as document operation, only \"put\", " +
- "\"remove\" and \"update\" are supported.");
- }
+ return switch (operationName) {
+ case PUT, ID -> DocumentOperationType.PUT;
+ case REMOVE -> DocumentOperationType.REMOVE;
+ case UPDATE -> DocumentOperationType.UPDATE;
+ default -> throw new IllegalArgumentException("Got " + operationName + " as document operation, only \"put\", " +
+ "\"remove\" and \"update\" are supported.");
+ };
}
}
diff --git a/document/src/main/java/com/yahoo/document/json/readers/JsonParserHelpers.java b/document/src/main/java/com/yahoo/document/json/readers/JsonParserHelpers.java
index 1723df2bd54..594dfc5ab06 100644
--- a/document/src/main/java/com/yahoo/document/json/readers/JsonParserHelpers.java
+++ b/document/src/main/java/com/yahoo/document/json/readers/JsonParserHelpers.java
@@ -5,6 +5,8 @@ package com.yahoo.document.json.readers;
import com.fasterxml.jackson.core.JsonToken;
import com.google.common.base.Preconditions;
+import java.util.Arrays;
+
public class JsonParserHelpers {
public static void expectArrayStart(JsonToken token) {
@@ -61,4 +63,9 @@ public class JsonParserHelpers {
}
}
+ public static void expectOneOf(JsonToken token, JsonToken ... tokens) {
+ if (Arrays.stream(tokens).noneMatch(t -> t == token))
+ throw new IllegalArgumentException("Expected one of " + tokens + " but got " + token);
+ }
+
}
diff --git a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java
index 193c9491e86..4b2da912c37 100644
--- a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java
+++ b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java
@@ -4,12 +4,18 @@ package com.yahoo.document.json.readers;
import com.fasterxml.jackson.core.JsonToken;
import com.yahoo.document.datatypes.TensorFieldValue;
import com.yahoo.document.json.TokenBuffer;
+import com.yahoo.document.select.parser.Token;
+import com.yahoo.slime.Inspector;
+import com.yahoo.slime.Type;
import com.yahoo.tensor.IndexedTensor;
import com.yahoo.tensor.MixedTensor;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorAddress;
import com.yahoo.tensor.TensorType;
+import java.util.ArrayDeque;
+import java.util.Deque;
+
import static com.yahoo.document.json.readers.JsonParserHelpers.*;
import static com.yahoo.tensor.serialization.JsonFormat.decodeHexString;
@@ -23,7 +29,6 @@ import static com.yahoo.tensor.serialization.JsonFormat.decodeHexString;
public class TensorReader {
public static final String TENSOR_ADDRESS = "address";
- public static final String TENSOR_DIMENSIONS = "dimensions";
public static final String TENSOR_CELLS = "cells";
public static final String TENSOR_VALUES = "values";
public static final String TENSOR_BLOCKS = "blocks";
@@ -32,22 +37,39 @@ public class TensorReader {
// MUST be kept in sync with com.yahoo.tensor.serialization.JsonFormat.decode in vespajlib
static void fillTensor(TokenBuffer buffer, TensorFieldValue tensorFieldValue) {
Tensor.Builder builder = Tensor.Builder.of(tensorFieldValue.getDataType().getTensorType());
- expectObjectStart(buffer.currentToken());
+ expectOneOf(buffer.currentToken(), JsonToken.START_OBJECT, JsonToken.START_ARRAY);
int initNesting = buffer.nesting();
for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) {
- if (TENSOR_CELLS.equals(buffer.currentName()))
+ if (TENSOR_CELLS.equals(buffer.currentName()) && ! primitiveContent(new TokenBuffer(new ArrayDeque<>(buffer.rest())))) {
readTensorCells(buffer, builder);
- else if (TENSOR_VALUES.equals(buffer.currentName()))
+ }
+ else if (TENSOR_VALUES.equals(buffer.currentName()) && builder.type().dimensions().stream().allMatch(d -> d.isIndexed())) {
readTensorValues(buffer, builder);
- else if (TENSOR_BLOCKS.equals(buffer.currentName()))
+ }
+ else if (TENSOR_BLOCKS.equals(buffer.currentName())) {
readTensorBlocks(buffer, builder);
- else if (builder.type().dimensions().stream().anyMatch(d -> d.isIndexed())) // sparse can be empty
- throw new IllegalArgumentException("Expected a tensor value to contain either 'cells', 'values' or 'blocks', but got: "+buffer.currentName());
+ }
+ else {
+ buffer.previous(); // Back up to the start of the enclosing block
+ readDirectTensorValue(buffer, builder);
+ buffer.previous(); // ... and back up to the end of the enclosing block
+ }
}
- expectObjectEnd(buffer.currentToken());
+ expectOneOf(buffer.currentToken(), JsonToken.END_OBJECT, JsonToken.END_ARRAY);
tensorFieldValue.assign(builder.build());
}
+ static boolean primitiveContent(TokenBuffer buffer) {
+ JsonToken cellsValue = buffer.currentToken();
+ if (cellsValue.isScalarValue()) return true;
+ if (cellsValue == JsonToken.START_ARRAY) {
+ JsonToken firstArrayValue = buffer.next();
+ if (firstArrayValue == JsonToken.END_ARRAY) return false;
+ if (firstArrayValue.isScalarValue()) return true;
+ }
+ return false;
+ }
+
static void readTensorCells(TokenBuffer buffer, Tensor.Builder builder) {
if (buffer.currentToken() == JsonToken.START_ARRAY) {
int initNesting = buffer.nesting();
@@ -88,10 +110,9 @@ public class TensorReader {
}
private static void readTensorValues(TokenBuffer buffer, Tensor.Builder builder) {
- if ( ! (builder instanceof IndexedTensor.BoundBuilder))
+ if ( ! (builder instanceof IndexedTensor.BoundBuilder indexedBuilder))
throw new IllegalArgumentException("The 'values' field can only be used with dense tensors. " +
"Use 'cells' or 'blocks' instead");
- IndexedTensor.BoundBuilder indexedBuilder = (IndexedTensor.BoundBuilder)builder;
if (buffer.currentToken() == JsonToken.VALUE_STRING) {
double[] decoded = decodeHexString(buffer.currentText(), builder.type().valueType());
if (decoded.length == 0)
@@ -112,11 +133,9 @@ public class TensorReader {
}
static void readTensorBlocks(TokenBuffer buffer, Tensor.Builder builder) {
- if ( ! (builder instanceof MixedTensor.BoundBuilder))
+ if ( ! (builder instanceof MixedTensor.BoundBuilder mixedBuilder))
throw new IllegalArgumentException("The 'blocks' field can only be used with mixed tensors with bound dimensions. " +
"Use 'cells' or 'values' instead");
-
- MixedTensor.BoundBuilder mixedBuilder = (MixedTensor.BoundBuilder) builder;
if (buffer.currentToken() == JsonToken.START_ARRAY) {
int initNesting = buffer.nesting();
for (buffer.next(); buffer.nesting() >= initNesting; buffer.next())
@@ -160,6 +179,19 @@ public class TensorReader {
mixedBuilder.block(address, values);
}
+ /** Reads a tensor value directly at the root, where the format is decided by the tensor type. */
+ private static void readDirectTensorValue(TokenBuffer buffer, Tensor.Builder builder) {
+ boolean hasIndexed = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isIndexed);
+ boolean hasMapped = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isMapped);
+
+ if ( ! hasMapped)
+ readTensorValues(buffer, builder);
+ else if (hasMapped && hasIndexed)
+ readTensorBlocks(buffer, builder);
+ else
+ readTensorCells(buffer, builder);
+ }
+
private static TensorAddress readAddress(TokenBuffer buffer, TensorType type) {
expectObjectStart(buffer.currentToken());
TensorAddress.Builder builder = new TensorAddress.Builder(type);
diff --git a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java
index 41d607b0d8e..c19094ff231 100644
--- a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java
+++ b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java
@@ -1403,12 +1403,6 @@ public class JsonReaderTestCase {
}
@Test
- public void testParsingOfTensorWithEmptyDimensions() {
- assertSparseTensorField("tensor(x{},y{}):{}",
- createPutWithSparseTensor(inputJson("{ 'dimensions': [] }")));
- }
-
- @Test
public void testParsingOfTensorWithEmptyCells() {
assertSparseTensorField("tensor(x{},y{}):{}",
createPutWithSparseTensor(inputJson("{ 'cells': [] }")));
@@ -1511,6 +1505,32 @@ public class JsonReaderTestCase {
Tensor tensor = assertTensorField(expected, put, "mixed_bfloat16_tensor");
}
+ /** Tests parsing of various tensor values set at the root, i.e. no 'cells', 'blocks' or 'values' */
+ @Test
+ public void testDirectValue() {
+ assertTensorField("tensor(x{}):{a:2, b:3}", "sparse_single_dimension_tensor", "{'a':2.0, 'b':3.0}");
+ assertTensorField("tensor(x[2],y[3]):[2, 3, 4, 5, 6, 7]]", "dense_tensor", "[2, 3, 4, 5, 6, 7]");
+ assertTensorField("tensor(x{},y[3]):{a:[2, 3, 4], b:[4, 5, 6]}", "mixed_tensor", "{'a':[2, 3, 4], 'b':[4, 5, 6]}");
+ assertTensorField("tensor(x{},y{}):{{x:a,y:0}:2, {x:b,y:1}:3}", "sparse_tensor",
+ "[{'address':{'x':'a','y':'0'},'value':2}, {'address':{'x':'b','y':'1'},'value':3}]");
+ }
+
+ @Test
+ public void testDirectValueReservedNameKeys() {
+ // Single-valued
+ assertTensorField("tensor(x{}):{cells:2, b:3}", "sparse_single_dimension_tensor", "{'cells':2.0, 'b':3.0}");
+ assertTensorField("tensor(x{}):{values:2, b:3}", "sparse_single_dimension_tensor", "{'values':2.0, 'b':3.0}");
+ assertTensorField("tensor(x{}):{block:2, b:3}", "sparse_single_dimension_tensor", "{'block':2.0, 'b':3.0}");
+
+ // Multi-valued
+ assertTensorField("tensor(x{},y[3]):{cells:[2, 3, 4], b:[4, 5, 6]}", "mixed_tensor",
+ "{'cells':[2, 3, 4], 'b':[4, 5, 6]}");
+ assertTensorField("tensor(x{},y[3]):{values:[2, 3, 4], b:[4, 5, 6]}", "mixed_tensor",
+ "{'values':[2, 3, 4], 'b':[4, 5, 6]}");
+ assertTensorField("tensor(x{},y[3]):{block:[2, 3, 4], b:[4, 5, 6]}", "mixed_tensor",
+ "{'block':[2, 3, 4], 'b':[4, 5, 6]}");
+ }
+
@Test
public void testParsingOfMixedTensorOnMixedForm() {
Tensor.Builder builder = Tensor.Builder.of(TensorType.fromSpec("tensor(x{},y[3])"));
@@ -2070,6 +2090,9 @@ public class JsonReaderTestCase {
private static Tensor assertSparseTensorField(String expectedTensor, DocumentPut put) {
return assertTensorField(expectedTensor, put, "sparse_tensor");
}
+ private Tensor assertTensorField(String expectedTensor, String fieldName, String inputJson) {
+ return assertTensorField(expectedTensor, createPutWithTensor(inputJson, fieldName), fieldName);
+ }
private static Tensor assertTensorField(String expectedTensor, DocumentPut put, String tensorFieldName) {
return assertTensorField(Tensor.from(expectedTensor), put, tensorFieldName);
}