summaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2024-01-24 11:11:23 +0100
committerjonmv <venstad@gmail.com>2024-01-24 11:11:23 +0100
commit201ca8994616ef61961853efac69027696615900 (patch)
treee1c5a1b5caae536fa80cd63c882879d2be33f582 /document
parent4ae8dfd3a0a4a94f7c93fa3e92b3691bf9907f98 (diff)
Add new LazyTokenBuffer, and wire it into existing tensor parse tests
Diffstat (limited to 'document')
-rw-r--r--document/src/main/java/com/yahoo/document/json/JsonReader.java51
-rw-r--r--document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java61
-rw-r--r--document/src/main/java/com/yahoo/document/json/TokenBuffer.java16
-rw-r--r--document/src/main/java/com/yahoo/document/json/document/DocumentParser.java2
-rw-r--r--document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java25
-rw-r--r--document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java132
6 files changed, 277 insertions, 10 deletions
diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java
index 3e1743b8d45..08d1fe688ed 100644
--- a/document/src/main/java/com/yahoo/document/json/JsonReader.java
+++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java
@@ -18,6 +18,7 @@ import java.io.InputStream;
import java.util.Optional;
import static com.yahoo.document.json.JsonReader.ReaderState.END_OF_FEED;
+import static com.yahoo.document.json.document.DocumentParser.FIELDS;
import static com.yahoo.document.json.readers.JsonParserHelpers.expectArrayStart;
/**
@@ -60,7 +61,7 @@ public class JsonReader {
* @param docIdString document ID
* @return the parsed document operation
*/
- public ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) {
+ ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) {
DocumentId docId = new DocumentId(docIdString);
DocumentParseInfo documentParseInfo;
try {
@@ -78,6 +79,54 @@ public class JsonReader {
return operation;
}
+ /**
+ * Reads a JSON which is expected to contain only the "fields" object of a document,
+ * and where other parameters, like the document ID and operation type, are supplied by other means.
+ *
+ * @param operationType the type of operation (update or put)
+ * @param docIdString document ID
+ * @return the parsed document operation
+ */
+ public ParsedDocumentOperation readSingleDocumentStreaming(DocumentOperationType operationType, String docIdString) {
+ try {
+ DocumentId docId = new DocumentId(docIdString);
+ DocumentParseInfo documentParseInfo = new DocumentParseInfo();
+ documentParseInfo.documentId = docId;
+ documentParseInfo.operationType = operationType;
+
+ if (JsonToken.START_OBJECT != parser.nextValue())
+ throw new IllegalArgumentException("expected start of root object, got " + parser.currentToken());
+
+ parser.nextValue();
+ if ( ! FIELDS.equals(parser.getCurrentName()))
+ throw new IllegalArgumentException("expected field \"fields\", but got " + parser.getCurrentName());
+
+ if (JsonToken.START_OBJECT != parser.currentToken())
+ throw new IllegalArgumentException("expected start of \"fields\" object, got " + parser.currentToken());
+
+ documentParseInfo.fieldsBuffer = new LazyTokenBuffer(parser);
+ VespaJsonDocumentReader vespaJsonDocumentReader = new VespaJsonDocumentReader(typeManager.getIgnoreUndefinedFields());
+ ParsedDocumentOperation operation = vespaJsonDocumentReader.createDocumentOperation(
+ getDocumentTypeFromString(documentParseInfo.documentId.getDocType(), typeManager), documentParseInfo);
+
+ if ( ! documentParseInfo.fieldsBuffer.isEmpty())
+ throw new IllegalArgumentException("expected all content to be consumed by document parsing, but " +
+ documentParseInfo.fieldsBuffer.nesting() + " levels remain");
+
+ if (JsonToken.END_OBJECT != parser.currentToken())
+ throw new IllegalArgumentException("expected end of \"fields\" object, got " + parser.currentToken());
+ if (JsonToken.END_OBJECT != parser.nextToken())
+ throw new IllegalArgumentException("expected end of root object, got " + parser.currentToken());
+ if (null != parser.nextToken())
+ throw new IllegalArgumentException("expected end of input, got " + parser.currentToken());
+
+ return operation;
+ }
+ catch (IOException e) {
+ throw new IllegalArgumentException("failed parsing document", e);
+ }
+ }
+
/** Returns the next document operation, or null if we have reached the end */
public DocumentOperation next() {
switch (state) {
diff --git a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java
new file mode 100644
index 00000000000..7798cd93909
--- /dev/null
+++ b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java
@@ -0,0 +1,61 @@
+package com.yahoo.document.json;
+
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+/**
+ * A {@link TokenBuffer} which only buffers tokens when needed, i.e., when peeking.
+ *
+ * @author jonmv
+ */
+public class LazyTokenBuffer extends TokenBuffer {
+
+ private final JsonParser parser;
+
+ public LazyTokenBuffer(JsonParser parser) {
+ this.parser = parser;
+ try { addFromParser(parser); }
+ catch (IOException e) { throw new IllegalArgumentException("failed parsing document JSON", e); }
+ if (JsonToken.START_OBJECT != current())
+ throw new IllegalArgumentException("expected start of JSON object, but got " + current());
+ updateNesting(current());
+ }
+
+ void advance() {
+ super.advance();
+ if (tokens.isEmpty() && nesting() > 0) tokens.add(nextToken()); // Fill current token if needed and possible.
+ }
+
+ @Override
+ public Supplier<Token> lookahead() {
+ return new Supplier<>() {
+ int localNesting = nesting();
+ Supplier<Token> buffered = LazyTokenBuffer.super.lookahead();
+ @Override public Token get() {
+ if (localNesting == 0)
+ return null;
+
+ Token token = buffered.get();
+ if (token == null) tokens.add(token = nextToken());
+ localNesting += nestingOffset(token.token);
+ return token;
+ }
+ };
+ }
+
+ private Token nextToken() {
+ try {
+ JsonToken token = parser.nextValue();
+ if (token == null)
+ throw new IllegalStateException("no more JSON tokens");
+ return new Token(token, parser.getCurrentName(), parser.getText());
+ }
+ catch (IOException e) {
+ throw new IllegalArgumentException("failed reading document JSON", e);
+ }
+ }
+
+}
diff --git a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java
index 52a2816334a..3a48f71c4cd 100644
--- a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java
+++ b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java
@@ -18,7 +18,7 @@ import java.util.function.Supplier;
*/
public class TokenBuffer {
- private final Deque<Token> tokens = new ArrayDeque<>();
+ final Deque<Token> tokens = new ArrayDeque<>();
private int nesting = 0;
@@ -29,12 +29,16 @@ public class TokenBuffer {
/** Returns the next token, or null, and updates the nesting count of this. */
public JsonToken next() {
- tokens.poll();
+ advance();
JsonToken token = current();
updateNesting(token);
return token;
}
+ void advance() {
+ tokens.poll();
+ }
+
/** Returns the current token without changing position, or null if none */
public JsonToken current() {
return isEmpty() ? null : tokens.peek().token;
@@ -72,7 +76,7 @@ public class TokenBuffer {
JsonToken token = parser.currentToken();
Preconditions.checkArgument(token == firstToken,
"Expected %s, got %s.", firstToken.name(), token);
- if (isEmpty()) updateNesting(token);
+ updateNesting(token);
try {
for (int nesting = addFromParser(parser); nesting > 0; nesting += addFromParser(parser))
@@ -83,7 +87,7 @@ public class TokenBuffer {
}
}
- private int nestingOffset(JsonToken token) {
+ int nestingOffset(JsonToken token) {
if (token == null) return 0;
if (token.isStructStart()) {
return 1;
@@ -94,12 +98,12 @@ public class TokenBuffer {
}
}
- private int addFromParser(JsonParser tokens) throws IOException {
+ int addFromParser(JsonParser tokens) throws IOException {
add(tokens.currentToken(), tokens.getCurrentName(), tokens.getText());
return nestingOffset(tokens.currentToken());
}
- private void updateNesting(JsonToken token) {
+ void updateNesting(JsonToken token) {
nesting += nestingOffset(token);
}
diff --git a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java
index b41159811f3..aef7e1cffe2 100644
--- a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java
+++ b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java
@@ -101,7 +101,7 @@ public class DocumentParser {
}
}
- private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) {
+ private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) {
try {
// "fields" opens a dictionary and is therefore on level two which might be surprising.
if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) {
diff --git a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java
index b2bb51d7c97..080528fea77 100644
--- a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java
+++ b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java
@@ -2687,6 +2687,14 @@ public class JsonReaderTestCase {
return createPutWithTensor(inputTensor, "sparse_tensor");
}
private DocumentPut createPutWithTensor(String inputTensor, String tensorFieldName) {
+ JsonReader streaming = createReader("""
+ {
+ "fields": {
+ "%s": %s
+ }
+ }
+ """.formatted(tensorFieldName, inputTensor));
+ DocumentPut lazyParsed = (DocumentPut) streaming.readSingleDocumentStreaming(DocumentOperationType.PUT, TENSOR_DOC_ID).operation();
JsonReader reader = createReader("""
[
{
@@ -2696,7 +2704,9 @@ public class JsonReaderTestCase {
}
}
]""".formatted(TENSOR_DOC_ID, tensorFieldName, inputTensor));
- return (DocumentPut) reader.next();
+ DocumentPut bufferParsed = (DocumentPut) reader.next();
+ assertEquals(lazyParsed, bufferParsed);
+ return bufferParsed;
}
private DocumentUpdate createAssignUpdateWithSparseTensor(String inputTensor) {
@@ -2783,6 +2793,15 @@ public class JsonReaderTestCase {
}
private DocumentUpdate createTensorUpdate(String operation, String tensorJson, String tensorFieldName) {
+ JsonReader streaming = createReader("""
+ {
+ "fields": {
+ "%s": {
+ "%s": %s
+ }
+ }
+ }""".formatted(tensorFieldName, operation, tensorJson));
+ DocumentUpdate lazyParsed = (DocumentUpdate) streaming.readSingleDocumentStreaming(DocumentOperationType.UPDATE, TENSOR_DOC_ID).operation();
JsonReader reader = createReader("""
[
{
@@ -2794,7 +2813,9 @@ public class JsonReaderTestCase {
}
}
]""".formatted(TENSOR_DOC_ID, tensorFieldName, operation, tensorJson));
- return (DocumentUpdate) reader.next();
+ DocumentUpdate bufferParsed = (DocumentUpdate) reader.next();
+ assertEquals(lazyParsed, bufferParsed);
+ return bufferParsed;
}
private void assertTensorAddUpdate(String expectedTensor, String tensorFieldName, String tensorJson) {
diff --git a/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java b/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java
new file mode 100644
index 00000000000..3ed2ed531c3
--- /dev/null
+++ b/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java
@@ -0,0 +1,132 @@
+package com.yahoo.document.json;
+
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+import com.yahoo.document.json.TokenBuffer.Token;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+/**
+ * @author jonmv
+ */
+public class LazyTokenBufferTest {
+
+ @Test
+ public void testBuffer() throws IOException {
+ String json = """
+ {
+ "fields": {
+ "foo": "bar",
+ "baz": [1, 2, 3],
+ "quu": { "qux": null }
+ }
+ }""";
+ JsonParser parser = new JsonFactory().createParser(json);
+ parser.nextValue();
+ parser.nextValue();
+ assertEquals(JsonToken.START_OBJECT, parser.currentToken());
+ assertEquals("fields", parser.currentName());
+
+ // Peeking through the buffer doesn't change nesting.
+ LazyTokenBuffer buffer = new LazyTokenBuffer(parser);
+ assertEquals(JsonToken.START_OBJECT, buffer.current());
+ assertEquals("fields", buffer.currentName());
+ assertEquals(1, buffer.nesting());
+
+ Supplier<Token> lookahead = buffer.lookahead();
+ Token peek = lookahead.get();
+ assertEquals(JsonToken.VALUE_STRING, peek.token);
+ assertEquals("foo", peek.name);
+ assertEquals("bar", peek.text);
+ assertEquals(1, buffer.nesting());
+
+ peek = lookahead.get();
+ assertEquals(JsonToken.START_ARRAY, peek.token);
+ assertEquals("baz", peek.name);
+ assertEquals(1, buffer.nesting());
+
+ peek = lookahead.get();
+ assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token);
+ assertEquals("1", peek.text);
+
+ peek = lookahead.get();
+ assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token);
+ assertEquals("2", peek.text);
+
+ peek = lookahead.get();
+ assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token);
+ assertEquals("3", peek.text);
+
+ peek = lookahead.get();
+ assertEquals(JsonToken.END_ARRAY, peek.token);
+ assertEquals(1, buffer.nesting());
+
+ peek = lookahead.get();
+ assertEquals(JsonToken.START_OBJECT, peek.token);
+ assertEquals("quu", peek.name);
+ assertEquals(1, buffer.nesting());
+
+ peek = lookahead.get();
+ assertEquals(JsonToken.VALUE_NULL, peek.token);
+ assertEquals("qux", peek.name);
+
+ peek = lookahead.get();
+ assertEquals(JsonToken.END_OBJECT, peek.token);
+ assertEquals(1, buffer.nesting());
+
+ peek = lookahead.get();
+ assertEquals(JsonToken.END_OBJECT, peek.token);
+ assertEquals(1, buffer.nesting());
+
+ peek = lookahead.get();
+ assertNull(peek);
+
+ // Parser is now at the end.
+ assertEquals(JsonToken.END_OBJECT, parser.nextToken());
+ assertNull(parser.nextToken());
+
+ // Repeat iterating through the buffer, this time advancing it, and see that nesting changes.
+ assertEquals(JsonToken.VALUE_STRING, buffer.next());
+ assertEquals("foo", buffer.currentName());
+ assertEquals("bar", buffer.currentText());
+ assertEquals(1, buffer.nesting());
+
+ assertEquals(JsonToken.START_ARRAY, buffer.next());
+ assertEquals("baz", buffer.currentName());
+ assertEquals(2, buffer.nesting());
+
+ assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next());
+ assertEquals("1", buffer.currentText());
+
+ assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next());
+ assertEquals("2", buffer.currentText());
+
+ assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next());
+ assertEquals("3", buffer.currentText());
+
+ assertEquals(JsonToken.END_ARRAY, buffer.next());
+ assertEquals(1, buffer.nesting());
+
+ assertEquals(JsonToken.START_OBJECT, buffer.next());
+ assertEquals("quu", buffer.currentName());
+ assertEquals(2, buffer.nesting());
+
+ assertEquals(JsonToken.VALUE_NULL, buffer.next());
+ assertEquals("qux", buffer.currentName());
+
+ assertEquals(JsonToken.END_OBJECT, buffer.next());
+ assertEquals(1, buffer.nesting());
+
+ assertEquals(JsonToken.END_OBJECT, buffer.next());
+ assertEquals(0, buffer.nesting());
+
+ assertNull(buffer.next());
+ }
+
+}