diff options
author | jonmv <venstad@gmail.com> | 2024-01-24 11:11:23 +0100 |
---|---|---|
committer | jonmv <venstad@gmail.com> | 2024-01-24 11:11:23 +0100 |
commit | 201ca8994616ef61961853efac69027696615900 (patch) | |
tree | e1c5a1b5caae536fa80cd63c882879d2be33f582 /document | |
parent | 4ae8dfd3a0a4a94f7c93fa3e92b3691bf9907f98 (diff) |
Add new LazyTokenBuffer, and wire it into existing tensor parse tests
Diffstat (limited to 'document')
6 files changed, 277 insertions, 10 deletions
diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java index 3e1743b8d45..08d1fe688ed 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonReader.java +++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java @@ -18,6 +18,7 @@ import java.io.InputStream; import java.util.Optional; import static com.yahoo.document.json.JsonReader.ReaderState.END_OF_FEED; +import static com.yahoo.document.json.document.DocumentParser.FIELDS; import static com.yahoo.document.json.readers.JsonParserHelpers.expectArrayStart; /** @@ -60,7 +61,7 @@ public class JsonReader { * @param docIdString document ID * @return the parsed document operation */ - public ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) { + ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) { DocumentId docId = new DocumentId(docIdString); DocumentParseInfo documentParseInfo; try { @@ -78,6 +79,54 @@ public class JsonReader { return operation; } + /** + * Reads a JSON which is expected to contain only the "fields" object of a document, + * and where other parameters, like the document ID and operation type, are supplied by other means. + * + * @param operationType the type of operation (update or put) + * @param docIdString document ID + * @return the parsed document operation + */ + public ParsedDocumentOperation readSingleDocumentStreaming(DocumentOperationType operationType, String docIdString) { + try { + DocumentId docId = new DocumentId(docIdString); + DocumentParseInfo documentParseInfo = new DocumentParseInfo(); + documentParseInfo.documentId = docId; + documentParseInfo.operationType = operationType; + + if (JsonToken.START_OBJECT != parser.nextValue()) + throw new IllegalArgumentException("expected start of root object, got " + parser.currentToken()); + + parser.nextValue(); + if ( ! FIELDS.equals(parser.getCurrentName())) + throw new IllegalArgumentException("expected field \"fields\", but got " + parser.getCurrentName()); + + if (JsonToken.START_OBJECT != parser.currentToken()) + throw new IllegalArgumentException("expected start of \"fields\" object, got " + parser.currentToken()); + + documentParseInfo.fieldsBuffer = new LazyTokenBuffer(parser); + VespaJsonDocumentReader vespaJsonDocumentReader = new VespaJsonDocumentReader(typeManager.getIgnoreUndefinedFields()); + ParsedDocumentOperation operation = vespaJsonDocumentReader.createDocumentOperation( + getDocumentTypeFromString(documentParseInfo.documentId.getDocType(), typeManager), documentParseInfo); + + if ( ! documentParseInfo.fieldsBuffer.isEmpty()) + throw new IllegalArgumentException("expected all content to be consumed by document parsing, but " + + documentParseInfo.fieldsBuffer.nesting() + " levels remain"); + + if (JsonToken.END_OBJECT != parser.currentToken()) + throw new IllegalArgumentException("expected end of \"fields\" object, got " + parser.currentToken()); + if (JsonToken.END_OBJECT != parser.nextToken()) + throw new IllegalArgumentException("expected end of root object, got " + parser.currentToken()); + if (null != parser.nextToken()) + throw new IllegalArgumentException("expected end of input, got " + parser.currentToken()); + + return operation; + } + catch (IOException e) { + throw new IllegalArgumentException("failed parsing document", e); + } + } + /** Returns the next document operation, or null if we have reached the end */ public DocumentOperation next() { switch (state) { diff --git a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java new file mode 100644 index 00000000000..7798cd93909 --- /dev/null +++ b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java @@ -0,0 +1,61 @@ +package com.yahoo.document.json; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; + +import java.io.IOException; +import java.util.function.Supplier; + +/** + * A {@link TokenBuffer} which only buffers tokens when needed, i.e., when peeking. + * + * @author jonmv + */ +public class LazyTokenBuffer extends TokenBuffer { + + private final JsonParser parser; + + public LazyTokenBuffer(JsonParser parser) { + this.parser = parser; + try { addFromParser(parser); } + catch (IOException e) { throw new IllegalArgumentException("failed parsing document JSON", e); } + if (JsonToken.START_OBJECT != current()) + throw new IllegalArgumentException("expected start of JSON object, but got " + current()); + updateNesting(current()); + } + + void advance() { + super.advance(); + if (tokens.isEmpty() && nesting() > 0) tokens.add(nextToken()); // Fill current token if needed and possible. + } + + @Override + public Supplier<Token> lookahead() { + return new Supplier<>() { + int localNesting = nesting(); + Supplier<Token> buffered = LazyTokenBuffer.super.lookahead(); + @Override public Token get() { + if (localNesting == 0) + return null; + + Token token = buffered.get(); + if (token == null) tokens.add(token = nextToken()); + localNesting += nestingOffset(token.token); + return token; + } + }; + } + + private Token nextToken() { + try { + JsonToken token = parser.nextValue(); + if (token == null) + throw new IllegalStateException("no more JSON tokens"); + return new Token(token, parser.getCurrentName(), parser.getText()); + } + catch (IOException e) { + throw new IllegalArgumentException("failed reading document JSON", e); + } + } + +} diff --git a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java index 52a2816334a..3a48f71c4cd 100644 --- a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java +++ b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java @@ -18,7 +18,7 @@ import java.util.function.Supplier; */ public class TokenBuffer { - private final Deque<Token> tokens = new ArrayDeque<>(); + final Deque<Token> tokens = new ArrayDeque<>(); private int nesting = 0; @@ -29,12 +29,16 @@ public class TokenBuffer { /** Returns the next token, or null, and updates the nesting count of this. */ public JsonToken next() { - tokens.poll(); + advance(); JsonToken token = current(); updateNesting(token); return token; } + void advance() { + tokens.poll(); + } + /** Returns the current token without changing position, or null if none */ public JsonToken current() { return isEmpty() ? null : tokens.peek().token; @@ -72,7 +76,7 @@ public class TokenBuffer { JsonToken token = parser.currentToken(); Preconditions.checkArgument(token == firstToken, "Expected %s, got %s.", firstToken.name(), token); - if (isEmpty()) updateNesting(token); + updateNesting(token); try { for (int nesting = addFromParser(parser); nesting > 0; nesting += addFromParser(parser)) @@ -83,7 +87,7 @@ public class TokenBuffer { } } - private int nestingOffset(JsonToken token) { + int nestingOffset(JsonToken token) { if (token == null) return 0; if (token.isStructStart()) { return 1; @@ -94,12 +98,12 @@ public class TokenBuffer { } } - private int addFromParser(JsonParser tokens) throws IOException { + int addFromParser(JsonParser tokens) throws IOException { add(tokens.currentToken(), tokens.getCurrentName(), tokens.getText()); return nestingOffset(tokens.currentToken()); } - private void updateNesting(JsonToken token) { + void updateNesting(JsonToken token) { nesting += nestingOffset(token); } diff --git a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java index b41159811f3..aef7e1cffe2 100644 --- a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java +++ b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java @@ -101,7 +101,7 @@ public class DocumentParser { } } - private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { + private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { try { // "fields" opens a dictionary and is therefore on level two which might be surprising. if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { diff --git a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java index b2bb51d7c97..080528fea77 100644 --- a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java +++ b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java @@ -2687,6 +2687,14 @@ public class JsonReaderTestCase { return createPutWithTensor(inputTensor, "sparse_tensor"); } private DocumentPut createPutWithTensor(String inputTensor, String tensorFieldName) { + JsonReader streaming = createReader(""" + { + "fields": { + "%s": %s + } + } + """.formatted(tensorFieldName, inputTensor)); + DocumentPut lazyParsed = (DocumentPut) streaming.readSingleDocumentStreaming(DocumentOperationType.PUT, TENSOR_DOC_ID).operation(); JsonReader reader = createReader(""" [ { @@ -2696,7 +2704,9 @@ public class JsonReaderTestCase { } } ]""".formatted(TENSOR_DOC_ID, tensorFieldName, inputTensor)); - return (DocumentPut) reader.next(); + DocumentPut bufferParsed = (DocumentPut) reader.next(); + assertEquals(lazyParsed, bufferParsed); + return bufferParsed; } private DocumentUpdate createAssignUpdateWithSparseTensor(String inputTensor) { @@ -2783,6 +2793,15 @@ public class JsonReaderTestCase { } private DocumentUpdate createTensorUpdate(String operation, String tensorJson, String tensorFieldName) { + JsonReader streaming = createReader(""" + { + "fields": { + "%s": { + "%s": %s + } + } + }""".formatted(tensorFieldName, operation, tensorJson)); + DocumentUpdate lazyParsed = (DocumentUpdate) streaming.readSingleDocumentStreaming(DocumentOperationType.UPDATE, TENSOR_DOC_ID).operation(); JsonReader reader = createReader(""" [ { @@ -2794,7 +2813,9 @@ public class JsonReaderTestCase { } } ]""".formatted(TENSOR_DOC_ID, tensorFieldName, operation, tensorJson)); - return (DocumentUpdate) reader.next(); + DocumentUpdate bufferParsed = (DocumentUpdate) reader.next(); + assertEquals(lazyParsed, bufferParsed); + return bufferParsed; } private void assertTensorAddUpdate(String expectedTensor, String tensorFieldName, String tensorJson) { diff --git a/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java b/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java new file mode 100644 index 00000000000..3ed2ed531c3 --- /dev/null +++ b/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java @@ -0,0 +1,132 @@ +package com.yahoo.document.json; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import com.yahoo.document.json.TokenBuffer.Token; +import org.junit.Test; + +import java.io.IOException; +import java.util.function.Supplier; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +/** + * @author jonmv + */ +public class LazyTokenBufferTest { + + @Test + public void testBuffer() throws IOException { + String json = """ + { + "fields": { + "foo": "bar", + "baz": [1, 2, 3], + "quu": { "qux": null } + } + }"""; + JsonParser parser = new JsonFactory().createParser(json); + parser.nextValue(); + parser.nextValue(); + assertEquals(JsonToken.START_OBJECT, parser.currentToken()); + assertEquals("fields", parser.currentName()); + + // Peeking through the buffer doesn't change nesting. + LazyTokenBuffer buffer = new LazyTokenBuffer(parser); + assertEquals(JsonToken.START_OBJECT, buffer.current()); + assertEquals("fields", buffer.currentName()); + assertEquals(1, buffer.nesting()); + + Supplier<Token> lookahead = buffer.lookahead(); + Token peek = lookahead.get(); + assertEquals(JsonToken.VALUE_STRING, peek.token); + assertEquals("foo", peek.name); + assertEquals("bar", peek.text); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.START_ARRAY, peek.token); + assertEquals("baz", peek.name); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token); + assertEquals("1", peek.text); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token); + assertEquals("2", peek.text); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token); + assertEquals("3", peek.text); + + peek = lookahead.get(); + assertEquals(JsonToken.END_ARRAY, peek.token); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.START_OBJECT, peek.token); + assertEquals("quu", peek.name); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NULL, peek.token); + assertEquals("qux", peek.name); + + peek = lookahead.get(); + assertEquals(JsonToken.END_OBJECT, peek.token); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.END_OBJECT, peek.token); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertNull(peek); + + // Parser is now at the end. + assertEquals(JsonToken.END_OBJECT, parser.nextToken()); + assertNull(parser.nextToken()); + + // Repeat iterating through the buffer, this time advancing it, and see that nesting changes. + assertEquals(JsonToken.VALUE_STRING, buffer.next()); + assertEquals("foo", buffer.currentName()); + assertEquals("bar", buffer.currentText()); + assertEquals(1, buffer.nesting()); + + assertEquals(JsonToken.START_ARRAY, buffer.next()); + assertEquals("baz", buffer.currentName()); + assertEquals(2, buffer.nesting()); + + assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next()); + assertEquals("1", buffer.currentText()); + + assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next()); + assertEquals("2", buffer.currentText()); + + assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next()); + assertEquals("3", buffer.currentText()); + + assertEquals(JsonToken.END_ARRAY, buffer.next()); + assertEquals(1, buffer.nesting()); + + assertEquals(JsonToken.START_OBJECT, buffer.next()); + assertEquals("quu", buffer.currentName()); + assertEquals(2, buffer.nesting()); + + assertEquals(JsonToken.VALUE_NULL, buffer.next()); + assertEquals("qux", buffer.currentName()); + + assertEquals(JsonToken.END_OBJECT, buffer.next()); + assertEquals(1, buffer.nesting()); + + assertEquals(JsonToken.END_OBJECT, buffer.next()); + assertEquals(0, buffer.nesting()); + + assertNull(buffer.next()); + } + +} |