diff options
author | jonmv <venstad@gmail.com> | 2024-01-26 09:37:11 +0100 |
---|---|---|
committer | jonmv <venstad@gmail.com> | 2024-01-26 09:37:11 +0100 |
commit | bc89d03da6c10eb38577c279cd26c82bf914a3bc (patch) | |
tree | 1cfd4180d6711d0368c87ff8ca3dd89c3c60616b /document/src/main/java/com/yahoo/document | |
parent | cdf8355c309a01aa512aef66af540e5346173b73 (diff) |
Revert "Merge pull request #30067 from vespa-engine/revert-30038-jonmv/leaner-token-buffer"
This reverts commit b771fbe5fe648cf4c64e04341542e11c2e21cb9d, reversing
changes made to 7b578506b5c4c59f4273e74af1f0db4a74f82175.
Diffstat (limited to 'document/src/main/java/com/yahoo/document')
7 files changed, 199 insertions, 129 deletions
diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java index 3e1743b8d45..08d1fe688ed 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonReader.java +++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java @@ -18,6 +18,7 @@ import java.io.InputStream; import java.util.Optional; import static com.yahoo.document.json.JsonReader.ReaderState.END_OF_FEED; +import static com.yahoo.document.json.document.DocumentParser.FIELDS; import static com.yahoo.document.json.readers.JsonParserHelpers.expectArrayStart; /** @@ -60,7 +61,7 @@ public class JsonReader { * @param docIdString document ID * @return the parsed document operation */ - public ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) { + ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) { DocumentId docId = new DocumentId(docIdString); DocumentParseInfo documentParseInfo; try { @@ -78,6 +79,54 @@ public class JsonReader { return operation; } + /** + * Reads a JSON which is expected to contain only the "fields" object of a document, + * and where other parameters, like the document ID and operation type, are supplied by other means. + * + * @param operationType the type of operation (update or put) + * @param docIdString document ID + * @return the parsed document operation + */ + public ParsedDocumentOperation readSingleDocumentStreaming(DocumentOperationType operationType, String docIdString) { + try { + DocumentId docId = new DocumentId(docIdString); + DocumentParseInfo documentParseInfo = new DocumentParseInfo(); + documentParseInfo.documentId = docId; + documentParseInfo.operationType = operationType; + + if (JsonToken.START_OBJECT != parser.nextValue()) + throw new IllegalArgumentException("expected start of root object, got " + parser.currentToken()); + + parser.nextValue(); + if ( ! FIELDS.equals(parser.getCurrentName())) + throw new IllegalArgumentException("expected field \"fields\", but got " + parser.getCurrentName()); + + if (JsonToken.START_OBJECT != parser.currentToken()) + throw new IllegalArgumentException("expected start of \"fields\" object, got " + parser.currentToken()); + + documentParseInfo.fieldsBuffer = new LazyTokenBuffer(parser); + VespaJsonDocumentReader vespaJsonDocumentReader = new VespaJsonDocumentReader(typeManager.getIgnoreUndefinedFields()); + ParsedDocumentOperation operation = vespaJsonDocumentReader.createDocumentOperation( + getDocumentTypeFromString(documentParseInfo.documentId.getDocType(), typeManager), documentParseInfo); + + if ( ! documentParseInfo.fieldsBuffer.isEmpty()) + throw new IllegalArgumentException("expected all content to be consumed by document parsing, but " + + documentParseInfo.fieldsBuffer.nesting() + " levels remain"); + + if (JsonToken.END_OBJECT != parser.currentToken()) + throw new IllegalArgumentException("expected end of \"fields\" object, got " + parser.currentToken()); + if (JsonToken.END_OBJECT != parser.nextToken()) + throw new IllegalArgumentException("expected end of root object, got " + parser.currentToken()); + if (null != parser.nextToken()) + throw new IllegalArgumentException("expected end of input, got " + parser.currentToken()); + + return operation; + } + catch (IOException e) { + throw new IllegalArgumentException("failed parsing document", e); + } + } + /** Returns the next document operation, or null if we have reached the end */ public DocumentOperation next() { switch (state) { diff --git a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java new file mode 100644 index 00000000000..0fbdd0b28c7 --- /dev/null +++ b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java @@ -0,0 +1,64 @@ +package com.yahoo.document.json; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; + +import java.io.IOException; +import java.util.function.Supplier; + +/** + * A {@link TokenBuffer} which only buffers tokens when needed, i.e., when peeking. + * + * @author jonmv + */ +public class LazyTokenBuffer extends TokenBuffer { + + private final JsonParser parser; + + public LazyTokenBuffer(JsonParser parser) { + this.parser = parser; + try { addFromParser(parser); } + catch (IOException e) { throw new IllegalArgumentException("failed parsing document JSON", e); } + if (JsonToken.START_OBJECT != current()) + throw new IllegalArgumentException("expected start of JSON object, but got " + current()); + updateNesting(current()); + } + + void advance() { + super.advance(); + if (tokens.isEmpty() && nesting() > 0) tokens.add(nextToken()); // Fill current token if needed and possible. + } + + @Override + public Supplier<Token> lookahead() { + return new Supplier<>() { + int localNesting = nesting(); + Supplier<Token> buffered = LazyTokenBuffer.super.lookahead(); + @Override public Token get() { + if (localNesting == 0) + return null; + + Token token = buffered.get(); + if (token == null) { + token = nextToken(); + tokens.add(token); + } + localNesting += nestingOffset(token.token); + return token; + } + }; + } + + private Token nextToken() { + try { + JsonToken token = parser.nextValue(); + if (token == null) + throw new IllegalStateException("no more JSON tokens"); + return new Token(token, parser.getCurrentName(), parser.getText()); + } + catch (IOException e) { + throw new IllegalArgumentException("failed reading document JSON", e); + } + } + +} diff --git a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java index dec84e46b77..3a48f71c4cd 100644 --- a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java +++ b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java @@ -1,15 +1,16 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.document.json; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.google.common.base.Preconditions; +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Iterator; +import java.util.function.Supplier; + /** * Helper class to enable lookahead in the token stream. * @@ -17,101 +18,76 @@ import com.google.common.base.Preconditions; */ public class TokenBuffer { - private final List<Token> tokens; + final Deque<Token> tokens = new ArrayDeque<>(); - private int position = 0; private int nesting = 0; - public TokenBuffer() { - this(new ArrayList<>()); - } - - public TokenBuffer(List<Token> tokens) { - this.tokens = tokens; - if (tokens.size() > 0) - updateNesting(tokens.get(position).token); - } + public TokenBuffer() { } /** Returns whether any tokens are available in this */ - public boolean isEmpty() { return remaining() == 0; } - - public JsonToken previous() { - updateNestingGoingBackwards(current()); - position--; - return current(); - } - - /** Returns the current token without changing position, or null if none */ - public JsonToken current() { - if (isEmpty()) return null; - Token token = tokens.get(position); - if (token == null) return null; - return token.token; - } + public boolean isEmpty() { return tokens.isEmpty(); } + /** Returns the next token, or null, and updates the nesting count of this. */ public JsonToken next() { - position++; + advance(); JsonToken token = current(); updateNesting(token); return token; } - /** Returns a given number of tokens ahead, or null if none */ - public JsonToken peek(int ahead) { - if (tokens.size() <= position + ahead) return null; - return tokens.get(position + ahead).token; + void advance() { + tokens.poll(); + } + + /** Returns the current token without changing position, or null if none */ + public JsonToken current() { + return isEmpty() ? null : tokens.peek().token; } /** Returns the current token name without changing position, or null if none */ public String currentName() { - if (isEmpty()) return null; - Token token = tokens.get(position); - if (token == null) return null; - return token.name; + return isEmpty() ? null : tokens.peek().name; } /** Returns the current token text without changing position, or null if none */ public String currentText() { - if (isEmpty()) return null; - Token token = tokens.get(position); - if (token == null) return null; - return token.text; + return isEmpty() ? null : tokens.peek().text; } - public int remaining() { - return tokens.size() - position; + /** + * Returns a sequence of remaining tokens in this, or nulls when none remain. + * This may fill the token buffer, but not otherwise modify it. + */ + public Supplier<Token> lookahead() { + Iterator<Token> iterator = tokens.iterator(); + if (iterator.hasNext()) iterator.next(); + return () -> iterator.hasNext() ? iterator.next() : null; } private void add(JsonToken token, String name, String text) { - tokens.add(tokens.size(), new Token(token, name, text)); + tokens.add(new Token(token, name, text)); } - public void bufferObject(JsonToken first, JsonParser tokens) { - bufferJsonStruct(first, tokens, JsonToken.START_OBJECT); + public void bufferObject(JsonParser parser) { + bufferJsonStruct(parser, JsonToken.START_OBJECT); } - private void bufferJsonStruct(JsonToken first, JsonParser tokens, JsonToken firstToken) { - int localNesting = 0; - JsonToken t = first; + private void bufferJsonStruct(JsonParser parser, JsonToken firstToken) { + JsonToken token = parser.currentToken(); + Preconditions.checkArgument(token == firstToken, + "Expected %s, got %s.", firstToken.name(), token); + updateNesting(token); - Preconditions.checkArgument(first == firstToken, - "Expected %s, got %s.", firstToken.name(), t); - if (remaining() == 0) { - updateNesting(t); + try { + for (int nesting = addFromParser(parser); nesting > 0; nesting += addFromParser(parser)) + parser.nextValue(); } - localNesting = storeAndPeekNesting(t, localNesting, tokens); - while (localNesting > 0) { - t = nextValue(tokens); - localNesting = storeAndPeekNesting(t, localNesting, tokens); + catch (IOException e) { + throw new IllegalArgumentException(e); } } - private int storeAndPeekNesting(JsonToken t, int nesting, JsonParser tokens) { - addFromParser(t, tokens); - return nesting + nestingOffset(t); - } - - private int nestingOffset(JsonToken token) { + int nestingOffset(JsonToken token) { if (token == null) return 0; if (token.isStructStart()) { return 1; @@ -122,43 +98,23 @@ public class TokenBuffer { } } - private void addFromParser(JsonToken t, JsonParser tokens) { - try { - add(t, tokens.getCurrentName(), tokens.getText()); - } catch (IOException e) { - throw new IllegalArgumentException(e); - } - } - - private JsonToken nextValue(JsonParser tokens) { - try { - return tokens.nextValue(); - } catch (IOException e) { - throw new IllegalArgumentException(e); - } + int addFromParser(JsonParser tokens) throws IOException { + add(tokens.currentToken(), tokens.getCurrentName(), tokens.getText()); + return nestingOffset(tokens.currentToken()); } - private void updateNesting(JsonToken token) { + void updateNesting(JsonToken token) { nesting += nestingOffset(token); } - private void updateNestingGoingBackwards(JsonToken token) { - nesting -= nestingOffset(token); - } - public int nesting() { return nesting; } public void skipToRelativeNesting(int relativeNesting) { int initialNesting = nesting(); - do { - next(); - } while ( nesting() > initialNesting + relativeNesting); - } - - public List<Token> rest() { - return tokens.subList(position, tokens.size()); + do next(); + while (nesting() > initialNesting + relativeNesting); } public static final class Token { diff --git a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java index 74656762fe1..aef7e1cffe2 100644 --- a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java +++ b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java @@ -86,16 +86,6 @@ public class DocumentParser { private void handleIdentLevelOne(DocumentParseInfo documentParseInfo, boolean docIdAndOperationIsSetExternally) throws IOException { JsonToken currentToken = parser.getCurrentToken(); - if (currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) { - try { - if (CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { - documentParseInfo.create = Optional.ofNullable(parser.getBooleanValue()); - return; - } - } catch (IOException e) { - throw new RuntimeException("Got IO exception while parsing document", e); - } - } if ((currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) && CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { documentParseInfo.create = Optional.of(currentToken == JsonToken.VALUE_TRUE); @@ -111,12 +101,11 @@ public class DocumentParser { } } - private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { + private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { try { - JsonToken currentToken = parser.getCurrentToken(); // "fields" opens a dictionary and is therefore on level two which might be surprising. - if (currentToken == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { - documentParseInfo.fieldsBuffer.bufferObject(currentToken, parser); + if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { + documentParseInfo.fieldsBuffer.bufferObject(parser); processIndent(); } } catch (IOException e) { diff --git a/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java b/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java index 2dce07cdbe6..e859306f04d 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java +++ b/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java @@ -8,6 +8,7 @@ import com.yahoo.document.json.TokenBuffer; import java.util.Optional; public class DocumentParseInfo { + public DocumentParseInfo() { } public DocumentId documentId; public Optional<Boolean> create = Optional.empty(); public Optional<String> condition = Optional.empty(); diff --git a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java index 0b7b1ae9996..1fd4029b1a5 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java +++ b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java @@ -4,13 +4,15 @@ package com.yahoo.document.json.readers; import com.fasterxml.jackson.core.JsonToken; import com.yahoo.document.datatypes.TensorFieldValue; import com.yahoo.document.json.TokenBuffer; -import com.yahoo.slime.Inspector; -import com.yahoo.slime.Type; +import com.yahoo.document.json.TokenBuffer.Token; import com.yahoo.tensor.IndexedTensor; import com.yahoo.tensor.MixedTensor; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.TensorType.Dimension; + +import java.util.function.Supplier; import static com.yahoo.document.json.readers.JsonParserHelpers.*; import static com.yahoo.tensor.serialization.JsonFormat.decodeHexString; @@ -37,36 +39,43 @@ public class TensorReader { Tensor.Builder builder = Tensor.Builder.of(tensorFieldValue.getDataType().getTensorType()); expectOneOf(buffer.current(), JsonToken.START_OBJECT, JsonToken.START_ARRAY); int initNesting = buffer.nesting(); - for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) { - if (TENSOR_CELLS.equals(buffer.currentName()) && ! primitiveContent(buffer)) { + while (true) { + Supplier<Token> lookahead = buffer.lookahead(); + Token next = lookahead.get(); + if (TENSOR_CELLS.equals(next.name) && ! primitiveContent(next.token, lookahead.get().token)) { + buffer.next(); readTensorCells(buffer, builder); } - else if (TENSOR_VALUES.equals(buffer.currentName()) && builder.type().dimensions().stream().allMatch(d -> d.isIndexed())) { + else if (TENSOR_VALUES.equals(next.name) && builder.type().dimensions().stream().allMatch(Dimension::isIndexed)) { + buffer.next(); readTensorValues(buffer, builder); } - else if (TENSOR_BLOCKS.equals(buffer.currentName())) { + else if (TENSOR_BLOCKS.equals(next.name)) { + buffer.next(); readTensorBlocks(buffer, builder); } - else if (TENSOR_TYPE.equals(buffer.currentName()) && buffer.current() == JsonToken.VALUE_STRING) { + else if (TENSOR_TYPE.equals(next.name) && next.token == JsonToken.VALUE_STRING) { + buffer.next(); // Ignore input tensor type } + else if (buffer.nesting() == initNesting && JsonToken.END_OBJECT == next.token) { + buffer.next(); + break; + } else { - buffer.previous(); // Back up to the start of the enclosing block readDirectTensorValue(buffer, builder); - buffer.previous(); // ... and back up to the end of the enclosing block + break; } } expectOneOf(buffer.current(), JsonToken.END_OBJECT, JsonToken.END_ARRAY); tensorFieldValue.assign(builder.build()); } - static boolean primitiveContent(TokenBuffer buffer) { - JsonToken cellsValue = buffer.current(); - if (cellsValue.isScalarValue()) return true; - if (cellsValue == JsonToken.START_ARRAY) { - JsonToken firstArrayValue = buffer.peek(1); - if (firstArrayValue == JsonToken.END_ARRAY) return false; - if (firstArrayValue.isScalarValue()) return true; + static boolean primitiveContent(JsonToken current, JsonToken next) { + if (current.isScalarValue()) return true; + if (current == JsonToken.START_ARRAY) { + if (next == JsonToken.END_ARRAY) return false; + if (next.isScalarValue()) return true; } return false; } @@ -186,7 +195,7 @@ public class TensorReader { boolean hasIndexed = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isIndexed); boolean hasMapped = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isMapped); - if (isArrayOfObjects(buffer, 0)) + if (isArrayOfObjects(buffer)) readTensorCells(buffer, builder); else if ( ! hasMapped) readTensorValues(buffer, builder); @@ -196,10 +205,12 @@ public class TensorReader { readTensorCells(buffer, builder); } - private static boolean isArrayOfObjects(TokenBuffer buffer, int ahead) { - if (buffer.peek(ahead++) != JsonToken.START_ARRAY) return false; - if (buffer.peek(ahead) == JsonToken.START_ARRAY) return isArrayOfObjects(buffer, ahead); // nested array - return buffer.peek(ahead) == JsonToken.START_OBJECT; + private static boolean isArrayOfObjects(TokenBuffer buffer) { + if (buffer.current() != JsonToken.START_ARRAY) return false; + Supplier<Token> lookahead = buffer.lookahead(); + Token next; + while ((next = lookahead.get()).token == JsonToken.START_ARRAY) { } + return next.token == JsonToken.START_OBJECT; } private static TensorAddress readAddress(TokenBuffer buffer, TensorType type) { diff --git a/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java b/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java index 113b8732b23..c7303d31ea2 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java +++ b/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java @@ -238,7 +238,7 @@ public class VespaJsonDocumentReader { "Expected end of JSON struct (%s), got %s", expectedFinalToken, buffer.current()); Preconditions.checkState(buffer.nesting() == 0, "Nesting not zero at end of operation"); Preconditions.checkState(buffer.next() == null, "Dangling data at end of operation"); - Preconditions.checkState(buffer.remaining() == 0, "Dangling data at end of operation"); + Preconditions.checkState(buffer.isEmpty(), "Dangling data at end of operation"); } } |