diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2024-01-26 16:29:55 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-26 16:29:55 +0100 |
commit | e5c90cf30238605a4c7c64a348a7a1ddade54e12 (patch) | |
tree | 7b1f18032c52093ba00d9052e4da01539895b5f0 /document/src/main/java/com/yahoo | |
parent | 46d51a5f036423d3dbd1a7af86dd568f30babe36 (diff) |
Revert "Jonmv/reapply leaner token buffer"
Diffstat (limited to 'document/src/main/java/com/yahoo')
7 files changed, 131 insertions, 230 deletions
diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java index b6cf8c6e18b..3e1743b8d45 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonReader.java +++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java @@ -6,10 +6,8 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.yahoo.document.DocumentId; import com.yahoo.document.DocumentOperation; -import com.yahoo.document.DocumentPut; import com.yahoo.document.DocumentType; import com.yahoo.document.DocumentTypeManager; -import com.yahoo.document.DocumentUpdate; import com.yahoo.document.TestAndSetCondition; import com.yahoo.document.json.document.DocumentParser; import com.yahoo.document.json.readers.DocumentParseInfo; @@ -20,9 +18,6 @@ import java.io.InputStream; import java.util.Optional; import static com.yahoo.document.json.JsonReader.ReaderState.END_OF_FEED; -import static com.yahoo.document.json.document.DocumentParser.CONDITION; -import static com.yahoo.document.json.document.DocumentParser.CREATE_IF_NON_EXISTENT; -import static com.yahoo.document.json.document.DocumentParser.FIELDS; import static com.yahoo.document.json.readers.JsonParserHelpers.expectArrayStart; /** @@ -65,7 +60,7 @@ public class JsonReader { * @param docIdString document ID * @return the parsed document operation */ - ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) { + public ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) { DocumentId docId = new DocumentId(docIdString); DocumentParseInfo documentParseInfo; try { @@ -83,79 +78,6 @@ public class JsonReader { return operation; } - /** - * Reads a JSON which is expected to contain a single document operation, - * and where other parameters, like the document ID and operation type, are supplied by other means. - * - * @param operationType the type of operation (update or put) - * @param docIdString document ID - * @return the parsed document operation - */ - public ParsedDocumentOperation readSingleDocumentStreaming(DocumentOperationType operationType, String docIdString) { - try { - DocumentId docId = new DocumentId(docIdString); - DocumentParseInfo documentParseInfo = new DocumentParseInfo(); - documentParseInfo.documentId = docId; - documentParseInfo.operationType = operationType; - - if (JsonToken.START_OBJECT != parser.nextValue()) - throw new IllegalArgumentException("expected start of root object, got " + parser.currentToken()); - - Boolean create = null; - String condition = null; - ParsedDocumentOperation operation = null; - while (JsonToken.END_OBJECT != parser.nextValue()) { - switch (parser.getCurrentName()) { - case FIELDS -> { - documentParseInfo.fieldsBuffer = new LazyTokenBuffer(parser); - VespaJsonDocumentReader vespaJsonDocumentReader = new VespaJsonDocumentReader(typeManager.getIgnoreUndefinedFields()); - operation = vespaJsonDocumentReader.createDocumentOperation( - getDocumentTypeFromString(documentParseInfo.documentId.getDocType(), typeManager), documentParseInfo); - - if ( ! documentParseInfo.fieldsBuffer.isEmpty()) - throw new IllegalArgumentException("expected all content to be consumed by document parsing, but " + - documentParseInfo.fieldsBuffer.nesting() + " levels remain"); - - } - case CONDITION -> { - if ( ! JsonToken.VALUE_STRING.equals(parser.currentToken()) && ! JsonToken.VALUE_NULL.equals(parser.currentToken())) - throw new IllegalArgumentException("expected string value for condition, got " + parser.currentToken()); - - condition = parser.getValueAsString(); - } - case CREATE_IF_NON_EXISTENT -> { - create = parser.getBooleanValue(); // Throws if not boolean. - } - default -> { - // We ignore stray fields, but need to ensure structural balance in doing do. - if (parser.currentToken().isStructStart()) parser.skipChildren(); - } - } - } - - if (null != parser.nextToken()) - throw new IllegalArgumentException("expected end of input, got " + parser.currentToken()); - - if (null == operation) - throw new IllegalArgumentException("document is missing the required \"fields\" field"); - - if (null != create) { - switch (operationType) { - case PUT -> ((DocumentPut) operation.operation()).setCreateIfNonExistent(create); - case UPDATE -> ((DocumentUpdate) operation.operation()).setCreateIfNonExistent(create); - case REMOVE -> throw new IllegalArgumentException(CREATE_IF_NON_EXISTENT + " is not supported for remove operations"); - } - } - - operation.operation().setCondition(TestAndSetCondition.fromConditionString(Optional.ofNullable(condition))); - - return operation; - } - catch (IOException e) { - throw new IllegalArgumentException("failed parsing document", e); - } - } - /** Returns the next document operation, or null if we have reached the end */ public DocumentOperation next() { switch (state) { diff --git a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java deleted file mode 100644 index 0fbdd0b28c7..00000000000 --- a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.yahoo.document.json; - -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonToken; - -import java.io.IOException; -import java.util.function.Supplier; - -/** - * A {@link TokenBuffer} which only buffers tokens when needed, i.e., when peeking. - * - * @author jonmv - */ -public class LazyTokenBuffer extends TokenBuffer { - - private final JsonParser parser; - - public LazyTokenBuffer(JsonParser parser) { - this.parser = parser; - try { addFromParser(parser); } - catch (IOException e) { throw new IllegalArgumentException("failed parsing document JSON", e); } - if (JsonToken.START_OBJECT != current()) - throw new IllegalArgumentException("expected start of JSON object, but got " + current()); - updateNesting(current()); - } - - void advance() { - super.advance(); - if (tokens.isEmpty() && nesting() > 0) tokens.add(nextToken()); // Fill current token if needed and possible. - } - - @Override - public Supplier<Token> lookahead() { - return new Supplier<>() { - int localNesting = nesting(); - Supplier<Token> buffered = LazyTokenBuffer.super.lookahead(); - @Override public Token get() { - if (localNesting == 0) - return null; - - Token token = buffered.get(); - if (token == null) { - token = nextToken(); - tokens.add(token); - } - localNesting += nestingOffset(token.token); - return token; - } - }; - } - - private Token nextToken() { - try { - JsonToken token = parser.nextValue(); - if (token == null) - throw new IllegalStateException("no more JSON tokens"); - return new Token(token, parser.getCurrentName(), parser.getText()); - } - catch (IOException e) { - throw new IllegalArgumentException("failed reading document JSON", e); - } - } - -} diff --git a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java index 3a48f71c4cd..dec84e46b77 100644 --- a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java +++ b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java @@ -1,16 +1,15 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.document.json; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.google.common.base.Preconditions; -import java.io.IOException; -import java.util.ArrayDeque; -import java.util.Deque; -import java.util.Iterator; -import java.util.function.Supplier; - /** * Helper class to enable lookahead in the token stream. * @@ -18,76 +17,101 @@ import java.util.function.Supplier; */ public class TokenBuffer { - final Deque<Token> tokens = new ArrayDeque<>(); + private final List<Token> tokens; + private int position = 0; private int nesting = 0; - public TokenBuffer() { } + public TokenBuffer() { + this(new ArrayList<>()); + } + + public TokenBuffer(List<Token> tokens) { + this.tokens = tokens; + if (tokens.size() > 0) + updateNesting(tokens.get(position).token); + } /** Returns whether any tokens are available in this */ - public boolean isEmpty() { return tokens.isEmpty(); } + public boolean isEmpty() { return remaining() == 0; } + + public JsonToken previous() { + updateNestingGoingBackwards(current()); + position--; + return current(); + } + + /** Returns the current token without changing position, or null if none */ + public JsonToken current() { + if (isEmpty()) return null; + Token token = tokens.get(position); + if (token == null) return null; + return token.token; + } - /** Returns the next token, or null, and updates the nesting count of this. */ public JsonToken next() { - advance(); + position++; JsonToken token = current(); updateNesting(token); return token; } - void advance() { - tokens.poll(); - } - - /** Returns the current token without changing position, or null if none */ - public JsonToken current() { - return isEmpty() ? null : tokens.peek().token; + /** Returns a given number of tokens ahead, or null if none */ + public JsonToken peek(int ahead) { + if (tokens.size() <= position + ahead) return null; + return tokens.get(position + ahead).token; } /** Returns the current token name without changing position, or null if none */ public String currentName() { - return isEmpty() ? null : tokens.peek().name; + if (isEmpty()) return null; + Token token = tokens.get(position); + if (token == null) return null; + return token.name; } /** Returns the current token text without changing position, or null if none */ public String currentText() { - return isEmpty() ? null : tokens.peek().text; + if (isEmpty()) return null; + Token token = tokens.get(position); + if (token == null) return null; + return token.text; } - /** - * Returns a sequence of remaining tokens in this, or nulls when none remain. - * This may fill the token buffer, but not otherwise modify it. - */ - public Supplier<Token> lookahead() { - Iterator<Token> iterator = tokens.iterator(); - if (iterator.hasNext()) iterator.next(); - return () -> iterator.hasNext() ? iterator.next() : null; + public int remaining() { + return tokens.size() - position; } private void add(JsonToken token, String name, String text) { - tokens.add(new Token(token, name, text)); + tokens.add(tokens.size(), new Token(token, name, text)); } - public void bufferObject(JsonParser parser) { - bufferJsonStruct(parser, JsonToken.START_OBJECT); + public void bufferObject(JsonToken first, JsonParser tokens) { + bufferJsonStruct(first, tokens, JsonToken.START_OBJECT); } - private void bufferJsonStruct(JsonParser parser, JsonToken firstToken) { - JsonToken token = parser.currentToken(); - Preconditions.checkArgument(token == firstToken, - "Expected %s, got %s.", firstToken.name(), token); - updateNesting(token); + private void bufferJsonStruct(JsonToken first, JsonParser tokens, JsonToken firstToken) { + int localNesting = 0; + JsonToken t = first; - try { - for (int nesting = addFromParser(parser); nesting > 0; nesting += addFromParser(parser)) - parser.nextValue(); + Preconditions.checkArgument(first == firstToken, + "Expected %s, got %s.", firstToken.name(), t); + if (remaining() == 0) { + updateNesting(t); } - catch (IOException e) { - throw new IllegalArgumentException(e); + localNesting = storeAndPeekNesting(t, localNesting, tokens); + while (localNesting > 0) { + t = nextValue(tokens); + localNesting = storeAndPeekNesting(t, localNesting, tokens); } } - int nestingOffset(JsonToken token) { + private int storeAndPeekNesting(JsonToken t, int nesting, JsonParser tokens) { + addFromParser(t, tokens); + return nesting + nestingOffset(t); + } + + private int nestingOffset(JsonToken token) { if (token == null) return 0; if (token.isStructStart()) { return 1; @@ -98,23 +122,43 @@ public class TokenBuffer { } } - int addFromParser(JsonParser tokens) throws IOException { - add(tokens.currentToken(), tokens.getCurrentName(), tokens.getText()); - return nestingOffset(tokens.currentToken()); + private void addFromParser(JsonToken t, JsonParser tokens) { + try { + add(t, tokens.getCurrentName(), tokens.getText()); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } + } + + private JsonToken nextValue(JsonParser tokens) { + try { + return tokens.nextValue(); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } } - void updateNesting(JsonToken token) { + private void updateNesting(JsonToken token) { nesting += nestingOffset(token); } + private void updateNestingGoingBackwards(JsonToken token) { + nesting -= nestingOffset(token); + } + public int nesting() { return nesting; } public void skipToRelativeNesting(int relativeNesting) { int initialNesting = nesting(); - do next(); - while (nesting() > initialNesting + relativeNesting); + do { + next(); + } while ( nesting() > initialNesting + relativeNesting); + } + + public List<Token> rest() { + return tokens.subList(position, tokens.size()); } public static final class Token { diff --git a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java index 77e11dcf2a8..74656762fe1 100644 --- a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java +++ b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java @@ -20,7 +20,7 @@ public class DocumentParser { private static final String UPDATE = "update"; private static final String PUT = "put"; private static final String ID = "id"; - public static final String CONDITION = "condition"; + private static final String CONDITION = "condition"; public static final String CREATE_IF_NON_EXISTENT = "create"; public static final String FIELDS = "fields"; public static final String REMOVE = "remove"; @@ -86,6 +86,16 @@ public class DocumentParser { private void handleIdentLevelOne(DocumentParseInfo documentParseInfo, boolean docIdAndOperationIsSetExternally) throws IOException { JsonToken currentToken = parser.getCurrentToken(); + if (currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) { + try { + if (CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { + documentParseInfo.create = Optional.ofNullable(parser.getBooleanValue()); + return; + } + } catch (IOException e) { + throw new RuntimeException("Got IO exception while parsing document", e); + } + } if ((currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) && CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { documentParseInfo.create = Optional.of(currentToken == JsonToken.VALUE_TRUE); @@ -101,11 +111,12 @@ public class DocumentParser { } } - private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { + private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { try { + JsonToken currentToken = parser.getCurrentToken(); // "fields" opens a dictionary and is therefore on level two which might be surprising. - if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { - documentParseInfo.fieldsBuffer.bufferObject(parser); + if (currentToken == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { + documentParseInfo.fieldsBuffer.bufferObject(currentToken, parser); processIndent(); } } catch (IOException e) { diff --git a/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java b/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java index e859306f04d..2dce07cdbe6 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java +++ b/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java @@ -8,7 +8,6 @@ import com.yahoo.document.json.TokenBuffer; import java.util.Optional; public class DocumentParseInfo { - public DocumentParseInfo() { } public DocumentId documentId; public Optional<Boolean> create = Optional.empty(); public Optional<String> condition = Optional.empty(); diff --git a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java index 1fd4029b1a5..0b7b1ae9996 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java +++ b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java @@ -4,15 +4,13 @@ package com.yahoo.document.json.readers; import com.fasterxml.jackson.core.JsonToken; import com.yahoo.document.datatypes.TensorFieldValue; import com.yahoo.document.json.TokenBuffer; -import com.yahoo.document.json.TokenBuffer.Token; +import com.yahoo.slime.Inspector; +import com.yahoo.slime.Type; import com.yahoo.tensor.IndexedTensor; import com.yahoo.tensor.MixedTensor; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; -import com.yahoo.tensor.TensorType.Dimension; - -import java.util.function.Supplier; import static com.yahoo.document.json.readers.JsonParserHelpers.*; import static com.yahoo.tensor.serialization.JsonFormat.decodeHexString; @@ -39,43 +37,36 @@ public class TensorReader { Tensor.Builder builder = Tensor.Builder.of(tensorFieldValue.getDataType().getTensorType()); expectOneOf(buffer.current(), JsonToken.START_OBJECT, JsonToken.START_ARRAY); int initNesting = buffer.nesting(); - while (true) { - Supplier<Token> lookahead = buffer.lookahead(); - Token next = lookahead.get(); - if (TENSOR_CELLS.equals(next.name) && ! primitiveContent(next.token, lookahead.get().token)) { - buffer.next(); + for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) { + if (TENSOR_CELLS.equals(buffer.currentName()) && ! primitiveContent(buffer)) { readTensorCells(buffer, builder); } - else if (TENSOR_VALUES.equals(next.name) && builder.type().dimensions().stream().allMatch(Dimension::isIndexed)) { - buffer.next(); + else if (TENSOR_VALUES.equals(buffer.currentName()) && builder.type().dimensions().stream().allMatch(d -> d.isIndexed())) { readTensorValues(buffer, builder); } - else if (TENSOR_BLOCKS.equals(next.name)) { - buffer.next(); + else if (TENSOR_BLOCKS.equals(buffer.currentName())) { readTensorBlocks(buffer, builder); } - else if (TENSOR_TYPE.equals(next.name) && next.token == JsonToken.VALUE_STRING) { - buffer.next(); + else if (TENSOR_TYPE.equals(buffer.currentName()) && buffer.current() == JsonToken.VALUE_STRING) { // Ignore input tensor type } - else if (buffer.nesting() == initNesting && JsonToken.END_OBJECT == next.token) { - buffer.next(); - break; - } else { + buffer.previous(); // Back up to the start of the enclosing block readDirectTensorValue(buffer, builder); - break; + buffer.previous(); // ... and back up to the end of the enclosing block } } expectOneOf(buffer.current(), JsonToken.END_OBJECT, JsonToken.END_ARRAY); tensorFieldValue.assign(builder.build()); } - static boolean primitiveContent(JsonToken current, JsonToken next) { - if (current.isScalarValue()) return true; - if (current == JsonToken.START_ARRAY) { - if (next == JsonToken.END_ARRAY) return false; - if (next.isScalarValue()) return true; + static boolean primitiveContent(TokenBuffer buffer) { + JsonToken cellsValue = buffer.current(); + if (cellsValue.isScalarValue()) return true; + if (cellsValue == JsonToken.START_ARRAY) { + JsonToken firstArrayValue = buffer.peek(1); + if (firstArrayValue == JsonToken.END_ARRAY) return false; + if (firstArrayValue.isScalarValue()) return true; } return false; } @@ -195,7 +186,7 @@ public class TensorReader { boolean hasIndexed = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isIndexed); boolean hasMapped = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isMapped); - if (isArrayOfObjects(buffer)) + if (isArrayOfObjects(buffer, 0)) readTensorCells(buffer, builder); else if ( ! hasMapped) readTensorValues(buffer, builder); @@ -205,12 +196,10 @@ public class TensorReader { readTensorCells(buffer, builder); } - private static boolean isArrayOfObjects(TokenBuffer buffer) { - if (buffer.current() != JsonToken.START_ARRAY) return false; - Supplier<Token> lookahead = buffer.lookahead(); - Token next; - while ((next = lookahead.get()).token == JsonToken.START_ARRAY) { } - return next.token == JsonToken.START_OBJECT; + private static boolean isArrayOfObjects(TokenBuffer buffer, int ahead) { + if (buffer.peek(ahead++) != JsonToken.START_ARRAY) return false; + if (buffer.peek(ahead) == JsonToken.START_ARRAY) return isArrayOfObjects(buffer, ahead); // nested array + return buffer.peek(ahead) == JsonToken.START_OBJECT; } private static TensorAddress readAddress(TokenBuffer buffer, TensorType type) { diff --git a/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java b/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java index 067dabdbdab..113b8732b23 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java +++ b/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java @@ -230,7 +230,7 @@ public class VespaJsonDocumentReader { private static boolean isFieldPath(String field) { - return field.matches("^.*?[.\\[{].*$"); + return field.matches("^.*?[.\\[\\{].*$"); } private static void verifyEndState(TokenBuffer buffer, JsonToken expectedFinalToken) { @@ -238,7 +238,7 @@ public class VespaJsonDocumentReader { "Expected end of JSON struct (%s), got %s", expectedFinalToken, buffer.current()); Preconditions.checkState(buffer.nesting() == 0, "Nesting not zero at end of operation"); Preconditions.checkState(buffer.next() == null, "Dangling data at end of operation"); - Preconditions.checkState(buffer.isEmpty(), "Dangling data at end of operation"); + Preconditions.checkState(buffer.remaining() == 0, "Dangling data at end of operation"); } } |