aboutsummaryrefslogtreecommitdiffstats
path: root/document/src/main/java/com/yahoo/document
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2024-01-26 09:37:11 +0100
committerjonmv <venstad@gmail.com>2024-01-26 09:37:11 +0100
commitbc89d03da6c10eb38577c279cd26c82bf914a3bc (patch)
tree1cfd4180d6711d0368c87ff8ca3dd89c3c60616b /document/src/main/java/com/yahoo/document
parentcdf8355c309a01aa512aef66af540e5346173b73 (diff)
Revert "Merge pull request #30067 from vespa-engine/revert-30038-jonmv/leaner-token-buffer"
This reverts commit b771fbe5fe648cf4c64e04341542e11c2e21cb9d, reversing changes made to 7b578506b5c4c59f4273e74af1f0db4a74f82175.
Diffstat (limited to 'document/src/main/java/com/yahoo/document')
-rw-r--r--document/src/main/java/com/yahoo/document/json/JsonReader.java51
-rw-r--r--document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java64
-rw-r--r--document/src/main/java/com/yahoo/document/json/TokenBuffer.java140
-rw-r--r--document/src/main/java/com/yahoo/document/json/document/DocumentParser.java17
-rw-r--r--document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java1
-rw-r--r--document/src/main/java/com/yahoo/document/json/readers/TensorReader.java53
-rw-r--r--document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java2
7 files changed, 199 insertions, 129 deletions
diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java
index 3e1743b8d45..08d1fe688ed 100644
--- a/document/src/main/java/com/yahoo/document/json/JsonReader.java
+++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java
@@ -18,6 +18,7 @@ import java.io.InputStream;
import java.util.Optional;
import static com.yahoo.document.json.JsonReader.ReaderState.END_OF_FEED;
+import static com.yahoo.document.json.document.DocumentParser.FIELDS;
import static com.yahoo.document.json.readers.JsonParserHelpers.expectArrayStart;
/**
@@ -60,7 +61,7 @@ public class JsonReader {
* @param docIdString document ID
* @return the parsed document operation
*/
- public ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) {
+ ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) {
DocumentId docId = new DocumentId(docIdString);
DocumentParseInfo documentParseInfo;
try {
@@ -78,6 +79,54 @@ public class JsonReader {
return operation;
}
+ /**
+ * Reads a JSON which is expected to contain only the "fields" object of a document,
+ * and where other parameters, like the document ID and operation type, are supplied by other means.
+ *
+ * @param operationType the type of operation (update or put)
+ * @param docIdString document ID
+ * @return the parsed document operation
+ */
+ public ParsedDocumentOperation readSingleDocumentStreaming(DocumentOperationType operationType, String docIdString) {
+ try {
+ DocumentId docId = new DocumentId(docIdString);
+ DocumentParseInfo documentParseInfo = new DocumentParseInfo();
+ documentParseInfo.documentId = docId;
+ documentParseInfo.operationType = operationType;
+
+ if (JsonToken.START_OBJECT != parser.nextValue())
+ throw new IllegalArgumentException("expected start of root object, got " + parser.currentToken());
+
+ parser.nextValue();
+ if ( ! FIELDS.equals(parser.getCurrentName()))
+ throw new IllegalArgumentException("expected field \"fields\", but got " + parser.getCurrentName());
+
+ if (JsonToken.START_OBJECT != parser.currentToken())
+ throw new IllegalArgumentException("expected start of \"fields\" object, got " + parser.currentToken());
+
+ documentParseInfo.fieldsBuffer = new LazyTokenBuffer(parser);
+ VespaJsonDocumentReader vespaJsonDocumentReader = new VespaJsonDocumentReader(typeManager.getIgnoreUndefinedFields());
+ ParsedDocumentOperation operation = vespaJsonDocumentReader.createDocumentOperation(
+ getDocumentTypeFromString(documentParseInfo.documentId.getDocType(), typeManager), documentParseInfo);
+
+ if ( ! documentParseInfo.fieldsBuffer.isEmpty())
+ throw new IllegalArgumentException("expected all content to be consumed by document parsing, but " +
+ documentParseInfo.fieldsBuffer.nesting() + " levels remain");
+
+ if (JsonToken.END_OBJECT != parser.currentToken())
+ throw new IllegalArgumentException("expected end of \"fields\" object, got " + parser.currentToken());
+ if (JsonToken.END_OBJECT != parser.nextToken())
+ throw new IllegalArgumentException("expected end of root object, got " + parser.currentToken());
+ if (null != parser.nextToken())
+ throw new IllegalArgumentException("expected end of input, got " + parser.currentToken());
+
+ return operation;
+ }
+ catch (IOException e) {
+ throw new IllegalArgumentException("failed parsing document", e);
+ }
+ }
+
/** Returns the next document operation, or null if we have reached the end */
public DocumentOperation next() {
switch (state) {
diff --git a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java
new file mode 100644
index 00000000000..0fbdd0b28c7
--- /dev/null
+++ b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java
@@ -0,0 +1,64 @@
+package com.yahoo.document.json;
+
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+/**
+ * A {@link TokenBuffer} which only buffers tokens when needed, i.e., when peeking.
+ *
+ * @author jonmv
+ */
+public class LazyTokenBuffer extends TokenBuffer {
+
+ private final JsonParser parser;
+
+ public LazyTokenBuffer(JsonParser parser) {
+ this.parser = parser;
+ try { addFromParser(parser); }
+ catch (IOException e) { throw new IllegalArgumentException("failed parsing document JSON", e); }
+ if (JsonToken.START_OBJECT != current())
+ throw new IllegalArgumentException("expected start of JSON object, but got " + current());
+ updateNesting(current());
+ }
+
+ void advance() {
+ super.advance();
+ if (tokens.isEmpty() && nesting() > 0) tokens.add(nextToken()); // Fill current token if needed and possible.
+ }
+
+ @Override
+ public Supplier<Token> lookahead() {
+ return new Supplier<>() {
+ int localNesting = nesting();
+ Supplier<Token> buffered = LazyTokenBuffer.super.lookahead();
+ @Override public Token get() {
+ if (localNesting == 0)
+ return null;
+
+ Token token = buffered.get();
+ if (token == null) {
+ token = nextToken();
+ tokens.add(token);
+ }
+ localNesting += nestingOffset(token.token);
+ return token;
+ }
+ };
+ }
+
+ private Token nextToken() {
+ try {
+ JsonToken token = parser.nextValue();
+ if (token == null)
+ throw new IllegalStateException("no more JSON tokens");
+ return new Token(token, parser.getCurrentName(), parser.getText());
+ }
+ catch (IOException e) {
+ throw new IllegalArgumentException("failed reading document JSON", e);
+ }
+ }
+
+}
diff --git a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java
index dec84e46b77..3a48f71c4cd 100644
--- a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java
+++ b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java
@@ -1,15 +1,16 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.document.json;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.google.common.base.Preconditions;
+import java.io.IOException;
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.function.Supplier;
+
/**
* Helper class to enable lookahead in the token stream.
*
@@ -17,101 +18,76 @@ import com.google.common.base.Preconditions;
*/
public class TokenBuffer {
- private final List<Token> tokens;
+ final Deque<Token> tokens = new ArrayDeque<>();
- private int position = 0;
private int nesting = 0;
- public TokenBuffer() {
- this(new ArrayList<>());
- }
-
- public TokenBuffer(List<Token> tokens) {
- this.tokens = tokens;
- if (tokens.size() > 0)
- updateNesting(tokens.get(position).token);
- }
+ public TokenBuffer() { }
/** Returns whether any tokens are available in this */
- public boolean isEmpty() { return remaining() == 0; }
-
- public JsonToken previous() {
- updateNestingGoingBackwards(current());
- position--;
- return current();
- }
-
- /** Returns the current token without changing position, or null if none */
- public JsonToken current() {
- if (isEmpty()) return null;
- Token token = tokens.get(position);
- if (token == null) return null;
- return token.token;
- }
+ public boolean isEmpty() { return tokens.isEmpty(); }
+ /** Returns the next token, or null, and updates the nesting count of this. */
public JsonToken next() {
- position++;
+ advance();
JsonToken token = current();
updateNesting(token);
return token;
}
- /** Returns a given number of tokens ahead, or null if none */
- public JsonToken peek(int ahead) {
- if (tokens.size() <= position + ahead) return null;
- return tokens.get(position + ahead).token;
+ void advance() {
+ tokens.poll();
+ }
+
+ /** Returns the current token without changing position, or null if none */
+ public JsonToken current() {
+ return isEmpty() ? null : tokens.peek().token;
}
/** Returns the current token name without changing position, or null if none */
public String currentName() {
- if (isEmpty()) return null;
- Token token = tokens.get(position);
- if (token == null) return null;
- return token.name;
+ return isEmpty() ? null : tokens.peek().name;
}
/** Returns the current token text without changing position, or null if none */
public String currentText() {
- if (isEmpty()) return null;
- Token token = tokens.get(position);
- if (token == null) return null;
- return token.text;
+ return isEmpty() ? null : tokens.peek().text;
}
- public int remaining() {
- return tokens.size() - position;
+ /**
+ * Returns a sequence of remaining tokens in this, or nulls when none remain.
+ * This may fill the token buffer, but not otherwise modify it.
+ */
+ public Supplier<Token> lookahead() {
+ Iterator<Token> iterator = tokens.iterator();
+ if (iterator.hasNext()) iterator.next();
+ return () -> iterator.hasNext() ? iterator.next() : null;
}
private void add(JsonToken token, String name, String text) {
- tokens.add(tokens.size(), new Token(token, name, text));
+ tokens.add(new Token(token, name, text));
}
- public void bufferObject(JsonToken first, JsonParser tokens) {
- bufferJsonStruct(first, tokens, JsonToken.START_OBJECT);
+ public void bufferObject(JsonParser parser) {
+ bufferJsonStruct(parser, JsonToken.START_OBJECT);
}
- private void bufferJsonStruct(JsonToken first, JsonParser tokens, JsonToken firstToken) {
- int localNesting = 0;
- JsonToken t = first;
+ private void bufferJsonStruct(JsonParser parser, JsonToken firstToken) {
+ JsonToken token = parser.currentToken();
+ Preconditions.checkArgument(token == firstToken,
+ "Expected %s, got %s.", firstToken.name(), token);
+ updateNesting(token);
- Preconditions.checkArgument(first == firstToken,
- "Expected %s, got %s.", firstToken.name(), t);
- if (remaining() == 0) {
- updateNesting(t);
+ try {
+ for (int nesting = addFromParser(parser); nesting > 0; nesting += addFromParser(parser))
+ parser.nextValue();
}
- localNesting = storeAndPeekNesting(t, localNesting, tokens);
- while (localNesting > 0) {
- t = nextValue(tokens);
- localNesting = storeAndPeekNesting(t, localNesting, tokens);
+ catch (IOException e) {
+ throw new IllegalArgumentException(e);
}
}
- private int storeAndPeekNesting(JsonToken t, int nesting, JsonParser tokens) {
- addFromParser(t, tokens);
- return nesting + nestingOffset(t);
- }
-
- private int nestingOffset(JsonToken token) {
+ int nestingOffset(JsonToken token) {
if (token == null) return 0;
if (token.isStructStart()) {
return 1;
@@ -122,43 +98,23 @@ public class TokenBuffer {
}
}
- private void addFromParser(JsonToken t, JsonParser tokens) {
- try {
- add(t, tokens.getCurrentName(), tokens.getText());
- } catch (IOException e) {
- throw new IllegalArgumentException(e);
- }
- }
-
- private JsonToken nextValue(JsonParser tokens) {
- try {
- return tokens.nextValue();
- } catch (IOException e) {
- throw new IllegalArgumentException(e);
- }
+ int addFromParser(JsonParser tokens) throws IOException {
+ add(tokens.currentToken(), tokens.getCurrentName(), tokens.getText());
+ return nestingOffset(tokens.currentToken());
}
- private void updateNesting(JsonToken token) {
+ void updateNesting(JsonToken token) {
nesting += nestingOffset(token);
}
- private void updateNestingGoingBackwards(JsonToken token) {
- nesting -= nestingOffset(token);
- }
-
public int nesting() {
return nesting;
}
public void skipToRelativeNesting(int relativeNesting) {
int initialNesting = nesting();
- do {
- next();
- } while ( nesting() > initialNesting + relativeNesting);
- }
-
- public List<Token> rest() {
- return tokens.subList(position, tokens.size());
+ do next();
+ while (nesting() > initialNesting + relativeNesting);
}
public static final class Token {
diff --git a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java
index 74656762fe1..aef7e1cffe2 100644
--- a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java
+++ b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java
@@ -86,16 +86,6 @@ public class DocumentParser {
private void handleIdentLevelOne(DocumentParseInfo documentParseInfo, boolean docIdAndOperationIsSetExternally)
throws IOException {
JsonToken currentToken = parser.getCurrentToken();
- if (currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) {
- try {
- if (CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) {
- documentParseInfo.create = Optional.ofNullable(parser.getBooleanValue());
- return;
- }
- } catch (IOException e) {
- throw new RuntimeException("Got IO exception while parsing document", e);
- }
- }
if ((currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) &&
CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) {
documentParseInfo.create = Optional.of(currentToken == JsonToken.VALUE_TRUE);
@@ -111,12 +101,11 @@ public class DocumentParser {
}
}
- private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) {
+ private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) {
try {
- JsonToken currentToken = parser.getCurrentToken();
// "fields" opens a dictionary and is therefore on level two which might be surprising.
- if (currentToken == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) {
- documentParseInfo.fieldsBuffer.bufferObject(currentToken, parser);
+ if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) {
+ documentParseInfo.fieldsBuffer.bufferObject(parser);
processIndent();
}
} catch (IOException e) {
diff --git a/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java b/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java
index 2dce07cdbe6..e859306f04d 100644
--- a/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java
+++ b/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java
@@ -8,6 +8,7 @@ import com.yahoo.document.json.TokenBuffer;
import java.util.Optional;
public class DocumentParseInfo {
+ public DocumentParseInfo() { }
public DocumentId documentId;
public Optional<Boolean> create = Optional.empty();
public Optional<String> condition = Optional.empty();
diff --git a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java
index 0b7b1ae9996..1fd4029b1a5 100644
--- a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java
+++ b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java
@@ -4,13 +4,15 @@ package com.yahoo.document.json.readers;
import com.fasterxml.jackson.core.JsonToken;
import com.yahoo.document.datatypes.TensorFieldValue;
import com.yahoo.document.json.TokenBuffer;
-import com.yahoo.slime.Inspector;
-import com.yahoo.slime.Type;
+import com.yahoo.document.json.TokenBuffer.Token;
import com.yahoo.tensor.IndexedTensor;
import com.yahoo.tensor.MixedTensor;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorAddress;
import com.yahoo.tensor.TensorType;
+import com.yahoo.tensor.TensorType.Dimension;
+
+import java.util.function.Supplier;
import static com.yahoo.document.json.readers.JsonParserHelpers.*;
import static com.yahoo.tensor.serialization.JsonFormat.decodeHexString;
@@ -37,36 +39,43 @@ public class TensorReader {
Tensor.Builder builder = Tensor.Builder.of(tensorFieldValue.getDataType().getTensorType());
expectOneOf(buffer.current(), JsonToken.START_OBJECT, JsonToken.START_ARRAY);
int initNesting = buffer.nesting();
- for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) {
- if (TENSOR_CELLS.equals(buffer.currentName()) && ! primitiveContent(buffer)) {
+ while (true) {
+ Supplier<Token> lookahead = buffer.lookahead();
+ Token next = lookahead.get();
+ if (TENSOR_CELLS.equals(next.name) && ! primitiveContent(next.token, lookahead.get().token)) {
+ buffer.next();
readTensorCells(buffer, builder);
}
- else if (TENSOR_VALUES.equals(buffer.currentName()) && builder.type().dimensions().stream().allMatch(d -> d.isIndexed())) {
+ else if (TENSOR_VALUES.equals(next.name) && builder.type().dimensions().stream().allMatch(Dimension::isIndexed)) {
+ buffer.next();
readTensorValues(buffer, builder);
}
- else if (TENSOR_BLOCKS.equals(buffer.currentName())) {
+ else if (TENSOR_BLOCKS.equals(next.name)) {
+ buffer.next();
readTensorBlocks(buffer, builder);
}
- else if (TENSOR_TYPE.equals(buffer.currentName()) && buffer.current() == JsonToken.VALUE_STRING) {
+ else if (TENSOR_TYPE.equals(next.name) && next.token == JsonToken.VALUE_STRING) {
+ buffer.next();
// Ignore input tensor type
}
+ else if (buffer.nesting() == initNesting && JsonToken.END_OBJECT == next.token) {
+ buffer.next();
+ break;
+ }
else {
- buffer.previous(); // Back up to the start of the enclosing block
readDirectTensorValue(buffer, builder);
- buffer.previous(); // ... and back up to the end of the enclosing block
+ break;
}
}
expectOneOf(buffer.current(), JsonToken.END_OBJECT, JsonToken.END_ARRAY);
tensorFieldValue.assign(builder.build());
}
- static boolean primitiveContent(TokenBuffer buffer) {
- JsonToken cellsValue = buffer.current();
- if (cellsValue.isScalarValue()) return true;
- if (cellsValue == JsonToken.START_ARRAY) {
- JsonToken firstArrayValue = buffer.peek(1);
- if (firstArrayValue == JsonToken.END_ARRAY) return false;
- if (firstArrayValue.isScalarValue()) return true;
+ static boolean primitiveContent(JsonToken current, JsonToken next) {
+ if (current.isScalarValue()) return true;
+ if (current == JsonToken.START_ARRAY) {
+ if (next == JsonToken.END_ARRAY) return false;
+ if (next.isScalarValue()) return true;
}
return false;
}
@@ -186,7 +195,7 @@ public class TensorReader {
boolean hasIndexed = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isIndexed);
boolean hasMapped = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isMapped);
- if (isArrayOfObjects(buffer, 0))
+ if (isArrayOfObjects(buffer))
readTensorCells(buffer, builder);
else if ( ! hasMapped)
readTensorValues(buffer, builder);
@@ -196,10 +205,12 @@ public class TensorReader {
readTensorCells(buffer, builder);
}
- private static boolean isArrayOfObjects(TokenBuffer buffer, int ahead) {
- if (buffer.peek(ahead++) != JsonToken.START_ARRAY) return false;
- if (buffer.peek(ahead) == JsonToken.START_ARRAY) return isArrayOfObjects(buffer, ahead); // nested array
- return buffer.peek(ahead) == JsonToken.START_OBJECT;
+ private static boolean isArrayOfObjects(TokenBuffer buffer) {
+ if (buffer.current() != JsonToken.START_ARRAY) return false;
+ Supplier<Token> lookahead = buffer.lookahead();
+ Token next;
+ while ((next = lookahead.get()).token == JsonToken.START_ARRAY) { }
+ return next.token == JsonToken.START_OBJECT;
}
private static TensorAddress readAddress(TokenBuffer buffer, TensorType type) {
diff --git a/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java b/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java
index 113b8732b23..c7303d31ea2 100644
--- a/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java
+++ b/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java
@@ -238,7 +238,7 @@ public class VespaJsonDocumentReader {
"Expected end of JSON struct (%s), got %s", expectedFinalToken, buffer.current());
Preconditions.checkState(buffer.nesting() == 0, "Nesting not zero at end of operation");
Preconditions.checkState(buffer.next() == null, "Dangling data at end of operation");
- Preconditions.checkState(buffer.remaining() == 0, "Dangling data at end of operation");
+ Preconditions.checkState(buffer.isEmpty(), "Dangling data at end of operation");
}
}