diff options
Diffstat (limited to 'document/src/main')
20 files changed, 1673 insertions, 297 deletions
diff --git a/document/src/main/java/com/yahoo/document/Document.java b/document/src/main/java/com/yahoo/document/Document.java index 222ebe29c6d..cf0951fb035 100644 --- a/document/src/main/java/com/yahoo/document/Document.java +++ b/document/src/main/java/com/yahoo/document/Document.java @@ -137,7 +137,7 @@ public class Document extends StructuredFieldValue { } public int getSerializedSize() throws SerializationException { - DocumentSerializer data = DocumentSerializerFactory.create42(new GrowableByteBuffer(64 * 1024, 2.0f)); + DocumentSerializer data = DocumentSerializerFactory.create6(new GrowableByteBuffer(64 * 1024, 2.0f)); data.write(this); return data.getBuf().position(); } @@ -149,7 +149,7 @@ public class Document extends StructuredFieldValue { public final int getApproxSize() { return 4096; } public void serialize(OutputStream out) throws SerializationException { - DocumentSerializer writer = DocumentSerializerFactory.create42(new GrowableByteBuffer(64 * 1024, 2.0f)); + DocumentSerializer writer = DocumentSerializerFactory.create6(new GrowableByteBuffer(64 * 1024, 2.0f)); writer.write(this); GrowableByteBuffer data = writer.getBuf(); byte[] array; @@ -345,38 +345,6 @@ public class Document extends StructuredFieldValue { serialize((DocumentWriter)data); } - @SuppressWarnings("deprecation") - @Deprecated - public void serializeHeader(Serializer data) throws SerializationException { - if (data instanceof DocumentWriter) { - if (data instanceof com.yahoo.document.serialization.VespaDocumentSerializer42) { - ((com.yahoo.document.serialization.VespaDocumentSerializer42)data).setHeaderOnly(true); - } - serialize((DocumentWriter)data); - } else if (data instanceof BufferSerializer) { - serialize(DocumentSerializerFactory.create42(((BufferSerializer) data).getBuf(), true)); - } else { - DocumentSerializer fw = DocumentSerializerFactory.create42(new GrowableByteBuffer(), true); - serialize(fw); - data.put(null, fw.getBuf().getByteBuffer()); - } - } - - @Deprecated - public void serializeBody(Serializer data) throws SerializationException { - if (getBody().getFieldCount() > 0) { - if (data instanceof FieldWriter) { - getBody().serialize(new Field("body", getBody().getDataType()), (FieldWriter) data); - } else if (data instanceof BufferSerializer) { - getBody().serialize(new Field("body", getBody().getDataType()), DocumentSerializerFactory.create42(((BufferSerializer) data).getBuf())); - } else { - DocumentSerializer fw = DocumentSerializerFactory.create42(new GrowableByteBuffer()); - getBody().serialize(new Field("body", getBody().getDataType()), fw); - data.put(null, fw.getBuf().getByteBuffer()); - } - } - } - @Override public DocumentType getDataType() { return (DocumentType)super.getDataType(); diff --git a/document/src/main/java/com/yahoo/document/DocumentTypeManager.java b/document/src/main/java/com/yahoo/document/DocumentTypeManager.java index 5fad35a2287..a3ba27b640f 100644 --- a/document/src/main/java/com/yahoo/document/DocumentTypeManager.java +++ b/document/src/main/java/com/yahoo/document/DocumentTypeManager.java @@ -266,18 +266,13 @@ public class DocumentTypeManager { } final public Document createDocument(GrowableByteBuffer buf) { - DocumentDeserializer data = DocumentDeserializerFactory.create42(this, buf); + DocumentDeserializer data = DocumentDeserializerFactory.create6(this, buf); return new Document(data); } public Document createDocument(DocumentDeserializer data) { return new Document(data); } - public Document createDocument(GrowableByteBuffer header, GrowableByteBuffer body) { - DocumentDeserializer data = DocumentDeserializerFactory.create42(this, header, body); - return new Document(data); - } - /** * Returns a read only view of the registered data types * diff --git a/document/src/main/java/com/yahoo/document/DocumentUpdate.java b/document/src/main/java/com/yahoo/document/DocumentUpdate.java index 0bbb57ec60b..ef075662ee7 100644 --- a/document/src/main/java/com/yahoo/document/DocumentUpdate.java +++ b/document/src/main/java/com/yahoo/document/DocumentUpdate.java @@ -15,8 +15,10 @@ import com.yahoo.io.GrowableByteBuffer; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Optional; /** @@ -38,7 +40,6 @@ import java.util.Optional; * @see com.yahoo.document.update.FieldUpdate * @see com.yahoo.document.update.ValueUpdate */ -//TODO Vespa 7 Remove all deprecated methods and use a map to avoid quadratic scaling on insert/update/remove public class DocumentUpdate extends DocumentOperation implements Iterable<FieldPathUpdate> { @@ -46,7 +47,7 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP public static final int CLASSID = 0x1000 + 6; private DocumentId docId; - private final List<FieldUpdate> fieldUpdates; + private final Map<Integer, FieldUpdate> id2FieldUpdates; private final List<FieldPathUpdate> fieldPathUpdates; private DocumentType documentType; private Boolean createIfNonExistent; @@ -60,7 +61,7 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP public DocumentUpdate(DocumentType docType, DocumentId docId) { this.docId = docId; this.documentType = docType; - this.fieldUpdates = new ArrayList<>(); + this.id2FieldUpdates = new HashMap<>(); this.fieldPathUpdates = new ArrayList<>(); } @@ -70,7 +71,7 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP public DocumentUpdate(DocumentUpdateReader reader) { docId = null; documentType = null; - fieldUpdates = new ArrayList<>(); + id2FieldUpdates = new HashMap<>(); fieldPathUpdates = new ArrayList<>(); reader.read(this); } @@ -113,7 +114,7 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP public DocumentUpdate applyTo(Document doc) { verifyType(doc); - for (FieldUpdate fieldUpdate : fieldUpdates) { + for (FieldUpdate fieldUpdate : id2FieldUpdates.values()) { fieldUpdate.applyTo(doc); } for (FieldPathUpdate fieldPathUpdate : fieldPathUpdates) { @@ -131,8 +132,9 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP public DocumentUpdate prune(Document doc) { verifyType(doc); - for (Iterator<FieldUpdate> iter = fieldUpdates.iterator(); iter.hasNext();) { - FieldUpdate update = iter.next(); + for (Iterator<Map.Entry<Integer, FieldUpdate>> iter = id2FieldUpdates.entrySet().iterator(); iter.hasNext();) { + Map.Entry<Integer, FieldUpdate> entry = iter.next(); + FieldUpdate update = entry.getValue(); if (!update.isEmpty()) { ValueUpdate last = update.getValueUpdate(update.size() - 1); if (last instanceof AssignValueUpdate) { @@ -158,34 +160,12 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP } /** - * Get an unmodifiable list of all field updates that this document update specifies. - * - * @return a list of all FieldUpdates in this DocumentUpdate - * @deprecated Use fieldUpdates() instead. - */ - @Deprecated - public List<FieldUpdate> getFieldUpdates() { - return Collections.unmodifiableList(fieldUpdates); - } - - /** * Get an unmodifiable collection of all field updates that this document update specifies. * * @return a collection of all FieldUpdates in this DocumentUpdate */ public Collection<FieldUpdate> fieldUpdates() { - return Collections.unmodifiableCollection(fieldUpdates); - } - - /** - * Get an unmodifiable list of all field path updates this document update specifies. - * - * @return Returns a list of all field path updates in this document update. - * @deprecated Use fieldPathUpdates() instead. - */ - @Deprecated - public List<FieldPathUpdate> getFieldPathUpdates() { - return Collections.unmodifiableList(fieldPathUpdates); + return Collections.unmodifiableCollection(id2FieldUpdates.values()); } /** @@ -214,36 +194,6 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP } /** - * Get the field update at the specified index in the list of field updates. - * - * @param index the index of the FieldUpdate to return - * @return the FieldUpdate at the specified index - * @throws IndexOutOfBoundsException if index is out of range - * @deprecated use getFieldUpdate(Field field) instead. - */ - @Deprecated - public FieldUpdate getFieldUpdate(int index) { - return fieldUpdates.get(index); - } - - /** - * Replaces the field update at the specified index in the list of field updates. - * - * @param index index of the FieldUpdate to replace - * @param upd the FieldUpdate to be stored at the specified position - * @return the FieldUpdate previously at the specified position - * @throws IndexOutOfBoundsException if index is out of range - * @deprecated Use removeFieldUpdate/addFieldUpdate instead - */ - @Deprecated - public FieldUpdate setFieldUpdate(int index, FieldUpdate upd) { - FieldUpdate old = fieldUpdates.get(index); - fieldUpdates.set(index, upd); - - return old; - } - - /** * Returns the update for a field * * @param field the field to return the update of @@ -255,7 +205,7 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP /** Removes all field updates from the list for field updates. */ public void clearFieldUpdates() { - fieldUpdates.clear(); + id2FieldUpdates.clear(); } /** @@ -269,12 +219,7 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP return field != null ? getFieldUpdate(field) : null; } private FieldUpdate getFieldUpdateById(Integer fieldId) { - for (FieldUpdate fieldUpdate : fieldUpdates) { - if (fieldUpdate.getField().getId() == fieldId) { - return fieldUpdate; - } - } - return null; + return id2FieldUpdates.get(fieldId); } /** @@ -310,7 +255,7 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP * @return the size of the List of FieldUpdates */ public int size() { - return fieldUpdates.size(); + return id2FieldUpdates.size(); } /** @@ -332,7 +277,7 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP if (prevUpdate != null) { prevUpdate.addAll(update); } else { - fieldUpdates.add(update); + id2FieldUpdates.put(fieldId, update); } } return this; @@ -372,28 +317,8 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP } } - /** - * Removes the field update at the specified position in the list of field updates. - * - * @param index the index of the FieldUpdate to remove - * @return the FieldUpdate previously at the specified position - * @throws IndexOutOfBoundsException if index is out of range - * @deprecated use removeFieldUpdate(Field field) instead. - */ - @Deprecated - public FieldUpdate removeFieldUpdate(int index) { - return fieldUpdates.remove(index); - } - public FieldUpdate removeFieldUpdate(Field field) { - for (Iterator<FieldUpdate> it = fieldUpdates.iterator(); it.hasNext();) { - FieldUpdate fieldUpdate = it.next(); - if (fieldUpdate.getField().equals(field)) { - it.remove(); - return fieldUpdate; - } - } - return null; + return id2FieldUpdates.remove(field.getId()); } public FieldUpdate removeFieldUpdate(String fieldName) { @@ -411,7 +336,7 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP } public final void serialize(GrowableByteBuffer buf) { - serialize(DocumentSerializerFactory.create42(buf)); + serialize(DocumentSerializerFactory.create6(buf)); } public void serialize(DocumentUpdateWriter data) { @@ -427,9 +352,8 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP if (docId != null ? !docId.equals(that.docId) : that.docId != null) return false; if (documentType != null ? !documentType.equals(that.documentType) : that.documentType != null) return false; - if (fieldPathUpdates != null ? !fieldPathUpdates.equals(that.fieldPathUpdates) : that.fieldPathUpdates != null) - return false; - if (fieldUpdates != null ? !fieldUpdates.equals(that.fieldUpdates) : that.fieldUpdates != null) return false; + if (!fieldPathUpdates.equals(that.fieldPathUpdates)) return false; + if (!id2FieldUpdates.equals(that.id2FieldUpdates)) return false; if (this.getCreateIfNonExistent() != ((DocumentUpdate) o).getCreateIfNonExistent()) return false; return true; @@ -438,8 +362,8 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP @Override public int hashCode() { int result = docId != null ? docId.hashCode() : 0; - result = 31 * result + (fieldUpdates != null ? fieldUpdates.hashCode() : 0); - result = 31 * result + (fieldPathUpdates != null ? fieldPathUpdates.hashCode() : 0); + result = 31 * result + id2FieldUpdates.hashCode(); + result = 31 * result + fieldPathUpdates.hashCode(); result = 31 * result + (documentType != null ? documentType.hashCode() : 0); return result; } @@ -455,7 +379,7 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP string.append(": "); string.append("["); - for (FieldUpdate fieldUpdate : fieldUpdates) { + for (FieldUpdate fieldUpdate : id2FieldUpdates.values()) { string.append(fieldUpdate).append(" "); } string.append("]"); @@ -482,14 +406,13 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP * @return True if this update is empty. */ public boolean isEmpty() { - return fieldUpdates.isEmpty() && fieldPathUpdates.isEmpty(); + return id2FieldUpdates.isEmpty() && fieldPathUpdates.isEmpty(); } /** * Sets whether this update should create the document it updates if that document does not exist. * In this case an empty document is created before the update is applied. * - * @since 5.17 * @param value Whether the document it updates should be created. */ public void setCreateIfNonExistent(boolean value) { @@ -499,8 +422,7 @@ public class DocumentUpdate extends DocumentOperation implements Iterable<FieldP /** * Gets whether this update should create the document it updates if that document does not exist. * - * @since 5.17 - * @return Whether the document it updates should be created. + * @return whether the document it updates should be created. */ public boolean getCreateIfNonExistent() { return createIfNonExistent != null && createIfNonExistent; diff --git a/document/src/main/java/com/yahoo/document/Field.java b/document/src/main/java/com/yahoo/document/Field.java index 5d3d148d832..d0a19a50073 100644 --- a/document/src/main/java/com/yahoo/document/Field.java +++ b/document/src/main/java/com/yahoo/document/Field.java @@ -187,7 +187,7 @@ public class Field extends FieldBase implements FieldSet, Comparable, Serializab * @param type The new type of the field. * @deprecated do not use */ - @Deprecated // TODO: Remove on Vespa 8 + @Deprecated // todo - refactor SD processing to avoid needing this public void setDataType(DataType type) { dataType = type; fieldId = calculateIdV7(null); diff --git a/document/src/main/java/com/yahoo/document/PositionDataType.java b/document/src/main/java/com/yahoo/document/PositionDataType.java index bb110ee7219..0e69171c106 100644 --- a/document/src/main/java/com/yahoo/document/PositionDataType.java +++ b/document/src/main/java/com/yahoo/document/PositionDataType.java @@ -100,12 +100,12 @@ public final class PositionDataType { } public static String getPositionSummaryFieldName(String fieldName) { - // TODO for 6.0, rename to _position to use a field name that is actually legal + // TODO for Vespa 8, consider renaming to _position to use a field name that is actually legal return fieldName + ".position"; } public static String getDistanceSummaryFieldName(String fieldName) { - // TODO for 6.0, rename to _distance to use a field name that is actually legal + // TODO for Vespa 8, consider renaming to _distance to use a field name that is actually legal return fieldName + ".distance"; } diff --git a/document/src/main/java/com/yahoo/document/datatypes/FieldValue.java b/document/src/main/java/com/yahoo/document/datatypes/FieldValue.java index dc3fd36b367..f895ad21b0a 100644 --- a/document/src/main/java/com/yahoo/document/datatypes/FieldValue.java +++ b/document/src/main/java/com/yahoo/document/datatypes/FieldValue.java @@ -52,7 +52,7 @@ public abstract class FieldValue extends Identifiable implements Comparable<Fiel } final public void serialize(GrowableByteBuffer buf) { - serialize(DocumentSerializerFactory.create42(buf)); + serialize(DocumentSerializerFactory.create6(buf)); } public abstract void printXml(XmlStream xml); @@ -140,9 +140,9 @@ public abstract class FieldValue extends Identifiable implements Comparable<Fiel if (target instanceof FieldWriter) { serialize(null, (FieldWriter) target); } else if (target instanceof BufferSerializer) { - serialize(null, DocumentSerializerFactory.create42(((BufferSerializer) target).getBuf())); + serialize(null, DocumentSerializerFactory.create6(((BufferSerializer) target).getBuf())); } else { - DocumentSerializer fw = DocumentSerializerFactory.create42(new GrowableByteBuffer()); + DocumentSerializer fw = DocumentSerializerFactory.create6(new GrowableByteBuffer()); serialize(null, fw); target.put(null, fw.getBuf().getByteBuffer()); } diff --git a/document/src/main/java/com/yahoo/document/fieldpathupdate/AddFieldPathUpdate.java b/document/src/main/java/com/yahoo/document/fieldpathupdate/AddFieldPathUpdate.java index 9fac025c15e..74b94b8135e 100644 --- a/document/src/main/java/com/yahoo/document/fieldpathupdate/AddFieldPathUpdate.java +++ b/document/src/main/java/com/yahoo/document/fieldpathupdate/AddFieldPathUpdate.java @@ -8,7 +8,7 @@ import com.yahoo.document.datatypes.CollectionFieldValue; import com.yahoo.document.datatypes.FieldPathIteratorHandler; import com.yahoo.document.datatypes.FieldValue; import com.yahoo.document.serialization.DocumentUpdateReader; -import com.yahoo.document.serialization.VespaDocumentSerializerHead; +import com.yahoo.document.serialization.VespaDocumentSerializer6; /** * @author <a href="mailto:thomasg@yahoo-inc.com">Thomas Gundersen</a> @@ -96,7 +96,7 @@ public class AddFieldPathUpdate extends FieldPathUpdate { } @Override - public void serialize(VespaDocumentSerializerHead data) { + public void serialize(VespaDocumentSerializer6 data) { data.write(this); } diff --git a/document/src/main/java/com/yahoo/document/fieldpathupdate/AssignFieldPathUpdate.java b/document/src/main/java/com/yahoo/document/fieldpathupdate/AssignFieldPathUpdate.java index e31eefa2c00..b01742018a2 100644 --- a/document/src/main/java/com/yahoo/document/fieldpathupdate/AssignFieldPathUpdate.java +++ b/document/src/main/java/com/yahoo/document/fieldpathupdate/AssignFieldPathUpdate.java @@ -9,7 +9,7 @@ import com.yahoo.document.datatypes.FieldValue; import com.yahoo.document.datatypes.NumericFieldValue; import com.yahoo.document.select.parser.ParseException; import com.yahoo.document.serialization.DocumentUpdateReader; -import com.yahoo.document.serialization.VespaDocumentSerializerHead; +import com.yahoo.document.serialization.VespaDocumentSerializer6; import java.util.HashMap; import java.util.Map; @@ -237,7 +237,7 @@ public class AssignFieldPathUpdate extends FieldPathUpdate { } @Override - public void serialize(VespaDocumentSerializerHead data) { + public void serialize(VespaDocumentSerializer6 data) { data.write(this); } diff --git a/document/src/main/java/com/yahoo/document/fieldpathupdate/FieldPathUpdate.java b/document/src/main/java/com/yahoo/document/fieldpathupdate/FieldPathUpdate.java index 29c876b85fe..d4144116a03 100644 --- a/document/src/main/java/com/yahoo/document/fieldpathupdate/FieldPathUpdate.java +++ b/document/src/main/java/com/yahoo/document/fieldpathupdate/FieldPathUpdate.java @@ -11,7 +11,7 @@ import com.yahoo.document.select.Result; import com.yahoo.document.select.ResultList; import com.yahoo.document.select.parser.ParseException; import com.yahoo.document.serialization.DocumentUpdateReader; -import com.yahoo.document.serialization.VespaDocumentSerializerHead; +import com.yahoo.document.serialization.VespaDocumentSerializer6; import java.util.ListIterator; /** @@ -130,7 +130,7 @@ public abstract class FieldPathUpdate { } } - public void serialize(VespaDocumentSerializerHead data) { + public void serialize(VespaDocumentSerializer6 data) { data.write(this); } diff --git a/document/src/main/java/com/yahoo/document/select/package-info.java b/document/src/main/java/com/yahoo/document/select/package-info.java index e6de6497008..813643a7f06 100644 --- a/document/src/main/java/com/yahoo/document/select/package-info.java +++ b/document/src/main/java/com/yahoo/document/select/package-info.java @@ -1,7 +1,5 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. @ExportPackage -@PublicApi package com.yahoo.document.select; -import com.yahoo.api.annotations.PublicApi; import com.yahoo.osgi.annotation.ExportPackage; diff --git a/document/src/main/java/com/yahoo/document/serialization/DocumentDeserializerFactory.java b/document/src/main/java/com/yahoo/document/serialization/DocumentDeserializerFactory.java index 072f11f5f22..03aa409ee8d 100644 --- a/document/src/main/java/com/yahoo/document/serialization/DocumentDeserializerFactory.java +++ b/document/src/main/java/com/yahoo/document/serialization/DocumentDeserializerFactory.java @@ -13,25 +13,26 @@ public class DocumentDeserializerFactory { /** * Creates a de-serializer for the current head document format. - * This format is an extension of the 4.2 format. + * This format is an extension of the 6.x format. */ public static DocumentDeserializer createHead(DocumentTypeManager manager, GrowableByteBuffer buf) { return new VespaDocumentDeserializerHead(manager, buf); } /** - * Creates a de-serializer for the document format that was created on Vespa 4.2. + * Creates a de-serializer for the 6.x document format. + * This format is an extension of the 4.2 format. */ - @SuppressWarnings("deprecation") - public static DocumentDeserializer create42(DocumentTypeManager manager, GrowableByteBuffer buf) { - return new VespaDocumentDeserializer42(manager, buf); + public static DocumentDeserializer create6(DocumentTypeManager manager, GrowableByteBuffer buf) { + return new VespaDocumentDeserializer6(manager, buf); } /** * Creates a de-serializer for the document format that was created on Vespa 4.2. */ @SuppressWarnings("deprecation") - public static DocumentDeserializer create42(DocumentTypeManager manager, GrowableByteBuffer buf, GrowableByteBuffer body) { - return new VespaDocumentDeserializer42(manager, buf, body); + public static DocumentDeserializer create42(DocumentTypeManager manager, GrowableByteBuffer buf) { + return new VespaDocumentDeserializer42(manager, buf); } + } diff --git a/document/src/main/java/com/yahoo/document/serialization/DocumentSerializerFactory.java b/document/src/main/java/com/yahoo/document/serialization/DocumentSerializerFactory.java index 7ff58855c34..54ec4e2fcca 100644 --- a/document/src/main/java/com/yahoo/document/serialization/DocumentSerializerFactory.java +++ b/document/src/main/java/com/yahoo/document/serialization/DocumentSerializerFactory.java @@ -12,26 +12,34 @@ public class DocumentSerializerFactory { /** * Creates a serializer for the current head document format. - * This format is an extension of the 4.2 format. + * This format is an extension of the 6.x format. */ public static DocumentSerializer createHead(GrowableByteBuffer buf) { return new VespaDocumentSerializerHead(buf); } /** - * Creates a serializer for the document format that was created on Vespa 4.2. + * Creates a serializer for the 6.x document format. + * This format is an extension of the 4.2 format. */ - @SuppressWarnings("deprecation") - public static DocumentSerializer create42(GrowableByteBuffer buf) { - return new VespaDocumentSerializer42(buf); + public static DocumentSerializer create6(GrowableByteBuffer buf) { + return new VespaDocumentSerializer6(buf); + } + + /** + * Creates a serializer for the 6.x document format. + * This format is an extension of the 4.2 format. + */ + public static DocumentSerializer create6() { + return new VespaDocumentSerializer6(new GrowableByteBuffer()); } /** * Creates a serializer for the document format that was created on Vespa 4.2. */ @SuppressWarnings("deprecation") - public static DocumentSerializer create42(GrowableByteBuffer buf, boolean headerOnly) { - return new VespaDocumentSerializer42(buf, headerOnly); + public static DocumentSerializer create42(GrowableByteBuffer buf) { + return new VespaDocumentSerializer42(buf); } /** diff --git a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java index 7ff5729ca39..7ec4433a24f 100644 --- a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java +++ b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java @@ -63,6 +63,7 @@ import com.yahoo.tensor.serialization.TypedBinaryFormat; import com.yahoo.text.Utf8; import com.yahoo.text.Utf8Array; import com.yahoo.text.Utf8String; +import com.yahoo.vespa.objects.BufferSerializer; import com.yahoo.vespa.objects.FieldBase; import java.nio.ByteBuffer; @@ -80,9 +81,9 @@ import static com.yahoo.text.Utf8.calculateStringPositions; * @deprecated Please use {@link com.yahoo.document.serialization.VespaDocumentDeserializerHead} instead for new code. * @author baldersheim */ -@Deprecated // Remove on Vespa 7 +@Deprecated // TODO: Remove on Vespa 8 // When removing: Move content of this class into VespaDocumentDeserializerHead -public class VespaDocumentDeserializer42 extends VespaDocumentSerializer42 implements DocumentDeserializer { +public class VespaDocumentDeserializer42 extends BufferSerializer implements DocumentDeserializer { private final Compressor compressor = new Compressor(); private DocumentTypeManager manager; diff --git a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer6.java b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer6.java new file mode 100644 index 00000000000..5424798110c --- /dev/null +++ b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer6.java @@ -0,0 +1,880 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.document.serialization; + +import com.yahoo.collections.Tuple2; +import com.yahoo.compress.CompressionType; +import com.yahoo.compress.Compressor; +import com.yahoo.document.annotation.AlternateSpanList; +import com.yahoo.document.annotation.Annotation; +import com.yahoo.document.annotation.AnnotationReference; +import com.yahoo.document.annotation.AnnotationType; +import com.yahoo.document.annotation.Span; +import com.yahoo.document.annotation.SpanList; +import com.yahoo.document.annotation.SpanNode; +import com.yahoo.document.annotation.SpanNodeParent; +import com.yahoo.document.annotation.SpanTree; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.DataTypeName; +import com.yahoo.document.datatypes.Array; +import com.yahoo.document.datatypes.BoolFieldValue; +import com.yahoo.document.datatypes.ByteFieldValue; +import com.yahoo.document.datatypes.CollectionFieldValue; +import com.yahoo.document.datatypes.DoubleFieldValue; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.FloatFieldValue; +import com.yahoo.document.datatypes.IntegerFieldValue; +import com.yahoo.document.datatypes.LongFieldValue; +import com.yahoo.document.datatypes.MapFieldValue; +import com.yahoo.document.datatypes.PredicateFieldValue; +import com.yahoo.document.datatypes.Raw; +import com.yahoo.document.datatypes.ReferenceFieldValue; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.Struct; +import com.yahoo.document.datatypes.StructuredFieldValue; +import com.yahoo.document.datatypes.TensorFieldValue; +import com.yahoo.document.datatypes.WeightedSet; +import com.yahoo.document.Document; +import com.yahoo.document.DocumentId; +import com.yahoo.document.DocumentType; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.DocumentUpdate; +import com.yahoo.document.Field; +import com.yahoo.document.fieldpathupdate.AddFieldPathUpdate; +import com.yahoo.document.fieldpathupdate.AssignFieldPathUpdate; +import com.yahoo.document.fieldpathupdate.FieldPathUpdate; +import com.yahoo.document.fieldpathupdate.RemoveFieldPathUpdate; +import com.yahoo.document.MapDataType; +import com.yahoo.document.predicate.BinaryFormat; +import com.yahoo.document.select.parser.ParseException; +import com.yahoo.document.StructDataType; +import com.yahoo.document.update.AddValueUpdate; +import com.yahoo.document.update.ArithmeticValueUpdate; +import com.yahoo.document.update.AssignValueUpdate; +import com.yahoo.document.update.ClearValueUpdate; +import com.yahoo.document.update.FieldUpdate; +import com.yahoo.document.update.MapValueUpdate; +import com.yahoo.document.update.RemoveValueUpdate; +import com.yahoo.document.update.ValueUpdate; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.tensor.serialization.TypedBinaryFormat; +import com.yahoo.text.Utf8; +import com.yahoo.text.Utf8Array; +import com.yahoo.text.Utf8String; +import com.yahoo.vespa.objects.BufferSerializer; +import com.yahoo.vespa.objects.FieldBase; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.yahoo.text.Utf8.calculateStringPositions; + +/** + * Class used for de-serializing documents on the Vespa 6.x document format. + * + * @author baldersheim + */ +public class VespaDocumentDeserializer6 extends BufferSerializer implements DocumentDeserializer { + + private final Compressor compressor = new Compressor(); + private DocumentTypeManager manager; + private short version; + private List<SpanNode> spanNodes; + private List<Annotation> annotations; + private int[] stringPositions; + + VespaDocumentDeserializer6(DocumentTypeManager manager, GrowableByteBuffer buf) { + super(buf); + this.manager = manager; + this.version = Document.SERIALIZED_VERSION; + } + + final public DocumentTypeManager getDocumentTypeManager() { return manager; } + + public void read(Document document) { + read(null, document); + } + + @SuppressWarnings("deprecation") + public void read(FieldBase field, Document doc) { + // Verify that we have correct version + version = getShort(null); + if (version < 6 || version > Document.SERIALIZED_VERSION) { + throw new DeserializationException("Unknown version " + version + ", expected " + + Document.SERIALIZED_VERSION + "."); + } + + int dataLength = 0; + int dataPos = 0; + + if (version < 7) { + getInt2_4_8Bytes(null); // Total document size.. Ignore + } else { + dataLength = getInt(null); + dataPos = position(); + } + + doc.setId(readDocumentId()); + + Byte content = getByte(null); + + doc.setDataType(readDocumentType()); + + Struct h = doc.getHeader(); + Struct b = doc.getBody(); + h.clear(); + b.clear(); + if ((content & 0x2) != 0) { + readHeaderBody(h, b); + } + if ((content & 0x4) != 0) { + readHeaderBody(b, h); + } + + if (version < 8) { + int crcVal = getInt(null); + } + + if (version > 6) { + if (dataLength != (position() - dataPos)) { + throw new DeserializationException("Length mismatch"); + } + } + } + public void read(FieldBase field, FieldValue value) { + throw new IllegalArgumentException("read not implemented yet."); + } + + public <T extends FieldValue> void read(FieldBase field, Array<T> array) { + int numElements = getNumCollectionElems(); + ArrayList<T> list = new ArrayList<T>(numElements); + ArrayDataType type = array.getDataType(); + for (int i = 0; i < numElements; i++) { + if (version < 7) { + getInt(null); // We don't need size for anything + } + FieldValue fv = type.getNestedType().createFieldValue(); + fv.deserialize(null, this); + list.add((T) fv); + } + array.clear(); + array.addAll(list); + } + + public <K extends FieldValue, V extends FieldValue> void read(FieldBase field, MapFieldValue<K, V> map) { + int numElements = getNumCollectionElems(); + Map<K,V> hash = new HashMap<>(); + MapDataType type = map.getDataType(); + for (int i = 0; i < numElements; i++) { + if (version < 7) { + getInt(null); // We don't need size for anything + } + K key = (K) type.getKeyType().createFieldValue(); + V val = (V) type.getValueType().createFieldValue(); + key.deserialize(null, this); + val.deserialize(null, this); + hash.put(key, val); + } + map.clear(); + map.putAll(hash); + } + + private int getNumCollectionElems() { + int numElements; + if (version < 7) { + getInt(null); // We already know the nested type, so ignore that.. + numElements = getInt(null); + } else { + numElements = getInt1_2_4Bytes(null); + } + if (numElements < 0) { + throw new DeserializationException("Bad number of array/map elements, " + numElements); + } + return numElements; + } + + public <T extends FieldValue> void read(FieldBase field, CollectionFieldValue<T> value) { + throw new IllegalArgumentException("read not implemented yet."); + } + public void read(FieldBase field, ByteFieldValue value) { value.assign(getByte(null)); } + + @Override + public void read(FieldBase field, BoolFieldValue value) { + value.setBoolean((getByte(null) != 0)); + } + + public void read(FieldBase field, DoubleFieldValue value) { value.assign(getDouble(null)); } + public void read(FieldBase field, FloatFieldValue value) { value.assign(getFloat(null)); } + public void read(FieldBase field, IntegerFieldValue value) { value.assign(getInt(null)); } + public void read(FieldBase field, LongFieldValue value) { value.assign(getLong(null)); } + + public void read(FieldBase field, Raw value) { + int rawsize = getInt(null); + byte[] rawBytes = getBytes(null, rawsize); + value.assign(rawBytes); + } + + @Override + public void read(FieldBase field, PredicateFieldValue value) { + int len = getInt(null); + byte[] buf = getBytes(null, len); + value.assign(BinaryFormat.decode(buf)); + } + + public void read(FieldBase field, StringFieldValue value) { + byte coding = getByte(null); + + int length = getInt1_4Bytes(null); + + //OK, it seems that this length includes null termination. + //NOTE: the following four lines are basically parseNullTerminatedString() inlined, + //but we need to use the UTF-8 buffer below, so not using that method... + byte[] stringArray = new byte[length - 1]; + buf.get(stringArray); + buf.get(); //move past 0-termination + value.setUnChecked(Utf8.toString(stringArray)); + + if ((coding & 64) == 64) { + //we have a span tree! + try { + //we don't support serialization of nested span trees, so this is safe: + stringPositions = calculateStringPositions(stringArray); + //total length: + int size = buf.getInt(); + int startPos = buf.position(); + + int numSpanTrees = buf.getInt1_2_4Bytes(); + + for (int i = 0; i < numSpanTrees; i++) { + SpanTree tree = new SpanTree(); + StringFieldValue treeName = new StringFieldValue(); + treeName.deserialize(this); + tree.setName(treeName.getString()); + value.setSpanTree(tree); + readSpanTree(tree, false); + } + + buf.position(startPos + size); + } finally { + stringPositions = null; + } + } + } + + @Override + public void read(FieldBase field, TensorFieldValue value) { + int encodedTensorLength = buf.getInt1_4Bytes(); + if (encodedTensorLength > 0) { + byte[] encodedTensor = getBytes(null, encodedTensorLength); + value.assign(TypedBinaryFormat.decode(Optional.of(value.getDataType().getTensorType()), + GrowableByteBuffer.wrap(encodedTensor))); + } else { + value.clear(); + } + } + + @Override + public void read(FieldBase field, ReferenceFieldValue value) { + final boolean documentIdPresent = (buf.get() != 0); + if (documentIdPresent) { + value.assign(readDocumentId()); + } else { + value.clear(); + } + } + + public void read(FieldBase fieldDef, Struct s) { + s.setVersion(version); + int startPos = position(); + + if (version < 6) { + throw new DeserializationException("Illegal document serialization version " + version); + } + + int dataSize; + if (version < 7) { + long rSize = getInt2_4_8Bytes(null); + //TODO: Look into how to support data segments larger than INT_MAX bytes + if (rSize > Integer.MAX_VALUE) { + throw new DeserializationException("Raw size of data block is too large."); + } + dataSize = (int)rSize; + } else { + dataSize = getInt(null); + } + + byte comprCode = getByte(null); + CompressionType compression = CompressionType.valueOf(comprCode); + + int uncompressedSize = 0; + if (compression != CompressionType.NONE && + compression != CompressionType.INCOMPRESSIBLE) + { + // uncompressedsize (full size of FIELDS only, after decompression) + long pSize = getInt2_4_8Bytes(null); + //TODO: Look into how to support data segments larger than INT_MAX bytes + if (pSize > Integer.MAX_VALUE) { + throw new DeserializationException("Uncompressed size of data block is too large."); + } + uncompressedSize = (int) pSize; + } + + int numberOfFields = getInt1_4Bytes(null); + + List<Tuple2<Integer, Long>> fieldIdsAndLengths = new ArrayList<>(numberOfFields); + for (int i=0; i<numberOfFields; ++i) { + // id, length (length only used for unknown fields + fieldIdsAndLengths.add(new Tuple2<>(getInt1_4Bytes(null), getInt2_4_8Bytes(null))); + } + + // save a reference to the big buffer we're reading from: + GrowableByteBuffer bigBuf = buf; + + if (version < 7) { + // In V6 and earlier, the length included the header. + int headerSize = position() - startPos; + dataSize -= headerSize; + } + byte[] destination = compressor.decompress(compression, getBuf().array(), position(), uncompressedSize, Optional.of(dataSize)); + + // set position in original buffer to after data + position(position() + dataSize); + + // for a while: deserialize from this buffer instead: + buf = GrowableByteBuffer.wrap(destination); + + s.clear(); + StructDataType type = s.getDataType(); + for (int i=0; i<numberOfFields; ++i) { + Field structField = type.getField(fieldIdsAndLengths.get(i).first, version); + if (structField == null) { + //ignoring unknown field: + position(position() + fieldIdsAndLengths.get(i).second.intValue()); + } else { + int posBefore = position(); + FieldValue value = structField.getDataType().createFieldValue(); + value.deserialize(structField, this); + s.setFieldValue(structField, value); + //jump to beginning of next field: + position(posBefore + fieldIdsAndLengths.get(i).second.intValue()); + } + } + + // restore the original buffer + buf = bigBuf; + } + + private void readHeaderBody(Struct primary, Struct alternate) { + primary.setVersion(version); + int startPos = position(); + + if (version < 6) { + throw new DeserializationException("Illegal document serialization version " + version); + } + + int dataSize; + if (version < 7) { + long rSize = getInt2_4_8Bytes(null); + //TODO: Look into how to support data segments larger than INT_MAX bytes + if (rSize > Integer.MAX_VALUE) { + throw new DeserializationException("Raw size of data block is too large."); + } + dataSize = (int)rSize; + } else { + dataSize = getInt(null); + } + + byte comprCode = getByte(null); + CompressionType compression = CompressionType.valueOf(comprCode); + + int uncompressedSize = 0; + if (compression != CompressionType.NONE && + compression != CompressionType.INCOMPRESSIBLE) + { + // uncompressedsize (full size of FIELDS only, after decompression) + long pSize = getInt2_4_8Bytes(null); + //TODO: Look into how to support data segments larger than INT_MAX bytes + if (pSize > Integer.MAX_VALUE) { + throw new DeserializationException("Uncompressed size of data block is too large."); + } + uncompressedSize = (int) pSize; + } + + int numberOfFields = getInt1_4Bytes(null); + + List<Tuple2<Integer, Long>> fieldIdsAndLengths = new ArrayList<>(numberOfFields); + for (int i=0; i<numberOfFields; ++i) { + // id, length (length only used for unknown fields + fieldIdsAndLengths.add(new Tuple2<>(getInt1_4Bytes(null), getInt2_4_8Bytes(null))); + } + + // save a reference to the big buffer we're reading from: + GrowableByteBuffer bigBuf = buf; + + if (version < 7) { + // In V6 and earlier, the length included the header. + int headerSize = position() - startPos; + dataSize -= headerSize; + } + byte[] destination = compressor.decompress(compression, getBuf().array(), position(), uncompressedSize, Optional.of(dataSize)); + + // set position in original buffer to after data + position(position() + dataSize); + + // for a while: deserialize from this buffer instead: + buf = GrowableByteBuffer.wrap(destination); + + StructDataType priType = primary.getDataType(); + StructDataType altType = alternate.getDataType(); + for (int i=0; i<numberOfFields; ++i) { + int posBefore = position(); + Struct s = null; + Integer f_id = fieldIdsAndLengths.get(i).first; + Field structField = priType.getField(f_id, version); + if (structField != null) { + s = primary; + } else { + structField = altType.getField(f_id, version); + if (structField != null) { + s = alternate; + } + } + if (s != null) { + FieldValue value = structField.getDataType().createFieldValue(); + value.deserialize(structField, this); + s.setFieldValue(structField, value); + } + //jump to beginning of next field: + position(posBefore + fieldIdsAndLengths.get(i).second.intValue()); + } + + // restore the original buffer + buf = bigBuf; + } + + public void read(FieldBase field, StructuredFieldValue value) { + throw new IllegalArgumentException("read not implemented yet."); + } + public <T extends FieldValue> void read(FieldBase field, WeightedSet<T> ws) { + WeightedSetDataType type = ws.getDataType(); + getInt(null); // Have no need for type + + int numElements = getInt(null); + if (numElements < 0) { + throw new DeserializationException("Bad number of weighted set elements, " + numElements); + } + + ws.clearAndReserve(numElements * 2); // Avoid resizing + for (int i = 0; i < numElements; i++) { + int size = getInt(null); + FieldValue value = type.getNestedType().createFieldValue(); + value.deserialize(null, this); + IntegerFieldValue weight = new IntegerFieldValue(getInt(null)); + ws.putUnChecked((T) value, weight); + } + + } + + public void read(FieldBase field, AnnotationReference value) { + int seqId = buf.getInt1_2_4Bytes(); + try { + Annotation a = annotations.get(seqId); + value.setReferenceNoCompatibilityCheck(a); + } catch (IndexOutOfBoundsException iiobe) { + throw new SerializationException("Could not serialize AnnotationReference value, reference not found.", iiobe); + } + } + + private Utf8String deserializeAttributeString() throws DeserializationException { + int length = getByte(null); + return new Utf8String(parseNullTerminatedString(length)); + } + + private Utf8Array parseNullTerminatedString() { return parseNullTerminatedString(getBuf().getByteBuffer()); } + private Utf8Array parseNullTerminatedString(int lengthExcludingNull) { return parseNullTerminatedString(getBuf().getByteBuffer(), lengthExcludingNull); } + + static Utf8Array parseNullTerminatedString(ByteBuffer buf, int lengthExcludingNull) throws DeserializationException { + Utf8Array utf8 = new Utf8Array(buf, lengthExcludingNull); + buf.get(); //move past 0-termination + return utf8; + } + + static Utf8Array parseNullTerminatedString(ByteBuffer buf) throws DeserializationException { + //search for 0-byte + int end = getFirstNullByte(buf); + + if (end == -1) { + throw new DeserializationException("Could not locate terminating 0-byte for string"); + } + + return parseNullTerminatedString(buf, end - buf.position()); + } + + private static int getFirstNullByte(ByteBuffer buf) { + int end = -1; + int start = buf.position(); + + while (true) { + try { + byte dataByte = buf.get(); + if (dataByte == (byte) 0) { + end = buf.position() - 1; + break; + } + } catch (Exception e) { + break; + } + } + + buf.position(start); + return end; + } + + public void read(DocumentUpdate update) { + update.setId(new DocumentId(this)); + update.setDocumentType(readDocumentType()); + + int size = getInt(null); + + for (int i = 0; i < size; i++) { + update.addFieldUpdate(new FieldUpdate(this, update.getDocumentType(), 8)); + } + + int sizeAndFlags = getInt(null); + update.setCreateIfNonExistent(DocumentUpdateFlags.extractFlags(sizeAndFlags).getCreateIfNonExistent()); + size = DocumentUpdateFlags.extractValue(sizeAndFlags); + + for (int i = 0; i < size; i++) { + int type = getByte(null); + update.addFieldPathUpdate(FieldPathUpdate.create(FieldPathUpdate.Type.valueOf(type), + update.getDocumentType(), this)); + } + } + + + public void read(FieldPathUpdate update) { + String fieldPath = getString(null); + String whereClause = getString(null); + update.setFieldPath(fieldPath); + + try { + update.setWhereClause(whereClause); + } catch (ParseException e) { + throw new DeserializationException(e); + } + } + + public void read(AssignFieldPathUpdate update) { + byte flags = getByte(null); + update.setRemoveIfZero((flags & AssignFieldPathUpdate.REMOVE_IF_ZERO) != 0); + update.setCreateMissingPath((flags & AssignFieldPathUpdate.CREATE_MISSING_PATH) != 0); + if ((flags & AssignFieldPathUpdate.ARITHMETIC_EXPRESSION) != 0) { + update.setExpression(getString(null)); + } else { + DataType dt = update.getFieldPath().getResultingDataType(); + FieldValue fv = dt.createFieldValue(); + fv.deserialize(this); + update.setNewValue(fv); + } + } + + public void read(RemoveFieldPathUpdate update) { + + } + + public void read(AddFieldPathUpdate update) { + DataType dt = update.getFieldPath().getResultingDataType(); + FieldValue fv = dt.createFieldValue(); + dt.createFieldValue(); + fv.deserialize(this); + + if (!(fv instanceof Array)) { + throw new DeserializationException("Add only applicable to array types"); + } + update.setNewValues((Array)fv); + } + + public ValueUpdate getValueUpdate(DataType superType, DataType subType) { + int vuTypeId = getInt(null); + + ValueUpdate.ValueUpdateClassID op = ValueUpdate.ValueUpdateClassID.getID(vuTypeId); + if (op == null) { + throw new IllegalArgumentException("Read type "+vuTypeId+" of bytebuffer, but this is not a legal value update type."); + } + + switch (op) { + case ADD: + { + FieldValue fval = subType.createFieldValue(); + fval.deserialize(this); + int weight = getInt(null); + return new AddValueUpdate(fval, weight); + } + case ARITHMETIC: + int opId = getInt(null); + ArithmeticValueUpdate.Operator operator = ArithmeticValueUpdate.Operator.getID(opId); + double operand = getDouble(null); + return new ArithmeticValueUpdate(operator, operand); + case ASSIGN: + { + byte contents = getByte(null); + FieldValue fval = null; + if (contents == (byte) 1) { + fval = superType.createFieldValue(); + fval.deserialize(this); + } + return new AssignValueUpdate(fval); + } + case CLEAR: + return new ClearValueUpdate(); + case MAP: + if (superType instanceof ArrayDataType) { + CollectionDataType type = (CollectionDataType) superType; + IntegerFieldValue index = new IntegerFieldValue(); + index.deserialize(this); + ValueUpdate update = getValueUpdate(type.getNestedType(), null); + return new MapValueUpdate(index, update); + } else if (superType instanceof WeightedSetDataType) { + CollectionDataType type = (CollectionDataType) superType; + FieldValue fval = type.getNestedType().createFieldValue(); + fval.deserialize(this); + ValueUpdate update = getValueUpdate(DataType.INT, null); + return new MapValueUpdate(fval, update); + } else { + throw new DeserializationException("MapValueUpdate only works for arrays and weighted sets"); + } + case REMOVE: + FieldValue fval = ((CollectionDataType) superType).getNestedType().createFieldValue(); + fval.deserialize(this); + return new RemoveValueUpdate(fval); + default: + throw new DeserializationException( + "Could not deserialize ValueUpdate, unknown valueUpdateClassID type " + vuTypeId); + } + } + + public void read(FieldUpdate fieldUpdate) { + int fieldId = getInt(null); + Field field = fieldUpdate.getDocumentType().getField(fieldId, fieldUpdate.getSerializationVersion()); + if (field == null) { + throw new DeserializationException( + "Cannot deserialize FieldUpdate, field fieldId " + fieldId + " not found in " + fieldUpdate.getDocumentType()); + } + + fieldUpdate.setField(field); + int size = getInt(null); + + for (int i = 0; i < size; i++) { + if (field.getDataType() instanceof CollectionDataType) { + CollectionDataType collType = (CollectionDataType) field.getDataType(); + fieldUpdate.addValueUpdate(getValueUpdate(collType, collType.getNestedType())); + } else { + fieldUpdate.addValueUpdate(getValueUpdate(field.getDataType(), null)); + } + } + } + + public DocumentId readDocumentId() { + Utf8String uri = new Utf8String(parseNullTerminatedString(getBuf().getByteBuffer())); + return DocumentId.createFromSerialized(uri.toString()); + } + + public DocumentType readDocumentType() { + Utf8Array docTypeName = parseNullTerminatedString(); + int ignored = getShort(null); // used to hold the version + + DocumentType docType = manager.getDocumentType(new DataTypeName(docTypeName)); + if (docType == null) { + throw new DeserializationException("No known document type with name " + + new Utf8String(docTypeName).toString()); + } + return docType; + } + + private SpanNode readSpanNode() { + byte type = buf.get(); + buf.position(buf.position() - 1); + + SpanNode retval; + if ((type & Span.ID) == Span.ID) { + retval = new Span(); + if (spanNodes != null) { + spanNodes.add(retval); + } + read((Span) retval); + } else if ((type & SpanList.ID) == SpanList.ID) { + retval = new SpanList(); + if (spanNodes != null) { + spanNodes.add(retval); + } + read((SpanList) retval); + } else if ((type & AlternateSpanList.ID) == AlternateSpanList.ID) { + retval = new AlternateSpanList(); + if (spanNodes != null) { + spanNodes.add(retval); + } + read((AlternateSpanList) retval); + } else { + throw new DeserializationException("Cannot read SpanNode of type " + type); + } + return retval; + } + + private void readSpanTree(SpanTree tree, boolean readName) { + //we don't support serialization of nested span trees: + if (spanNodes != null || annotations != null) { + throw new SerializationException("Deserialization of nested SpanTrees is not supported."); + } + + //we're going to write a new SpanTree, create a new Map for nodes: + spanNodes = new ArrayList<SpanNode>(); + annotations = new ArrayList<Annotation>(); + + try { + if (readName) { + StringFieldValue treeName = new StringFieldValue(); + treeName.deserialize(this); + tree.setName(treeName.getString()); + } + + SpanNode root = readSpanNode(); + tree.setRoot(root); + + int numAnnotations = buf.getInt1_2_4Bytes(); + + for (int i = 0; i < numAnnotations; i++) { + Annotation a = new Annotation(); + annotations.add(a); + } + for (int i = 0; i < numAnnotations; i++) { + read(annotations.get(i)); + } + for (Annotation a : annotations) { + tree.annotate(a); + } + + for (SpanNode node: spanNodes) { + if (node instanceof Span) { + correctIndexes((Span) node); + } + } + } finally { + //we're done, let's set this to null to save memory and prevent madness: + spanNodes = null; + annotations = null; + } + } + + public void read(SpanTree tree) { + readSpanTree(tree, true); + } + + public void read(Annotation annotation) { + int annotationTypeId = buf.getInt(); + AnnotationType type = manager.getAnnotationTypeRegistry().getType(annotationTypeId); + + if (type == null) { + throw new DeserializationException("Cannot deserialize annotation of type " + annotationTypeId + " (unknown type)"); + } + + annotation.setType(type); + + byte features = buf.get(); + int length = buf.getInt1_2_4Bytes(); + + if ((features & (byte) 1) == (byte) 1) { + //we have a span node + int spanNodeId = buf.getInt1_2_4Bytes(); + try { + SpanNode node = spanNodes.get(spanNodeId); + annotation.setSpanNode(node); + } catch (IndexOutOfBoundsException ioobe) { + throw new DeserializationException("Could not deserialize annotation, associated span node not found ", ioobe); + } + } + if ((features & (byte) 2) == (byte) 2) { + //we have a value: + int dataTypeId = buf.getInt(); + + //if this data type ID the same as the one in our config? + if (dataTypeId != type.getDataType().getId()) { + //not the same, but we will handle it gracefully, and just skip past the data: + buf.position(buf.position() + length - 4); + } else { + FieldValue value = type.getDataType().createFieldValue(); + value.deserialize(this); + annotation.setFieldValue(value); + } + } + } + + public void read(Span span) { + byte type = buf.get(); + if ((type & Span.ID) != Span.ID) { + throw new DeserializationException("Cannot deserialize Span with type " + type); + } + span.setFrom(buf.getInt1_2_4Bytes()); + span.setLength(buf.getInt1_2_4Bytes()); + } + + private void correctIndexes(Span span) { + if (stringPositions == null) { + throw new DeserializationException("Cannot deserialize Span, no access to parent StringFieldValue."); + } + int fromIndex = stringPositions[span.getFrom()]; + int toIndex = stringPositions[span.getTo()]; + int length = toIndex - fromIndex; + + span.setFrom(fromIndex); + span.setLength(length); + } + + public void read(SpanList spanList) { + byte type = buf.get(); + if ((type & SpanList.ID) != SpanList.ID) { + throw new DeserializationException("Cannot deserialize SpanList with type " + type); + } + List<SpanNode> nodes = readSpanList(spanList); + for (SpanNode node : nodes) { + spanList.add(node); + } + } + + public void read(AlternateSpanList altSpanList) { + byte type = buf.get(); + if ((type & AlternateSpanList.ID) != AlternateSpanList.ID) { + throw new DeserializationException("Cannot deserialize AlternateSpanList with type " + type); + } + int numSubTrees = buf.getInt1_2_4Bytes(); + + for (int i = 0; i < numSubTrees; i++) { + double prob = buf.getDouble(); + List<SpanNode> list = readSpanList(altSpanList); + + if (i == 0) { + for (SpanNode node : list) { + altSpanList.add(node); + } + altSpanList.setProbability(0, prob); + } else { + altSpanList.addChildren(i, list, prob); + } + } + } + + private List<SpanNode> readSpanList(SpanNodeParent parent) { + int size = buf.getInt1_2_4Bytes(); + List<SpanNode> spanList = new ArrayList<SpanNode>(); + for (int i = 0; i < size; i++) { + spanList.add(readSpanNode()); + } + return spanList; + } + +} diff --git a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializerHead.java b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializerHead.java index 40aec94aec6..1e90395a153 100644 --- a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializerHead.java +++ b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializerHead.java @@ -1,14 +1,8 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.document.serialization; -import com.yahoo.document.DocumentId; import com.yahoo.document.DocumentTypeManager; -import com.yahoo.document.DocumentUpdate; -import com.yahoo.document.datatypes.BoolFieldValue; -import com.yahoo.document.fieldpathupdate.FieldPathUpdate; -import com.yahoo.document.update.FieldUpdate; import com.yahoo.io.GrowableByteBuffer; -import com.yahoo.vespa.objects.FieldBase; /** * Class used for de-serializing documents on the current head document format. @@ -16,36 +10,10 @@ import com.yahoo.vespa.objects.FieldBase; * @author baldersheim */ @SuppressWarnings("deprecation") -public class VespaDocumentDeserializerHead extends VespaDocumentDeserializer42 { +public class VespaDocumentDeserializerHead extends VespaDocumentDeserializer6 { public VespaDocumentDeserializerHead(DocumentTypeManager manager, GrowableByteBuffer buffer) { super(manager, buffer); } - @Override - public void read(DocumentUpdate update) { - update.setId(new DocumentId(this)); - update.setDocumentType(readDocumentType()); - - int size = getInt(null); - - for (int i = 0; i < size; i++) { - update.addFieldUpdate(new FieldUpdate(this, update.getDocumentType(), 8)); - } - - int sizeAndFlags = getInt(null); - update.setCreateIfNonExistent(DocumentUpdateFlags.extractFlags(sizeAndFlags).getCreateIfNonExistent()); - size = DocumentUpdateFlags.extractValue(sizeAndFlags); - - for (int i = 0; i < size; i++) { - int type = getByte(null); - update.addFieldPathUpdate(FieldPathUpdate.create(FieldPathUpdate.Type.valueOf(type), - update.getDocumentType(), this)); - } - } - - @Override - public void read(FieldBase field, BoolFieldValue value) { - value.setBoolean((getByte(null) != 0)); - } } diff --git a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentSerializer42.java b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentSerializer42.java index 581c7df8aee..ebe9a124033 100644 --- a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentSerializer42.java +++ b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentSerializer42.java @@ -67,11 +67,10 @@ import static com.yahoo.text.Utf8.calculateBytePositions; * @deprecated use {@link com.yahoo.document.serialization.VespaDocumentSerializerHead} instead for new code * @author baldersheim */ -@Deprecated // OK: Don't remove on Vespa 6: Mail may have documents on this format still +@Deprecated // TODO: Remove on Vespa 8 // When removing: Move content into VespaDocumentSerializerHead public class VespaDocumentSerializer42 extends BufferSerializer implements DocumentSerializer { - private boolean headerOnly; private int spanNodeCounter = -1; private int[] bytePositions; @@ -83,55 +82,45 @@ public class VespaDocumentSerializer42 extends BufferSerializer implements Docum super(); } - VespaDocumentSerializer42(GrowableByteBuffer buf, boolean headerOnly) { - this(buf); - this.headerOnly = headerOnly; - } - - public void setHeaderOnly(boolean headerOnly) { - this.headerOnly = headerOnly; - } - public void write(Document doc) { write(new Field(doc.getDataType().getName(), 0, doc.getDataType(), true), doc); } public void write(FieldBase field, Document doc) { - //save the starting position in the buffer - int startPos = buf.position(); - buf.putShort(Document.SERIALIZED_VERSION); + //save the starting position in the buffer + int lenPos = buf.position(); // Temporary length, fill in after serialization is done. buf.putInt(0); doc.getId().serialize(this); + Struct head = doc.getHeader(); + Struct body = doc.getBody(); + boolean hasHead = (head.getFieldCount() != 0); + boolean hasBody = (body.getFieldCount() != 0); + byte contents = 0x01; // Indicating we have document type which we always have - if (doc.getHeader().getFieldCount() > 0) { + if (hasHead) { contents |= 0x2; // Indicate we have header } - if (!headerOnly && doc.getBody().getFieldCount() > 0) { + if (hasBody) { contents |= 0x4; // Indicate we have a body } buf.put(contents); doc.getDataType().serialize(this); - - if (doc.getHeader().getFieldCount() > 0) { - doc.getHeader().serialize(doc.getDataType().getField("header"), this); + if (hasHead) { + head.serialize(null, this); } - - if (!headerOnly && doc.getBody().getFieldCount() > 0) { - doc.getBody().serialize(doc.getDataType().getField("body"), this); + if (hasBody) { + body.serialize(null, this); } - int finalPos = buf.position(); - - buf.position(startPos + 2); - buf.putInt(finalPos - startPos - 2 - 4); // Don't include the length itself or the version + buf.position(lenPos); + buf.putInt(finalPos - lenPos - 4); // Don't include the length itself or the version buf.position(finalPos); - } /** diff --git a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentSerializer6.java b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentSerializer6.java new file mode 100644 index 00000000000..7b7878083a2 --- /dev/null +++ b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentSerializer6.java @@ -0,0 +1,709 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.document.serialization; + +import com.yahoo.compress.Compressor; + +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.Document; +import com.yahoo.document.DocumentId; +import com.yahoo.document.DocumentType; +import com.yahoo.document.DocumentUpdate; +import com.yahoo.document.Field; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.document.annotation.AlternateSpanList; +import com.yahoo.document.annotation.Annotation; +import com.yahoo.document.annotation.AnnotationReference; +import com.yahoo.document.annotation.Span; +import com.yahoo.document.annotation.SpanList; +import com.yahoo.document.annotation.SpanNode; +import com.yahoo.document.annotation.SpanTree; +import com.yahoo.document.datatypes.Array; +import com.yahoo.document.datatypes.BoolFieldValue; +import com.yahoo.document.datatypes.ByteFieldValue; +import com.yahoo.document.datatypes.CollectionFieldValue; +import com.yahoo.document.datatypes.DoubleFieldValue; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.FloatFieldValue; +import com.yahoo.document.datatypes.IntegerFieldValue; +import com.yahoo.document.datatypes.LongFieldValue; +import com.yahoo.document.datatypes.MapFieldValue; +import com.yahoo.document.datatypes.PredicateFieldValue; +import com.yahoo.document.datatypes.Raw; +import com.yahoo.document.datatypes.ReferenceFieldValue; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.Struct; +import com.yahoo.document.datatypes.StructuredFieldValue; +import com.yahoo.document.datatypes.TensorFieldValue; +import com.yahoo.document.datatypes.WeightedSet; +import com.yahoo.document.fieldpathupdate.AddFieldPathUpdate; +import com.yahoo.document.fieldpathupdate.AssignFieldPathUpdate; +import com.yahoo.document.fieldpathupdate.FieldPathUpdate; +import com.yahoo.document.predicate.BinaryFormat; +import com.yahoo.document.update.AddValueUpdate; +import com.yahoo.document.update.ArithmeticValueUpdate; +import com.yahoo.document.update.AssignValueUpdate; +import com.yahoo.document.update.ClearValueUpdate; +import com.yahoo.document.update.FieldUpdate; +import com.yahoo.document.update.MapValueUpdate; +import com.yahoo.document.update.RemoveValueUpdate; +import com.yahoo.document.update.ValueUpdate; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.tensor.serialization.TypedBinaryFormat; +import com.yahoo.vespa.objects.BufferSerializer; +import com.yahoo.vespa.objects.FieldBase; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import static com.yahoo.text.Utf8.calculateBytePositions; + +/** + * Class used for serializing documents on the Vespa 6.x document format. + * + * @author baldersheim + **/ +public class VespaDocumentSerializer6 extends BufferSerializer implements DocumentSerializer { + + private int spanNodeCounter = -1; + private int[] bytePositions; + + VespaDocumentSerializer6(GrowableByteBuffer buf) { + super(buf); + } + + public void write(Document doc) { + write(new Field(doc.getDataType().getName(), 0, doc.getDataType(), true), doc); + } + + @SuppressWarnings("deprecation") + public void write(FieldBase field, Document doc) { + buf.putShort(Document.SERIALIZED_VERSION); + + //save the position of the length in the buffer + int lenPos = buf.position(); + // Temporary length, fill in after serialization is done. + buf.putInt(0); + + doc.getId().serialize(this); + + Struct head = doc.getHeader(); + Struct body = doc.getBody(); + boolean hasHead = (head.getFieldCount() != 0); + boolean hasBody = (body.getFieldCount() != 0); + + byte contents = 0x01; // Indicating we have document type which we always have + if (hasHead) { + contents |= 0x2; // Indicate we have header + } + if (hasBody) { + contents |= 0x4; // Indicate we have a body + } + buf.put(contents); + + doc.getDataType().serialize(this); + if (hasHead) { + head.serialize(null, this); + } + if (hasBody) { + body.serialize(null, this); + } + int finalPos = buf.position(); + buf.position(lenPos); + buf.putInt(finalPos - lenPos - 4); // Don't include the length itself or the version + buf.position(finalPos); + } + + /** + * Write out the value of field + * + * @param field - field description (name and data type) + * @param value - field value + */ + public void write(FieldBase field, FieldValue value) { + throw new IllegalArgumentException("Not Implemented"); + } + + /** + * Write out the value of array field + * + * @param field - field description (name and data type) + * @param array - field value + */ + public <T extends FieldValue> void write(FieldBase field, Array<T> array) { + buf.putInt1_2_4Bytes(array.size()); + + List<T> lst = array.getValues(); + for (FieldValue value : lst) { + value.serialize(this); + } + + } + + public <K extends FieldValue, V extends FieldValue> void write(FieldBase field, MapFieldValue<K, V> map) { + buf.putInt1_2_4Bytes(map.size()); + for (Map.Entry<K, V> e : map.entrySet()) { + e.getKey().serialize(this); + e.getValue().serialize(this); + } + } + + /** + * Write out the value of byte field + * + * @param field - field description (name and data type) + * @param value - field value + */ + public void write(FieldBase field, ByteFieldValue value) { + buf.put(value.getByte()); + } + + @Override + public void write(FieldBase field, BoolFieldValue value) { + byte asByte = value.getBoolean() ? (byte)1 : (byte)0; + buf.put(asByte); + } + + /** + * Write out the value of collection field + * + * @param field - field description (name and data type) + * @param value - field value + */ + public <T extends FieldValue> void write(FieldBase field, CollectionFieldValue<T> value) { + throw new IllegalArgumentException("Not Implemented"); + } + + /** + * Write out the value of double field + * + * @param field - field description (name and data type) + * @param value - field value + */ + public void write(FieldBase field, DoubleFieldValue value) { + buf.putDouble(value.getDouble()); + } + + /** + * Write out the value of float field + * + * @param field - field description (name and data type) + * @param value - field value + */ + public void write(FieldBase field, FloatFieldValue value) { + buf.putFloat(value.getFloat()); + } + + /** + * Write out the value of integer field + * + * @param field - field description (name and data type) + * @param value - field value + */ + public void write(FieldBase field, IntegerFieldValue value) { + buf.putInt(value.getInteger()); + } + + /** + * Write out the value of long field + * + * @param field - field description (name and data type) + * @param value - field value + */ + public void write(FieldBase field, LongFieldValue value) { + buf.putLong(value.getLong()); + } + + /** + * Write out the value of raw field + * + * @param field - field description (name and data type) + * @param value - field value + */ + public void write(FieldBase field, Raw value) { + ByteBuffer rawBuf = value.getByteBuffer(); + int origPos = rawBuf.position(); + buf.putInt(rawBuf.remaining()); + buf.put(rawBuf); + rawBuf.position(origPos); + + } + + @Override + public void write(FieldBase field, PredicateFieldValue value) { + byte[] buf = BinaryFormat.encode(value.getPredicate()); + this.buf.putInt(buf.length); + this.buf.put(buf); + } + + /** + * Write out the value of string field + * + * @param field - field description (name and data type) + * @param value - field value + */ + public void write(FieldBase field, StringFieldValue value) { + byte[] stringBytes = createUTF8CharArray(value.getString()); + + byte coding = 0; + //Use bit 6 of "coding" to say whether span tree is available or not + if (!value.getSpanTrees().isEmpty()) { + coding |= 64; + } + buf.put(coding); + buf.putInt1_4Bytes(stringBytes.length + 1); + + buf.put(stringBytes); + buf.put(((byte) 0)); + + Map<String, SpanTree> trees = value.getSpanTreeMap(); + if ((trees != null) && !trees.isEmpty()) { + try { + //we don't support serialization of nested span trees, so this is safe: + bytePositions = calculateBytePositions(value.getString()); + //total length. record position and go back here if necessary: + int posBeforeSize = buf.position(); + buf.putInt(0); + buf.putInt1_2_4Bytes(trees.size()); + + for (SpanTree tree : trees.values()) { + try { + write(tree); + } catch (SerializationException e) { + throw e; + } catch (RuntimeException e) { + throw new SerializationException("Exception thrown while serializing span tree '" + + tree.getName() + "'; string='" + value.getString() + "'", e); + } + } + int endPos = buf.position(); + buf.position(posBeforeSize); + buf.putInt(endPos - posBeforeSize - 4); //length shall exclude itself + buf.position(endPos); + } finally { + bytePositions = null; + } + } + } + + @Override + public void write(FieldBase field, TensorFieldValue value) { + if (value.getTensor().isPresent()) { + byte[] encodedTensor = TypedBinaryFormat.encode(value.getTensor().get()); + buf.putInt1_4Bytes(encodedTensor.length); + buf.put(encodedTensor); + } else { + buf.putInt1_4Bytes(0); + } + } + + @Override + public void write(FieldBase field, ReferenceFieldValue value) { + if (value.getDocumentId().isPresent()) { + // We piggyback on DocumentId's existing serialization code, but need to know + // whether or not it's present or merely the empty string. + buf.put((byte)1); + write(value.getDocumentId().get()); + } else { + buf.put((byte)0); + } + } + + /** + * Write out the value of struct field + * + * @param field - field description (name and data type) + * @param s - field value + */ + public void write(FieldBase field, Struct s) { + // Serialize all parts first.. As we need to know length before starting + // Serialize all the fields. + + //keep the buffer we're serializing everything into: + GrowableByteBuffer bigBuffer = buf; + + //create a new buffer and serialize into that for a while: + GrowableByteBuffer buffer = new GrowableByteBuffer(4096, 2.0f); + buf = buffer; + + List<Integer> fieldIds = new LinkedList<>(); + List<java.lang.Integer> fieldLengths = new LinkedList<>(); + + for (Map.Entry<Field, FieldValue> value : s.getFields()) { + + int startPos = buffer.position(); + value.getValue().serialize(value.getKey(), this); + + fieldLengths.add(buffer.position() - startPos); + fieldIds.add(value.getKey().getId(s.getVersion())); + } + + // Switch buffers again: + buffer.flip(); + buf = bigBuffer; + + int uncompressedSize = buffer.remaining(); + Compressor.Compression compression = + s.getDataType().getCompressor().compress(buffer.getByteBuffer().array(), buffer.remaining()); + + // Actual serialization starts here. + int lenPos = buf.position(); + putInt(null, 0); // Move back to this after compression is done. + buf.put(compression.type().getCode()); + + if (compression.data() != null && compression.type().isCompressed()) { + buf.putInt2_4_8Bytes(uncompressedSize); + } + + buf.putInt1_4Bytes(s.getFieldCount()); + + for (int i = 0; i < s.getFieldCount(); ++i) { + putInt1_4Bytes(null, fieldIds.get(i)); + putInt2_4_8Bytes(null, fieldLengths.get(i)); + } + + int pos = buf.position(); + if (compression.data() != null && compression.type().isCompressed()) { + put(null, compression.data()); + } else { + put(null, buffer.getByteBuffer()); + } + int dataLength = buf.position() - pos; + + int posNow = buf.position(); + buf.position(lenPos); + putInt(null, dataLength); + buf.position(posNow); + } + + /** + * Write out the value of structured field + * + * @param field - field description (name and data type) + * @param value - field value + */ + public void write(FieldBase field, StructuredFieldValue value) { + throw new IllegalArgumentException("Not Implemented"); + } + + /** + * Write out the value of weighted set field + * + * @param field - field description (name and data type) + * @param ws - field value + */ + public <T extends FieldValue> void write(FieldBase field, WeightedSet<T> ws) { + WeightedSetDataType type = ws.getDataType(); + putInt(null, type.getNestedType().getId()); + putInt(null, ws.size()); + + Iterator<T> it = ws.fieldValueIterator(); + while (it.hasNext()) { + FieldValue key = it.next(); + java.lang.Integer value = ws.get(key); + int sizePos = buf.position(); + putInt(null, 0); + int startPos = buf.position(); + key.serialize(this); + putInt(null, value); + int finalPos = buf.position(); + int size = finalPos - startPos; + buf.position(sizePos); + putInt(null, size); + buf.position(finalPos); + } + + } + + public void write(FieldBase field, AnnotationReference value) { + int annotationId = value.getReference().getScratchId(); + if (annotationId >= 0) { + buf.putInt1_2_4Bytes(annotationId); + } else { + throw new SerializationException("Could not serialize AnnotationReference value, reference not found (" + value + ")"); + } + } + + public void write(DocumentId id) { + put(null, id.getScheme().toUtf8().getBytes()); + putByte(null, (byte) 0); + } + + public void write(DocumentType type) { + byte[] docType = createUTF8CharArray(type.getName()); + put(null, docType); + putByte(null, ((byte) 0)); + putShort(null, (short) 0); // Used to hold the version. Is now always 0. + } + + public void write(Annotation annotation) { + buf.putInt(annotation.getType().getId()); //name hash + + byte features = 0; + if (annotation.isSpanNodeValid()) { + features |= ((byte) 1); + } + if (annotation.hasFieldValue()) { + features |= ((byte) 2); + } + buf.put(features); + + int posBeforeSize = buf.position(); + buf.putInt1_2_4BytesAs4(0); + + //write ID of span node: + if (annotation.isSpanNodeValid()) { + int spanNodeId = annotation.getSpanNode().getScratchId(); + if (spanNodeId >= 0) { + buf.putInt1_2_4Bytes(spanNodeId); + } else { + throw new SerializationException("Could not serialize annotation, associated SpanNode not found (" + annotation + ")"); + } + } + + //write annotation value: + if (annotation.hasFieldValue()) { + buf.putInt(annotation.getType().getDataType().getId()); + annotation.getFieldValue().serialize(this); + } + + int end = buf.position(); + buf.position(posBeforeSize); + buf.putInt1_2_4BytesAs4(end - posBeforeSize - 4); + buf.position(end); + } + + public void write(SpanTree tree) { + //we don't support serialization of nested span trees: + if (spanNodeCounter >= 0) { + throw new SerializationException("Serialization of nested SpanTrees is not supported."); + } + + //we're going to write a new SpanTree, create a new Map for nodes: + spanNodeCounter = 0; + + //make sure tree is consistent before continuing: + tree.cleanup(); + + try { + new StringFieldValue(tree.getName()).serialize(this); + + write(tree.getRoot()); + { + //add all annotations to temporary list and sort it, to get predictable serialization + List<Annotation> tmpAnnotationList = new ArrayList<Annotation>(tree.numAnnotations()); + for (Annotation annotation : tree) { + tmpAnnotationList.add(annotation); + } + Collections.sort(tmpAnnotationList); + + int annotationCounter = 0; + //add all annotations to map here, in case of back-references: + for (Annotation annotation : tmpAnnotationList) { + annotation.setScratchId(annotationCounter++); + } + + buf.putInt1_2_4Bytes(tmpAnnotationList.size()); + for (Annotation annotation : tmpAnnotationList) { + write(annotation); + } + } + } finally { + //we're done, let's set these to null to save memory and prevent madness: + spanNodeCounter = -1; + } + } + + public void write(SpanNode spanNode) { + if (spanNodeCounter >= 0) { + spanNode.setScratchId(spanNodeCounter++); + } + if (spanNode instanceof Span) { + write((Span) spanNode); + } else if (spanNode instanceof AlternateSpanList) { + write((AlternateSpanList) spanNode); + } else if (spanNode instanceof SpanList) { + write((SpanList) spanNode); + } else { + throw new IllegalStateException("BUG!! Unable to serialize " + spanNode); + } + } + + public void write(Span span) { + buf.put(Span.ID); + + if (bytePositions != null) { + int byteFrom = bytePositions[span.getFrom()]; + int byteLength = bytePositions[span.getFrom() + span.getLength()] - byteFrom; + + buf.putInt1_2_4Bytes(byteFrom); + buf.putInt1_2_4Bytes(byteLength); + } else { + throw new SerializationException("Cannot serialize Span " + span + ", no access to parent StringFieldValue."); + } + } + + public void write(SpanList spanList) { + buf.put(SpanList.ID); + buf.putInt1_2_4Bytes(spanList.numChildren()); + Iterator<SpanNode> children = spanList.childIterator(); + while (children.hasNext()) { + write(children.next()); + } + } + + public void write(AlternateSpanList altSpanList) { + buf.put(AlternateSpanList.ID); + buf.putInt1_2_4Bytes(altSpanList.getNumSubTrees()); + for (int i = 0; i < altSpanList.getNumSubTrees(); i++) { + buf.putDouble(altSpanList.getProbability(i)); + buf.putInt1_2_4Bytes(altSpanList.numChildren(i)); + Iterator<SpanNode> children = altSpanList.childIterator(i); + while (children.hasNext()) { + write(children.next()); + } + } + } + + @Override + public void write(DocumentUpdate update) { + update.getId().serialize(this); + + update.getDocumentType().serialize(this); + + putInt(null, update.fieldUpdates().size()); + + for (FieldUpdate up : update.fieldUpdates()) { + up.serialize(this); + } + + DocumentUpdateFlags flags = new DocumentUpdateFlags(); + flags.setCreateIfNonExistent(update.getCreateIfNonExistent()); + putInt(null, flags.injectInto(update.fieldPathUpdates().size())); + + for (FieldPathUpdate up : update.fieldPathUpdates()) { + up.serialize(this); + } + } + + public void write(FieldPathUpdate update) { + putByte(null, (byte)update.getUpdateType().getCode()); + put(null, update.getOriginalFieldPath()); + put(null, update.getOriginalWhereClause()); + } + + public void write(AssignFieldPathUpdate update) { + write((FieldPathUpdate)update); + byte flags = 0; + if (update.getRemoveIfZero()) { + flags |= AssignFieldPathUpdate.REMOVE_IF_ZERO; + } + if (update.getCreateMissingPath()) { + flags |= AssignFieldPathUpdate.CREATE_MISSING_PATH; + } + if (update.isArithmetic()) { + flags |= AssignFieldPathUpdate.ARITHMETIC_EXPRESSION; + putByte(null, flags); + put(null, update.getExpression()); + } else { + putByte(null, flags); + update.getFieldValue().serialize(this); + } + } + + public void write(AddFieldPathUpdate update) { + write((FieldPathUpdate)update); + update.getNewValues().serialize(this); + } + + @Override + public void write(FieldUpdate update) { + putInt(null, update.getField().getId(Document.SERIALIZED_VERSION)); + putInt(null, update.getValueUpdates().size()); + for (ValueUpdate vupd : update.getValueUpdates()) { + putInt(null, vupd.getValueUpdateClassID().id); + vupd.serialize(this, update.getField().getDataType()); + } + } + + @Override + public void write(AddValueUpdate update, DataType superType) { + writeValue(this, ((CollectionDataType)superType).getNestedType(), update.getValue()); + putInt(null, update.getWeight()); + } + + @Override + public void write(MapValueUpdate update, DataType superType) { + if (superType instanceof ArrayDataType) { + CollectionDataType type = (CollectionDataType) superType; + IntegerFieldValue index = (IntegerFieldValue) update.getValue(); + index.serialize(this); + putInt(null, update.getUpdate().getValueUpdateClassID().id); + update.getUpdate().serialize(this, type.getNestedType()); + } else if (superType instanceof WeightedSetDataType) { + writeValue(this, ((CollectionDataType)superType).getNestedType(), update.getValue()); + putInt(null, update.getUpdate().getValueUpdateClassID().id); + update.getUpdate().serialize(this, DataType.INT); + } else { + throw new SerializationException("MapValueUpdate only works for arrays and weighted sets"); + } + } + + @Override + public void write(ArithmeticValueUpdate update) { + putInt(null, update.getOperator().id); + putDouble(null, update.getOperand().doubleValue()); + } + + @Override + public void write(AssignValueUpdate update, DataType superType) { + if (update.getValue() == null) { + putByte(null, (byte) 0); + } else { + putByte(null, (byte) 1); + writeValue(this, superType, update.getValue()); + } + } + + @Override + public void write(RemoveValueUpdate update, DataType superType) { + writeValue(this, ((CollectionDataType)superType).getNestedType(), update.getValue()); + } + + @Override + public void write(ClearValueUpdate clearValueUpdate, DataType superType) { + //TODO: This has never ever been implemented. Has this ever worked? + } + + /** + * Returns the serialized size of the given {@link Document}. Please note that this method performs actual + * serialization of the document, but simply return the size of the final {@link GrowableByteBuffer}. If you need + * the buffer itself, do NOT use this method. + * + * @param doc The Document whose size to calculate. + * @return The size in bytes. + */ + public static long getSerializedSize(Document doc) { + DocumentSerializer serializer = new VespaDocumentSerializer6(new GrowableByteBuffer()); + serializer.write(doc); + return serializer.getBuf().position(); + } + + private static void writeValue(VespaDocumentSerializer6 serializer, DataType dataType, Object value) { + FieldValue fieldValue; + if (value instanceof FieldValue) { + fieldValue = (FieldValue)value; + } else { + fieldValue = dataType.createFieldValue(value); + } + fieldValue.serialize(serializer); + } + +} diff --git a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentSerializerHead.java b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentSerializerHead.java index 92bce41ba8c..bfa746a02b1 100644 --- a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentSerializerHead.java +++ b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentSerializerHead.java @@ -1,80 +1,17 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.document.serialization; -import com.yahoo.document.DocumentUpdate; -import com.yahoo.document.datatypes.ByteFieldValue; -import com.yahoo.document.fieldpathupdate.AddFieldPathUpdate; -import com.yahoo.document.fieldpathupdate.AssignFieldPathUpdate; -import com.yahoo.document.fieldpathupdate.FieldPathUpdate; -import com.yahoo.document.update.FieldUpdate; import com.yahoo.io.GrowableByteBuffer; -import com.yahoo.vespa.objects.FieldBase; /** * Class used for serializing documents on the current head document format. * * @author baldersheim */ -@SuppressWarnings("deprecation") -public class VespaDocumentSerializerHead extends VespaDocumentSerializer42 { +public class VespaDocumentSerializerHead extends VespaDocumentSerializer6 { public VespaDocumentSerializerHead(GrowableByteBuffer buf) { super(buf); } - @Override - public void write(DocumentUpdate update) { - update.getId().serialize(this); - - update.getDocumentType().serialize(this); - - putInt(null, update.fieldUpdates().size()); - - for (FieldUpdate up : update.fieldUpdates()) { - up.serialize(this); - } - - DocumentUpdateFlags flags = new DocumentUpdateFlags(); - flags.setCreateIfNonExistent(update.getCreateIfNonExistent()); - putInt(null, flags.injectInto(update.fieldPathUpdates().size())); - - for (FieldPathUpdate up : update.fieldPathUpdates()) { - up.serialize(this); - } - } - - public void write(FieldPathUpdate update) { - putByte(null, (byte)update.getUpdateType().getCode()); - put(null, update.getOriginalFieldPath()); - put(null, update.getOriginalWhereClause()); - } - - public void write(AssignFieldPathUpdate update) { - write((FieldPathUpdate)update); - byte flags = 0; - if (update.getRemoveIfZero()) { - flags |= AssignFieldPathUpdate.REMOVE_IF_ZERO; - } - if (update.getCreateMissingPath()) { - flags |= AssignFieldPathUpdate.CREATE_MISSING_PATH; - } - if (update.isArithmetic()) { - flags |= AssignFieldPathUpdate.ARITHMETIC_EXPRESSION; - putByte(null, flags); - put(null, update.getExpression()); - } else { - putByte(null, flags); - update.getFieldValue().serialize(this); - } - } - - public void write(AddFieldPathUpdate update) { - write((FieldPathUpdate)update); - update.getNewValues().serialize(this); - } - - @Override - public void write(FieldBase field, ByteFieldValue value) { - buf.put(value.getByte()); - } } diff --git a/document/src/main/java/com/yahoo/document/update/FieldUpdate.java b/document/src/main/java/com/yahoo/document/update/FieldUpdate.java index 163bda5b623..192afba80b3 100644 --- a/document/src/main/java/com/yahoo/document/update/FieldUpdate.java +++ b/document/src/main/java/com/yahoo/document/update/FieldUpdate.java @@ -291,7 +291,7 @@ public class FieldUpdate { } public final void serialize(GrowableByteBuffer buf) { - serialize(DocumentSerializerFactory.create42(buf)); + serialize(DocumentSerializerFactory.create6(buf)); } public void serialize(DocumentUpdateWriter data) { diff --git a/document/src/main/java/com/yahoo/vespaxmlparser/package-info.java b/document/src/main/java/com/yahoo/vespaxmlparser/package-info.java index ba33b6a83ad..68116e777d6 100644 --- a/document/src/main/java/com/yahoo/vespaxmlparser/package-info.java +++ b/document/src/main/java/com/yahoo/vespaxmlparser/package-info.java @@ -1,5 +1,5 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// TODO: Remove this package on Vespa 7 +// TODO: Remove this package on Vespa 8 @ExportPackage package com.yahoo.vespaxmlparser; |