diff options
Diffstat (limited to 'document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer6.java')
-rw-r--r-- | document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer6.java | 880 |
1 files changed, 880 insertions, 0 deletions
diff --git a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer6.java b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer6.java new file mode 100644 index 00000000000..5424798110c --- /dev/null +++ b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer6.java @@ -0,0 +1,880 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.document.serialization; + +import com.yahoo.collections.Tuple2; +import com.yahoo.compress.CompressionType; +import com.yahoo.compress.Compressor; +import com.yahoo.document.annotation.AlternateSpanList; +import com.yahoo.document.annotation.Annotation; +import com.yahoo.document.annotation.AnnotationReference; +import com.yahoo.document.annotation.AnnotationType; +import com.yahoo.document.annotation.Span; +import com.yahoo.document.annotation.SpanList; +import com.yahoo.document.annotation.SpanNode; +import com.yahoo.document.annotation.SpanNodeParent; +import com.yahoo.document.annotation.SpanTree; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.DataTypeName; +import com.yahoo.document.datatypes.Array; +import com.yahoo.document.datatypes.BoolFieldValue; +import com.yahoo.document.datatypes.ByteFieldValue; +import com.yahoo.document.datatypes.CollectionFieldValue; +import com.yahoo.document.datatypes.DoubleFieldValue; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.FloatFieldValue; +import com.yahoo.document.datatypes.IntegerFieldValue; +import com.yahoo.document.datatypes.LongFieldValue; +import com.yahoo.document.datatypes.MapFieldValue; +import com.yahoo.document.datatypes.PredicateFieldValue; +import com.yahoo.document.datatypes.Raw; +import com.yahoo.document.datatypes.ReferenceFieldValue; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.Struct; +import com.yahoo.document.datatypes.StructuredFieldValue; +import com.yahoo.document.datatypes.TensorFieldValue; +import com.yahoo.document.datatypes.WeightedSet; +import com.yahoo.document.Document; +import com.yahoo.document.DocumentId; +import com.yahoo.document.DocumentType; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.DocumentUpdate; +import com.yahoo.document.Field; +import com.yahoo.document.fieldpathupdate.AddFieldPathUpdate; +import com.yahoo.document.fieldpathupdate.AssignFieldPathUpdate; +import com.yahoo.document.fieldpathupdate.FieldPathUpdate; +import com.yahoo.document.fieldpathupdate.RemoveFieldPathUpdate; +import com.yahoo.document.MapDataType; +import com.yahoo.document.predicate.BinaryFormat; +import com.yahoo.document.select.parser.ParseException; +import com.yahoo.document.StructDataType; +import com.yahoo.document.update.AddValueUpdate; +import com.yahoo.document.update.ArithmeticValueUpdate; +import com.yahoo.document.update.AssignValueUpdate; +import com.yahoo.document.update.ClearValueUpdate; +import com.yahoo.document.update.FieldUpdate; +import com.yahoo.document.update.MapValueUpdate; +import com.yahoo.document.update.RemoveValueUpdate; +import com.yahoo.document.update.ValueUpdate; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.tensor.serialization.TypedBinaryFormat; +import com.yahoo.text.Utf8; +import com.yahoo.text.Utf8Array; +import com.yahoo.text.Utf8String; +import com.yahoo.vespa.objects.BufferSerializer; +import com.yahoo.vespa.objects.FieldBase; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.yahoo.text.Utf8.calculateStringPositions; + +/** + * Class used for de-serializing documents on the Vespa 6.x document format. + * + * @author baldersheim + */ +public class VespaDocumentDeserializer6 extends BufferSerializer implements DocumentDeserializer { + + private final Compressor compressor = new Compressor(); + private DocumentTypeManager manager; + private short version; + private List<SpanNode> spanNodes; + private List<Annotation> annotations; + private int[] stringPositions; + + VespaDocumentDeserializer6(DocumentTypeManager manager, GrowableByteBuffer buf) { + super(buf); + this.manager = manager; + this.version = Document.SERIALIZED_VERSION; + } + + final public DocumentTypeManager getDocumentTypeManager() { return manager; } + + public void read(Document document) { + read(null, document); + } + + @SuppressWarnings("deprecation") + public void read(FieldBase field, Document doc) { + // Verify that we have correct version + version = getShort(null); + if (version < 6 || version > Document.SERIALIZED_VERSION) { + throw new DeserializationException("Unknown version " + version + ", expected " + + Document.SERIALIZED_VERSION + "."); + } + + int dataLength = 0; + int dataPos = 0; + + if (version < 7) { + getInt2_4_8Bytes(null); // Total document size.. Ignore + } else { + dataLength = getInt(null); + dataPos = position(); + } + + doc.setId(readDocumentId()); + + Byte content = getByte(null); + + doc.setDataType(readDocumentType()); + + Struct h = doc.getHeader(); + Struct b = doc.getBody(); + h.clear(); + b.clear(); + if ((content & 0x2) != 0) { + readHeaderBody(h, b); + } + if ((content & 0x4) != 0) { + readHeaderBody(b, h); + } + + if (version < 8) { + int crcVal = getInt(null); + } + + if (version > 6) { + if (dataLength != (position() - dataPos)) { + throw new DeserializationException("Length mismatch"); + } + } + } + public void read(FieldBase field, FieldValue value) { + throw new IllegalArgumentException("read not implemented yet."); + } + + public <T extends FieldValue> void read(FieldBase field, Array<T> array) { + int numElements = getNumCollectionElems(); + ArrayList<T> list = new ArrayList<T>(numElements); + ArrayDataType type = array.getDataType(); + for (int i = 0; i < numElements; i++) { + if (version < 7) { + getInt(null); // We don't need size for anything + } + FieldValue fv = type.getNestedType().createFieldValue(); + fv.deserialize(null, this); + list.add((T) fv); + } + array.clear(); + array.addAll(list); + } + + public <K extends FieldValue, V extends FieldValue> void read(FieldBase field, MapFieldValue<K, V> map) { + int numElements = getNumCollectionElems(); + Map<K,V> hash = new HashMap<>(); + MapDataType type = map.getDataType(); + for (int i = 0; i < numElements; i++) { + if (version < 7) { + getInt(null); // We don't need size for anything + } + K key = (K) type.getKeyType().createFieldValue(); + V val = (V) type.getValueType().createFieldValue(); + key.deserialize(null, this); + val.deserialize(null, this); + hash.put(key, val); + } + map.clear(); + map.putAll(hash); + } + + private int getNumCollectionElems() { + int numElements; + if (version < 7) { + getInt(null); // We already know the nested type, so ignore that.. + numElements = getInt(null); + } else { + numElements = getInt1_2_4Bytes(null); + } + if (numElements < 0) { + throw new DeserializationException("Bad number of array/map elements, " + numElements); + } + return numElements; + } + + public <T extends FieldValue> void read(FieldBase field, CollectionFieldValue<T> value) { + throw new IllegalArgumentException("read not implemented yet."); + } + public void read(FieldBase field, ByteFieldValue value) { value.assign(getByte(null)); } + + @Override + public void read(FieldBase field, BoolFieldValue value) { + value.setBoolean((getByte(null) != 0)); + } + + public void read(FieldBase field, DoubleFieldValue value) { value.assign(getDouble(null)); } + public void read(FieldBase field, FloatFieldValue value) { value.assign(getFloat(null)); } + public void read(FieldBase field, IntegerFieldValue value) { value.assign(getInt(null)); } + public void read(FieldBase field, LongFieldValue value) { value.assign(getLong(null)); } + + public void read(FieldBase field, Raw value) { + int rawsize = getInt(null); + byte[] rawBytes = getBytes(null, rawsize); + value.assign(rawBytes); + } + + @Override + public void read(FieldBase field, PredicateFieldValue value) { + int len = getInt(null); + byte[] buf = getBytes(null, len); + value.assign(BinaryFormat.decode(buf)); + } + + public void read(FieldBase field, StringFieldValue value) { + byte coding = getByte(null); + + int length = getInt1_4Bytes(null); + + //OK, it seems that this length includes null termination. + //NOTE: the following four lines are basically parseNullTerminatedString() inlined, + //but we need to use the UTF-8 buffer below, so not using that method... + byte[] stringArray = new byte[length - 1]; + buf.get(stringArray); + buf.get(); //move past 0-termination + value.setUnChecked(Utf8.toString(stringArray)); + + if ((coding & 64) == 64) { + //we have a span tree! + try { + //we don't support serialization of nested span trees, so this is safe: + stringPositions = calculateStringPositions(stringArray); + //total length: + int size = buf.getInt(); + int startPos = buf.position(); + + int numSpanTrees = buf.getInt1_2_4Bytes(); + + for (int i = 0; i < numSpanTrees; i++) { + SpanTree tree = new SpanTree(); + StringFieldValue treeName = new StringFieldValue(); + treeName.deserialize(this); + tree.setName(treeName.getString()); + value.setSpanTree(tree); + readSpanTree(tree, false); + } + + buf.position(startPos + size); + } finally { + stringPositions = null; + } + } + } + + @Override + public void read(FieldBase field, TensorFieldValue value) { + int encodedTensorLength = buf.getInt1_4Bytes(); + if (encodedTensorLength > 0) { + byte[] encodedTensor = getBytes(null, encodedTensorLength); + value.assign(TypedBinaryFormat.decode(Optional.of(value.getDataType().getTensorType()), + GrowableByteBuffer.wrap(encodedTensor))); + } else { + value.clear(); + } + } + + @Override + public void read(FieldBase field, ReferenceFieldValue value) { + final boolean documentIdPresent = (buf.get() != 0); + if (documentIdPresent) { + value.assign(readDocumentId()); + } else { + value.clear(); + } + } + + public void read(FieldBase fieldDef, Struct s) { + s.setVersion(version); + int startPos = position(); + + if (version < 6) { + throw new DeserializationException("Illegal document serialization version " + version); + } + + int dataSize; + if (version < 7) { + long rSize = getInt2_4_8Bytes(null); + //TODO: Look into how to support data segments larger than INT_MAX bytes + if (rSize > Integer.MAX_VALUE) { + throw new DeserializationException("Raw size of data block is too large."); + } + dataSize = (int)rSize; + } else { + dataSize = getInt(null); + } + + byte comprCode = getByte(null); + CompressionType compression = CompressionType.valueOf(comprCode); + + int uncompressedSize = 0; + if (compression != CompressionType.NONE && + compression != CompressionType.INCOMPRESSIBLE) + { + // uncompressedsize (full size of FIELDS only, after decompression) + long pSize = getInt2_4_8Bytes(null); + //TODO: Look into how to support data segments larger than INT_MAX bytes + if (pSize > Integer.MAX_VALUE) { + throw new DeserializationException("Uncompressed size of data block is too large."); + } + uncompressedSize = (int) pSize; + } + + int numberOfFields = getInt1_4Bytes(null); + + List<Tuple2<Integer, Long>> fieldIdsAndLengths = new ArrayList<>(numberOfFields); + for (int i=0; i<numberOfFields; ++i) { + // id, length (length only used for unknown fields + fieldIdsAndLengths.add(new Tuple2<>(getInt1_4Bytes(null), getInt2_4_8Bytes(null))); + } + + // save a reference to the big buffer we're reading from: + GrowableByteBuffer bigBuf = buf; + + if (version < 7) { + // In V6 and earlier, the length included the header. + int headerSize = position() - startPos; + dataSize -= headerSize; + } + byte[] destination = compressor.decompress(compression, getBuf().array(), position(), uncompressedSize, Optional.of(dataSize)); + + // set position in original buffer to after data + position(position() + dataSize); + + // for a while: deserialize from this buffer instead: + buf = GrowableByteBuffer.wrap(destination); + + s.clear(); + StructDataType type = s.getDataType(); + for (int i=0; i<numberOfFields; ++i) { + Field structField = type.getField(fieldIdsAndLengths.get(i).first, version); + if (structField == null) { + //ignoring unknown field: + position(position() + fieldIdsAndLengths.get(i).second.intValue()); + } else { + int posBefore = position(); + FieldValue value = structField.getDataType().createFieldValue(); + value.deserialize(structField, this); + s.setFieldValue(structField, value); + //jump to beginning of next field: + position(posBefore + fieldIdsAndLengths.get(i).second.intValue()); + } + } + + // restore the original buffer + buf = bigBuf; + } + + private void readHeaderBody(Struct primary, Struct alternate) { + primary.setVersion(version); + int startPos = position(); + + if (version < 6) { + throw new DeserializationException("Illegal document serialization version " + version); + } + + int dataSize; + if (version < 7) { + long rSize = getInt2_4_8Bytes(null); + //TODO: Look into how to support data segments larger than INT_MAX bytes + if (rSize > Integer.MAX_VALUE) { + throw new DeserializationException("Raw size of data block is too large."); + } + dataSize = (int)rSize; + } else { + dataSize = getInt(null); + } + + byte comprCode = getByte(null); + CompressionType compression = CompressionType.valueOf(comprCode); + + int uncompressedSize = 0; + if (compression != CompressionType.NONE && + compression != CompressionType.INCOMPRESSIBLE) + { + // uncompressedsize (full size of FIELDS only, after decompression) + long pSize = getInt2_4_8Bytes(null); + //TODO: Look into how to support data segments larger than INT_MAX bytes + if (pSize > Integer.MAX_VALUE) { + throw new DeserializationException("Uncompressed size of data block is too large."); + } + uncompressedSize = (int) pSize; + } + + int numberOfFields = getInt1_4Bytes(null); + + List<Tuple2<Integer, Long>> fieldIdsAndLengths = new ArrayList<>(numberOfFields); + for (int i=0; i<numberOfFields; ++i) { + // id, length (length only used for unknown fields + fieldIdsAndLengths.add(new Tuple2<>(getInt1_4Bytes(null), getInt2_4_8Bytes(null))); + } + + // save a reference to the big buffer we're reading from: + GrowableByteBuffer bigBuf = buf; + + if (version < 7) { + // In V6 and earlier, the length included the header. + int headerSize = position() - startPos; + dataSize -= headerSize; + } + byte[] destination = compressor.decompress(compression, getBuf().array(), position(), uncompressedSize, Optional.of(dataSize)); + + // set position in original buffer to after data + position(position() + dataSize); + + // for a while: deserialize from this buffer instead: + buf = GrowableByteBuffer.wrap(destination); + + StructDataType priType = primary.getDataType(); + StructDataType altType = alternate.getDataType(); + for (int i=0; i<numberOfFields; ++i) { + int posBefore = position(); + Struct s = null; + Integer f_id = fieldIdsAndLengths.get(i).first; + Field structField = priType.getField(f_id, version); + if (structField != null) { + s = primary; + } else { + structField = altType.getField(f_id, version); + if (structField != null) { + s = alternate; + } + } + if (s != null) { + FieldValue value = structField.getDataType().createFieldValue(); + value.deserialize(structField, this); + s.setFieldValue(structField, value); + } + //jump to beginning of next field: + position(posBefore + fieldIdsAndLengths.get(i).second.intValue()); + } + + // restore the original buffer + buf = bigBuf; + } + + public void read(FieldBase field, StructuredFieldValue value) { + throw new IllegalArgumentException("read not implemented yet."); + } + public <T extends FieldValue> void read(FieldBase field, WeightedSet<T> ws) { + WeightedSetDataType type = ws.getDataType(); + getInt(null); // Have no need for type + + int numElements = getInt(null); + if (numElements < 0) { + throw new DeserializationException("Bad number of weighted set elements, " + numElements); + } + + ws.clearAndReserve(numElements * 2); // Avoid resizing + for (int i = 0; i < numElements; i++) { + int size = getInt(null); + FieldValue value = type.getNestedType().createFieldValue(); + value.deserialize(null, this); + IntegerFieldValue weight = new IntegerFieldValue(getInt(null)); + ws.putUnChecked((T) value, weight); + } + + } + + public void read(FieldBase field, AnnotationReference value) { + int seqId = buf.getInt1_2_4Bytes(); + try { + Annotation a = annotations.get(seqId); + value.setReferenceNoCompatibilityCheck(a); + } catch (IndexOutOfBoundsException iiobe) { + throw new SerializationException("Could not serialize AnnotationReference value, reference not found.", iiobe); + } + } + + private Utf8String deserializeAttributeString() throws DeserializationException { + int length = getByte(null); + return new Utf8String(parseNullTerminatedString(length)); + } + + private Utf8Array parseNullTerminatedString() { return parseNullTerminatedString(getBuf().getByteBuffer()); } + private Utf8Array parseNullTerminatedString(int lengthExcludingNull) { return parseNullTerminatedString(getBuf().getByteBuffer(), lengthExcludingNull); } + + static Utf8Array parseNullTerminatedString(ByteBuffer buf, int lengthExcludingNull) throws DeserializationException { + Utf8Array utf8 = new Utf8Array(buf, lengthExcludingNull); + buf.get(); //move past 0-termination + return utf8; + } + + static Utf8Array parseNullTerminatedString(ByteBuffer buf) throws DeserializationException { + //search for 0-byte + int end = getFirstNullByte(buf); + + if (end == -1) { + throw new DeserializationException("Could not locate terminating 0-byte for string"); + } + + return parseNullTerminatedString(buf, end - buf.position()); + } + + private static int getFirstNullByte(ByteBuffer buf) { + int end = -1; + int start = buf.position(); + + while (true) { + try { + byte dataByte = buf.get(); + if (dataByte == (byte) 0) { + end = buf.position() - 1; + break; + } + } catch (Exception e) { + break; + } + } + + buf.position(start); + return end; + } + + public void read(DocumentUpdate update) { + update.setId(new DocumentId(this)); + update.setDocumentType(readDocumentType()); + + int size = getInt(null); + + for (int i = 0; i < size; i++) { + update.addFieldUpdate(new FieldUpdate(this, update.getDocumentType(), 8)); + } + + int sizeAndFlags = getInt(null); + update.setCreateIfNonExistent(DocumentUpdateFlags.extractFlags(sizeAndFlags).getCreateIfNonExistent()); + size = DocumentUpdateFlags.extractValue(sizeAndFlags); + + for (int i = 0; i < size; i++) { + int type = getByte(null); + update.addFieldPathUpdate(FieldPathUpdate.create(FieldPathUpdate.Type.valueOf(type), + update.getDocumentType(), this)); + } + } + + + public void read(FieldPathUpdate update) { + String fieldPath = getString(null); + String whereClause = getString(null); + update.setFieldPath(fieldPath); + + try { + update.setWhereClause(whereClause); + } catch (ParseException e) { + throw new DeserializationException(e); + } + } + + public void read(AssignFieldPathUpdate update) { + byte flags = getByte(null); + update.setRemoveIfZero((flags & AssignFieldPathUpdate.REMOVE_IF_ZERO) != 0); + update.setCreateMissingPath((flags & AssignFieldPathUpdate.CREATE_MISSING_PATH) != 0); + if ((flags & AssignFieldPathUpdate.ARITHMETIC_EXPRESSION) != 0) { + update.setExpression(getString(null)); + } else { + DataType dt = update.getFieldPath().getResultingDataType(); + FieldValue fv = dt.createFieldValue(); + fv.deserialize(this); + update.setNewValue(fv); + } + } + + public void read(RemoveFieldPathUpdate update) { + + } + + public void read(AddFieldPathUpdate update) { + DataType dt = update.getFieldPath().getResultingDataType(); + FieldValue fv = dt.createFieldValue(); + dt.createFieldValue(); + fv.deserialize(this); + + if (!(fv instanceof Array)) { + throw new DeserializationException("Add only applicable to array types"); + } + update.setNewValues((Array)fv); + } + + public ValueUpdate getValueUpdate(DataType superType, DataType subType) { + int vuTypeId = getInt(null); + + ValueUpdate.ValueUpdateClassID op = ValueUpdate.ValueUpdateClassID.getID(vuTypeId); + if (op == null) { + throw new IllegalArgumentException("Read type "+vuTypeId+" of bytebuffer, but this is not a legal value update type."); + } + + switch (op) { + case ADD: + { + FieldValue fval = subType.createFieldValue(); + fval.deserialize(this); + int weight = getInt(null); + return new AddValueUpdate(fval, weight); + } + case ARITHMETIC: + int opId = getInt(null); + ArithmeticValueUpdate.Operator operator = ArithmeticValueUpdate.Operator.getID(opId); + double operand = getDouble(null); + return new ArithmeticValueUpdate(operator, operand); + case ASSIGN: + { + byte contents = getByte(null); + FieldValue fval = null; + if (contents == (byte) 1) { + fval = superType.createFieldValue(); + fval.deserialize(this); + } + return new AssignValueUpdate(fval); + } + case CLEAR: + return new ClearValueUpdate(); + case MAP: + if (superType instanceof ArrayDataType) { + CollectionDataType type = (CollectionDataType) superType; + IntegerFieldValue index = new IntegerFieldValue(); + index.deserialize(this); + ValueUpdate update = getValueUpdate(type.getNestedType(), null); + return new MapValueUpdate(index, update); + } else if (superType instanceof WeightedSetDataType) { + CollectionDataType type = (CollectionDataType) superType; + FieldValue fval = type.getNestedType().createFieldValue(); + fval.deserialize(this); + ValueUpdate update = getValueUpdate(DataType.INT, null); + return new MapValueUpdate(fval, update); + } else { + throw new DeserializationException("MapValueUpdate only works for arrays and weighted sets"); + } + case REMOVE: + FieldValue fval = ((CollectionDataType) superType).getNestedType().createFieldValue(); + fval.deserialize(this); + return new RemoveValueUpdate(fval); + default: + throw new DeserializationException( + "Could not deserialize ValueUpdate, unknown valueUpdateClassID type " + vuTypeId); + } + } + + public void read(FieldUpdate fieldUpdate) { + int fieldId = getInt(null); + Field field = fieldUpdate.getDocumentType().getField(fieldId, fieldUpdate.getSerializationVersion()); + if (field == null) { + throw new DeserializationException( + "Cannot deserialize FieldUpdate, field fieldId " + fieldId + " not found in " + fieldUpdate.getDocumentType()); + } + + fieldUpdate.setField(field); + int size = getInt(null); + + for (int i = 0; i < size; i++) { + if (field.getDataType() instanceof CollectionDataType) { + CollectionDataType collType = (CollectionDataType) field.getDataType(); + fieldUpdate.addValueUpdate(getValueUpdate(collType, collType.getNestedType())); + } else { + fieldUpdate.addValueUpdate(getValueUpdate(field.getDataType(), null)); + } + } + } + + public DocumentId readDocumentId() { + Utf8String uri = new Utf8String(parseNullTerminatedString(getBuf().getByteBuffer())); + return DocumentId.createFromSerialized(uri.toString()); + } + + public DocumentType readDocumentType() { + Utf8Array docTypeName = parseNullTerminatedString(); + int ignored = getShort(null); // used to hold the version + + DocumentType docType = manager.getDocumentType(new DataTypeName(docTypeName)); + if (docType == null) { + throw new DeserializationException("No known document type with name " + + new Utf8String(docTypeName).toString()); + } + return docType; + } + + private SpanNode readSpanNode() { + byte type = buf.get(); + buf.position(buf.position() - 1); + + SpanNode retval; + if ((type & Span.ID) == Span.ID) { + retval = new Span(); + if (spanNodes != null) { + spanNodes.add(retval); + } + read((Span) retval); + } else if ((type & SpanList.ID) == SpanList.ID) { + retval = new SpanList(); + if (spanNodes != null) { + spanNodes.add(retval); + } + read((SpanList) retval); + } else if ((type & AlternateSpanList.ID) == AlternateSpanList.ID) { + retval = new AlternateSpanList(); + if (spanNodes != null) { + spanNodes.add(retval); + } + read((AlternateSpanList) retval); + } else { + throw new DeserializationException("Cannot read SpanNode of type " + type); + } + return retval; + } + + private void readSpanTree(SpanTree tree, boolean readName) { + //we don't support serialization of nested span trees: + if (spanNodes != null || annotations != null) { + throw new SerializationException("Deserialization of nested SpanTrees is not supported."); + } + + //we're going to write a new SpanTree, create a new Map for nodes: + spanNodes = new ArrayList<SpanNode>(); + annotations = new ArrayList<Annotation>(); + + try { + if (readName) { + StringFieldValue treeName = new StringFieldValue(); + treeName.deserialize(this); + tree.setName(treeName.getString()); + } + + SpanNode root = readSpanNode(); + tree.setRoot(root); + + int numAnnotations = buf.getInt1_2_4Bytes(); + + for (int i = 0; i < numAnnotations; i++) { + Annotation a = new Annotation(); + annotations.add(a); + } + for (int i = 0; i < numAnnotations; i++) { + read(annotations.get(i)); + } + for (Annotation a : annotations) { + tree.annotate(a); + } + + for (SpanNode node: spanNodes) { + if (node instanceof Span) { + correctIndexes((Span) node); + } + } + } finally { + //we're done, let's set this to null to save memory and prevent madness: + spanNodes = null; + annotations = null; + } + } + + public void read(SpanTree tree) { + readSpanTree(tree, true); + } + + public void read(Annotation annotation) { + int annotationTypeId = buf.getInt(); + AnnotationType type = manager.getAnnotationTypeRegistry().getType(annotationTypeId); + + if (type == null) { + throw new DeserializationException("Cannot deserialize annotation of type " + annotationTypeId + " (unknown type)"); + } + + annotation.setType(type); + + byte features = buf.get(); + int length = buf.getInt1_2_4Bytes(); + + if ((features & (byte) 1) == (byte) 1) { + //we have a span node + int spanNodeId = buf.getInt1_2_4Bytes(); + try { + SpanNode node = spanNodes.get(spanNodeId); + annotation.setSpanNode(node); + } catch (IndexOutOfBoundsException ioobe) { + throw new DeserializationException("Could not deserialize annotation, associated span node not found ", ioobe); + } + } + if ((features & (byte) 2) == (byte) 2) { + //we have a value: + int dataTypeId = buf.getInt(); + + //if this data type ID the same as the one in our config? + if (dataTypeId != type.getDataType().getId()) { + //not the same, but we will handle it gracefully, and just skip past the data: + buf.position(buf.position() + length - 4); + } else { + FieldValue value = type.getDataType().createFieldValue(); + value.deserialize(this); + annotation.setFieldValue(value); + } + } + } + + public void read(Span span) { + byte type = buf.get(); + if ((type & Span.ID) != Span.ID) { + throw new DeserializationException("Cannot deserialize Span with type " + type); + } + span.setFrom(buf.getInt1_2_4Bytes()); + span.setLength(buf.getInt1_2_4Bytes()); + } + + private void correctIndexes(Span span) { + if (stringPositions == null) { + throw new DeserializationException("Cannot deserialize Span, no access to parent StringFieldValue."); + } + int fromIndex = stringPositions[span.getFrom()]; + int toIndex = stringPositions[span.getTo()]; + int length = toIndex - fromIndex; + + span.setFrom(fromIndex); + span.setLength(length); + } + + public void read(SpanList spanList) { + byte type = buf.get(); + if ((type & SpanList.ID) != SpanList.ID) { + throw new DeserializationException("Cannot deserialize SpanList with type " + type); + } + List<SpanNode> nodes = readSpanList(spanList); + for (SpanNode node : nodes) { + spanList.add(node); + } + } + + public void read(AlternateSpanList altSpanList) { + byte type = buf.get(); + if ((type & AlternateSpanList.ID) != AlternateSpanList.ID) { + throw new DeserializationException("Cannot deserialize AlternateSpanList with type " + type); + } + int numSubTrees = buf.getInt1_2_4Bytes(); + + for (int i = 0; i < numSubTrees; i++) { + double prob = buf.getDouble(); + List<SpanNode> list = readSpanList(altSpanList); + + if (i == 0) { + for (SpanNode node : list) { + altSpanList.add(node); + } + altSpanList.setProbability(0, prob); + } else { + altSpanList.addChildren(i, list, prob); + } + } + } + + private List<SpanNode> readSpanList(SpanNodeParent parent) { + int size = buf.getInt1_2_4Bytes(); + List<SpanNode> spanList = new ArrayList<SpanNode>(); + for (int i = 0; i < size; i++) { + spanList.add(readSpanNode()); + } + return spanList; + } + +} |