// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespaxmlparser; import com.yahoo.document.DataType; import com.yahoo.document.Document; import com.yahoo.document.DocumentId; import com.yahoo.document.DocumentType; import com.yahoo.document.DocumentTypeManager; import com.yahoo.document.Field; import com.yahoo.document.MapDataType; import com.yahoo.document.PositionDataType; import com.yahoo.document.annotation.AnnotationReference; import com.yahoo.document.datatypes.*; import com.yahoo.document.predicate.Predicate; import com.yahoo.document.serialization.DeserializationException; import com.yahoo.document.serialization.FieldReader; import com.yahoo.text.Utf8; import com.yahoo.vespa.objects.FieldBase; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import java.io.InputStream; import java.math.BigInteger; import java.util.Base64; import java.util.Optional; /** * XML parser that reads document fields from an XML stream. * * All read methods assume that the stream is currently positioned at the start element of the relevant field. * */ public class VespaXMLFieldReader extends VespaXMLReader implements FieldReader { private static final BigInteger UINT_MAX = new BigInteger("4294967296"); private static final BigInteger ULONG_MAX = new BigInteger("18446744073709551616"); public VespaXMLFieldReader(String fileName, DocumentTypeManager docTypeManager) throws Exception { super(fileName, docTypeManager); } public VespaXMLFieldReader(InputStream stream, DocumentTypeManager docTypeManager) throws Exception { super(stream, docTypeManager); } public VespaXMLFieldReader(XMLStreamReader reader, DocumentTypeManager docTypeManager) { super(reader, docTypeManager); } /** * Optional test and set condition. Common for document/update/remove elements * This variable is either set in VespaXMLFieldReader#read (reader for document) * or in VespaXMLUpdateReader#read (reader for update). */ private Optional condition = Optional.empty(); public Optional getCondition() { return condition; } public void read(FieldBase field, Document document) { try { //workaround for documents inside array if (reader.getEventType() != XMLStreamReader.START_ELEMENT || !"document".equals(reader.getName().toString())) { while (reader.hasNext()) { if (reader.getEventType() == XMLStreamReader.START_ELEMENT && "document".equals(reader.getName().toString())) { break; } reader.next(); } } // First fetch attributes. String typeName = null; for (int i = 0; i < reader.getAttributeCount(); i++) { final String attributeName = reader.getAttributeName(i).toString(); if ("documentid".equals(attributeName) || "id".equals(attributeName)) { document.setId(new DocumentId(reader.getAttributeValue(i))); } else if ("documenttype".equals(attributeName) || "type".equals(attributeName)) { typeName = reader.getAttributeValue(i); } else if ("condition".equals(attributeName)) { condition = Optional.of(reader.getAttributeValue(i)); } } if (document.getId() != null) { if (field == null) { field = new FieldBase(document.getId().toString()); } } DocumentType doctype = docTypeManager.getDocumentType(typeName); if (doctype == null) { throw newDeserializeException(field, "Must specify an existing document type, not '" + typeName + "'"); } else { document.setDataType(doctype); } // Then fetch fields while (reader.hasNext()) { int type = reader.next(); if (type == XMLStreamReader.START_ELEMENT) { Field f = doctype.getField(reader.getName().toString()); if (f == null) { throw newDeserializeException(field, "Field " + reader.getName() + " not found."); } FieldValue fv = f.getDataType().createFieldValue(); fv.deserialize(f, this); document.setFieldValue(f, fv); skipToEnd(f.getName()); } else if (type == XMLStreamReader.END_ELEMENT) { return; } } } catch (XMLStreamException e) { throw newException(field, e); } } public void read(FieldBase field, Array value) { try { while (reader.hasNext()) { int type = reader.next(); if (type == XMLStreamReader.START_ELEMENT) { if ("item".equals(reader.getName().toString())) { FieldValue fv = (value.getDataType()).getNestedType().createFieldValue(); deserializeFieldValue(field, fv); // noinspection unchecked value.add((T)fv); skipToEnd("item"); } } else if (type == XMLStreamReader.END_ELEMENT) { return; } } } catch (XMLStreamException e) { throw newException(field, e); } } class KeyAndValue { FieldValue key = null; FieldValue value = null; } void readKeyAndValue(FieldBase field, KeyAndValue val, MapDataType dt) throws XMLStreamException { while (reader.hasNext()) { int type = reader.next(); if (type == XMLStreamReader.START_ELEMENT) { if ("key".equals(reader.getName().toString())) { val.key = dt.getKeyType().createFieldValue(); deserializeFieldValue(field, val.key); skipToEnd("key"); } else if ("value".equals(reader.getName().toString())) { val.value = dt.getValueType().createFieldValue(); deserializeFieldValue(field, val.value); skipToEnd("value"); } else { throw newDeserializeException("Illegal element inside map item: " + reader.getName()); } } else if (type == XMLStreamReader.END_ELEMENT) { return; } } } public void read(FieldBase field, MapFieldValue map) { try { MapDataType dt = map.getDataType(); while (reader.hasNext()) { int type = reader.next(); if (type == XMLStreamReader.START_ELEMENT) { if ("item".equals(reader.getName().toString())) { KeyAndValue kv = new KeyAndValue(); readKeyAndValue(field, kv, dt); if (kv.key == null || kv.value == null) { throw newDeserializeException(field, "Map items must specify both key and value"); } // noinspection unchecked map.put((K)kv.key, (V)kv.value); skipToEnd("item"); } else { throw newDeserializeException(field, "Illegal tag " + reader.getName() + " expected 'item'"); } } else if (type == XMLStreamReader.END_ELEMENT) { return; } } } catch (XMLStreamException e) { throw newException(field, e); } } public void read(FieldBase field, Struct value) { try { boolean base64 = isBase64EncodedElement(reader); boolean foundField = false; StringBuilder positionBuilder = null; while (reader.hasNext()) { int type = reader.next(); if (type == XMLStreamReader.START_ELEMENT) { Field structField = value.getField(reader.getName().toString()); if (structField == null) { throw newDeserializeException(field, "Field " + reader.getName() + " not found."); } FieldValue fieldValue = structField.getDataType().createFieldValue(); fieldValue.deserialize(structField, this); value.setFieldValue(structField, fieldValue); skipToEnd(structField.getName()); foundField = true; } else if (type == XMLStreamReader.CHARACTERS) { if (foundField) { continue; } // The text of an XML element may be output using 1-n CHARACTERS // events, so we have to buffer up until the end of the element to // ensure we get everything. String chars = reader.getText(); if (positionBuilder == null) { positionBuilder = new StringBuilder(chars); } else { positionBuilder.append(chars); } } else if (type == XMLStreamReader.END_ELEMENT) { if (positionBuilder != null) { assignPositionFieldFromStringIfNonEmpty(value, positionBuilder.toString(), base64); } break; } } } catch (XMLStreamException e) { throw newException(field, e); } } private void assignPositionFieldFromStringIfNonEmpty(Struct value, String elementText, boolean base64) { String str = base64 ? Utf8.toString(Base64.getMimeDecoder().decode(elementText)) : elementText; str = str.trim(); if (str.isEmpty()) { return; } DataType valueType = value.getDataType(); if (valueType.equals(PositionDataType.INSTANCE)) { value.assign(PositionDataType.fromString(str)); } } public void read(FieldBase field, WeightedSet value) { try { while (reader.hasNext()) { int type = reader.next(); if (type == XMLStreamReader.START_ELEMENT) { if ("item".equals(reader.getName().toString())) { FieldValue fv = value.getDataType().getNestedType().createFieldValue(); int weight = 1; for (int i = 0; i < reader.getAttributeCount(); i++) { if ("weight".equals(reader.getAttributeName(i).toString())) { weight = Integer.parseInt(reader.getAttributeValue(i)); } } deserializeFieldValue(field, fv); // noinspection unchecked value.put((T)fv, weight); skipToEnd("item"); } else { throw newDeserializeException(field, "Illegal tag " + reader.getName() + " expected 'item'"); } } else if (type == XMLStreamReader.END_ELEMENT) { return; } } } catch (XMLStreamException e) { throw newException(field, e); } } public void read(FieldBase field, ByteFieldValue value) { try { String dataParsed = reader.getElementText(); try { value.assign(Byte.valueOf(dataParsed)); } catch (Exception e) { throw newDeserializeException(field, "Invalid byte \"" + dataParsed + "\"."); } } catch (XMLStreamException e) { throw newException(field, e); } } public void read(FieldBase field, BoolFieldValue value) { try { String dataParsed = reader.getElementText(); try { value.assign(dataParsed); } catch (Exception e) { throw newDeserializeException(field, e.getMessage()); } } catch (XMLStreamException e) { throw newException(field, e); } } public void read(FieldBase field, DoubleFieldValue value) { try { String dataParsed = reader.getElementText(); try { value.assign(Double.valueOf(dataParsed)); } catch (Exception e) { throw newDeserializeException(field, "Invalid double \"" + dataParsed + "\"."); } } catch (XMLStreamException e) { throw newException(field, e); } } public void read(FieldBase field, FloatFieldValue value) { try { String dataParsed = reader.getElementText(); try { value.assign(Float.valueOf(dataParsed)); } catch (Exception e) { throw newDeserializeException(field, "Invalid float \"" + dataParsed + "\"."); } } catch (XMLStreamException e) { throw newException(field, e); } } private RuntimeException newDeserializeException(FieldBase field, String msg) { return newDeserializeException("Field '" + ((field == null) ? "null" : field.getName()) + "': " + msg); } private RuntimeException newException(FieldBase field, Exception e) { return newDeserializeException("Field '" + ((field == null) ? "null" : field.getName()) + "': " + e.getMessage()); } public void read(FieldBase field, IntegerFieldValue value) { try { String dataParsed = reader.getElementText(); BigInteger val; try { if (dataParsed.startsWith("0x")) { val = new BigInteger(dataParsed.substring(2), 16); } else if (dataParsed.startsWith("0") && dataParsed.length() > 1) { val = new BigInteger(dataParsed.substring(1), 8); } else { val = new BigInteger(dataParsed); } } catch (Exception e) { throw newDeserializeException(field, "Invalid integer \"" + dataParsed + "\"."); } if (val.bitLength() > 32) { throw newDeserializeException(field, "Invalid integer \"" + dataParsed + "\". Out of range."); } if (val.bitLength() == 32) { if (val.compareTo(BigInteger.ZERO) == 1) { // Flip to negative val = val.subtract(UINT_MAX); } else { throw newDeserializeException(field, "Invalid integer \"" + dataParsed + "\". Out of range."); } } value.assign(val.intValue()); } catch (XMLStreamException e) { throw newException(field, e); } } public void read(FieldBase field, LongFieldValue value) { try { String dataParsed = reader.getElementText(); BigInteger val; try { if (dataParsed.startsWith("0x")) { val = new BigInteger(dataParsed.substring(2), 16); } else if (dataParsed.startsWith("0") && dataParsed.length() > 1) { val = new BigInteger(dataParsed.substring(1), 8); } else { val = new BigInteger(dataParsed); } } catch (Exception e) { throw newDeserializeException(field, "Invalid long \"" + dataParsed + "\"."); } if (val.bitLength() > 64) { throw newDeserializeException(field, "Invalid long \"" + dataParsed + "\". Out of range."); } if (val.compareTo(BigInteger.ZERO) == 1 && val.bitLength() == 64) { // Flip to negative val = val.subtract(ULONG_MAX); } value.assign(val.longValue()); } catch (XMLStreamException e) { throw newException(field, e); } } public void read(FieldBase field, Raw value) { try { if (isBase64EncodedElement(reader)) { value.assign(Base64.getMimeDecoder().decode(reader.getElementText())); } else { value.assign(reader.getElementText().getBytes()); } } catch (XMLStreamException e) { throw newException(field, e); } } @Override public void read(FieldBase field, PredicateFieldValue value) { try { if (isBase64EncodedElement(reader)) { value.assign(Predicate.fromBinary(Base64.getMimeDecoder().decode(reader.getElementText()))); } else { value.assign(Predicate.fromString(reader.getElementText())); } } catch (XMLStreamException e) { throw newException(field, e); } } public void read(FieldBase field, StringFieldValue value) { try { if (isBase64EncodedElement(reader)) { throw new IllegalArgumentException("Attribute binaryencoding=base64 is not allowed for fields of type 'string'. To represent binary data, use type 'raw'."); } else { value.assign(reader.getElementText()); } } catch (XMLStreamException | IllegalArgumentException e) { throw newException(field, e); } } @Override public void read(FieldBase field, TensorFieldValue value) { throwOnlyJsonSupportedException(field, "TENSOR"); } @Override public void read(FieldBase field, ReferenceFieldValue value) { throwOnlyJsonSupportedException(field, "REFERENCE"); } private static void throwOnlyJsonSupportedException(FieldBase field, String fieldType) { throw new DeserializationException("Field '"+ (field != null ? field.getName() : "null") + "': " + "XML input for fields of type " + fieldType + " is not supported. Please use JSON input instead."); } public void read(FieldBase field, AnnotationReference value) { System.out.println("Annotation value read!"); } private void deserializeFieldValue(FieldBase field, FieldValue value) { value.deserialize(field instanceof Field ? (Field)field : null, this); } /***********************************************************************/ /* UNUSED METHODS */ /***********************************************************************/ @SuppressWarnings("UnusedDeclaration") public DocumentId readDocumentId() { return null; } @SuppressWarnings("UnusedDeclaration") public DocumentType readDocumentType() { return null; //To change body of implemented methods use File | Settings | File Templates. } @SuppressWarnings("UnusedDeclaration") public DocumentTypeManager getDocumentTypeManager() { return docTypeManager; } @Override public void read(FieldBase field, CollectionFieldValue value) { System.out.println("Should not be called!!!"); } @Override public void read(FieldBase field, StructuredFieldValue value) { System.out.println("Should not be called!!!"); } @Override public void read(FieldBase field, FieldValue value) { System.out.println("SHOULD NEVER BE CALLED? " + field.toString()); } @Override public byte getByte(FieldBase fieldBase) { return 0; } @Override public short getShort(FieldBase fieldBase) { return 0; } @Override public int getInt(FieldBase fieldBase) { return 0; } @Override public long getLong(FieldBase fieldBase) { return 0; } @Override public float getFloat(FieldBase fieldBase) { return 0; } @Override public double getDouble(FieldBase fieldBase) { return 0; } @Override public byte[] getBytes(FieldBase fieldBase, int i) { return new byte[0]; } @Override public String getString(FieldBase fieldBase) { return null; } }