From 72231250ed81e10d66bfe70701e64fa5fe50f712 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Wed, 15 Jun 2016 23:09:44 +0200 Subject: Publish --- .../java/com/yahoo/vespaxmlparser/FeedReader.java | 21 + .../vespaxmlparser/VespaXMLDocumentReader.java | 49 ++ .../yahoo/vespaxmlparser/VespaXMLFeedReader.java | 313 +++++++++++++ .../yahoo/vespaxmlparser/VespaXMLFieldReader.java | 520 +++++++++++++++++++++ .../com/yahoo/vespaxmlparser/VespaXMLReader.java | 69 +++ .../yahoo/vespaxmlparser/VespaXMLUpdateReader.java | 379 +++++++++++++++ .../com/yahoo/vespaxmlparser/package-info.java | 5 + 7 files changed, 1356 insertions(+) create mode 100644 document/src/main/java/com/yahoo/vespaxmlparser/FeedReader.java create mode 100644 document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLDocumentReader.java create mode 100644 document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFeedReader.java create mode 100644 document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFieldReader.java create mode 100644 document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLReader.java create mode 100644 document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLUpdateReader.java create mode 100644 document/src/main/java/com/yahoo/vespaxmlparser/package-info.java (limited to 'document/src/main/java/com/yahoo/vespaxmlparser') diff --git a/document/src/main/java/com/yahoo/vespaxmlparser/FeedReader.java b/document/src/main/java/com/yahoo/vespaxmlparser/FeedReader.java new file mode 100644 index 00000000000..e97bd43d9bf --- /dev/null +++ b/document/src/main/java/com/yahoo/vespaxmlparser/FeedReader.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespaxmlparser; + +import com.yahoo.vespaxmlparser.VespaXMLFeedReader.Operation; + +/** + * Minimal interface for reading operations from a stream for a feeder. + * + * Interface extracted from VespaXMLFeedReader to enable JSON feeding. + * + * @author steinar + */ +public interface FeedReader { + + /** + * Reads the next operation from the stream. + * @param operation The operation to fill in. Operation is unchanged if none was found. + */ + public abstract void read(Operation operation) throws Exception; + +} \ No newline at end of file diff --git a/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLDocumentReader.java b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLDocumentReader.java new file mode 100644 index 00000000000..a5ea5983e29 --- /dev/null +++ b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLDocumentReader.java @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespaxmlparser; + +import com.yahoo.document.Document; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.serialization.DocumentReader; + +import javax.xml.stream.XMLStreamReader; +import java.io.InputStream; + +/** + * XML parser that reads Vespa documents from an XML stream. + * + * @author thomasg + */ +public class VespaXMLDocumentReader extends VespaXMLFieldReader implements DocumentReader { + + /** + * Creates a reader that reads from the given file. + */ + public VespaXMLDocumentReader(String fileName, DocumentTypeManager docTypeManager) throws Exception { + super(fileName, docTypeManager); + } + + /** + * Creates a reader that reads from the given stream. + */ + public VespaXMLDocumentReader(InputStream stream, DocumentTypeManager docTypeManager) throws Exception { + super(stream, docTypeManager); + } + + /** + * Creates a reader that reads using the given reader. This is useful if the document is part of a greater + * XML stream. + */ + public VespaXMLDocumentReader(XMLStreamReader reader, DocumentTypeManager docTypeManager) { + super(reader, docTypeManager); + } + + /** + * Reads one document from the stream. Function assumes that the current element in the stream is + * the start tag for the document. + * + * @param document the document to be read + */ + public void read(Document document) { + read(null, document); + } +} diff --git a/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFeedReader.java b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFeedReader.java new file mode 100644 index 00000000000..0c8b9b22961 --- /dev/null +++ b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFeedReader.java @@ -0,0 +1,313 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespaxmlparser; + +import com.yahoo.document.Document; +import com.yahoo.document.DocumentId; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.DocumentUpdate; +import com.yahoo.document.TestAndSetCondition; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Optional; + +/** + * XML parser for Vespa document XML. + * + * Parses an entire document "feed", which consists of a vespafeed element containing + * zero or more instances of documents, updates or removes. + * + * Standard usage is to create an Operation object and call read(Operation) until + * operation.getType() returns OperationType.INVALID. + * + * If you are looking to parse only a single document or update, use VespaXMLDocumentReader + * or VespaXMLUpdateReader respectively. + */ +public class VespaXMLFeedReader extends VespaXMLReader implements FeedReader { + + /** + * Creates a reader that reads from the given file. + */ + public VespaXMLFeedReader(String fileName, DocumentTypeManager docTypeManager) throws Exception { + super(fileName, docTypeManager); + readInitial(); + } + + /** + * Creates a reader that reads from the given stream. + */ + public VespaXMLFeedReader(InputStream stream, DocumentTypeManager docTypeManager) throws Exception { + super(stream, docTypeManager); + readInitial(); + } + + /** + * Creates a reader that uses the given reader to read - this can be used if the vespa feed + * is part of a larger XML document. + */ + public VespaXMLFeedReader(XMLStreamReader reader, DocumentTypeManager manager) throws Exception { + super(reader, manager); + readInitial(); + } + + /** + * Skips the initial "vespafeed" tag. + */ + void readInitial() throws Exception { + boolean found = false; + + while (reader.hasNext()) { + int type = reader.next(); + if (type == XMLStreamReader.START_ELEMENT) { + if ("vespafeed".equals(reader.getName().toString())) { + found = true; + break; + } + } + } + + if (!found) { + throw newDeserializeException("Feed information must be contained within a \"vespafeed\" element"); + } + } + + public enum OperationType { + DOCUMENT, + REMOVE, + UPDATE, + INVALID + } + + /** + * Represents a feed operation found by the parser. Can be one of the following types: + * - getType() == DOCUMENT: getDocument() is valid. + * - getType() == REMOVE: getRemove() is valid. + * - getType() == UPDATE: getUpdate() is valid. + */ + public static class Operation { + + private OperationType type; + private Document doc; + private DocumentId remove; + private DocumentUpdate docUpdate; + private FeedOperation feedOperation; + private TestAndSetCondition condition; + + public Operation() { + setInvalid(); + } + + public void setInvalid() { + type = OperationType.INVALID; + doc = null; + remove = null; + docUpdate = null; + feedOperation = null; + condition = null; + } + + public OperationType getType() { + return type; + } + + public Document getDocument() { + return doc; + } + + public void setDocument(Document doc) { + this.type = OperationType.DOCUMENT; + this.doc = doc; + } + + public DocumentId getRemove() { + return remove; + } + + public void setRemove(DocumentId remove) { + this.type = OperationType.REMOVE; + this.remove = remove; + } + + public DocumentUpdate getDocumentUpdate() { + return docUpdate; + } + + public void setDocumentUpdate(DocumentUpdate docUpdate) { + this.type = OperationType.UPDATE; + this.docUpdate = docUpdate; + } + + public FeedOperation getFeedOperation() { + return feedOperation; + } + + public void setCondition(TestAndSetCondition condition) { + this.condition = condition; + } + + public TestAndSetCondition getCondition() { + return condition; + } + + @Override + public String toString() { + return "Operation{" + + "type=" + type + + ", doc=" + doc + + ", remove=" + remove + + ", docUpdate=" + docUpdate + + ", feedOperation=" + feedOperation + + '}'; + } + } + + public static class FeedOperation { + + private String name; + private Integer generation; + private Integer increment; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public Integer getGeneration() { + return generation; + } + + public void setGeneration(int generation) { + this.generation = generation; + } + + public Integer getIncrement() { + return increment; + } + + public void setIncrement(int increment) { + this.increment = increment; + } + } + + /** + *

Reads all operations from the XML stream and puts into a list. Note + * that if the XML stream is large, this may cause out of memory errors, so + * make sure to use this only with small streams.

+ * + * @return The list of all read operations. + */ + public List readAll() throws Exception { + List list = new ArrayList(); + while (true) { + Operation op = new Operation(); + read(op); + if (op.getType() == OperationType.INVALID) { + return list; + } else { + list.add(op); + } + } + } + + /* (non-Javadoc) + * @see com.yahoo.vespaxmlparser.FeedReader#read(com.yahoo.vespaxmlparser.VespaXMLFeedReader.Operation) + */ + @Override + public void read(Operation operation) throws Exception { + String startTag = null; + operation.setInvalid(); + + try { + while (reader.hasNext()) { + int type = reader.next(); + + if (type == XMLStreamReader.START_ELEMENT) { + startTag = reader.getName().toString(); + + if ("document".equals(startTag)) { + VespaXMLDocumentReader documentReader = new VespaXMLDocumentReader(reader, docTypeManager); + Document document = new Document(documentReader); + operation.setDocument(document); + operation.setCondition(TestAndSetCondition.fromConditionString(documentReader.getCondition())); + return; + } else if ("update".equals(startTag)) { + VespaXMLUpdateReader updateReader = new VespaXMLUpdateReader(reader, docTypeManager); + DocumentUpdate update = new DocumentUpdate(updateReader); + operation.setDocumentUpdate(update); + operation.setCondition(TestAndSetCondition.fromConditionString(updateReader.getCondition())); + return; + } else if ("remove".equals(startTag)) { + boolean documentIdFound = false; + + Optional condition = Optional.empty(); + for (int i = 0; i < reader.getAttributeCount(); i++) { + final String attributeName = reader.getAttributeName(i).toString(); + if ("documentid".equals(attributeName) || "id".equals(attributeName)) { + operation.setRemove(new DocumentId(reader.getAttributeValue(i))); + documentIdFound = true; + } else if ("condition".equals(attributeName)) { + condition = Optional.of(reader.getAttributeValue(i)); + } + } + + if (!documentIdFound) { + throw newDeserializeException("Missing \"documentid\" attribute for remove operation"); + } + + operation.setCondition(TestAndSetCondition.fromConditionString(condition)); + + return; + } else { + throw newDeserializeException("Element \"" + startTag + "\" not allowed in this context"); + } + } + } + } catch (XMLStreamException e) { + throw(e); + // Skip to end of current tag with other exceptions. + } catch (Exception e) { + try { + if (startTag != null) { + skipToEnd(startTag); + } + } catch (Exception ignore) { + } + + throw(e); + } + } + + public void read(FeedOperation fo) throws XMLStreamException { + while (reader.hasNext()) { + int type = reader.next(); + + if (type == XMLStreamReader.START_ELEMENT) { + if ("name".equals(reader.getName().toString())) { + fo.setName(reader.getElementText().toString()); + skipToEnd("name"); + } else if ("generation".equals(reader.getName().toString())) { + fo.setGeneration(Integer.parseInt(reader.getElementText().toString())); + skipToEnd("generation"); + } else if ("increment".equals(reader.getName().toString())) { + String text = reader.getElementText(); + if ("autodetect".equals(text)) { + fo.setIncrement(-1); + } else { + fo.setIncrement(Integer.parseInt(text)); + } + skipToEnd("increment"); + } + } else if (type == XMLStreamReader.END_ELEMENT) { + return; + } + } + } + +} diff --git a/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFieldReader.java b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFieldReader.java new file mode 100644 index 00000000000..cdc676eca5f --- /dev/null +++ b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFieldReader.java @@ -0,0 +1,520 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespaxmlparser; + +import com.yahoo.document.DataType; +import com.yahoo.document.Document; +import com.yahoo.document.DocumentId; +import com.yahoo.document.DocumentType; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.Field; +import com.yahoo.document.MapDataType; +import com.yahoo.document.PositionDataType; +import com.yahoo.document.annotation.AnnotationReference; +import com.yahoo.document.datatypes.*; +import com.yahoo.document.predicate.Predicate; +import com.yahoo.document.serialization.DeserializationException; +import com.yahoo.document.serialization.FieldReader; +import com.yahoo.text.Utf8; +import com.yahoo.vespa.objects.FieldBase; +import org.apache.commons.codec.binary.Base64; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import java.io.InputStream; +import java.math.BigInteger; +import java.util.Optional; + +/** + * XML parser that reads document fields from an XML stream. + * + * All read methods assume that the stream is currently positioned at the start element of the relevant field. + * + */ +public class VespaXMLFieldReader extends VespaXMLReader implements FieldReader { + private static final BigInteger UINT_MAX = new BigInteger("4294967296"); + private static final BigInteger ULONG_MAX = new BigInteger("18446744073709551616"); + + public VespaXMLFieldReader(String fileName, DocumentTypeManager docTypeManager) throws Exception { + super(fileName, docTypeManager); + } + + public VespaXMLFieldReader(InputStream stream, DocumentTypeManager docTypeManager) throws Exception { + super(stream, docTypeManager); + } + + public VespaXMLFieldReader(XMLStreamReader reader, DocumentTypeManager docTypeManager) { + super(reader, docTypeManager); + } + + /** + * Optional test and set condition. Common for document/update/remove elements + * This variable is either set in VespaXMLFieldReader#read (reader for document) + * or in VespaXMLUpdateReader#read (reader for update). + */ + private Optional condition = Optional.empty(); + + public Optional getCondition() { + return condition; + } + + public void read(FieldBase field, Document document) { + try { + //workaround for documents inside array + if (reader.getEventType() != XMLStreamReader.START_ELEMENT || !"document".equals(reader.getName().toString())) { + while (reader.hasNext()) { + if (reader.getEventType() == XMLStreamReader.START_ELEMENT && "document".equals(reader.getName().toString())) { + break; + } + reader.next(); + } + } + + // First fetch attributes. + String typeName = null; + + for (int i = 0; i < reader.getAttributeCount(); i++) { + final String attributeName = reader.getAttributeName(i).toString(); + if ("documentid".equals(attributeName) || "id".equals(attributeName)) { + document.setId(new DocumentId(reader.getAttributeValue(i))); + } else if ("documenttype".equals(attributeName) || "type".equals(attributeName)) { + typeName = reader.getAttributeValue(i); + } else if ("condition".equals(attributeName)) { + condition = Optional.of(reader.getAttributeValue(i)); + } + } + + if (document.getId() != null) { + if (field == null) { + field = new FieldBase(document.getId().toString()); + } + } + + DocumentType doctype = docTypeManager.getDocumentType(typeName); + if (doctype == null) { + throw newDeserializeException(field, "Must specify an existing document type, not '" + typeName + "'"); + } else { + document.setDataType(doctype); + } + + // Then fetch fields + while (reader.hasNext()) { + int type = reader.next(); + + if (type == XMLStreamReader.START_ELEMENT) { + Field f = doctype.getField(reader.getName().toString()); + + if (f == null) { + throw newDeserializeException(field, "Field " + reader.getName() + " not found."); + } + + FieldValue fv = f.getDataType().createFieldValue(); + fv.deserialize(f, this); + document.setFieldValue(f, fv); + skipToEnd(f.getName()); + } else if (type == XMLStreamReader.END_ELEMENT) { + return; + } + } + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + public void read(FieldBase field, Array value) { + try { + while (reader.hasNext()) { + int type = reader.next(); + + if (type == XMLStreamReader.START_ELEMENT) { + if ("item".equals(reader.getName().toString())) { + FieldValue fv = (value.getDataType()).getNestedType().createFieldValue(); + deserializeFieldValue(field, fv); + // noinspection unchecked + value.add((T)fv); + skipToEnd("item"); + } + } else if (type == XMLStreamReader.END_ELEMENT) { + return; + } + } + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + class KeyAndValue { + FieldValue key = null; + FieldValue value = null; + } + + void readKeyAndValue(FieldBase field, KeyAndValue val, MapDataType dt) throws XMLStreamException { + while (reader.hasNext()) { + int type = reader.next(); + + if (type == XMLStreamReader.START_ELEMENT) { + if ("key".equals(reader.getName().toString())) { + val.key = dt.getKeyType().createFieldValue(); + deserializeFieldValue(field, val.key); + skipToEnd("key"); + } else if ("value".equals(reader.getName().toString())) { + val.value = dt.getValueType().createFieldValue(); + deserializeFieldValue(field, val.value); + skipToEnd("value"); + } else { + throw newDeserializeException("Illegal element inside map item: " + reader.getName()); + } + } else if (type == XMLStreamReader.END_ELEMENT) { + return; + } + } + } + + public void read(FieldBase field, MapFieldValue map) { + try { + MapDataType dt = map.getDataType(); + + while (reader.hasNext()) { + int type = reader.next(); + + if (type == XMLStreamReader.START_ELEMENT) { + if ("item".equals(reader.getName().toString())) { + KeyAndValue kv = new KeyAndValue(); + readKeyAndValue(field, kv, dt); + + if (kv.key == null || kv.value == null) { + throw newDeserializeException(field, "Map items must specify both key and value"); + } + // noinspection unchecked + map.put((K)kv.key, (V)kv.value); + skipToEnd("item"); + } else { + throw newDeserializeException(field, "Illegal tag " + reader.getName() + " expected 'item'"); + } + } else if (type == XMLStreamReader.END_ELEMENT) { + return; + } + } + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + public void read(FieldBase field, Struct value) { + try { + boolean base64 = isBase64EncodedElement(reader); + boolean foundField = false; + StringBuilder positionBuilder = null; + while (reader.hasNext()) { + int type = reader.next(); + if (type == XMLStreamReader.START_ELEMENT) { + Field structField = value.getField(reader.getName().toString()); + if (structField == null) { + throw newDeserializeException(field, "Field " + reader.getName() + " not found."); + } + FieldValue fieldValue = structField.getDataType().createFieldValue(); + fieldValue.deserialize(structField, this); + value.setFieldValue(structField, fieldValue); + skipToEnd(structField.getName()); + foundField = true; + } else if (type == XMLStreamReader.CHARACTERS) { + if (foundField) { + continue; + } + // The text of an XML element may be output using 1-n CHARACTERS + // events, so we have to buffer up until the end of the element to + // ensure we get everything. + String chars = reader.getText(); + if (positionBuilder == null) { + positionBuilder = new StringBuilder(chars); + } else { + positionBuilder.append(chars); + } + } else if (type == XMLStreamReader.END_ELEMENT) { + if (positionBuilder != null) { + assignPositionFieldFromStringIfNonEmpty(value, positionBuilder.toString(), base64); + } + break; + } + } + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + private void assignPositionFieldFromStringIfNonEmpty(Struct value, String elementText, boolean base64) { + String str = base64 ? Utf8.toString(new Base64().decode(elementText)) : elementText; + str = str.trim(); + if (str.isEmpty()) { + return; + } + DataType valueType = value.getDataType(); + if (valueType.equals(PositionDataType.INSTANCE)) { + value.assign(PositionDataType.fromString(str)); + } + } + + public void read(FieldBase field, WeightedSet value) { + try { + while (reader.hasNext()) { + int type = reader.next(); + + if (type == XMLStreamReader.START_ELEMENT) { + if ("item".equals(reader.getName().toString())) { + FieldValue fv = value.getDataType().getNestedType().createFieldValue(); + + int weight = 1; + for (int i = 0; i < reader.getAttributeCount(); i++) { + if ("weight".equals(reader.getAttributeName(i).toString())) { + weight = Integer.parseInt(reader.getAttributeValue(i)); + } + } + + deserializeFieldValue(field, fv); + // noinspection unchecked + value.put((T)fv, weight); + skipToEnd("item"); + } else { + throw newDeserializeException(field, "Illegal tag " + reader.getName() + " expected 'item'"); + } + } else if (type == XMLStreamReader.END_ELEMENT) { + return; + } + } + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + public void read(FieldBase field, ByteFieldValue value) { + try { + String dataParsed = reader.getElementText(); + try { + value.assign(new Byte(dataParsed)); + } catch (Exception e) { + throw newDeserializeException(field, "Invalid byte \"" + dataParsed + "\"."); + } + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + public void read(FieldBase field, DoubleFieldValue value) { + try { + String dataParsed = reader.getElementText(); + try { + value.assign(new Double(dataParsed)); + } catch (Exception e) { + throw newDeserializeException(field, "Invalid double \"" + dataParsed + "\"."); + } + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + public void read(FieldBase field, FloatFieldValue value) { + try { + String dataParsed = reader.getElementText(); + try { + value.assign(new Float(dataParsed)); + } catch (Exception e) { + throw newDeserializeException(field, "Invalid float \"" + dataParsed + "\"."); + } + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + private RuntimeException newDeserializeException(FieldBase field, String msg) { + return newDeserializeException("Field '" + ((field == null) ? "null" : field.getName()) + "': " + msg); + } + private RuntimeException newException(FieldBase field, Exception e) { + return newDeserializeException("Field '" + ((field == null) ? "null" : field.getName()) + "': " + e.getMessage()); + } + public void read(FieldBase field, IntegerFieldValue value) { + try { + String dataParsed = reader.getElementText(); + + BigInteger val; + try { + if (dataParsed.startsWith("0x")) { + val = new BigInteger(dataParsed.substring(2), 16); + } else if (dataParsed.startsWith("0") && dataParsed.length() > 1) { + val = new BigInteger(dataParsed.substring(1), 8); + } else { + val = new BigInteger(dataParsed); + } + } catch (Exception e) { + throw newDeserializeException(field, "Invalid integer \"" + dataParsed + "\"."); + } + if (val.bitLength() > 32) { + throw newDeserializeException(field, "Invalid integer \"" + dataParsed + "\". Out of range."); + } + if (val.bitLength() == 32) { + if (val.compareTo(BigInteger.ZERO) == 1) { + // Flip to negative + val = val.subtract(UINT_MAX); + } else { + throw newDeserializeException(field, "Invalid integer \"" + dataParsed + "\". Out of range."); + } + } + + value.assign(val.intValue()); + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + public void read(FieldBase field, LongFieldValue value) { + try { + String dataParsed = reader.getElementText(); + + BigInteger val; + try { + if (dataParsed.startsWith("0x")) { + val = new BigInteger(dataParsed.substring(2), 16); + } else if (dataParsed.startsWith("0") && dataParsed.length() > 1) { + val = new BigInteger(dataParsed.substring(1), 8); + } else { + val = new BigInteger(dataParsed); + } + } catch (Exception e) { + throw newDeserializeException(field, "Invalid long \"" + dataParsed + "\"."); + } + if (val.bitLength() > 64) { + throw newDeserializeException(field, "Invalid long \"" + dataParsed + "\". Out of range."); + } + if (val.compareTo(BigInteger.ZERO) == 1 && val.bitLength() == 64) { + // Flip to negative + val = val.subtract(ULONG_MAX); + } + value.assign(val.longValue()); + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + public void read(FieldBase field, Raw value) { + try { + if (isBase64EncodedElement(reader)) { + value.assign(new Base64().decode(reader.getElementText())); + } else { + value.assign(reader.getElementText().getBytes()); + } + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + @Override + public void read(FieldBase field, PredicateFieldValue value) { + try { + if (isBase64EncodedElement(reader)) { + value.assign(Predicate.fromBinary(new Base64().decode(reader.getElementText()))); + } else { + value.assign(Predicate.fromString(reader.getElementText())); + } + } catch (XMLStreamException e) { + throw newException(field, e); + } + } + + public void read(FieldBase field, StringFieldValue value) { + try { + if (isBase64EncodedElement(reader)) { + throw new IllegalArgumentException("Attribute binaryencoding=base64 is not allowed for fields of type 'string'. To represent binary data, use type 'raw'."); + } else { + value.assign(reader.getElementText()); + } + } catch (XMLStreamException | IllegalArgumentException e) { + throw newException(field, e); + } + } + + @Override + public void read(FieldBase field, TensorFieldValue value) { + throw new DeserializationException("Field '"+ (field != null ? field.getName() : "null") + "': " + + "XML input for fields of type TENSOR is not supported. Please use JSON input instead."); + } + + public void read(FieldBase field, AnnotationReference value) { + System.out.println("Annotation value read!"); + } + + private void deserializeFieldValue(FieldBase field, FieldValue value) { + value.deserialize(field instanceof Field ? (Field)field : null, this); + } + + /***********************************************************************/ + /* UNUSED METHODS */ + /***********************************************************************/ + + @SuppressWarnings("UnusedDeclaration") + public DocumentId readDocumentId() { + return null; + } + + @SuppressWarnings("UnusedDeclaration") + public DocumentType readDocumentType() { + return null; //To change body of implemented methods use File | Settings | File Templates. + } + + @SuppressWarnings("UnusedDeclaration") + public DocumentTypeManager getDocumentTypeManager() { + return docTypeManager; + } + + @Override + public void read(FieldBase field, CollectionFieldValue value) { + System.out.println("Should not be called!!!"); + } + + @Override + public void read(FieldBase field, StructuredFieldValue value) { + System.out.println("Should not be called!!!"); + } + + @Override + public void read(FieldBase field, FieldValue value) { + System.out.println("SHOULD NEVER BE CALLED? " + field.toString()); + } + + @Override + public byte getByte(FieldBase fieldBase) { + return 0; + } + + @Override + public short getShort(FieldBase fieldBase) { + return 0; + } + + @Override + public int getInt(FieldBase fieldBase) { + return 0; + } + + @Override + public long getLong(FieldBase fieldBase) { + return 0; + } + + @Override + public float getFloat(FieldBase fieldBase) { + return 0; + } + + @Override + public double getDouble(FieldBase fieldBase) { + return 0; + } + + @Override + public byte[] getBytes(FieldBase fieldBase, int i) { + return new byte[0]; + } + + @Override + public String getString(FieldBase fieldBase) { + return null; + } +} diff --git a/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLReader.java b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLReader.java new file mode 100644 index 00000000000..10c3676a965 --- /dev/null +++ b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLReader.java @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespaxmlparser; + +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.serialization.DeserializationException; + +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import java.io.FileInputStream; +import java.io.InputStream; + +/** + * @author thomasg + */ +public class VespaXMLReader { + DocumentTypeManager docTypeManager; + XMLStreamReader reader; + + public VespaXMLReader(String fileName, DocumentTypeManager docTypeManager) throws Exception { + this(new FileInputStream(fileName), docTypeManager); + } + + public VespaXMLReader(InputStream stream, DocumentTypeManager docTypeManager) throws Exception { + this.docTypeManager = docTypeManager; + XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); + xmlInputFactory.setProperty("javax.xml.stream.isSupportingExternalEntities", Boolean.FALSE); + reader = xmlInputFactory.createXMLStreamReader(stream); + } + + public VespaXMLReader(XMLStreamReader reader, DocumentTypeManager docTypeManager) { + this.docTypeManager = docTypeManager; + this.reader = reader; + } + + protected RuntimeException newDeserializeException(String message) { + return new DeserializationException(message + " (at line " + reader.getLocation().getLineNumber() + ", column " + reader.getLocation().getColumnNumber() + ")"); + } + + protected RuntimeException newException(Exception e) { + return new DeserializationException(e.getMessage() + " (at line " + reader.getLocation().getLineNumber() + ", column " + reader.getLocation().getColumnNumber() + ")", e); + } + + protected void skipToEnd(String tagName) throws XMLStreamException { + while (reader.hasNext()) { + if (reader.getEventType() == XMLStreamReader.END_ELEMENT && tagName.equals(reader.getName().toString())) { + return; + } + reader.next(); + } + throw new DeserializationException("Missing end tag for element '" + tagName + "'" + reader.getLocation()); + } + + public static boolean isBase64EncodingAttribute(String attributeName, String attributeValue) { + return "binaryencoding".equals(attributeName) && + "base64".equalsIgnoreCase(attributeValue); + } + + public static boolean isBase64EncodedElement(XMLStreamReader reader) { + for (int i = 0; i < reader.getAttributeCount(); i++) { + if (isBase64EncodingAttribute(reader.getAttributeName(i).toString(), + reader.getAttributeValue(i))) + { + return true; + } + } + return false; + } +} diff --git a/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLUpdateReader.java b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLUpdateReader.java new file mode 100644 index 00000000000..a4d334848d5 --- /dev/null +++ b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLUpdateReader.java @@ -0,0 +1,379 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespaxmlparser; + +import com.yahoo.document.*; +import com.yahoo.document.datatypes.Array; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.IntegerFieldValue; +import com.yahoo.document.datatypes.WeightedSet; +import com.yahoo.document.fieldpathupdate.AddFieldPathUpdate; +import com.yahoo.document.fieldpathupdate.AssignFieldPathUpdate; +import com.yahoo.document.fieldpathupdate.FieldPathUpdate; +import com.yahoo.document.fieldpathupdate.RemoveFieldPathUpdate; +import com.yahoo.document.select.parser.ParseException; +import com.yahoo.document.serialization.DocumentUpdateReader; +import com.yahoo.document.update.FieldUpdate; +import com.yahoo.document.update.ValueUpdate; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import java.io.InputStream; +import java.util.List; +import java.util.Optional; + +public class VespaXMLUpdateReader extends VespaXMLFieldReader implements DocumentUpdateReader { + public VespaXMLUpdateReader(String fileName, DocumentTypeManager docTypeManager) throws Exception { + super(fileName, docTypeManager); + } + + public VespaXMLUpdateReader(InputStream stream, DocumentTypeManager docTypeManager) throws Exception { + super(stream, docTypeManager); + } + + public VespaXMLUpdateReader(XMLStreamReader reader, DocumentTypeManager docTypeManager) { + super(reader, docTypeManager); + } + + private Optional condition = Optional.empty(); + + public Optional getCondition() { + return condition; + } + + public boolean hasFieldPath() { + for (int i = 0; i < reader.getAttributeCount(); i++) { + if (reader.getAttributeName(i).toString().equals("fieldpath")) { + return true; + } + } + + return false; + } + + public void read(DocumentUpdate update) { + try { + // First fetch attributes. + DocumentType doctype = null; + + for (int i = 0; i < reader.getAttributeCount(); i++) { + final String attributeName = reader.getAttributeName(i).toString(); + final String attributeValue = reader.getAttributeValue(i); + + if ("documentid".equals(attributeName) || "id".equals(attributeName)) { + update.setId(new DocumentId(attributeValue)); + } else if ("documenttype".equals(attributeName) || "type".equals(attributeName)) { + doctype = docTypeManager.getDocumentType(attributeValue); + update.setDocumentType(doctype); + } else if ("create-if-non-existent".equals(attributeName)) { + if ("true".equals(attributeValue)) { + update.setCreateIfNonExistent(true); + } else if ("false".equals(attributeValue)) { + update.setCreateIfNonExistent(false); + } else { + throw newDeserializeException("'create-if-non-existent' must be either 'true' or 'false', was '" + attributeValue +"'"); + } + } else if ("condition".equals(attributeName)) { + condition = Optional.of(attributeValue); + } + } + + if (doctype == null) { + throw newDeserializeException("Must specify document type. " + reader.getLocation()); + } + + // Then fetch fields + while (reader.hasNext()) { + int type = reader.next(); + + if (type == XMLStreamReader.START_ELEMENT) { + final String currentName = reader.getName().toString(); + if (hasFieldPath()) { + if ("assign".equals(currentName)) { + update.addFieldPathUpdate(new AssignFieldPathUpdate(doctype, this)); + skipToEnd("assign"); + } else if ("add".equals(currentName)) { + update.addFieldPathUpdate(new AddFieldPathUpdate(doctype, this)); + skipToEnd("add"); + } else if ("remove".equals(currentName)) { + update.addFieldPathUpdate(new RemoveFieldPathUpdate(doctype, this)); + skipToEnd("remove"); + } else { + throw newDeserializeException("Unknown field path update operation " + reader.getName()); + } + } else { + if ("assign".equals(currentName)) { + update.addFieldUpdate(readAssign(update)); + skipToEnd("assign"); + } else if ("add".equals(currentName)) { + update.addFieldUpdate(readAdd(update)); + skipToEnd("add"); + } else if ("remove".equals(currentName)) { + update.addFieldUpdate(readRemove(update)); + skipToEnd("remove"); + } else if ("alter".equals(currentName)) { + update.addFieldUpdate(readAlter(update)); + skipToEnd("alter"); + } else if ("increment".equals(currentName) || + "decrement".equals(currentName) || + "multiply".equals(currentName) || + "divide".equals(currentName)) { + update.addFieldUpdate(readArithmeticField(update, currentName)); + skipToEnd(currentName); + } else { + throw newDeserializeException("Unknown update operation " + reader.getName()); + } + } + } else if (type == XMLStreamReader.END_ELEMENT) { + return; + } + } + } catch (XMLStreamException e) { + throw newException(e); + } + } + + FieldUpdate readAdd(DocumentUpdate update) throws XMLStreamException { + for (int i = 0; i < reader.getAttributeCount(); i++) { + if ("field".equals(reader.getAttributeName(i).toString())) { + Field f = update.getDocumentType().getField(reader.getAttributeValue(i)); + + FieldValue value = f.getDataType().createFieldValue(); + value.deserialize(f, this); + + if (value instanceof Array) { + List l = ((Array)value).getValues(); + return FieldUpdate.createAddAll(f, l); + } else if (value instanceof WeightedSet) { + return FieldUpdate.createAddAll(f, ((WeightedSet) value)); + } else { + throw newDeserializeException("Add operation only applicable to multivalue lists"); + } + + } + } + throw newDeserializeException("Add update without field attribute"); + } + + + FieldUpdate readRemove(DocumentUpdate update) throws XMLStreamException { + for (int i = 0; i < reader.getAttributeCount(); i++) { + if ("field".equals(reader.getAttributeName(i).toString())) { + Field f = update.getDocumentType().getField(reader.getAttributeValue(i)); + + FieldValue value = f.getDataType().createFieldValue(); + value.deserialize(f, this); + + if (value instanceof Array) { + List l = ((Array)value).getValues(); + return FieldUpdate.createRemoveAll(f, l); + } else if (value instanceof WeightedSet) { + return FieldUpdate.createRemoveAll(f, ((WeightedSet)value)); + } else { + throw newDeserializeException("Remove operation only applicable to multivalue lists"); + } + + } + } + throw newDeserializeException("Remove update without field attribute"); + } + + FieldUpdate readAssign(DocumentUpdate update) throws XMLStreamException { + for (int i = 0; i < reader.getAttributeCount(); i++) { + if ("field".equals(reader.getAttributeName(i).toString())) { + Field f = update.getDocumentType().getField(reader.getAttributeValue(i)); + + if (f == null) { + throw newDeserializeException("Field " + reader.getAttributeValue(i) + " not found."); + } + + FieldValue value = f.getDataType().createFieldValue(); + value.deserialize(f, this); + return FieldUpdate.createAssign(f, value); + } + } + throw newDeserializeException("Assignment update without field attribute"); + } + + + FieldUpdate readAlter(DocumentUpdate update) throws XMLStreamException { + Field f = null; + for (int i = 0; i < reader.getAttributeCount(); i++) { + if ("field".equals(reader.getAttributeName(i).toString())) { + f = update.getDocumentType().getField(reader.getAttributeValue(i)); + } + } + + if (f == null) { + throw newDeserializeException("Alter update without \"field\" attribute"); + } + + FieldUpdate fu = FieldUpdate.create(f); + + while (reader.hasNext()) { + int type = reader.next(); + if (type == XMLStreamReader.START_ELEMENT) { + if ("increment".equals(reader.getName().toString()) || + "decrement".equals(reader.getName().toString()) || + "multiply".equals(reader.getName().toString()) || + "divide".equals(reader.getName().toString())) { + update.addFieldUpdate(readArithmetic(update, reader.getName().toString(), f, fu)); + skipToEnd(reader.getName().toString()); + } else { + throw newDeserializeException("Element \"" + reader.getName() + "\" not appropriate within alter element"); + } + } else if (type == XMLStreamReader.END_ELEMENT) { + break; + } + } + + return fu; + } + + FieldUpdate readArithmeticField(DocumentUpdate update, String type) throws XMLStreamException { + Field f = null; + for (int i = 0; i < reader.getAttributeCount(); i++) { + if ("field".equals(reader.getAttributeName(i).toString())) { + f = update.getDocumentType().getField(reader.getAttributeValue(i)); + } + } + + if (f == null) { + throw newDeserializeException("Assignment update without \"field\" attribute"); + } + + FieldUpdate fu = FieldUpdate.create(f); + readArithmetic(update, type, f, fu); + return fu; + } + + FieldUpdate readArithmetic(DocumentUpdate update, String type, Field f, FieldUpdate fu) throws XMLStreamException { + Double by = null; + + for (int i = 0; i < reader.getAttributeCount(); i++) { + if ("by".equals(reader.getAttributeName(i).toString())) { + by = Double.parseDouble(reader.getAttributeValue(i)); + } + } + + if (by == null) { + throw newDeserializeException("Assignment update without \"by\" attribute"); + } + + FieldValue key = null; + do { + reader.next(); + if (reader.getEventType() == XMLStreamReader.START_ELEMENT) { + if ("key".equals(reader.getName().toString())) { + if (f.getDataType() instanceof WeightedSetDataType) { + DataType nestedType = ((WeightedSetDataType)f.getDataType()).getNestedType(); + key = nestedType.createFieldValue(); + key.deserialize(this); + } else if (f.getDataType() instanceof MapDataType) { + key = ((MapDataType)f.getDataType()).getKeyType().createFieldValue(); + key.deserialize(this); + } else if (f.getDataType() instanceof ArrayDataType) { + key = new IntegerFieldValue(Integer.parseInt(reader.getElementText())); + } else { + throw newDeserializeException("Key tag only applicable for weighted sets and maps"); + } + skipToEnd("key"); + } else { + throw newDeserializeException("\"" + reader.getName() + "\" not appropriate within " + type + " element."); + } + } + } while (reader.getEventType() != XMLStreamReader.END_ELEMENT); + + if (key != null) { + if ("increment".equals(type)) { fu.addValueUpdate(ValueUpdate.createIncrement(key, by)); } + if ("decrement".equals(type)) { fu.addValueUpdate(ValueUpdate.createDecrement(key, by)); } + if ("multiply".equals(type)) { fu.addValueUpdate(ValueUpdate.createMultiply(key, by)); } + if ("divide".equals(type)) { fu.addValueUpdate(ValueUpdate.createDivide(key, by)); } + } else { + if ("increment".equals(type)) { fu.addValueUpdate(ValueUpdate.createIncrement(by)); } + if ("decrement".equals(type)) { fu.addValueUpdate(ValueUpdate.createDecrement(by)); } + if ("multiply".equals(type)) { fu.addValueUpdate(ValueUpdate.createMultiply(by)); } + if ("divide".equals(type)) { fu.addValueUpdate(ValueUpdate.createDivide(by)); } + } + + return fu; + } + + public void read(FieldUpdate update) { + } + + public void read(FieldPathUpdate update) { + String whereClause = null; + String fieldPath = null; + + for (int i = 0; i < reader.getAttributeCount(); i++) { + if (reader.getAttributeName(i).toString().equals("where")) { + whereClause = reader.getAttributeValue(i); + } else if (reader.getAttributeName(i).toString().equals("fieldpath")) { + fieldPath = reader.getAttributeValue(i); + } + } + + if (fieldPath != null) { + update.setFieldPath(fieldPath); + } else { + throw newDeserializeException("Field path is required for document updates."); + } + + if (whereClause != null) { + try { + update.setWhereClause(whereClause); + } catch (ParseException e) { + throw newException(e); + } + } + } + + public void read(AssignFieldPathUpdate update) { + try { + for (int i = 0; i < reader.getAttributeCount(); i++) { + if (reader.getAttributeName(i).toString().equals("removeifzero")) { + update.setRemoveIfZero(Boolean.parseBoolean(reader.getAttributeValue(i))); + } else if (reader.getAttributeName(i).toString().equals("createmissingpath")) { + update.setCreateMissingPath(Boolean.parseBoolean(reader.getAttributeValue(i))); + } + } + DataType dt = update.getFieldPath().getResultingDataType(); + + if (dt instanceof NumericDataType) { + update.setExpression(reader.getElementText()); + } else { + FieldValue fv = dt.createFieldValue(); + fv.deserialize(resolveField(update), this); + update.setNewValue(fv); + } + } catch (XMLStreamException e) { + throw newException(e); + } + } + + public void read(AddFieldPathUpdate update) { + DataType dt = update.getFieldPath().getResultingDataType(); + FieldValue fv = dt.createFieldValue(); + fv.deserialize(resolveField(update), this); + update.setNewValues((Array)fv); + } + + public void read(RemoveFieldPathUpdate update) { + } + + private static Field resolveField(FieldPathUpdate update) { + String orig = update.getOriginalFieldPath(); + if (orig == null) { + return null; + } + FieldPath path = update.getFieldPath(); + if (path == null) { + return null; + } + DataType type = path.getResultingDataType(); + if (type == null) { + return null; + } + return new Field(orig, type); + } +} diff --git a/document/src/main/java/com/yahoo/vespaxmlparser/package-info.java b/document/src/main/java/com/yahoo/vespaxmlparser/package-info.java new file mode 100644 index 00000000000..eae7e1320a2 --- /dev/null +++ b/document/src/main/java/com/yahoo/vespaxmlparser/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespaxmlparser; + +import com.yahoo.osgi.annotation.ExportPackage; -- cgit v1.2.3