diff options
Diffstat (limited to 'document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFeedReader.java')
-rw-r--r-- | document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFeedReader.java | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFeedReader.java b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFeedReader.java new file mode 100644 index 00000000000..0c8b9b22961 --- /dev/null +++ b/document/src/main/java/com/yahoo/vespaxmlparser/VespaXMLFeedReader.java @@ -0,0 +1,313 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespaxmlparser; + +import com.yahoo.document.Document; +import com.yahoo.document.DocumentId; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.DocumentUpdate; +import com.yahoo.document.TestAndSetCondition; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Optional; + +/** + * XML parser for Vespa document XML. + * + * Parses an entire document "feed", which consists of a vespafeed element containing + * zero or more instances of documents, updates or removes. + * + * Standard usage is to create an Operation object and call read(Operation) until + * operation.getType() returns OperationType.INVALID. + * + * If you are looking to parse only a single document or update, use VespaXMLDocumentReader + * or VespaXMLUpdateReader respectively. + */ +public class VespaXMLFeedReader extends VespaXMLReader implements FeedReader { + + /** + * Creates a reader that reads from the given file. + */ + public VespaXMLFeedReader(String fileName, DocumentTypeManager docTypeManager) throws Exception { + super(fileName, docTypeManager); + readInitial(); + } + + /** + * Creates a reader that reads from the given stream. + */ + public VespaXMLFeedReader(InputStream stream, DocumentTypeManager docTypeManager) throws Exception { + super(stream, docTypeManager); + readInitial(); + } + + /** + * Creates a reader that uses the given reader to read - this can be used if the vespa feed + * is part of a larger XML document. + */ + public VespaXMLFeedReader(XMLStreamReader reader, DocumentTypeManager manager) throws Exception { + super(reader, manager); + readInitial(); + } + + /** + * Skips the initial "vespafeed" tag. + */ + void readInitial() throws Exception { + boolean found = false; + + while (reader.hasNext()) { + int type = reader.next(); + if (type == XMLStreamReader.START_ELEMENT) { + if ("vespafeed".equals(reader.getName().toString())) { + found = true; + break; + } + } + } + + if (!found) { + throw newDeserializeException("Feed information must be contained within a \"vespafeed\" element"); + } + } + + public enum OperationType { + DOCUMENT, + REMOVE, + UPDATE, + INVALID + } + + /** + * Represents a feed operation found by the parser. Can be one of the following types: + * - getType() == DOCUMENT: getDocument() is valid. + * - getType() == REMOVE: getRemove() is valid. + * - getType() == UPDATE: getUpdate() is valid. + */ + public static class Operation { + + private OperationType type; + private Document doc; + private DocumentId remove; + private DocumentUpdate docUpdate; + private FeedOperation feedOperation; + private TestAndSetCondition condition; + + public Operation() { + setInvalid(); + } + + public void setInvalid() { + type = OperationType.INVALID; + doc = null; + remove = null; + docUpdate = null; + feedOperation = null; + condition = null; + } + + public OperationType getType() { + return type; + } + + public Document getDocument() { + return doc; + } + + public void setDocument(Document doc) { + this.type = OperationType.DOCUMENT; + this.doc = doc; + } + + public DocumentId getRemove() { + return remove; + } + + public void setRemove(DocumentId remove) { + this.type = OperationType.REMOVE; + this.remove = remove; + } + + public DocumentUpdate getDocumentUpdate() { + return docUpdate; + } + + public void setDocumentUpdate(DocumentUpdate docUpdate) { + this.type = OperationType.UPDATE; + this.docUpdate = docUpdate; + } + + public FeedOperation getFeedOperation() { + return feedOperation; + } + + public void setCondition(TestAndSetCondition condition) { + this.condition = condition; + } + + public TestAndSetCondition getCondition() { + return condition; + } + + @Override + public String toString() { + return "Operation{" + + "type=" + type + + ", doc=" + doc + + ", remove=" + remove + + ", docUpdate=" + docUpdate + + ", feedOperation=" + feedOperation + + '}'; + } + } + + public static class FeedOperation { + + private String name; + private Integer generation; + private Integer increment; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public Integer getGeneration() { + return generation; + } + + public void setGeneration(int generation) { + this.generation = generation; + } + + public Integer getIncrement() { + return increment; + } + + public void setIncrement(int increment) { + this.increment = increment; + } + } + + /** + * <p>Reads all operations from the XML stream and puts into a list. Note + * that if the XML stream is large, this may cause out of memory errors, so + * make sure to use this only with small streams.</p> + * + * @return The list of all read operations. + */ + public List<Operation> readAll() throws Exception { + List<Operation> list = new ArrayList<Operation>(); + while (true) { + Operation op = new Operation(); + read(op); + if (op.getType() == OperationType.INVALID) { + return list; + } else { + list.add(op); + } + } + } + + /* (non-Javadoc) + * @see com.yahoo.vespaxmlparser.FeedReader#read(com.yahoo.vespaxmlparser.VespaXMLFeedReader.Operation) + */ + @Override + public void read(Operation operation) throws Exception { + String startTag = null; + operation.setInvalid(); + + try { + while (reader.hasNext()) { + int type = reader.next(); + + if (type == XMLStreamReader.START_ELEMENT) { + startTag = reader.getName().toString(); + + if ("document".equals(startTag)) { + VespaXMLDocumentReader documentReader = new VespaXMLDocumentReader(reader, docTypeManager); + Document document = new Document(documentReader); + operation.setDocument(document); + operation.setCondition(TestAndSetCondition.fromConditionString(documentReader.getCondition())); + return; + } else if ("update".equals(startTag)) { + VespaXMLUpdateReader updateReader = new VespaXMLUpdateReader(reader, docTypeManager); + DocumentUpdate update = new DocumentUpdate(updateReader); + operation.setDocumentUpdate(update); + operation.setCondition(TestAndSetCondition.fromConditionString(updateReader.getCondition())); + return; + } else if ("remove".equals(startTag)) { + boolean documentIdFound = false; + + Optional<String> condition = Optional.empty(); + for (int i = 0; i < reader.getAttributeCount(); i++) { + final String attributeName = reader.getAttributeName(i).toString(); + if ("documentid".equals(attributeName) || "id".equals(attributeName)) { + operation.setRemove(new DocumentId(reader.getAttributeValue(i))); + documentIdFound = true; + } else if ("condition".equals(attributeName)) { + condition = Optional.of(reader.getAttributeValue(i)); + } + } + + if (!documentIdFound) { + throw newDeserializeException("Missing \"documentid\" attribute for remove operation"); + } + + operation.setCondition(TestAndSetCondition.fromConditionString(condition)); + + return; + } else { + throw newDeserializeException("Element \"" + startTag + "\" not allowed in this context"); + } + } + } + } catch (XMLStreamException e) { + throw(e); + // Skip to end of current tag with other exceptions. + } catch (Exception e) { + try { + if (startTag != null) { + skipToEnd(startTag); + } + } catch (Exception ignore) { + } + + throw(e); + } + } + + public void read(FeedOperation fo) throws XMLStreamException { + while (reader.hasNext()) { + int type = reader.next(); + + if (type == XMLStreamReader.START_ELEMENT) { + if ("name".equals(reader.getName().toString())) { + fo.setName(reader.getElementText().toString()); + skipToEnd("name"); + } else if ("generation".equals(reader.getName().toString())) { + fo.setGeneration(Integer.parseInt(reader.getElementText().toString())); + skipToEnd("generation"); + } else if ("increment".equals(reader.getName().toString())) { + String text = reader.getElementText(); + if ("autodetect".equals(text)) { + fo.setIncrement(-1); + } else { + fo.setIncrement(Integer.parseInt(text)); + } + skipToEnd("increment"); + } + } else if (type == XMLStreamReader.END_ELEMENT) { + return; + } + } + } + +} |