summaryrefslogtreecommitdiffstats
path: root/vespa-http-client/src/main/java/com/yahoo/vespa/http/client/core/XmlFeedReader.java
blob: e22f16a200cd6e46309484482db835f067b9ae3c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.http.client.core;

import com.yahoo.vespa.http.client.FeedClient;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.ext.DefaultHandler2;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * Reads an input stream of xml, sends these to session.
 * @author dybis
*/
public class XmlFeedReader {

    // Static class.
    private XmlFeedReader() {}

    public static void read(InputStream inputStream, FeedClient feedClient, AtomicInteger numSent) throws Exception{

        SAXParserFactory parserFactor = SAXParserFactory.newInstance();
        parserFactor.setValidating(false);
        parserFactor.setNamespaceAware(false);
        final SAXParser parser = parserFactor.newSAXParser();
        SAXClientFeeder saxClientFeeder = new SAXClientFeeder(feedClient, numSent);
        SAXClientFeeder handler = saxClientFeeder;

        InputSource inputSource = new InputSource();
        inputSource.setEncoding(StandardCharsets.UTF_8.displayName());
        inputSource.setByteStream(inputStream);
        // This is to send events about CDATA to the saxClientFeeder 
        // (https://docs.oracle.com/javase/tutorial/jaxp/sax/events.html)
        parser.setProperty("http://xml.org/sax/properties/lexical-handler", saxClientFeeder);

        parser.parse(inputSource, handler);
    }
}

/**
 * Streams XML and sends each document operation to feeder.
 */
class SAXClientFeeder extends DefaultHandler2 {
    public static final String CDATA_START = "<![CDATA[";
    public static final String CDATA_STOP = "]]>";
    private final FeedClient feedClient;
    int vespaIndent = 0;
    int documentIndent = 0;
    String documentId = null;
    StringBuilder content = new StringBuilder();
    final AtomicInteger numSent;
    boolean isCData = false;

    public SAXClientFeeder(FeedClient feedClient, AtomicInteger numSent) {
        this.feedClient = feedClient;
        this.numSent = numSent;
    }

    @Override
    public void startCDATA () throws SAXException {
        content.append(CDATA_START);
        isCData = true;
    }

    @Override
    public void endCDATA () throws SAXException {
        content.append(CDATA_STOP);
        isCData = false;
    }

    @Override
    public void comment(char[] ch, int start, int length) throws SAXException {

    }

    @SuppressWarnings("fallthrough")
    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        switch(qName){
            case "vespafeed":
                vespaIndent++;
                if (vespaIndent == 1 && documentIndent == 0) {
                    // If this is the first vespafeed tag, it should not be added to content of the first item.
                    return;
                }
            case "update":
            case "remove":
            case "document" :
                documentIndent++;
                documentId = attributes.getValue("documentid");
                content = new StringBuilder();
        }
        content.append("<" + qName);
        if (attributes != null) {
            for (int i = 0; i < attributes.getLength (); i++) {
                content.append(" ")
                        .append(attributes.getQName(i))
                        .append("=\"");
                String attributesValue = attributes.getValue(i);
                characters(attributesValue.toCharArray(), 0, attributesValue.length());
                content.append("\"");
            }
        }
        content.append(">");
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        content.append("</")
                .append(qName)
                .append(">");
        switch(qName){
            case "vespafeed":
                vespaIndent--;
                return;
            case "update":
            case "remove":
            case "document" :
                documentIndent--;
                if (documentIndent == 0) {
                    if (documentId == null || documentId.isEmpty()) {
                        throw new IllegalArgumentException("no docid");
                    }
                    feedClient.stream(documentId, content);
                    numSent.incrementAndGet();
                }
        }
    }

    @Override
    public void characters (char buf [], int offset, int len)
            throws SAXException {
        if (isCData) {
            content.append(buf, offset, len);
            return;
        }

        // This is on the critical loop for performance, otherwise a library would have been used.
        // We can do a few shortcuts as well as this data is already decoded by SAX parser.
        for (int x = offset ; x < len + offset ; x++) {
            switch (buf[x]) {
                case '&' : content.append("&amp;"); continue;
                case '<' : content.append("&lt;"); continue;
                case '>' : content.append("&gt;"); continue;
                case '"' : content.append("&quot;"); continue;
                case '\'' : content.append("&apos;"); continue;
                default: content.append(buf[x]); continue;
            }
        }
    }
}