aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search/federation/http/HTTPClientSearcher.java
blob: ee093adc6fc54fdb1612c5d1068f9de3f498456a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.federation.http;

import com.yahoo.component.ComponentId;
import com.yahoo.jdisc.http.CertificateStore;
import com.yahoo.yolean.Exceptions;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.processing.request.CompoundName;
import com.yahoo.search.result.ErrorMessage;
import com.yahoo.search.result.Hit;
import com.yahoo.search.searchchain.Execution;
import com.yahoo.statistics.Statistics;

import org.apache.http.HttpEntity;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;

/**
 * A utility parent for searchers which gets data from web services which is incorporated into the query.
 * This searcher will take care of implementing the search method while the extending class implements
 * {@link #getQueryMap} and {@link #handleResponse} to create the http request and handle the response, respectively.
 *
 * <p>This class automatically adds a meta hit containing latency and other
 * meta information about the obtained HTTP data using createRequestMeta().
 * The fields available in the hit are:</p>
 *
  * <dl><dt>
 * HTTPSearcher.LOG_LATENCY_START
 * <dd>
 *     The latency of the external provider answering a request.
 * <dt>
 * HTTPSearcher.LOG_LATENCY_FINISH
 * <dd>
 *     Total time of the HTTP traffic, but also decoding of the data, is this
 *     happens at the same time.
 * <dt>
 * HTTPSearcher.LOG_URI
 * <dd>
 *     The complete URI used for external service.
 * <dt>
 * HTTPSearcher.LOG_SCHEME
 * <dd>
 *     The scheme of the request URI sent.
 * <dt>
 * HTTPSearcher.LOG_HOST
 * <dd>
 *     The host used for the request URI sent.
 * <dt>
 * HTTPSearcher.LOG_PORT
 * <dd>
 *     The port used for the request URI sent.
 * <dt>
 * HTTPSearcher.LOG_PATH
 * <dd>
 *     Path element of the request URI sent.
 * <dt>
 * HTTPSearcher.LOG_STATUS
 * <dd>
 *     Status code of the HTTP response.
 * <dt>
 * HTTPSearcher.LOG_PROXY_TYPE
 * <dd>
 *     The proxy type used, if any. Default is "http".
 * <dt>
 * HTTPSearcher.LOG_PROXY_HOST
 * <dd>
 *     The proxy host, if any.
 * <dt>
 * HTTPSearcher.LOG_PROXY_PORT
 * <dd>
 *     The proxy port, if any.
 * <dt>
 * HTTPSearcher.LOG_HEADER_PREFIX prepended to request header field name
 * <dd>
 *     The content of any additional request header fields.
 * <dt>
 * HTTPSearcher.LOG_RESPONSE_HEADER_PREFIX prepended to response header field name
 * <dd>
 *     The content of any additional response header fields.
 * </dl>

 * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a>
 * @author bratseth
 * @deprecated
 */
// TODO: Remove on Vespa 7
@Deprecated // OK
public abstract class HTTPClientSearcher extends HTTPSearcher {

    static final CompoundName REQUEST_META_CARRIER = new CompoundName("com.yahoo.search.federation.http.HTTPClientSearcher_requestMeta");

    protected final static Logger log = Logger.getLogger(HTTPClientSearcher.class.getName());

    /**
     * Creates a client searcher
     *
     * @param id the id of this instance
     * @param connections the connections this will load balance and fail over between
     * @param path the path portion of the url to be used
     */
    public HTTPClientSearcher(ComponentId id, List<Connection> connections,String path,Statistics statistics) {
        super(id, connections, path, statistics);
    }

    public HTTPClientSearcher(ComponentId id, List<Connection> connections,String path,Statistics statistics,
                              CertificateStore certificateStore) {
        super(id, connections, path, statistics, certificateStore);
    }

    public HTTPClientSearcher(ComponentId id, List<Connection> connections, HTTPParameters parameters, Statistics statistics) {
        super(id, connections, parameters, statistics);
    }
    /**
     * Creates a client searcher
     *
     * @param id the id of this instance
     * @param connections the connections this will load balance and fail over between
     * @param parameters the parameters to use when making http calls
     * @param certificateStore the certificate store to use to pass certificates in requests
     */
    public HTTPClientSearcher(ComponentId id, List<Connection> connections, HTTPParameters parameters,
                              Statistics statistics, CertificateStore certificateStore) {
        super(id, connections, parameters, statistics, certificateStore);
    }

    /** Overridden to avoid interfering with errors from nested searchers, which is inappropriate for a <i>client</i> */
    @Override
    public Result robustSearch(Query query, Execution execution, Connection connection) {
        return search(query,execution,connection);
    }

    /** Implements a search towards the connection chosen by the cluster searcher for this query */
    @Override
    public Result search(Query query, Execution execution, Connection connection) {
        Hit requestMeta = doHttpRequest(query, connection);
        Result result = execution.search(query);
        result.hits().add(requestMeta);
        return result;
    }

    private Hit doHttpRequest(Query query, Connection connection) {
        URI uri;
        // Create default meta hit for holding logging information
        Hit requestMeta = createRequestMeta();
        query.properties().set(REQUEST_META_CARRIER, requestMeta);

        query.trace("Created request information hit",false,9);
        try {
            uri = getURI(query, connection);
        } catch (MalformedURLException e) {
            query.errors().add(createMalformedUrlError(query,e));
            return requestMeta;
        } catch (URISyntaxException e) {
            query.errors().add(createMalformedUrlError(query,e));
            return requestMeta;
        }

        HttpEntity entity;
        try {
            if (query.getTraceLevel()>=1)
                query.trace("Fetching " + uri.toString(), false, 1);
            entity = getEntity(uri, requestMeta, query);
        } catch (IOException e) {
            query.errors().add(ErrorMessage.createBackendCommunicationError(
                    "Error when trying to connect to HTTP backend in " + this + " using " + connection + " for " +
                    query + ": " + Exceptions.toMessageString(e)));
            return requestMeta;
        } catch (TimeoutException e) {
            query.errors().add(ErrorMessage.createTimeout("HTTP traffic timed out in "
                    + this + " for " + query + ": " + e.getMessage()));
            return requestMeta;
        }
        if (entity==null) {
            query.errors().add(ErrorMessage.createBackendCommunicationError(
                    "No result from connecting to HTTP backend in " + this + " using " + connection + " for " +  query));
            return requestMeta;
        }

        try {
            query = handleResponse(entity,query);
        }
        catch (IOException e) {
            query.errors().add(ErrorMessage.createBackendCommunicationError(
                    "Error when trying to consume input in " + this + ": " + Exceptions.toMessageString(e)));
        } finally {
            cleanupHttpEntity(entity);
        }
        return requestMeta;
    }

    /** Overrides to pass the query on to the next searcher */
    @Override
    public Result search(Query query, Execution execution, ErrorMessage error) {
        query.errors().add(error);
        return execution.search(query);
    }

    /** Do nothing on fill in client searchers */
    @Override
    public void fill(Result result,String summaryClass,Execution execution,Connection connection) {
    }

    /**
     * Convenience hook for unmarshalling the response and adding the information to the query.
     * Implement this or <code>handleResponse(entity,query)</code> in any subclass.
     * This default implementation throws an exception.
     *
     * @param inputStream the stream containing the data from the http service
     * @param contentLength the length of the content in the stream in bytes, or a negative number if not known
     * @param query the current query, to which information from the stream should be added
     * @return query the query to propagate down the chain. This should almost always be the
     *         query instance given as a parameter.
     */
    public Query handleResponse(InputStream inputStream, long contentLength, Query query) throws IOException {
        throw new UnsupportedOperationException("handleResponse must be implemented by " + this);
    }

    /**
     * Unmarshals the response and adds the resulting data to the given query.
     * This default implementation calls
     * <code>return handleResponse(entity.getContent(), entity.getContentLength(), query);</code>
     * (and does some detailed query tracing).
     *
     * @param query the current query, to which information from the stream should be added
     * @return query the query to propagate down the chain. This should almost always be the
     *         query instance given as a parameter.
     */
    public Query handleResponse(HttpEntity entity, Query query) throws IOException {
        long len = entity.getContentLength();
        if (query.getTraceLevel()>=4)
            query.trace("Received " + len + " bytes response in " + this, false, 4);
        query = handleResponse(entity.getContent(), len, query);
        if (query.getTraceLevel()>=2)
            query.trace("Handled " + len + " bytes response in " + this, false, 2);
        return query;
    }

    /** Never retry individual queries to clients for now */
    @Override
    protected boolean shouldRetry(Query query,Result result) { return false; }

    /**
     * numHits and offset should not be part of the cache key as cache supports
     * partial read/write that is only one cache entry is maintained per query
     * irrespective of the offset and numhits.
     */
    public abstract Map<String, String> getCacheKey(Query q);

    /**
     * Adds all key-values starting by "service." + getClientName() in query.properties().
     * Returns the empty map if {@link #getServiceName} is not overridden.
     */
    @Override
    public Map<String,String> getQueryMap(Query query) {
        LinkedHashMap<String, String> queryMap=new LinkedHashMap<>();
        if (getServiceName().isEmpty()) return queryMap;

        for (Map.Entry<String,Object> objectProperty : query.properties().listProperties("service." + getServiceName()).entrySet()) // TODO: Make more efficient using CompoundName
            queryMap.put(objectProperty.getKey(),objectProperty.getValue().toString());
        return queryMap;
    }

    /**
     * Override this to return the name of the service this is a client of.
     * This is used to look up service specific properties as service.getServiceName.serviceSpecificProperty.
     * This default implementation returns "", which means service specific parameters will not be used.
     */
    protected String getServiceName() { return ""; }

}