diff options
Diffstat (limited to 'fbench/src/httpclient/httpclient.h')
-rw-r--r-- | fbench/src/httpclient/httpclient.h | 335 |
1 files changed, 335 insertions, 0 deletions
diff --git a/fbench/src/httpclient/httpclient.h b/fbench/src/httpclient/httpclient.h new file mode 100644 index 00000000000..e69a10346cd --- /dev/null +++ b/fbench/src/httpclient/httpclient.h @@ -0,0 +1,335 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <ostream> +#include <memory> +#include <vespa/fastos/socket.h> + +/** + * This class implements a HTTP client that may be used to fetch + * documents from a HTTP server. It uses the HTTP 1.1 protocol, but in + * order to keep the external interface simple, it does not support + * request pipelining. + **/ +class HTTPClient +{ +private: + HTTPClient(const HTTPClient &); + HTTPClient &operator=(const HTTPClient &); + +protected: + + /** + * abstract superclass of classes used to handle reading of URL + * content depending on how the content length may be determined. + **/ + class ReaderInterface + { + public: + ReaderInterface() {} + virtual ~ReaderInterface() {} + + /** + * This method is called by the @ref HTTPClient::Read(char *, + * size_t) method in order to read from the URL in the appropriate + * way. + * + * @return bytes read or -1 on failure. + * @param client the client object doing the read. + * @param buf where to store the incoming data. + * @param len length of buf. + **/ + virtual ssize_t Read(HTTPClient &client, void *buf, size_t len) = 0; + }; + friend class HTTPClient::ReaderInterface; + + /** + * Class used to handle reading of URL content when content length + * is indicated by the server closing the connection. + **/ + class ConnCloseReader : public ReaderInterface + { + private: + static ConnCloseReader _instance; + public: + ConnCloseReader() {} + virtual ~ConnCloseReader() {} + static ReaderInterface *GetInstance() { return &_instance; } + ssize_t Read(HTTPClient &client, void *buf, size_t len); + }; + friend class HTTPClient::ConnCloseReader; + + /** + * Class used to handle reading of URL content when content length + * is given by a Content-Length header value. + **/ + class ContentLengthReader : public ReaderInterface + { + private: + static ContentLengthReader _instance; + public: + ContentLengthReader() {} + virtual ~ContentLengthReader() {} + static ReaderInterface *GetInstance() { return &_instance; } + ssize_t Read(HTTPClient &client, void *buf, size_t len); + }; + friend class HTTPClient::ContentLengthReader; + + /** + * Class used to handle reading of URL content sent with chunked + * transfer encoding. + **/ + class ChunkedReader : public ReaderInterface + { + private: + static ChunkedReader _instance; + public: + ChunkedReader() {} + virtual ~ChunkedReader() {} + static ReaderInterface *GetInstance() { return &_instance; } + ssize_t Read(HTTPClient &client, void *buf, size_t len); + }; + friend class HTTPClient::ChunkedReader; + + std::unique_ptr<FastOS_Socket> _socket; + std::string _hostname; + int _port; + bool _keepAlive; + bool _headerBenchmarkdataCoverage; + std::string _extraHeaders; + std::string _authority; + uint64_t _reuseCount; + + size_t _bufsize; + char *_buf; + ssize_t _bufused; + ssize_t _bufpos; + + std::string _headerinfo; + unsigned int _headerinfoPos; + + bool _isOpen; + unsigned int _httpVersion; + unsigned int _requestStatus; + int _totalHitCount; + bool _connectionCloseGiven; + bool _contentLengthGiven; + bool _chunkedEncodingGiven; + bool _keepAliveGiven; + unsigned int _contentLength; + + unsigned int _chunkSeq; // chunk sequence number + unsigned int _chunkLeft; // bytes left of current chunk + unsigned int _dataRead; // total bytes read from URL + bool _dataDone; // all URL content read ? + ReaderInterface *_reader; // handles core URL reading + + + /** + * Discard all data currently present in the internal buffer. + **/ + void ResetBuffer() + { + _bufpos = 0; + _bufused = 0; + } + + /** + * Fill the internal buffer with data from the url we are connected + * to. + * + * @return the number of bytes put into the buffer or -1 on fail. + **/ + ssize_t FillBuffer(); + + /** + * Return the next byte from the data stream we are reading. + * + * @return next byte from the data stream or -1 on EOF/ERROR + **/ + int ReadByte() + { + if (_bufpos == _bufused) + FillBuffer(); + return (_bufused > _bufpos) ? _buf[_bufpos++] & 0x0ff : -1; + } + + /** + * Connect to the given url. + * + * @return success(true)/failure(false) + * @param url the url you want to connect to + **/ + bool Connect(const char *url); + + /** + * Read the next line of text from the data stream into 'buf'. If + * the line is longer than ('bufsize' - 1), the first ('bufsize' - + * 1) bytes will be placed in buf (the rest of the line will be + * discarded), and the true length of the line will be returned. The + * string placed in buf will be terminated with a null + * character. Newline characters will be discarded. A line is + * terminated by either '\n', "\r\n" or EOF (EOF - connection + * closed) + * + * @return the actual length of the next line, or -1 if no line was read. + * @param buf where to put the line. + * @param bufsize the length of buf. + **/ + ssize_t ReadLine(char *buf, size_t bufsize); + + /** + * Split a string into parts by inserting null characters into the + * string and index the parts by putting pointers to them in the + * argument array given. Only non-empty parts will be indexed in the + * argument array. + * + * @return NULL(complete split)/rest of string(incomplete split) + * @param input the null-terminated input string. + * @param argc the number of parts found. + * @param argv the argument array. + * @param maxargs the size of 'argv'. + **/ + static char *SplitString(char *input, int &argc, char **argv, + int maxargs); + + /** + * Read and parse the HTTP Header. + * + * @return success(true)/failure(fail) + **/ + bool ReadHTTPHeader(); + + /** + * Read and parse a chunk header. Only used with chunked encoding. + * + * @return success(true)/failure(fail) + **/ + bool ReadChunkHeader(); + +public: + + /** + * Create a HTTP client that may be used to fetch documents from the + * given host. + * + * @param hostname the host you want to fetch documents from. + * @param port the TCP port to use when contacting the host. + * @param keepAlive flag indicating if keep-alive should be enabled. + **/ + HTTPClient(const char *hostname, int port, bool keepAlive, + bool headerBenchmarkdataCoverage, const std::string & extraHeaders="", const std::string &authority = ""); + + /** + * Disconnect from server and free memory. + **/ + ~HTTPClient(); + + /** + * This method may be used to obtain information about how many + * times a physical connection has been reused to send an additional + * HTTP request. Note that connections may only be reused if + * keep-alive is enabled. + * + * @return connection reuse count + **/ + uint64_t GetReuseCount() const + { + return _reuseCount; + } + + /** + * Connect to the given url and read the response HTTP header. Note + * that this method will fail if the host returns a status code + * other than 200. This is done in order to make the interface as + * simple as possible. + * + * @return success(true)/failure(false) + * @param url the url you want to connect to + **/ + bool Open(const char *url); + + /** + * Read data from the url we are currently connected to. This method + * should be called repeatedly until it returns 0 in order to + * completely read the URL content. If @ref Close is called before + * all URL content is read the physical connection will be closed + * even if keepAlive is enabled. + * + * @return bytes read or -1 on failure. + * @param buf where to store the incoming data. + * @param len length of buf. + **/ + ssize_t Read(void *buf, size_t len); + + /** + * Close the connection to the url we are currently reading + * from. Will also close the physical connection if keepAlive is not + * enabled or if all the url content was not read. This is done + * because skipping will probably be more expencive than creating a + * new connection. + * + * @return success(true)/failure(false) + **/ + bool Close(); + + /** + * Class that provides status about the executed fetch method. + **/ + class FetchStatus final + { + public: + /** + * Create a status for the executed fetch. + * + * @param requestStatus The status from the HTTP server. + * @param totalHitCount The total number of hits. + * @param resultSize The number of bytes in result. + **/ + FetchStatus(bool ok, uint32_t requestStatus, int32_t totalHitCount, int32_t resultSize) : + _ok(ok), + _requestStatus(requestStatus), + _totalHitCount(totalHitCount), + _resultSize(resultSize) + {} + /** + * Query if the operation was successful. + * @return Status of operation. + **/ + auto Ok() const { return _ok; } + /** + Query HTTP request status. + @return HTTP request status. + **/ + auto RequestStatus() const { return _requestStatus; } + /** + * Query total hit count. Returns -1 if the total hit count + * could not be found. + * @return Total hit count for query. + **/ + auto TotalHitCount() const { return _totalHitCount; } + /** + * Query the number of bytes in the result buffer. + * @return Number of bytes in result buffer. + **/ + auto ResultSize() const { return _resultSize; } + + private: + bool _ok; + uint32_t _requestStatus; + int32_t _totalHitCount; + int32_t _resultSize; + }; + + /** + * High-level method that may be used to fetch a document in a + * single method call and save the content to the given file. + * + * @return FetchStatus object which can be queried for status. + * @param url the url to fetch. + * @param file where to save the fetched document. If this parameter + * is NULL, the content will be read and then discarded. + **/ + FetchStatus Fetch(const char *url, std::ostream *file = NULL); +}; + |