diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /fbench/src/util |
Publish
Diffstat (limited to 'fbench/src/util')
-rw-r--r-- | fbench/src/util/.gitignore | 3 | ||||
-rw-r--r-- | fbench/src/util/CMakeLists.txt | 9 | ||||
-rw-r--r-- | fbench/src/util/clientstatus.cpp | 176 | ||||
-rw-r--r-- | fbench/src/util/clientstatus.h | 217 | ||||
-rw-r--r-- | fbench/src/util/description.html | 2 | ||||
-rw-r--r-- | fbench/src/util/filereader.cpp | 124 | ||||
-rw-r--r-- | fbench/src/util/filereader.h | 127 | ||||
-rw-r--r-- | fbench/src/util/httpclient.cpp | 549 | ||||
-rw-r--r-- | fbench/src/util/httpclient.h | 335 | ||||
-rw-r--r-- | fbench/src/util/timer.cpp | 90 | ||||
-rw-r--r-- | fbench/src/util/timer.h | 88 |
11 files changed, 1720 insertions, 0 deletions
diff --git a/fbench/src/util/.gitignore b/fbench/src/util/.gitignore new file mode 100644 index 00000000000..316ace34e7b --- /dev/null +++ b/fbench/src/util/.gitignore @@ -0,0 +1,3 @@ +.depend +Makefile +util.lib diff --git a/fbench/src/util/CMakeLists.txt b/fbench/src/util/CMakeLists.txt new file mode 100644 index 00000000000..9efca452cea --- /dev/null +++ b/fbench/src/util/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(fbench_util STATIC + SOURCES + filereader.cpp + httpclient.cpp + timer.cpp + clientstatus.cpp + DEPENDS +) diff --git a/fbench/src/util/clientstatus.cpp b/fbench/src/util/clientstatus.cpp new file mode 100644 index 00000000000..6c117d7e0e6 --- /dev/null +++ b/fbench/src/util/clientstatus.cpp @@ -0,0 +1,176 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "clientstatus.h" +#include <string.h> +#include <math.h> + +ClientStatus::ClientStatus() + : _error(false), + _errorMsg(), + _skipCnt(0), + _failCnt(0), + _overtimeCnt(0), + _totalTime(0), + _realTime(0), + _requestCnt(0), + _timetableResolution(10), + _timetable(10240 * _timetableResolution, 0), + _higherCnt(0), + _minTime(0), + _maxTime(0), + _reuseCnt(0), + _zeroHitQueries(0), + _requestStatusDistribution() +{ +} + +ClientStatus::~ClientStatus() +{ +} + +void +ClientStatus::SetError(const char *errorMsg) +{ + _error = true; + _errorMsg = errorMsg; +} + +void +ClientStatus::ResponseTime(double ms) +{ + if (ms < 0) return; // should never happen. + if (ms > _maxTime) + _maxTime = ms; + if (ms < _minTime || _requestCnt == 0) + _minTime = ms; + _totalTime += ms; + + size_t t = (size_t)(ms * _timetableResolution + 0.5); + if (t >= _timetable.size()) + _higherCnt++; + else + _timetable[t]++; + _requestCnt++; +} + +void +ClientStatus::AddRequestStatus(uint32_t status) +{ + auto it = _requestStatusDistribution.find(status); + + if (it != _requestStatusDistribution.end()) + it->second++; + else + _requestStatusDistribution[status] = 1; +} + +void +ClientStatus::Merge(const ClientStatus & status) +{ + if (_timetable.size() != status._timetable.size()) { + printf("ClientStatus::Merge() : incompatible data structures!\n"); + return; + } + + if (_maxTime < status._maxTime) + _maxTime = status._maxTime; + if ((_requestCnt == 0) || + (_minTime > status._minTime && status._requestCnt > 0)) + _minTime = status._minTime; + _skipCnt += status._skipCnt; + _failCnt += status._failCnt; + _overtimeCnt += status._overtimeCnt; + _totalTime += status._totalTime; + _realTime += status._realTime; + _requestCnt += status._requestCnt; + for (size_t i = 0; i < _timetable.size(); i++) + _timetable[i] += status._timetable[i]; + _higherCnt += status._higherCnt; + _reuseCnt += status._reuseCnt; + _zeroHitQueries += status._zeroHitQueries; + + for (const auto& entry : status._requestStatusDistribution) { + auto it = _requestStatusDistribution.find(entry.first); + if (it != _requestStatusDistribution.end()) + it->second += entry.second; + else + _requestStatusDistribution[entry.first] = entry.second; + } +} + +double +ClientStatus::GetMin() +{ + return _minTime; +} + +double +ClientStatus::GetMax() +{ + return _maxTime; +} + +double +ClientStatus::GetAverage() +{ + return (_requestCnt == 0) ? + 0 : _totalTime / ((double)_requestCnt); +} + +double +ClientStatus::GetPercentile(double percent) +{ + if (percent < 0.0) percent = 0.0; + if (percent > 100.0) percent = 100.0; + + double target = ((double)(_requestCnt - 1)) * (percent / 100.0); + long t1 = (long)floor(target); + long t2 = (long)ceil(target); + double k = ceil(target) - target; + int i1 = 0; + int i2 = 0; + long cnt = 0; + double val1 = 0; + double val2 = 0; + + cnt = _timetable[0]; + while (cnt <= t1) { + if (i1 + 1 < int(_timetable.size())) { + cnt += _timetable[++i1]; + } else { + i1 = -1; + break; + } + } + i2 = i1; + if (i1 >= 0) { + val1 = i1; + while (cnt <= t2) { + if (i2 + 1 < int(_timetable.size())) { + cnt += _timetable[++i2]; + } else { + i2 = -1; + break; + } + } + } else { + if (_higherCnt < 2) { + val1 = _maxTime * _timetableResolution; + } else { + // use uniform distribution for approximation + val1 = (((double)(t1 - (_requestCnt - _higherCnt))) / ((double)(_higherCnt - 1))) + * (_maxTime * _timetableResolution - ((double)_timetable.size())) + ((double)_timetable.size()); + } + } + if (i2 >= 0) { + val2 = i2; + } else { + if (_higherCnt < 2) { + val2 = _maxTime * _timetableResolution; + } else { + // use uniform distribution for approximation + val2 = (((double)(t2 - (_requestCnt - _higherCnt))) / ((double)(_higherCnt - 1))) + * (_maxTime * _timetableResolution - ((double)_timetable.size())) + ((double)_timetable.size()); + } + } + return (k * val1 + (1 - k) * val2) / _timetableResolution; +} diff --git a/fbench/src/util/clientstatus.h b/fbench/src/util/clientstatus.h new file mode 100644 index 00000000000..5248f2618c5 --- /dev/null +++ b/fbench/src/util/clientstatus.h @@ -0,0 +1,217 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <map> +#include <vector> + +/** + * This is a helper struct that is used by the @ref Client class to + * aggregate runtime statistics. It is also used to record warnings + * and errors. + **/ +struct ClientStatus +{ + /** + * Indicates wether a fatal error has occurred. + **/ + bool _error; + + /** + * Message explaining the error indicated by _error. + **/ + std::string _errorMsg; + + /** + * The number of requests that has been skipped. + **/ + long _skipCnt; + + /** + * The number of requests that have failed. + **/ + long _failCnt; + + /** + * The number of requests that had response time greater than the + * cycle time. + **/ + long _overtimeCnt; + + /** + * Total response time for all requests. + **/ + double _totalTime; + + /** + * Real time passed. This is used to calculate the actual query + * rate. + **/ + double _realTime; + + /** + * Total number of (successful) requests. Overtime requests are + * counted with, but not failed or skipped ones. + **/ + long _requestCnt; + + /** + * Resolution of timetable. A resolution of 1 means each entry in + * the timetable is 1 millisecond. A resolution of 10 means each + * entry is 1/10th of a millisecond. + **/ + const int _timetableResolution; + + /** + * Table where _timetable[i] is the number of requests with response + * time in milliseconds (i is multiplied with the resolution). + **/ + std::vector<int> _timetable; + + /** + * Number of requests with response time greater than or equal + * _timetableSize divided by _timetableResolution milliseconds. + **/ + long _higherCnt; + + /** + * The minimum response time measured. + **/ + double _minTime; + + /** + * The maximum response time measured. + **/ + double _maxTime; + + /** + * Connection reuse count. Tells us how many requests were made + * without having to open a new connection. If keep-alive is not + * enabled, this will always be 0. + **/ + uint64_t _reuseCnt; + + /** + * The number of zero hit queries + **/ + long _zeroHitQueries; + + /** + * The request status distribution. Key=Status, Value=Count. + **/ + std::map<uint32_t, uint32_t> _requestStatusDistribution; + + ClientStatus(); + ~ClientStatus(); + + /** + * Notify that an error occurred and set an error message describing + * the error. The client should only call this method once right + * before exiting due to a fatal error. + * + * @param errorMsg A string explaining the error. + **/ + void SetError(const char* errorMsg); + + /** + * Notify that a request was skipped. Long requests (measured in + * bytes) will be skipped due to intenal buffer limitations. This + * should happen very rarely. + **/ + void SkippedRequest() { _skipCnt++; } + + /** + * Notify that a request failed. This should be called when the + * client could not establish a connection to the server or a read + * error occurred while fetching the response. + **/ + void RequestFailed() { _failCnt++; } + + /** + * Notify that the cycle time could not be held. This typically + * indicates that either the server response time is longer than the + * cycle time or that your thread/socket libraries are unable to + * handle the number of clients currently running. + **/ + void OverTime() { _overtimeCnt++; } + + /** + * This method is used to register response times measured by the + * client. Response times should only be registered for successful + * requests. + * + * @param ms Response time measured in milliseconds. + **/ + void ResponseTime(double ms); + + /** + * Set real time passed while benchmarking. + * + * @param ms time passed while benchmarking (in milliseconds) + **/ + void SetRealTime(double ms) { _realTime = ms; } + + /** + * Set connection reuse count. + * + * @param cnt connection reuse count + **/ + void SetReuseCount(uint64_t cnt) { _reuseCnt = cnt; } + + /** + * Add request status to request status distribution. + * + * @param status The status to insert + **/ + void AddRequestStatus(uint32_t status); + + /** + * Merge the info held by 'status' into the info held by this + * struct. Note that the error flag and error messages are ignored. If + * you do not want to use data held by a status struct with an error + * you should check the error flag before merging. + * + * @param status The ClientStatus that should be merged into this one. + **/ + void Merge(const ClientStatus & status); + + /** + * @return the minimum response time. + **/ + double GetMin(); + + /** + * @return the maximum response time. + **/ + double GetMax(); + + /** + * @return the average response time. + **/ + double GetAverage(); + + /** + * @return The 50 percent percentile (aka median). + **/ + double GetMedian() { return GetPercentile(50); } + + /** + * This method calculates a response time that separates the 'percent' + * percent fastest requests from the (100 - 'percent') percent slowest + * requests. A single request may be classified by comparing the + * request response time with the percentile returned by this + * method. If the requested percentile lies outside the time table + * measuring interval, -1 is returned. This indicates that the + * requested percentile was greater than _timetableSize (divided by + * resolution) milliseconds. + * + * @return the calculated percentile or -1 if it was outside the time table. + * @param percent percent of requests that should have response time lower + * than the percentile to be calculated by this method. Legal values + * of this parameter is in the range [0,100]. + **/ + double GetPercentile(double percent); + +private: + ClientStatus(const ClientStatus &); + ClientStatus &operator=(const ClientStatus &); +}; diff --git a/fbench/src/util/description.html b/fbench/src/util/description.html new file mode 100644 index 00000000000..4ac9f11ca21 --- /dev/null +++ b/fbench/src/util/description.html @@ -0,0 +1,2 @@ +<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +Library containing utility classes. diff --git a/fbench/src/util/filereader.cpp b/fbench/src/util/filereader.cpp new file mode 100644 index 00000000000..b1eebbcb2f0 --- /dev/null +++ b/fbench/src/util/filereader.cpp @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "filereader.h" +#include <iostream> +#include <unistd.h> + +int GetOpt (int argc, char *argv[], const char *optionsString, + const char* &optionArgument, + int &optionIndex) +{ + optind = optionIndex; + + int rc = getopt(argc, argv, optionsString); + optionArgument = optarg; + optionIndex = optind; + return rc; +} + +FileReader::FileReader() + : _backing(), + _file(&std::cin), + _bufsize(1024*1024), + _buf(_bufsize), + _bufused(0), + _bufpos(0) +{ +} + +FileReader::~FileReader() +{ +} + +bool +FileReader::Open(const char *filename) +{ + _backing = std::make_unique<std::ifstream>(filename); + _file = _backing.get(); + return (bool)*_file; +} + +bool +FileReader::OpenStdin() +{ + _file = &std::cin; + return true; +} + +bool +FileReader::Reset() +{ + _file->clear(); + _file->seekg(0); + return bool(*_file); +} + +bool +FileReader::SetFilePos(int64_t pos) +{ + _bufpos = 0; + _file->seekg(pos); + return bool(*_file); +} + +int64_t +FileReader::GetFileSize() +{ + _file->seekg (0, std::ifstream::end); + return _file->tellg(); +} + +uint64_t +FileReader::FindNewline(int64_t pos) +{ + char buf[100]; + SetFilePos(pos); + ssize_t len = ReadLine(buf, 100); + ++_bufpos; + + return pos+len; +} + +void +FileReader::FillBuffer() +{ + _file->read(&_buf[0], _bufsize); + _bufused = _file->gcount(); // may be -1 + _bufpos = 0; +} + +ssize_t +FileReader::ReadLine(char *buf, size_t bufsize) +{ + int c; + size_t len; + + len = 0; + c = ReadByte(); + if (c == -1) + return -1; + while (c != -1 && c != '\n' && c != '\r') { + if (len < bufsize - 1) + buf[len] = c; + len++; + c = ReadByte(); + } + if (_bufpos == _bufused) + FillBuffer(); + if ((_bufused > _bufpos) && + ((c == '\n' && _buf[_bufpos] == '\r') || + (c == '\r' && _buf[_bufpos] == '\n'))) + _bufpos++; + if (len < bufsize) + buf[len] = '\0'; // terminate string + else + buf[bufsize - 1] = '\0'; // terminate string + return len; +} + +void +FileReader::Close() +{ + if (_backing) { + _backing->close(); + } +} diff --git a/fbench/src/util/filereader.h b/fbench/src/util/filereader.h new file mode 100644 index 00000000000..b553c73a262 --- /dev/null +++ b/fbench/src/util/filereader.h @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <fstream> +#include <memory> +#include <vector> + +int GetOpt (int argc, char *argv[], const char *optionsString, + const char* &optionArgument, + int &optionIndex); + +/** + * This is a wrapper class for std::ifstream that may be used when + * reading line based text files. An internal buffer is used to + * improve performance. + **/ +class FileReader +{ +private: + std::unique_ptr<std::ifstream> _backing; + std::istream *_file; + int _bufsize; + std::vector<char> _buf; + int _bufused; + int _bufpos; + + /** + * Fill the internal buffer with data from the currently open file. + **/ + void FillBuffer(); + + FileReader(const FileReader &); + FileReader &operator=(const FileReader &); + +public: + + /** + * Creates a used for disk-access. An + * internal buffer of 5120 bytes is also created. + **/ + FileReader(); + + /** + * Frees memory used by the underlying file and the internal buffer. + **/ + ~FileReader(); + + /** + * Read a single byte from the currently open input file. You should + * call @ref Open before calling this method. The internal buffer is + * used to reduce the number of reads performed on the underlying + * file. + * + * @return the read byte or -1 if EOF was reached or an error occurred. + **/ + int ReadByte() + { + if(_bufpos == _bufused) + FillBuffer(); + return (_bufused > _bufpos) ? _buf[_bufpos++] & 0x0ff : -1; + } + + /** + * Open a file for reading. + * + * @return success(true)/failure(false) + * @param filename the name of the file to open. + **/ + bool Open(const char *filename); + + /** + * Open the standard input for reading. + * + * @return success(true)/failure(false) + **/ + bool OpenStdin(); + + /** + * Reset the file pointer and flush the internal buffer. The next + * read operation will apply to the beginning of the file. + * + * @return success(true)/failure(false) + **/ + bool Reset(); + + /** + * Works like Reset(), but sets the file pointer to 'pos + **/ + bool SetFilePos(int64_t pos); + + /** + * @return size of file in bytes + **/ + int64_t GetFileSize(); + + /** + * @returns _bufpos + **/ + uint64_t GetBufPos() const { return _bufpos; } + + /** + * @returns offset of next newline from pos + **/ + uint64_t FindNewline(int64_t pos); + + /** + * Read the next line of text from the the currently open file into + * 'buf'. If the line is longer than ('bufsize' - 1), the first + * ('bufsize' - 1) bytes will be placed in 'buf' and the true length + * of the line will be returned. The string placed in 'buf' will be + * terminated with a null character. Newline characters will be + * discarded. A line is terminated by either '\n', '\r', "\r\n", + * "\n\r" or EOF. This method uses @ref ReadByte to read single + * bytes from the file. + * + * @return the actual length of the next line, or -1 if no line was read. + * @param buf where to put the line. + * @param bufsize the length of buf. + **/ + ssize_t ReadLine(char *buf, size_t bufsize); + + /** + * Close the file. + **/ + void Close(); +}; + diff --git a/fbench/src/util/httpclient.cpp b/fbench/src/util/httpclient.cpp new file mode 100644 index 00000000000..ce2157335e4 --- /dev/null +++ b/fbench/src/util/httpclient.cpp @@ -0,0 +1,549 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "httpclient.h" + +#define FETCH_BUFLEN 5120 +#define FIXED_REQ_MAX 256 + + +HTTPClient::ConnCloseReader +HTTPClient::ConnCloseReader::_instance; + +HTTPClient::ContentLengthReader +HTTPClient::ContentLengthReader::_instance; + +HTTPClient::ChunkedReader +HTTPClient::ChunkedReader::_instance; + + +HTTPClient::HTTPClient(const char *hostname, int port, + bool keepAlive, bool headerBenchmarkdataCoverage, + const std::string & extraHeaders, const std::string &authority) + : _socket(new FastOS_Socket()), + _hostname(hostname), + _port(port), + _keepAlive(keepAlive), + _headerBenchmarkdataCoverage(headerBenchmarkdataCoverage), + _extraHeaders(extraHeaders), + _authority(authority), + _reuseCount(0), + _bufsize(10240), + _buf(new char[_bufsize]), + _bufused(0), + _bufpos(0), + _headerinfo(), + _isOpen(false), + _httpVersion(0), + _requestStatus(0), + _totalHitCount(-1), + _connectionCloseGiven(false), + _contentLengthGiven(false), + _chunkedEncodingGiven(false), + _keepAliveGiven(false), + _contentLength(0), + _chunkSeq(0), + _chunkLeft(0), + _dataRead(0), + _dataDone(false), + _reader(NULL) +{ + _socket->SetAddressByHostName(port, hostname); + if (_authority == "") { + char tmp[1024]; + snprintf(tmp, 1024, "%s:%d", hostname, port); + _authority = tmp; + } +} + +ssize_t +HTTPClient::FillBuffer() { + _bufused = _socket->Read(_buf, _bufsize); // may be -1 + _bufpos = 0; + return _bufused; +} + +HTTPClient::~HTTPClient() +{ + if (_socket) + _socket->Close(); + delete [] _buf; +} + +ssize_t +HTTPClient::ReadLine(char *buf, size_t bufsize) +{ + size_t len = 0; + int lastC = 0; + int c = ReadByte(); + + if (c == -1) + return -1; + while (c != '\n' && c != -1) { + if (len + 1 < bufsize) + buf[len] = c; + len++; + lastC = c; + c = ReadByte(); + } + if (lastC == '\r') + len--; + if (len < bufsize) + buf[len] = '\0'; // terminate string + else if (bufsize > 0) + buf[bufsize - 1] = '\0'; // terminate string + return len; +} + +bool +HTTPClient::Connect(const char *url) +{ + char tmp[4096]; + char *req = NULL; + uint32_t req_max = 0; + uint32_t url_len = strlen(url); + uint32_t host_len = _hostname.size(); + + // Add additional headers + std::string headers = _extraHeaders; + + // this is always requested to get robust info on total hit count. + headers += "X-Yahoo-Vespa-Benchmarkdata: true\r\n"; + + if ( _headerBenchmarkdataCoverage ) { + headers += "X-Yahoo-Vespa-Benchmarkdata-Coverage: true\r\n"; + } + + if (url_len + host_len + headers.length() + FIXED_REQ_MAX < sizeof(tmp)) { + req = tmp; + req_max = sizeof(tmp); + } else { + req_max = url_len + host_len + headers.length() + FIXED_REQ_MAX; + req = new char[req_max]; + assert(req != NULL); + } + + if (headers.length() > 0) { + headers += "\r\n"; + } + // create request + if(_keepAlive) { + snprintf(req, req_max, + "GET %s HTTP/1.1\r\n" + "Host: %s\r\n" + "User-Agent: fbench/4.2.10\r\n" + "%s" + "\r\n", + url, _authority.c_str(), headers.c_str()); + } else { + snprintf(req, req_max, + "GET %s HTTP/1.1\r\n" + "Host: %s\r\n" + "Connection: close\r\n" + "User-Agent: fbench/4.2.10\r\n" + "%s" + "\r\n", + url, _authority.c_str(), headers.c_str()); + } + + // try to reuse connection if keep-alive is enabled + if (_keepAlive + && _socket->IsOpened() + && _socket->Write(req, strlen(req)) == (ssize_t)strlen(req) + && FillBuffer() > 0) { + + // DEBUG + // printf("Socket Connection reused!\n"); + _reuseCount++; + if (req != tmp) { + delete [] req; + } + return true; + } else { + _socket->Close(); + ResetBuffer(); + } + + // try to open new connection to server + if (_socket->SetSoBlocking(true) + && _socket->Connect() + && _socket->SetNoDelay(true) + && _socket->SetSoLinger(false, 0) + && _socket->Write(req, strlen(req)) == (ssize_t)strlen(req)) { + + // DEBUG + // printf("New Socket connection!\n"); + if (req != tmp) { + delete [] req; + } + return true; + } else { + _socket->Close(); + } + + // DEBUG + // printf("Connect FAILED!\n"); + if (req != tmp) { + delete [] req; + } + return false; +} + +char * +HTTPClient::SplitString(char *input, int &argc, char **argv, int maxargs) +{ + for (argc = 0, argv[0] = input; *input != '\0'; input++) + if (*input == '\t' || *input == ' ') { + *input = '\0'; + if (*(argv[argc]) != '\0' && ++argc >= maxargs) + return (input + 1); // INCOMPLETE + argv[argc] = (input + 1); + } + if (*(argv[argc]) != '\0') + argc++; + return NULL; // COMPLETE +} + +bool +HTTPClient::ReadHTTPHeader() +{ + int lineLen; + char line[4096]; + int argc; + char *argv[32]; + int i; + + // clear HTTP header flags + _connectionCloseGiven = false; + _contentLengthGiven = false; + _chunkedEncodingGiven = false; + _keepAliveGiven = false; + + // read and split status line + if ((lineLen = ReadLine(line, 4096)) <= 0) + return false; + SplitString(line, argc, argv, 32); + + // parse status line + if (argc >= 2) { + if (strncmp(argv[0], "HTTP/", 5) != 0) + return false; + _httpVersion = (strncmp(argv[0], "HTTP/1.0", 8) == 0) ? + 0 : 1; + _requestStatus = atoi(argv[1]); + } else { + return false; + } + + // DEBUG + // printf("HTTP: version: 1.%d\n", _httpVersion); + // printf("HTTP: status: %d\n", _requestStatus); + + // read and parse rest of header + while((lineLen = ReadLine(line, 4096)) > 0) { + + // DEBUG + // printf("HTTP-Header: '%s'\n", line); + + if (strncmp(line, "X-Yahoo-Vespa-", strlen("X-Yahoo-Vespa")) == 0) { + const auto benchmark_data = std::string(line + 14); + + auto strpos = benchmark_data.find("TotalHitCount:"); + if (strpos != std::string::npos) { + _totalHitCount = atoi(benchmark_data.substr(14).c_str()); + } + + // Make sure to have enough memory in _headerinfo + _headerinfo += benchmark_data; + _headerinfo += "\n"; + } + + SplitString(line, argc, argv, 32); + if (argc > 1) { + if (strcasecmp(argv[0], "connection:") == 0) { + for(i = 1; i < argc; i++) { + // DEBUG + // printf("HTTP: Connection: '%s'\n", argv[i]); + + if (strcasecmp(argv[i], "keep-alive") == 0) { + _keepAliveGiven = true; + + // DEBUG + // printf("HTTP: connection keep-alive given\n"); + } + if (strcasecmp(argv[i], "close") == 0) { + _connectionCloseGiven = true; + + // DEBUG + // printf("HTTP: connection close given\n"); + } + } + } + if (strcasecmp(argv[0], "content-length:") == 0) { + _contentLengthGiven = true; + _contentLength = atoi(argv[1]); + + // DEBUG + // printf("HTTP: content length : %d\n", _contentLength); + } + if (strcasecmp(argv[0], "transfer-encoding:") == 0 + && strcasecmp(argv[1], "chunked") == 0) { + _chunkedEncodingGiven = true; + + // DEBUG + // printf("HTTP: chunked encoding given\n"); + } + } + } + return (lineLen == 0); +} + +bool +HTTPClient::ReadChunkHeader() +{ + int lineLen; + char numStr[10]; + char c; + int i; + + if (_chunkSeq++ > 0 && ReadLine(NULL, 0) != 0) + return false; // no CRLF(/LF) after data block + + assert(_chunkLeft == 0); + if (ReadLine(numStr, 10) <= 0) + return false; // chunk length not found + for (i = 0; i < 10; i++) { + c = numStr[i]; + if (c >= 'a' && c <= 'f') + c = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + c = c - 'A' + 10; + else if (c >= '0' && c <= '9') + c = c - '0'; + else + break; + if (i >= 8) // can't handle chunks this big + return false; + _chunkLeft = (_chunkLeft << 4) + c; + } + + // DEBUG + // printf("CHUNK: Length: %d\n", _chunkLeft); + + if (_chunkLeft == 0) { + while ((lineLen = ReadLine(NULL, 0)) > 0); // skip trailer + if (lineLen < 0) + return false; // data error + _dataDone = true; // got last chunk + } + return true; +} + +bool +HTTPClient::Open(const char *url) +{ + if (_isOpen) + Close(); + + ResetBuffer(); + _dataRead = 0; + _dataDone = false; + _isOpen = Connect(url); + if(!_isOpen || !ReadHTTPHeader()) { + Close(); + return false; + } + if(_chunkedEncodingGiven) { + _chunkSeq = 0; + _chunkLeft = 0; + + // DEBUG + // printf("READER = Chunked\n"); + _reader = ChunkedReader::GetInstance(); + } else if(_contentLengthGiven) { + + // DEBUG + // printf("READER = ContentLength\n"); + _reader = ContentLengthReader::GetInstance(); + } else { + + // DEBUG + // printf("READER = ConnClose\n"); + _reader = ConnCloseReader::GetInstance(); + } + return true; +} + +ssize_t +HTTPClient::ConnCloseReader::Read(HTTPClient &client, + void *buf, size_t len) +{ + size_t fromBuffer = 0; + ssize_t res = 0; + ssize_t readRes; + + if (client._bufused > client._bufpos) { // data in buffer ? + fromBuffer = (((size_t)(client._bufused - client._bufpos)) > len) ? + len : client._bufused - client._bufpos; + memcpy(buf, client._buf + client._bufpos, fromBuffer); + client._bufpos += fromBuffer; + client._dataRead += fromBuffer; + res = fromBuffer; + } + if ((len - fromBuffer) > (len >> 1)) { + readRes = client._socket->Read(static_cast<char *>(buf) + + fromBuffer, len - fromBuffer); + if (readRes < 0) { + client.Close(); + return -1; + } + if (readRes == 0) + client._dataDone = true; + client._dataRead += readRes; + res += readRes; + } + return res; +} + +ssize_t +HTTPClient::ContentLengthReader::Read(HTTPClient &client, + void *buf, size_t len) +{ + size_t fromBuffer = 0; + ssize_t res = 0; + ssize_t readLen; + ssize_t readRes; + + if (client._bufused > client._bufpos) { // data in buffer ? + fromBuffer = (((size_t)(client._bufused - client._bufpos)) > len) ? + len : client._bufused - client._bufpos; + memcpy(buf, client._buf + client._bufpos, fromBuffer); + client._bufpos += fromBuffer; + client._dataRead += fromBuffer; + res = fromBuffer; + if (client._dataRead >= client._contentLength) { + client._dataDone = true; + return res; + } + } + if ((len - fromBuffer) > (len >> 1)) { + readLen = (len - fromBuffer + < client._contentLength - client._dataRead) ? + len - fromBuffer : client._contentLength - client._dataRead; + readRes = client._socket->Read(static_cast<char *>(buf) + + fromBuffer, readLen); + if (readRes < 0) { + client.Close(); + return -1; + } + client._dataRead += readRes; + res += readRes; + if (client._dataRead >= client._contentLength) { + client._dataDone = true; + return res; + } + if (readRes == 0) { // data lost because server closed connection + client.Close(); + return -1; + } + } + return res; +} + +ssize_t +HTTPClient::ChunkedReader::Read(HTTPClient &client, + void *buf, size_t len) +{ + size_t fromBuffer = 0; + ssize_t res = 0; + + while ((len - res) > (len >> 1)) { + if (client._chunkLeft == 0) { + if (!client.ReadChunkHeader()) { + client.Close(); + return -1; + } + if (client._dataDone) + return res; + } + if (client._bufused == client._bufpos) { + if (client.FillBuffer() <= 0) { + client.Close(); + return -1; + } + } + fromBuffer = ((len - res) < ((size_t)(client._bufused - client._bufpos))) ? + len - res : client._bufused - client._bufpos; + fromBuffer = (client._chunkLeft < fromBuffer) ? + client._chunkLeft : fromBuffer; + memcpy(static_cast<char *>(buf) + res, client._buf + client._bufpos, fromBuffer); + client._bufpos += fromBuffer; + client._dataRead += fromBuffer; + client._chunkLeft -= fromBuffer; + res += fromBuffer; + } + return res; +} + +ssize_t +HTTPClient::Read(void *buf, size_t len) +{ + if (!_isOpen) + return -1; + if (_dataDone) + return 0; + return _reader->Read(*this, buf, len); +} + +bool +HTTPClient::Close() +{ + if (!_isOpen) + return true; + + _isOpen = false; + return (!_keepAlive + || _connectionCloseGiven + || !_dataDone + || (_httpVersion == 0 && !_keepAliveGiven)) ? + _socket->Close() : true; +} + +HTTPClient::FetchStatus +HTTPClient::Fetch(const char *url, std::ostream *file) +{ + size_t buflen = FETCH_BUFLEN; + char buf[FETCH_BUFLEN]; // NB: ensure big enough thread stack. + ssize_t readRes = 0; + ssize_t written = 0; + + if (!Open(url)) { + return FetchStatus(false, _requestStatus, _totalHitCount, 0); + } + + // Write headerinfo + if (file) { + file->write(_headerinfo.c_str(), _headerinfo.length()); + if (file->fail()) { + Close(); + return FetchStatus(false, _requestStatus, _totalHitCount, 0); + } + file->write("\r\n", 2); + // Reset header data. + _headerinfo = ""; + } + + while((readRes = Read(buf, buflen)) > 0) { + if(file != NULL) { + if (!file->write(buf, readRes)) { + Close(); + return FetchStatus(false, _requestStatus, _totalHitCount, written); + } + } + written += readRes; + } + Close(); + + return FetchStatus(_requestStatus == 200 && readRes == 0 && _totalHitCount >= 0, + _requestStatus, + _totalHitCount, + written); +} diff --git a/fbench/src/util/httpclient.h b/fbench/src/util/httpclient.h new file mode 100644 index 00000000000..e69a10346cd --- /dev/null +++ b/fbench/src/util/httpclient.h @@ -0,0 +1,335 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <ostream> +#include <memory> +#include <vespa/fastos/socket.h> + +/** + * This class implements a HTTP client that may be used to fetch + * documents from a HTTP server. It uses the HTTP 1.1 protocol, but in + * order to keep the external interface simple, it does not support + * request pipelining. + **/ +class HTTPClient +{ +private: + HTTPClient(const HTTPClient &); + HTTPClient &operator=(const HTTPClient &); + +protected: + + /** + * abstract superclass of classes used to handle reading of URL + * content depending on how the content length may be determined. + **/ + class ReaderInterface + { + public: + ReaderInterface() {} + virtual ~ReaderInterface() {} + + /** + * This method is called by the @ref HTTPClient::Read(char *, + * size_t) method in order to read from the URL in the appropriate + * way. + * + * @return bytes read or -1 on failure. + * @param client the client object doing the read. + * @param buf where to store the incoming data. + * @param len length of buf. + **/ + virtual ssize_t Read(HTTPClient &client, void *buf, size_t len) = 0; + }; + friend class HTTPClient::ReaderInterface; + + /** + * Class used to handle reading of URL content when content length + * is indicated by the server closing the connection. + **/ + class ConnCloseReader : public ReaderInterface + { + private: + static ConnCloseReader _instance; + public: + ConnCloseReader() {} + virtual ~ConnCloseReader() {} + static ReaderInterface *GetInstance() { return &_instance; } + ssize_t Read(HTTPClient &client, void *buf, size_t len); + }; + friend class HTTPClient::ConnCloseReader; + + /** + * Class used to handle reading of URL content when content length + * is given by a Content-Length header value. + **/ + class ContentLengthReader : public ReaderInterface + { + private: + static ContentLengthReader _instance; + public: + ContentLengthReader() {} + virtual ~ContentLengthReader() {} + static ReaderInterface *GetInstance() { return &_instance; } + ssize_t Read(HTTPClient &client, void *buf, size_t len); + }; + friend class HTTPClient::ContentLengthReader; + + /** + * Class used to handle reading of URL content sent with chunked + * transfer encoding. + **/ + class ChunkedReader : public ReaderInterface + { + private: + static ChunkedReader _instance; + public: + ChunkedReader() {} + virtual ~ChunkedReader() {} + static ReaderInterface *GetInstance() { return &_instance; } + ssize_t Read(HTTPClient &client, void *buf, size_t len); + }; + friend class HTTPClient::ChunkedReader; + + std::unique_ptr<FastOS_Socket> _socket; + std::string _hostname; + int _port; + bool _keepAlive; + bool _headerBenchmarkdataCoverage; + std::string _extraHeaders; + std::string _authority; + uint64_t _reuseCount; + + size_t _bufsize; + char *_buf; + ssize_t _bufused; + ssize_t _bufpos; + + std::string _headerinfo; + unsigned int _headerinfoPos; + + bool _isOpen; + unsigned int _httpVersion; + unsigned int _requestStatus; + int _totalHitCount; + bool _connectionCloseGiven; + bool _contentLengthGiven; + bool _chunkedEncodingGiven; + bool _keepAliveGiven; + unsigned int _contentLength; + + unsigned int _chunkSeq; // chunk sequence number + unsigned int _chunkLeft; // bytes left of current chunk + unsigned int _dataRead; // total bytes read from URL + bool _dataDone; // all URL content read ? + ReaderInterface *_reader; // handles core URL reading + + + /** + * Discard all data currently present in the internal buffer. + **/ + void ResetBuffer() + { + _bufpos = 0; + _bufused = 0; + } + + /** + * Fill the internal buffer with data from the url we are connected + * to. + * + * @return the number of bytes put into the buffer or -1 on fail. + **/ + ssize_t FillBuffer(); + + /** + * Return the next byte from the data stream we are reading. + * + * @return next byte from the data stream or -1 on EOF/ERROR + **/ + int ReadByte() + { + if (_bufpos == _bufused) + FillBuffer(); + return (_bufused > _bufpos) ? _buf[_bufpos++] & 0x0ff : -1; + } + + /** + * Connect to the given url. + * + * @return success(true)/failure(false) + * @param url the url you want to connect to + **/ + bool Connect(const char *url); + + /** + * Read the next line of text from the data stream into 'buf'. If + * the line is longer than ('bufsize' - 1), the first ('bufsize' - + * 1) bytes will be placed in buf (the rest of the line will be + * discarded), and the true length of the line will be returned. The + * string placed in buf will be terminated with a null + * character. Newline characters will be discarded. A line is + * terminated by either '\n', "\r\n" or EOF (EOF - connection + * closed) + * + * @return the actual length of the next line, or -1 if no line was read. + * @param buf where to put the line. + * @param bufsize the length of buf. + **/ + ssize_t ReadLine(char *buf, size_t bufsize); + + /** + * Split a string into parts by inserting null characters into the + * string and index the parts by putting pointers to them in the + * argument array given. Only non-empty parts will be indexed in the + * argument array. + * + * @return NULL(complete split)/rest of string(incomplete split) + * @param input the null-terminated input string. + * @param argc the number of parts found. + * @param argv the argument array. + * @param maxargs the size of 'argv'. + **/ + static char *SplitString(char *input, int &argc, char **argv, + int maxargs); + + /** + * Read and parse the HTTP Header. + * + * @return success(true)/failure(fail) + **/ + bool ReadHTTPHeader(); + + /** + * Read and parse a chunk header. Only used with chunked encoding. + * + * @return success(true)/failure(fail) + **/ + bool ReadChunkHeader(); + +public: + + /** + * Create a HTTP client that may be used to fetch documents from the + * given host. + * + * @param hostname the host you want to fetch documents from. + * @param port the TCP port to use when contacting the host. + * @param keepAlive flag indicating if keep-alive should be enabled. + **/ + HTTPClient(const char *hostname, int port, bool keepAlive, + bool headerBenchmarkdataCoverage, const std::string & extraHeaders="", const std::string &authority = ""); + + /** + * Disconnect from server and free memory. + **/ + ~HTTPClient(); + + /** + * This method may be used to obtain information about how many + * times a physical connection has been reused to send an additional + * HTTP request. Note that connections may only be reused if + * keep-alive is enabled. + * + * @return connection reuse count + **/ + uint64_t GetReuseCount() const + { + return _reuseCount; + } + + /** + * Connect to the given url and read the response HTTP header. Note + * that this method will fail if the host returns a status code + * other than 200. This is done in order to make the interface as + * simple as possible. + * + * @return success(true)/failure(false) + * @param url the url you want to connect to + **/ + bool Open(const char *url); + + /** + * Read data from the url we are currently connected to. This method + * should be called repeatedly until it returns 0 in order to + * completely read the URL content. If @ref Close is called before + * all URL content is read the physical connection will be closed + * even if keepAlive is enabled. + * + * @return bytes read or -1 on failure. + * @param buf where to store the incoming data. + * @param len length of buf. + **/ + ssize_t Read(void *buf, size_t len); + + /** + * Close the connection to the url we are currently reading + * from. Will also close the physical connection if keepAlive is not + * enabled or if all the url content was not read. This is done + * because skipping will probably be more expencive than creating a + * new connection. + * + * @return success(true)/failure(false) + **/ + bool Close(); + + /** + * Class that provides status about the executed fetch method. + **/ + class FetchStatus final + { + public: + /** + * Create a status for the executed fetch. + * + * @param requestStatus The status from the HTTP server. + * @param totalHitCount The total number of hits. + * @param resultSize The number of bytes in result. + **/ + FetchStatus(bool ok, uint32_t requestStatus, int32_t totalHitCount, int32_t resultSize) : + _ok(ok), + _requestStatus(requestStatus), + _totalHitCount(totalHitCount), + _resultSize(resultSize) + {} + /** + * Query if the operation was successful. + * @return Status of operation. + **/ + auto Ok() const { return _ok; } + /** + Query HTTP request status. + @return HTTP request status. + **/ + auto RequestStatus() const { return _requestStatus; } + /** + * Query total hit count. Returns -1 if the total hit count + * could not be found. + * @return Total hit count for query. + **/ + auto TotalHitCount() const { return _totalHitCount; } + /** + * Query the number of bytes in the result buffer. + * @return Number of bytes in result buffer. + **/ + auto ResultSize() const { return _resultSize; } + + private: + bool _ok; + uint32_t _requestStatus; + int32_t _totalHitCount; + int32_t _resultSize; + }; + + /** + * High-level method that may be used to fetch a document in a + * single method call and save the content to the given file. + * + * @return FetchStatus object which can be queried for status. + * @param url the url to fetch. + * @param file where to save the fetched document. If this parameter + * is NULL, the content will be read and then discarded. + **/ + FetchStatus Fetch(const char *url, std::ostream *file = NULL); +}; + diff --git a/fbench/src/util/timer.cpp b/fbench/src/util/timer.cpp new file mode 100644 index 00000000000..c96a52a1d70 --- /dev/null +++ b/fbench/src/util/timer.cpp @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "timer.h" +#include <stdio.h> +#include <thread> + +Timer::Timer() + : _time(), + _timespan(0), + _maxTime(0), + _running(false) +{ +} + +void +Timer::SetMax(double max) +{ + _maxTime = max; +} + +void +Timer::Start() +{ + if (_running) + return; + _running = true; + _time = clock::now(); +} + +void +Timer::Stop() +{ + if (!_running) + return; + _timespan = GetCurrent(); + _running = false; +} + +void +Timer::Clear() +{ + _running = false; + _timespan = 0; +} + +double +Timer::GetTimespan() +{ + if (_running) + Stop(); + return _timespan; +} + +double +Timer::GetRemaining() +{ + double span = GetTimespan(); + return (span < _maxTime) ? _maxTime - span : 0; +} + +double +Timer::GetCurrent() +{ + if (!_running) + return 0; + using milliseconds = std::chrono::duration<double, std::milli>; + return std::chrono::duration_cast<milliseconds>(time_point(clock::now()) - _time).count(); +} + +void +Timer::TestClass() +{ + Timer test; + + printf("*** Start Testing: class Timer ***\n"); + printf("set max time to 5 seconds, then sleep for 1...\n"); + test.SetMax(5000); + test.Start(); + std::this_thread::sleep_for(std::chrono::seconds(1)); + test.Stop(); + printf("elapsed: %f, left:%f\n", + test.GetTimespan(), test.GetRemaining()); + printf("set max time to 1 second, then sleep for 2...\n"); + test.SetMax(1000); + test.Start(); + std::this_thread::sleep_for(std::chrono::seconds(2)); + test.Stop(); + printf("elapsed: %f, left:%f\n", + test.GetTimespan(), test.GetRemaining()); + printf("*** Finished Testing: class Timer ***\n"); +} diff --git a/fbench/src/util/timer.h b/fbench/src/util/timer.h new file mode 100644 index 00000000000..2771f6e00cc --- /dev/null +++ b/fbench/src/util/timer.h @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <chrono> +#include <memory> + +/** + * This class is used to mesure time intervals, or time spans. In addition to + * simply measuring timespans, this class also has the ability to set + * a maximum timespan and use this as a reference when handling + * measured time spans. The max time span may be thought of as an + * upper limit for the time spans you are going to measure. After + * measuring a time span you may use the @ref GetRemaining and @ref + * GetOvertime methods to check how the measured time span relates to + * the maximum time span. + **/ +class Timer +{ +private: + typedef std::chrono::steady_clock clock; + typedef std::chrono::time_point<clock> time_point; + time_point _time; + double _timespan; + double _maxTime; + bool _running; + +public: + typedef std::unique_ptr<Timer> UP; + /** + * Create a new timer. + **/ + Timer(); + + /** + * Set the maximum time span. + * + * @param max the maximum time span in ms. + **/ + void SetMax(double max); + + /** + * Start the timer. This will set the start time to the current + * time. + **/ + void Start(); + + /** + * Stop the timer. This will set the measured time span to the + * difference between the current time and the start time. + **/ + void Stop(); + + /** + * Set the measured time spen to 0 ms and stop the timer if it is + * running. + **/ + void Clear(); + + /** + * Get the measured time span. If the timer is running, @ref Stop + * will be called. + * + * @return the measured time span in ms. + **/ + double GetTimespan(); + + /** + * Compare the measured time span with the maximum time span. If the + * maximum time span is greater, the difference between the maximum + * time span and the measured time span is returned. If the measured + * time span is greater, 0 is returned as there is no time remaining. + * + * @return remaining time in ms, or 0 if no time is remaining. + **/ + double GetRemaining(); + + /** + * @return time from start to current in ms + **/ + double GetCurrent(); + + /** + * Static method performing simple testing on the timer class. This + * method produces output to stdout that needs manual inspection. + **/ + static void TestClass(); +}; + |