diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /fbench |
Publish
Diffstat (limited to 'fbench')
50 files changed, 4428 insertions, 0 deletions
diff --git a/fbench/.gitignore b/fbench/.gitignore new file mode 100644 index 00000000000..069dac5c201 --- /dev/null +++ b/fbench/.gitignore @@ -0,0 +1,7 @@ +bin +dist +doc +include +lib +Makefile +Testing diff --git a/fbench/CMakeLists.txt b/fbench/CMakeLists.txt new file mode 100644 index 00000000000..d079ce70406 --- /dev/null +++ b/fbench/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_define_module( + DEPENDS + fastos + + APPS + src/fbench + src/filterfile + src/geturl + src/splitfile + src/util + + TESTS + src/test +) diff --git a/fbench/INSTALL b/fbench/INSTALL new file mode 100644 index 00000000000..2bbd9fb9fc2 --- /dev/null +++ b/fbench/INSTALL @@ -0,0 +1,35 @@ +****************** +* fbench INSTALL * +****************** + +fbench uses 'FastOS'. In the following instructions, let %FASTOS_DIR% +denote the install directory for FastOS, also let %FBENCH_DIR% denote +the install directory for fbench. A resonable selection of install +directories would be: + %FASTOS_DIR% = '/usr/fastsearch/fastos' + %FBENCH_DIR% = '/usr/fastsearch/fbench' + +Install FastOS: +- checkout the fastos CVS module +- go to fastos/src/fastos +- ./configure --install-dir %FASTOS_DIR% [<config parameters>] + (run ./configure --help for help) +- make install + +Install fbench: +- checkout the fbench CVS module +- go to fbench/src +- ./configure --fastos-dir %FASTOS_DIR% \ + --install-dir %FBENCH_DIR% [<config parameters>] + (run ./configure --fastos-dir %FASTOS_DIR% --help for help) +- make install + +This will install all needed binaries and scripts into +%FBENCH_DIR%/bin. It will also copy the README file into +%FBENCH_DIR%. The README file contains a step by step description of +how to perform benchmarking. + +NOTE: When building on Win32, FastOS may have trouble resolving paths +to directories that do not exist; If you want to use a relative path +as install directory, Win32 users must create the install directory +before running the configure script in the fbench directory. diff --git a/fbench/OWNERS b/fbench/OWNERS new file mode 100644 index 00000000000..0aa3eee0364 --- /dev/null +++ b/fbench/OWNERS @@ -0,0 +1,2 @@ +havardpe +balder diff --git a/fbench/README b/fbench/README new file mode 100644 index 00000000000..688810e0567 --- /dev/null +++ b/fbench/README @@ -0,0 +1,346 @@ +fbench - fastserver benchmarking program +---------------------------------------- + + +1 Installing fbench +------------------- + +The preferred way of running fbench is to create your own test +directory where you place all fbench executables and prepare test +files. If you have access to the fbench source, you may consult the +'INSTALL' file for information on how to install fbench. If you have a +pre-compiled distribution of fbench, simply extract the archive. The +fbench install directory should contain the following set of files: + + README + bin/fbench + bin/filterfile + bin/geturl + bin/plot.pl + bin/pretest.sh + bin/resultfilter.pl + bin/runtests.sh + bin/separate.pl + bin/splitfile + + +2 Benchmark overview +-------------------- + +fbench measures the performance of the server by running a number of +clients that send requests to the server in parallel. Each client has +its own input file containing urls that should be requested from the +server. When benchmarking fastserver, the urls contained in these +files correspond to searches. Before you may start benchmarking you +must collect the query urls to be used and distribute them into a +number of files depending on how many clients you are planning to run +in parallel. The most realistic results are obtained by using access +logs collected by fastserver itself from actual usage (AllTheWeb is a +good place to look for such logs). You should always collect enough +query urls to perform a single test run without having to reuse any +queries. + + +3 Preparing the test data +------------------------- + +This step assumes you have obtained some fastserver access log +files. The first step is to extract the query urls from the logs. This +is done with the 'filterfile' program. + +| usage: filterfile [-a] [-h] +| +| Read concatenated fastserver logs from stdin and write +| extracted query urls to stdout. +| +| -a : all parameters to the original query urls are preserved. +| If the -a switch is not given, only 'query' and 'type' +| parameters are kept in the extracted query urls. +| -h : print this usage information. + +You then need to split the query urls into a number of files. This is +done with the 'splitfile' program. + +| usage: splitfile [-p pattern] <numparts> [<file>] +| +| -p pattern : output name pattern ['query%03d.txt'] +| <numparts> : number of output files to generate. +| +| Reads from <file> (stdin if <file> is not given) and +| randomly distributes each line between <numpart> output +| files. The names of the output files are generated by +| combining the <pattern> with sequential numbers using +| the sprintf function. + +Since each parallel client should have its own file, you should split +the query urls into at least as many files as the number of clients +you are planning to run. + +Example: the file 'logs' contains access logs from fastserver. You +want to extract the query urls from it and save the query urls into +200 separate files (because you are planning to run 200 clients when +benchmarking). You may do the following: + +$ cat logs | bin/filterfile | bin/splitfile 200 + +This will create 200 files with names 'query000.txt', 'query001.txt', +'query002.txt' etc. You may control the filename pattern of the output +files by using the -p switch with the 'splitfile' program. + + +4 Running a single test +----------------------- + +You are now ready to begin benchmarking. The actual benchmarking is +done with the fbench program. fbench usage information ([] are used to +mark optional parameters and default values): + +| usage: fbench [-n numClients] [-c cycleTime] [-l limit] [-i ignoreCount] +| [-s seconds] [-q queryFilePattern] [-o outputFilePattern] +| [-r restartLimit] [-k] <hostname> <port> +| +| -n <num> : run with <num> parallel clients [10] +| -c <num> : each client will make a request each <num> milliseconds [1000] +| ('-1' -> cycle time should be twice the response time) +| -l <num> : minimum response size for successful requests [0] +| -i <num> : do not log the <num> first results. -1 means no logging [0] +| -s <num> : run the test for <num> seconds. -1 means forever [60] +| -q <str> : pattern defining input query files ['query%03d.txt'] +| (the pattern is used with sprintf to generate filenames) +| -o <str> : save query results to output files with the given pattern +| (default is not saving.) +| -r <num> : number of times to re-use each query file. -1 means no limit [-1] +| -k : disable HTTP keep-alive. +| +| <hostname> : the host you want to benchmark. +| <port> : the port to use when contacting the host. + +The only mandatory parameters are the hostname and the port of the +server you want to benchmark. If you are measuring server performance, +you should ensure that the caches are cleared between each run. This +may be done either by stopping and starting fsearch and fdispatch or +by using the geturl program to fetch '/admin?command=clear_caches' +from the http port on each fsearch and fdispatch (this requires that +you are running from a host that is known as privileged by the +fastserver nodes or that fastserver was compiled to accept all hosts +as privileged). + +| usage: geturl <host> <port> <url> + +You may clear the caches by doing: + +$ bin/geturl <host> <port> "/admin?command=clear_caches" + +This must be done for each fsearch and fdispatch http port to clear +all caches. + +Example: You want to test just how well fastserver does under massive +preassure by letting 200 clients search continuously as fast as they +can (they issue new queries immediately after the results from the +previous query are obtained). Assuming you have at least 200 query +files with default filename pattern you may do the following: + +$ bin/fbench -n 200 -c 0 <host> <port> + +This will run the test over a period of 60 seconds. Use the -s option +to change the duration of the test. + +Example: You want to manually observe fastserver with a certain amount +of load. You may use fbench to produce 'background noise' by using the +-s option with argument 0, like this: + +$ bin/fbench -n 50 -c 1000 -s 0 <host> <port> + +This will start 50 clients that ask at most 1 query per second each, +giving a maximum load of 50 queries per second if the server allows +it. This test run will run forever due to the '-s 0' option given. + + +5 Understanding Benchmarking Results +------------------------------------ + +After a test run has completed, fbench outputs various test +results. This section will explain what each of these numbers mean. + +'connection reuse count' This value indicates how many times HTTP + connections were reused to issue another + request. Note that this number will only be + displayed if the -k switch (disable HTTP + keep-alive) is not used. + +'clients' Echo of the -n parameter. + +'cycle time' Echo of the -c parameter. + +'lower response limit' Echo of the -l parameter. + +'skipped requests' Number of requests that was skipped by + fbench. fbench will typically skip a request + if the line containing the query url exceeds + a pre-defined limit. Skipped requests will + have minimal impact on the statistical + results. + +'failed requests' The number of failed requests. A request will be + marked as failed if en error occurred while + reading the result or if the result contained + less bytes than 'lower response limit'. + +'successful requests' Number of successful requests. Each performed + request is counted as either successful or + failed. Skipped requests (see above) are not + performed and therefore not counted. + +'cycles not held' Number of cycles not held. The cycle time is + specified with the -c parameter. It defines + how often a client should perform a new + request. However, a client may not perform + another request before the result from the + previous request has been obtained. Whenever a + client is unable to initiate a new request + 'on time' due to not being finished with the + previous request, this value will be + increased. + +'minimum response time' The minimum response time. The response time + is measured as the time period from just + before the request is sent to the server, + till the result is obtained from the server. + +'maximum response time' The maximum response time. The response time + is measured as the time period from just + before the request is sent to the server, + till the result is obtained from the server. + +'average response time' The average response time. The response time + is measured as the time period from just + before the request is sent to the server, + till the result is obtained from the server. + +'X percentile' The X percentile of the response time samples; + a value selected such that X percent of the + response time samples are below this + value. In order to calculate percentiles, a + histogram of response times is maintained for + each client at runtime and merged after the + test run ends. If a percentile value exceeds + the upper bound of this histogram, it will be + approximated (and thus less accurate) and + marked with '(approx)'. + +'max query rate' The cycle time tells each client how often it + should perform a request. If a client is not + able to perform a new request on time due to + a previous outstanding request, this + increases the overtime counter, and the + client will preform the next request as soon + as the previous one is completed. The + opposite may also happen; a request may + complete in less than the cycle time. In this + case the client will wait the remaining time + of the cycle before performing another + request. The max query rate is an + extrapolated value indicating what the query + rate would be if no client would wait for the + completion of cycles, and that the average + response time would not increase. NOTE: This + number is only supplied as an inverse of the + average response time and should NEVER be + used to describe the query rate of a server. + +'actual query rate' The average number of queries per second; QPS. + +'utilization' The percentage of time used waiting for + the server to complete (successful) + requests. Note that if a request fails, the + utilization will drop since the client has + 'wasted' the time spent on the failed + request. + + +6 Running test series +--------------------- + +For more complete benchmarking you will want to combine the results +from several test runs and present them together in a graph or maybe a +spreadsheet. The perl script resultfilter.pl may be used to convert +the output from fbench into a single line of numbers. Lines of numbers +produced from several test runs may then be concatenated into the same +text file and used to plot a graph with gnuplot or imported into an +application accepting structured text files (like Excel). + +The task described above is performed by the runtests.sh script. It +runs fbench several times with varying client count and cycle +time. Between each test run, the script pretest.sh (located in the bin +directory) is run. The pretest.sh script should make sure that the +server you want to benchmark is in the same state before each of the +test runs. This typically means that the caches should be cleared. The +supplied pretest.sh file does nothing, and should therefore be +modified to fit your needs before you start benchmarking with the +runtests.sh script. NOTE: 'runtests.sh' must be run from the fbench +install directory in order to find the scripts and programs it depends +on. (fbench is run as 'bin/fbench' etc.). + +| usage: runtests.sh [-o] [-l] <minClients> <maxClients> <deltaClients> +| <minCycle> <maxCycle> <deltaCycle> [fbench options] <hostname> <port> +| +| The number of clients varies from <minClients> to <maxClients> with +| <deltaClients> increments. For each client count, the cycle time will +| vary in the same way according to <minCycle>, <maxCycle> and <deltaCycle>. +| fbench is run with each combination of client count and cycle time, and +| the result output is filtered with the 'resultfilter.pl' script. +| If you want to save the results you should redirect stdout to a file. +| +| -o : change the order in which the tests are performed so that client +| count varies for each cycle time. +| -l : output a blank line between test subseries. If -o is not specified this +| will output a blank line between test series using different client count. +| If -o was specified this will output blank lines between test series +| using different cycle time. +| +| [fbench options] <hostname> <port>: These arguments are passed to fbench. +| There are 2 things to remember: first; do not specify either of the -n +| or -c options since they will override the values for client count and +| cycle time generated by this script. secondly; make sure you specify +| the correct host and port number. See the fbench usage (run fbench +| without parameters) for more info on how to invoke fbench. + +Example: You want to see how well fastserver performs with varying +client count and cycle time. Assume that you have already prepared 200 +query files and that you have edited the 'pretest.sh' script to make +it clear all fsearch and fdispatch caches. To test with client count +from 10 to 200 with intervals of 10 clients and cycle time from 0 to +5000 milliseconds with 500 ms intervals you may do the following: + +$ bin/runtests.sh 10 200 10 0 5000 500 <host> <port> + +The duration of each test run will be 60 seconds (the default). This +may be a little short. You will also get all results written directly +to your console. Say you want to run each test run for 5 minutes and +you want to collect the results in the file 'results.txt'. You may +then do the following: + +$ bin/runtests.sh 10 200 10 0 5000 500 -s 300 <host> <port> > result.txt + +The '-s 300' option will be given to fbench causing each test run to +have a duration of 300 seconds = 5 minutes. The standard output is +simply redirected to a file to collect the results for future use. + +The perl utility scripts separate.pl and plot.pl may be used to create +graphs using gnuplot. + +| usage: separate.pl <sepcol> +| Separate a tabular numeric file into chunks using a blank +| line whenever the value in column 'sepcol' changes. + +| usage: plot.pl [-h] [-x] <plotno> +| Plot the contents of 'result.txt'. +| -h This help +| -x Output to X11 window (default PS-file 'graph.ps') +| plotno: 1: Response Time Percentiles by NumCli +| 2: Rate by NumCli +| 3: Response Time Percentiles by Rate + +Note that the separate.pl script does the same thing as the -l option +of runtests.sh; it inserts blank lines into the result to let gnuplot +interpret each chunk as a separate dataseries. diff --git a/fbench/build/buildspec.xml b/fbench/build/buildspec.xml new file mode 100644 index 00000000000..eb33ba8416e --- /dev/null +++ b/fbench/build/buildspec.xml @@ -0,0 +1,41 @@ +<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +<BuildSpecification> + <Owner> + <OwnerName>Haavard Pettersen</OwnerName> + <OwnerEmail>Havard.Pettersen@fast.no</OwnerEmail> + </Owner> + + <Dependencies> + <dep package="common/fastos" version="1.5.5.1" /> + </Dependencies> + + <PreBuild> + <configure path="src"> + <parameter value="--fastos-dir ${fbuild_install_dir}/fastos" /> + <parameter value="--install-dir ${fbuild_install_dir}/fbench" /> + </configure> + </PreBuild> + + <Build> + <make path="src" target="bootstrap"/> + </Build> + + <PostBuild> + </PostBuild> + + <Test> + </Test> + + <Install> + <make path="src" target="install" /> + </Install> + + <Dist> + <tar target="${fbuild_dist_dir}/${fbuild_build_title}-${fbuild_arch}-${fbuild_os}-${fbuild_os_version}" + basepath="${fbuild_install_dir}" compress="yes"> + <addfile path="fbench" /> + </tar> + + </Dist> + +</BuildSpecification> diff --git a/fbench/src/.gitignore b/fbench/src/.gitignore new file mode 100644 index 00000000000..ada67ed181d --- /dev/null +++ b/fbench/src/.gitignore @@ -0,0 +1,8 @@ +Makefile.ini +config_command.sh +fbench.dsp +filterfile.dsp +geturl.dsp +project.dsw +splitfile.dsp +util.dsp diff --git a/fbench/src/fbench/.gitignore b/fbench/src/fbench/.gitignore new file mode 100644 index 00000000000..91da6fc795c --- /dev/null +++ b/fbench/src/fbench/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +fbench +fbench.ilk +fbench.pdb diff --git a/fbench/src/fbench/CMakeLists.txt b/fbench/src/fbench/CMakeLists.txt new file mode 100644 index 00000000000..f9f2ad7e1d9 --- /dev/null +++ b/fbench/src/fbench/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(fbench_app + SOURCES + client.cpp + fbench.cpp + OUTPUT_NAME fbench + INSTALL bin + DEPENDS + fbench_util + fastos +) diff --git a/fbench/src/fbench/client.cpp b/fbench/src/fbench/client.cpp new file mode 100644 index 00000000000..efaafab6e87 --- /dev/null +++ b/fbench/src/fbench/client.cpp @@ -0,0 +1,186 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <util/timer.h> +#include <util/httpclient.h> +#include <util/filereader.h> +#include "client.h" + +Client::Client(ClientArguments *args) + : _args(args), + _status(new ClientStatus()), + _reqTimer(new Timer()), + _cycleTimer(new Timer()), + _masterTimer(new Timer()), + _http(new HTTPClient(_args->_hostname, _args->_port, + _args->_keepAlive, _args->_headerBenchmarkdataCoverage, + _args->_extraHeaders, _args->_authority)), + _reader(new FileReader()), + _output(), + _linebufsize(args->_maxLineSize), + _linebuf(new char[_linebufsize]), + _stop(false), + _done(false), + _thread() +{ + assert(args != NULL); + _cycleTimer->SetMax(_args->_cycle); +} + +Client::~Client() +{ + delete [] _linebuf; +} + +void Client::runMe(Client * me) { + me->run(); +} + +void +Client::run() +{ + char filename[1024]; + char timestr[64]; + int linelen; + /// int reslen; + + std::this_thread::sleep_for(std::chrono::milliseconds(_args->_delay)); + + // open query file + snprintf(filename, 1024, _args->_filenamePattern, _args->_myNum); + if (!_reader->Open(filename)) { + printf("Client %d: ERROR: could not open file '%s' [read mode]\n", + _args->_myNum, filename); + _status->SetError("Could not open query file."); + return; + } + if (_args->_outputPattern != NULL) { + snprintf(filename, 1024, _args->_outputPattern, _args->_myNum); + _output = std::make_unique<std::ofstream>(filename, std::ofstream::out | std::ofstream::binary); + if (_output->fail()) { + printf("Client %d: ERROR: could not open file '%s' [write mode]\n", + _args->_myNum, filename); + _status->SetError("Could not open output file."); + return; + } + } + if (_output) + _output->write(FBENCH_DELIMITER + 1, strlen(FBENCH_DELIMITER) - 1); + + if (_args->_ignoreCount == 0) + _masterTimer->Start(); + + // Start reading from offset + if ( _args->_singleQueryFile ) + _reader->SetFilePos(_args->_queryfileOffset); + + // run queries + while (!_stop) { + + _cycleTimer->Start(); + + linelen = _reader->ReadLine(_linebuf, _linebufsize); + + // Read maximum to _queryfileOffsetEnd + if ( _args->_singleQueryFile && _reader->GetBufPos() >= _args->_queryfileBytes ) { + _reader->SetFilePos(_args->_queryfileOffset); + } + + if (linelen < 0) { + _reader->Reset(); + // Start reading from offset + if ( _args->_singleQueryFile ) { + _reader->SetFilePos(_args->_queryfileOffset); + } + + linelen = _reader->ReadLine(_linebuf, _linebufsize); + if (linelen < 0) { + fprintf(stderr, "Client %d: ERROR: could not read any lines from '%s'\n", + _args->_myNum, filename); + _status->SetError("Could not read any lines from query file."); + break; + } + if (_args->_restartLimit == 0) { + break; + } else if (_args->_restartLimit > 0) { + _args->_restartLimit--; + } + } + if (linelen < _linebufsize) { + if (_output) { + _output->write("URL: ", strlen("URL: ")); + _output->write(_linebuf, linelen); + _output->write("\n\n", 2); + } + if (linelen + (int)_args->_queryStringToAppend.length() < _linebufsize) { + strcat(_linebuf, _args->_queryStringToAppend.c_str()); + } + _reqTimer->Start(); + auto fetch_status = _http->Fetch(_linebuf, _output.get()); + _reqTimer->Stop(); + _status->AddRequestStatus(fetch_status.RequestStatus()); + if (fetch_status.Ok() && fetch_status.TotalHitCount() == 0) + ++_status->_zeroHitQueries; + if (_output) { + if (!fetch_status.Ok()) { + _output->write("\nFBENCH: URL FETCH FAILED!\n", + strlen("\nFBENCH: URL FETCH FAILED!\n")); + _output->write(FBENCH_DELIMITER + 1, strlen(FBENCH_DELIMITER) - 1); + } else { + sprintf(timestr, "\nTIME USED: %0.4f s\n", + _reqTimer->GetTimespan() / 1000.0); + _output->write(timestr, strlen(timestr)); + _output->write(FBENCH_DELIMITER + 1, strlen(FBENCH_DELIMITER) - 1); + } + } + if (fetch_status.ResultSize() >= _args->_byteLimit) { + if (_args->_ignoreCount == 0) + _status->ResponseTime(_reqTimer->GetTimespan()); + } else { + if (_args->_ignoreCount == 0) + _status->RequestFailed(); + } + } else { + if (_args->_ignoreCount == 0) + _status->SkippedRequest(); + } + _cycleTimer->Stop(); + if (_args->_cycle < 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(int(_reqTimer->GetTimespan()))); + } else { + if (_cycleTimer->GetRemaining() > 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(int(_cycleTimer->GetRemaining()))); + } else { + if (_args->_ignoreCount == 0) + _status->OverTime(); + } + } + if (_args->_ignoreCount > 0) { + _args->_ignoreCount--; + if (_args->_ignoreCount == 0) + _masterTimer->Start(); + } + // Update current time span to calculate Q/s + _status->SetRealTime(_masterTimer->GetCurrent()); + } + _masterTimer->Stop(); + _status->SetRealTime(_masterTimer->GetTimespan()); + _status->SetReuseCount(_http->GetReuseCount()); + printf("."); + fflush(stdout); + _done = true; +} + +void Client::stop() { + _stop = true; +} + +bool Client::done() { + return _done; +} + +void Client::start() { + _thread = std::thread(Client::runMe, this); +} + +void Client::join() { + _thread.join(); +} diff --git a/fbench/src/fbench/client.h b/fbench/src/fbench/client.h new file mode 100644 index 00000000000..4e78d2d6adc --- /dev/null +++ b/fbench/src/fbench/client.h @@ -0,0 +1,199 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <util/clientstatus.h> +#include <fstream> +#include <atomic> +#include <thread> + +#define FBENCH_DELIMITER "\n[--xxyyzz--FBENCH_MAGIC_DELIMITER--zzyyxx--]\n" + +/** + * This struct contains arguments used to control a single client. + * Each client runs in a separate thread. This struct do not own the + * strings it references. + **/ +struct ClientArguments +{ + /** + * Sequential number identifying this client. + **/ + int _myNum; + + /** + * The total number of clients controlled by the parent fbench + * application + **/ + int _totNum; + + /** + * Pattern that combined with the client number will become the name + * of the file containing the urls this client should request. + **/ + const char *_filenamePattern; + + /** + * Pattern that combined with the client number will become the name + * of the file this client should dump url content to. If this + * pattern is set to NULL no output file is generated. + **/ + const char *_outputPattern; + + /** + * The server the client should fetch urls from. + **/ + const char *_hostname; + + /** + * The server port where the webserver is running. + **/ + int _port; + + /** + * The minimum number of milliseconds between two requests from this + * client. + **/ + long _cycle; + + /** + * Number of milliseconds to wait before making the first request. + * This will be different for different clients and helps distribute + * the requests. + **/ + long _delay; + + /** + * Number of requests that should be made before we start logging + * response times. This is included so fbench startup slugginess + * will not affect the benchmark results. + **/ + int _ignoreCount; + + /** + * Minimum number of bytes allowed in a response for a request to be + * successful. If a response contains fewer bytes than this number, + * the request will be logged as a failure even if no errors + * occurred. + **/ + int _byteLimit; + + /** + * Number of times this client is allowed to re-use the urls in the + * input query file. + **/ + int _restartLimit; + + /** + * Max line size in the input query data. Longer lines than this + * will be skipped. + **/ + int _maxLineSize; + + /** + * Indicate wether keep-alive connections should be enabled for this + * client. + **/ + bool _keepAlive; + + /** + * Indicate whether to add benchmark data coverage headers + **/ + bool _headerBenchmarkdataCoverage; + + uint64_t _queryfileOffset; + uint64_t _queryfileBytes; + bool _singleQueryFile; + std::string _queryStringToAppend; + std::string _extraHeaders; + std::string _authority; + + ClientArguments(int myNum, int totNum, + const char *filenamePattern, + const char *outputPattern, + const char *hostname, int port, + long cycle, long delay, + int ignoreCount, int byteLimit, + int restartLimit, int maxLineSize, + bool keepAlive, bool headerBenchmarkdataCoverage, + uint64_t queryfileOffset, uint64_t queryfileBytes, bool singleQueryFile, + const std::string & queryStringToAppend, const std::string & extraHeaders, + const std::string &authority) + : _myNum(myNum), + _totNum(totNum), + _filenamePattern(filenamePattern), + _outputPattern(outputPattern), + _hostname(hostname), + _port(port), + _cycle(cycle), + _delay(delay), + _ignoreCount(ignoreCount), + _byteLimit(byteLimit), + _restartLimit(restartLimit), + _maxLineSize(maxLineSize), + _keepAlive(keepAlive), + _headerBenchmarkdataCoverage(headerBenchmarkdataCoverage), + _queryfileOffset(queryfileOffset), + _queryfileBytes(queryfileBytes), + _singleQueryFile(singleQueryFile), + _queryStringToAppend(queryStringToAppend), + _extraHeaders(extraHeaders), + _authority(authority) + { + } + +private: + ClientArguments(const ClientArguments &); + ClientArguments &operator=(const ClientArguments &); +}; + +/** + * This class implements a single test client. The clients are run in + * separate threads to simulate several simultanious users. The + * operation of a client is controlled through an instance of the + * @ref ClientArguments class. + **/ +class Client +{ +private: + std::unique_ptr<ClientArguments> _args; + std::unique_ptr<ClientStatus> _status; + Timer::UP _reqTimer; + Timer::UP _cycleTimer; + Timer::UP _masterTimer; + std::unique_ptr<HTTPClient> _http; + std::unique_ptr<FileReader> _reader; + std::unique_ptr<std::ofstream> _output; + int _linebufsize; + char *_linebuf; + std::atomic<bool> _stop; + std::atomic<bool> _done; + std::thread _thread; + + Client(const Client &); + Client &operator=(const Client &); + static void runMe(Client * client); + void run(); + +public: + typedef std::unique_ptr<Client> UP; + /** + * The client arguments given to this method becomes the + * responsibility of the client. + **/ + Client(ClientArguments *args); + + /** + * Delete objects owned by this client, including the client arguments. + **/ + ~Client(); + + /** + * @return A struct containing status info for this client. + **/ + const ClientStatus & GetStatus() { return *_status; } + void start(); + void stop(); + bool done(); + void join(); +}; + diff --git a/fbench/src/fbench/description.html b/fbench/src/fbench/description.html new file mode 100644 index 00000000000..3b5eabb86d3 --- /dev/null +++ b/fbench/src/fbench/description.html @@ -0,0 +1,2 @@ +<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +The actual benchmarking program. diff --git a/fbench/src/fbench/fbench.cpp b/fbench/src/fbench/fbench.cpp new file mode 100644 index 00000000000..5e53573b1b3 --- /dev/null +++ b/fbench/src/fbench/fbench.cpp @@ -0,0 +1,484 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <util/timer.h> +#include <util/httpclient.h> +#include <util/filereader.h> +#include "client.h" +#include "fbench.h" +#include <thread> + +#include <cmath> + +sig_atomic_t exitSignal = 0; + +FBench::FBench() + : _clients(), + _ignoreCount(0), + _cycle(0), + _filenamePattern(NULL), + _outputPattern(NULL), + _byteLimit(0), + _restartLimit(0), + _maxLineSize(0), + _keepAlive(true), + _headerBenchmarkdataCoverage(false), + _seconds(60), + _singleQueryFile(false) +{ +} + +FBench::~FBench() +{ + _clients.clear(); + free(_filenamePattern); + free(_outputPattern); +} + +void +FBench::InitBenchmark(int numClients, int ignoreCount, int cycle, + const char *filenamePattern, const char *outputPattern, + int byteLimit, int restartLimit, int maxLineSize, + bool keepAlive, bool headerBenchmarkdataCoverage, int seconds, + bool singleQueryFile, const std::string & queryStringToAppend, const std::string & extraHeaders, + const std::string &authority) +{ + _clients.resize(numClients); + _ignoreCount = ignoreCount; + _cycle = cycle; + + free(_filenamePattern); + _filenamePattern = strdup(filenamePattern); + free(_outputPattern); + _outputPattern = (outputPattern == NULL) ? + NULL : strdup(outputPattern); + _queryStringToAppend = queryStringToAppend; + _extraHeaders = extraHeaders; + _authority = authority; + _byteLimit = byteLimit; + _restartLimit = restartLimit; + _maxLineSize = maxLineSize; + _keepAlive = keepAlive; + _headerBenchmarkdataCoverage = headerBenchmarkdataCoverage; + _seconds = seconds; + _singleQueryFile = singleQueryFile; +} + +void +FBench::CreateClients() +{ + int spread = (_cycle > 1) ? _cycle : 1; + + int i(0); + for(auto & client : _clients) { + client = std::make_unique<Client>( + new ClientArguments(i, _clients.size(), _filenamePattern, + _outputPattern, _hostnames[i % _hostnames.size()].c_str(), + _ports[i % _ports.size()], _cycle, + random() % spread, _ignoreCount, + _byteLimit, _restartLimit, _maxLineSize, + _keepAlive, _headerBenchmarkdataCoverage, + _queryfileOffset[i % _queryfileOffset.size()], + _queryfileOffset[i+1 % _queryfileOffset.size()]-_queryfileOffset[i % _queryfileOffset.size()], + _singleQueryFile, _queryStringToAppend, _extraHeaders, _authority)); + ++i; + } +} + +bool +FBench::ClientsDone() +{ + bool done(true); + for (auto & client : _clients) { + if ( ! client->done() ) { + return false; + } + } + return done; +} + +void +FBench::StartClients() +{ + printf("Starting clients...\n"); + for (auto & client : _clients) { + client->start(); + } +} + +void +FBench::StopClients() +{ + printf("Stopping clients"); + for (auto & client : _clients) { + client->stop(); + } + printf("\nClients stopped.\n"); + for (auto & client : _clients) { + client->join(); + } + printf("\nClients Joined.\n"); +} + +void +FBench::PrintSummary() +{ + ClientStatus status; + + double maxRate = 0; + double actualRate = 0; + + int realNumClients = 0; + + int i = 0; + for (auto & client : _clients) { + if (client->GetStatus()._error) { + printf("Client %d: %s => discarding client results.\n", + i, client->GetStatus()._errorMsg.c_str()); + } else { + status.Merge(client->GetStatus()); + ++realNumClients; + } + ++i; + } + double avg = status.GetAverage(); + + maxRate = (avg > 0) ? realNumClients * 1000.0 / avg : 0; + actualRate = (status._realTime > 0) ? + realNumClients * 1000.0 * status._requestCnt / status._realTime : 0; + + double p25 = status.GetPercentile(25); + double p50 = status.GetPercentile(50); + double p75 = status.GetPercentile(75); + double p90 = status.GetPercentile(90); + double p95 = status.GetPercentile(95); + double p99 = status.GetPercentile(99); + + if (_keepAlive) { + printf("*** HTTP keep-alive statistics ***\n"); + printf("connection reuse count -- %" PRId64 "\n", status._reuseCnt); + } + printf("***************** Benchmark Summary *****************\n"); + printf("clients: %8ld\n", _clients.size()); + printf("ran for: %8d seconds\n", _seconds); + printf("cycle time: %8d ms\n", _cycle); + printf("lower response limit: %8d bytes\n", _byteLimit); + printf("skipped requests: %8ld\n", status._skipCnt); + printf("failed requests: %8ld\n", status._failCnt); + printf("successful requests: %8ld\n", status._requestCnt); + printf("cycles not held: %8ld\n", status._overtimeCnt); + printf("minimum response time: %8.2f ms\n", status._minTime); + printf("maximum response time: %8.2f ms\n", status._maxTime); + printf("average response time: %8.2f ms\n", status.GetAverage()); + if (p25 > status._timetable.size() / status._timetableResolution - 1) + printf("25 percentile: %8.2f ms (approx)\n", p25); + else printf("25 percentile: %8.2f ms\n", p25); + if (p50 > status._timetable.size() / status._timetableResolution - 1) + printf("50 percentile: %8.2f ms (approx)\n", p50); + else printf("50 percentile: %8.2f ms\n", p50); + if (p75 > status._timetable.size() / status._timetableResolution - 1) + printf("75 percentile: %8.2f ms (approx)\n", p75); + else printf("75 percentile: %8.2f ms\n", p75); + if (p90 > status._timetable.size() / status._timetableResolution - 1) + printf("90 percentile: %8.2f ms (approx)\n", p90); + else printf("90 percentile: %8.2f ms\n", p90); + if (p95 > status._timetable.size() / status._timetableResolution - 1) + printf("95 percentile: %8.2f ms (approx)\n", p95); + else printf("95 percentile: %8.2f ms\n", p95); + if (p99 > status._timetable.size() / status._timetableResolution - 1) + printf("99 percentile: %8.2f ms (approx)\n", p99); + else printf("99 percentile: %8.2f ms\n", p99); + printf("actual query rate: %8.2f Q/s\n", actualRate); + printf("utilization: %8.2f %%\n", + (maxRate > 0) ? 100 * (actualRate / maxRate) : 0); + printf("zero hit queries: %8ld\n", status._zeroHitQueries); + printf("http request status breakdown:\n"); + for (const auto& entry : status._requestStatusDistribution) + printf(" %8u : %8u \n", entry.first, entry.second); + + fflush(stdout); +} + +void +FBench::Usage() +{ + printf("usage: fbench [-H extraHeader] [-a queryStringToAppend ] [-n numClients] [-c cycleTime] [-l limit] [-i ignoreCount]\n"); + printf(" [-s seconds] [-q queryFilePattern] [-o outputFilePattern]\n"); + printf(" [-r restartLimit] [-m maxLineSize] [-k] <hostname> <port>\n\n"); + printf(" -H <str> : append extra header to each get request.\n"); + printf(" -A <str> : assign autority. <str> should be hostname:port format. Overrides Host: header sent.\n"); + printf(" -a <str> : append string to each query\n"); + printf(" -n <num> : run with <num> parallel clients [10]\n"); + printf(" -c <num> : each client will make a request each <num> milliseconds [1000]\n"); + printf(" ('-1' -> cycle time should be twice the response time)\n"); + printf(" -l <num> : minimum response size for successful requests [0]\n"); + printf(" -i <num> : do not log the <num> first results. -1 means no logging [0]\n"); + printf(" -s <num> : run the test for <num> seconds. -1 means forever [60]\n"); + printf(" -q <str> : pattern defining input query files ['query%%03d.txt']\n"); + printf(" (the pattern is used with sprintf to generate filenames)\n"); + printf(" -o <str> : save query results to output files with the given pattern\n"); + printf(" (default is not saving.)\n"); + printf(" -r <num> : number of times to re-use each query file. -1 means no limit [-1]\n"); + printf(" -m <num> : max line size in input query files [8192].\n"); + printf(" Can not be less than the minimum [1024].\n"); + printf(" -p <num> : print summary every <num> seconds.\n"); + printf(" -k : disable HTTP keep-alive.\n"); + printf(" -y : write data on coverage to output file (must used with -x).\n"); + printf(" -z : use single query file to be distributed between clients.\n\n"); + printf(" <hostname> : the host you want to benchmark.\n"); + printf(" <port> : the port to use when contacting the host.\n\n"); + printf("Several hostnames and ports can be listed\n"); + printf("This is distributed in round-robin manner to clients\n"); +} + +void +FBench::Exit() +{ + StopClients(); + printf("\n"); + PrintSummary(); + exit(0); +} + +int +FBench::Main(int argc, char *argv[]) +{ + // parameters with default values. + int numClients = 10; + int cycleTime = 1000; + int byteLimit = 0; + int ignoreCount = 0; + int seconds = 60; + int maxLineSize = 128 * 1024; + const int minLineSize = 1024; + + const char *queryFilePattern = "query%03d.txt"; + const char *outputFilePattern = NULL; + std::string queryStringToAppend; + std::string extraHeaders; + + int restartLimit = -1; + bool keepAlive = true; + bool headerBenchmarkdataCoverage = false; + + bool singleQueryFile = false; + std::string authority; + + int printInterval = 0; + + // parse options and override defaults. + int idx; + char opt; + const char *arg; + bool optError; + + idx = 1; + optError = false; + while((opt = GetOpt(argc, argv, "H:A:a:n:c:l:i:s:q:o:r:m:p:kxyz", arg, idx)) != -1) { + switch(opt) { + case 'A': + authority = arg; + break; + case 'H': + extraHeaders += std::string(arg) + "\r\n"; + if (strncmp(arg, "Host:", 5) == 0) { + fprintf(stderr, "Do not override 'Host:' header, use -A option instead\n"); + return -1; + } + break; + case 'a': + queryStringToAppend = std::string(arg); + break; + case 'n': + numClients = atoi(arg); + break; + case 'c': + cycleTime = atoi(arg); + break; + case 'l': + byteLimit = atoi(arg); + break; + case 'i': + ignoreCount = atoi(arg); + break; + case 's': + seconds = atoi(arg); + break; + case 'q': + queryFilePattern = arg; + break; + case 'o': + outputFilePattern = arg; + break; + case 'r': + restartLimit = atoi(arg); + break; + case 'm': + maxLineSize = atoi(arg); + if (maxLineSize < minLineSize) { + maxLineSize = minLineSize; + } + break; + case 'p': + printInterval = atoi(arg); + if (printInterval < 0) + optError = true; + break; + case 'k': + keepAlive = false; + break; + case 'x': + // consuming x for backwards compability. This turned on header benchmark data + // but this is now always on. + break; + case 'y': + headerBenchmarkdataCoverage = true; + break; + case 'z': + singleQueryFile = true; + break; + default: + optError = true; + break; + } + } + + if ( argc < (idx + 2) || optError) { + Usage(); + return -1; + } + // Hostname/port must be in pair + int args = (argc - idx); + if (args % 2 != 0) { + fprintf(stderr, "Not equal number of hostnames and ports\n"); + return -1; + } + + short hosts = args / 2; + + for (int i=0; i<hosts; ++i) + { + _hostnames.push_back(std::string(argv[idx+2*i])); + int port = atoi(argv[idx+2*i+1]); + if (port == 0) { + fprintf(stderr, "Not a valid port:\t%s\n", argv[idx+2*i+1]); + return -1; + } + _ports.push_back(port); + } + + // Find offset for each client if shared query file + _queryfileOffset.push_back(0); + if (singleQueryFile) { + // Open file to find offsets, with pattern as if client 0 + char filename[1024]; + snprintf(filename, 1024, queryFilePattern, 0); + queryFilePattern = filename; + FileReader reader; + if (!reader.Open(queryFilePattern)) { + fprintf(stderr, "ERROR: could not open file '%s' [read mode]\n", + queryFilePattern); + return -1; + } + + uint64_t totalSize = reader.GetFileSize(); + uint64_t perClient = totalSize / numClients; + + for (int i=1; i<numClients; ++i) { + /** Start each client with some offset, adjusted to next newline + **/ + FileReader r; + r.Open(queryFilePattern); + uint64_t clientOffset = std::max(i*perClient, _queryfileOffset.back() ); + uint64_t newline = r.FindNewline(clientOffset) + 1; + _queryfileOffset.push_back(newline); + } + + // Add pos to end of file + _queryfileOffset.push_back( totalSize+1 ); + + + // Print offset of clients + /* + printf("%6s%14s%15s", "Client", "Offset", "Bytes\n"); + for (unsigned int i =0; i< _queryfileOffset.size()-1; ++i) + printf("%6d%14ld%14ld\n", i, _queryfileOffset[i], _queryfileOffset[i+1]-_queryfileOffset[i]); + */ + } + + InitBenchmark(numClients, ignoreCount, cycleTime, + queryFilePattern, outputFilePattern, + byteLimit, restartLimit, maxLineSize, + keepAlive, + headerBenchmarkdataCoverage, seconds, + singleQueryFile, queryStringToAppend, extraHeaders, + authority); + + CreateClients(); + StartClients(); + + if (seconds < 0) { + unsigned int secondCount = 0; + while (!ClientsDone()) { + if (exitSignal) { + _seconds = secondCount; + Exit(); + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + if (printInterval != 0 && ++secondCount % printInterval == 0) { + printf("\nRuntime: %d sec\n", secondCount); + PrintSummary(); + } + } + } else if (seconds > 0) { + // Timer to compansate for work load on PrintSummary() + Timer sleepTimer; + sleepTimer.SetMax(1000); + + for (;seconds > 0 && !ClientsDone(); seconds--) { + if (exitSignal) { + _seconds = _seconds - seconds; + Exit(); + } + std::this_thread::sleep_for(std::chrono::milliseconds(int(sleepTimer.GetRemaining()))); + sleepTimer.Start(); + + if (seconds % 60 == 0) { + printf("[dummydate]: PROGRESS: fbench: Seconds left %d\n", seconds); + } + + if (printInterval != 0 && seconds % printInterval == 0) { + printf("\nRuntime: %d sec\n", _seconds - seconds); + PrintSummary(); + } + + sleepTimer.Stop(); + } + } + + StopClients(); + PrintSummary(); + return 0; +} + +void sighandler(int sig) +{ + if (sig == SIGINT) { + exitSignal = 1; + } +} + +int +main(int argc, char** argv) +{ + + struct sigaction act; + + act.sa_handler = sighandler; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + + sigaction(SIGINT, &act, NULL); + sigaction(SIGPIPE, &act, NULL); + + FBench myApp; + return myApp.Main(argc, argv); +} diff --git a/fbench/src/fbench/fbench.h b/fbench/src/fbench/fbench.h new file mode 100644 index 00000000000..cf0782f9e36 --- /dev/null +++ b/fbench/src/fbench/fbench.h @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +/** + * This is the application class of the fbench program. It controls + * the operation of the test clients and collects overall results. + * The functionallity of the Main method is split into several helper + * methods for more clarity in the source. + **/ +class FBench +{ +private: + std::vector<Client::UP> _clients; + int _numClients; + int _ignoreCount; + int _cycle; + std::vector<std::string> _hostnames; + std::vector<int> _ports; + char *_filenamePattern; + char *_outputPattern; + int _byteLimit; + int _restartLimit; + int _maxLineSize; + bool _keepAlive; + bool _headerBenchmarkdataCoverage; + int _seconds; + std::vector<uint64_t> _queryfileOffset; + int _numberOfQueries; + bool _singleQueryFile; + std::string _queryStringToAppend; + std::string _extraHeaders; + std::string _authority; + + void InitBenchmark(int numClients, int ignoreCount, int cycle, + const char *filenamePattern, const char *outputPattern, + int byteLimit, int restartLimit, int maxLineSize, + bool keepAlive, bool headerBenchmarkdataCoverage, int seconds, + bool singleQueryFile, const std::string & queryStringToAppend, const std::string & extraHeaders, + const std::string &authority); + + void CreateClients(); + void StartClients(); + void StopClients(); + bool ClientsDone(); + void PrintSummary(); + + FBench(const FBench &); + FBench &operator=(const FBench &); + +public: + FBench(); + ~FBench(); + + /** + * Exit + **/ + void Exit(); + + /** + * Usage statement. + */ + void Usage(); + + /** + * Application entry point. + **/ + int Main(int argc, char *argv[]); +}; + +/** + * Run fbench as a FastOS application. + **/ +int main(int argc, char** argv); + diff --git a/fbench/src/filterfile/.gitignore b/fbench/src/filterfile/.gitignore new file mode 100644 index 00000000000..097e544e83d --- /dev/null +++ b/fbench/src/filterfile/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +filterfile +filterfile.ilk +filterfile.pdb diff --git a/fbench/src/filterfile/CMakeLists.txt b/fbench/src/filterfile/CMakeLists.txt new file mode 100644 index 00000000000..42984155cc0 --- /dev/null +++ b/fbench/src/filterfile/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(fbench_filterfile_app + SOURCES + filterfile.cpp + OUTPUT_NAME filterfile + INSTALL bin + DEPENDS + fbench_util +) diff --git a/fbench/src/filterfile/description.html b/fbench/src/filterfile/description.html new file mode 100644 index 00000000000..1efa0063328 --- /dev/null +++ b/fbench/src/filterfile/description.html @@ -0,0 +1,2 @@ +<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +Program used to extract query urls from fastserver logs. diff --git a/fbench/src/filterfile/filterfile.cpp b/fbench/src/filterfile/filterfile.cpp new file mode 100644 index 00000000000..e72b5b5c02d --- /dev/null +++ b/fbench/src/filterfile/filterfile.cpp @@ -0,0 +1,162 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <util/filereader.h> +#include <iostream> +#include <string.h> +#include <assert.h> + +/** + * Extract query urls from web logs. The filterfile application reads + * concatenated web logs from stdin and writes all query urls found in + * the input to stdout. Urls beginning with '/cgi-bin/search?' are + * assumed to be query urls. Only the 'query' and 'type' parameters + * are kept in the output. + **/ + +int +main(int argc, char** argv) +{ + bool showUsage = false; + bool allowAllParams = false; + int bufsize = 10240; + + // parse options and override defaults. + int optIdx; + char opt; + const char *arg; + bool optError; + + optIdx = 1; + optError = false; + while((opt = GetOpt(argc, argv, "ahm:", arg, optIdx)) != -1) { + switch(opt) { + case 'a': + allowAllParams = true; + break; + case 'h': + showUsage = true; + break; + case 'm': + bufsize = atoi(arg); + if (bufsize < 10240) { + bufsize = 10240; + } + break; + default: + optError = true; + break; + } + } + + if (optError || showUsage) { + printf("usage: filterfile [-a] [-h] [-m maxLineSize]\n\n"); + printf("Read concatenated fastserver logs from stdin and write\n"); + printf("extracted query urls to stdout.\n\n"); + printf(" -a : all parameters to the original query urls are preserved.\n"); + printf(" If the -a switch is not given, only 'query' and 'type'\n"); + printf(" parameters are kept in the extracted query urls.\n"); + printf(" -h : print this usage information.\n"); + printf(" -m <num> : max line size for input/output lines.\n"); + printf(" Can not be less than the default [10240]\n"); + return -1; + } + + const char *beginToken = "GET "; + int beginTokenlen = strlen(beginToken); + + const char *endToken = " HTTP/"; + + //const char *prefix = "/cgi-bin/search?"; + const char *prefix = "/?"; + int prefixlen = strlen(prefix); + + //const char *trigger = "/cgi-bin/"; + const char *trigger = ""; + int triggerlen = strlen(trigger); + + // open input and output (should never fail) + FileReader *reader = new FileReader(); + if (!reader->OpenStdin()) { + printf("could not open stdin! (strange)\n"); + delete reader; + return -1; + } + std::ostream & file = std::cout; + + // filter the input + char *line = new char[bufsize]; + assert(line != NULL); + int res; + char *tmp; + char *url; + int startIdx; + int endIdx; + int idx; + int outIdx; + char *buf = new char[bufsize]; + assert(buf != NULL); + int state; // 0=expect param name, 1=copy, 2=skip + bool gotQuery; + memcpy(buf, prefix, prefixlen); + while ((res = reader->ReadLine(line, bufsize - 1)) >= 0) { + + // find field beginning + tmp = strstr(line, beginToken); + startIdx = (tmp != NULL) ? (tmp - line) + beginTokenlen : 0; + + // find url beginning + url = strstr(line + startIdx, trigger); + if (url == NULL) + continue; // CONTINUE + + // find field end + tmp = strstr(line + startIdx, endToken); + if (tmp == NULL) + tmp = strstr(line + startIdx, "\""); + endIdx = (tmp != NULL) ? (tmp - line) : strlen(line); + + // find params + idx = (url - line) + triggerlen; + while (idx < endIdx && line[idx++] != '?'); + if (idx >= endIdx) + continue; // CONTINUE + + outIdx = prefixlen; + state = 0; // expect param name + gotQuery = false; + while(idx < endIdx) { + switch (state) { + case 0: + state = ((strncmp(line + idx, "query=", 6) == 0 + && (gotQuery = true)) || + allowAllParams || + strncmp(line + idx, "type=", 5) == 0) ? 1 : 2; + break; + case 1: + buf[outIdx++] = line[idx]; + case 2: // FALLTHROUGH + if (line[idx++] == '&') + state = 0; + break; + } + } + if (!gotQuery) + continue; // CONTINUE + + if (buf[outIdx - 1] == '&') + outIdx--; + buf[outIdx++] = '\n'; + buf[outIdx] = '\0'; + if (!file.write(buf, outIdx)) { + reader->Close(); + delete reader; + delete [] line; + delete [] buf; + return -1; + } + } + reader->Close(); + delete reader; + delete [] line; + delete [] buf; + return 0; +} diff --git a/fbench/src/geturl/.gitignore b/fbench/src/geturl/.gitignore new file mode 100644 index 00000000000..b615c53e087 --- /dev/null +++ b/fbench/src/geturl/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +geturl +geturl.ilk +geturl.pdb diff --git a/fbench/src/geturl/CMakeLists.txt b/fbench/src/geturl/CMakeLists.txt new file mode 100644 index 00000000000..c5e69018e5d --- /dev/null +++ b/fbench/src/geturl/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(fbench_geturl_app + SOURCES + geturl.cpp + OUTPUT_NAME geturl + INSTALL bin + DEPENDS + fbench_util + fastos +) diff --git a/fbench/src/geturl/description.html b/fbench/src/geturl/description.html new file mode 100644 index 00000000000..d38e29dc805 --- /dev/null +++ b/fbench/src/geturl/description.html @@ -0,0 +1,2 @@ +<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +Program used to fetch the content of an URL. diff --git a/fbench/src/geturl/geturl.cpp b/fbench/src/geturl/geturl.cpp new file mode 100644 index 00000000000..667db264156 --- /dev/null +++ b/fbench/src/geturl/geturl.cpp @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <util/httpclient.h> +#include <iostream> + +int +main(int argc, char** argv) +{ + if (argc != 4) { + printf("usage: geturl <host> <port> <url>\n"); + return -1; + } + + HTTPClient client(argv[1], atoi(argv[2]), false, false); + if (!client.Fetch(argv[3], &std::cout).Ok()) { + fprintf(stderr, "geturl: could not fetch 'http://%s:%d%s'\n", + argv[1], atoi(argv[2]), argv[3]); + return -1; + } + return 0; +} diff --git a/fbench/src/splitfile/.gitignore b/fbench/src/splitfile/.gitignore new file mode 100644 index 00000000000..681674c8928 --- /dev/null +++ b/fbench/src/splitfile/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +splitfile +splitfile.ilk +splitfile.pdb diff --git a/fbench/src/splitfile/CMakeLists.txt b/fbench/src/splitfile/CMakeLists.txt new file mode 100644 index 00000000000..94c8c5681ff --- /dev/null +++ b/fbench/src/splitfile/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(fbench_splitfile_app + SOURCES + splitfile.cpp + OUTPUT_NAME splitfile + INSTALL bin + DEPENDS + fbench_util +) diff --git a/fbench/src/splitfile/description.html b/fbench/src/splitfile/description.html new file mode 100644 index 00000000000..b38cb5e8f65 --- /dev/null +++ b/fbench/src/splitfile/description.html @@ -0,0 +1,2 @@ +<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +Program used to split query url files. diff --git a/fbench/src/splitfile/splitfile.cpp b/fbench/src/splitfile/splitfile.cpp new file mode 100644 index 00000000000..001e6c4ed5c --- /dev/null +++ b/fbench/src/splitfile/splitfile.cpp @@ -0,0 +1,114 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <util/filereader.h> +#include <fstream> +#include <vector> +#include <memory> + +/** + * Split a text file randomly in a number of parts. Process an input + * file (or stdin) line by line, writing each line out to a randomly + * chosen output file. The output files are numbered using a counter + * and a filename pattern. + **/ + +int +main(int argc, char** argv) +{ + // parameters with default values. + const char *pattern = "query%03d.txt"; + int linebufsize = 10240; + + // parse options and override defaults. + int idx; + char opt; + const char *arg; + bool optError; + + idx = 1; + optError = false; + while((opt = GetOpt(argc, argv, "p:m:", arg, idx)) != -1) { + switch(opt) { + case 'p': + pattern = arg; + break; + case 'm': + linebufsize = atoi(arg); + if (linebufsize < 10240) { + linebufsize = 10240; + } + break; + default: + optError = true; + break; + } + } + + if (argc < (idx + 1) || argc > (idx + 2) || optError) { + printf("usage: splitfile [-p pattern] [-m maxLineSize] <numparts> [<file>]\n\n"); + printf(" -p pattern : output name pattern ['query%%03d.txt']\n"); + printf(" -m <num> : max line size for input/output lines.\n"); + printf(" Can not be less than the default [10240]\n"); + printf(" <numparts> : number of output files to generate.\n\n"); + printf("Reads from <file> (stdin if <file> is not given) and\n"); + printf("randomly distributes each line between <numpart> output\n"); + printf("files. The names of the output files are generated by\n"); + printf("combining the <pattern> with sequential numbers using\n"); + printf("the sprintf function.\n"); + return -1; + } + + int outcnt = atoi(argv[idx]); + if (outcnt < 1) { + printf("too few output files!\n"); + return -1; + } + + int i; + int res; + std::vector<char> linebuf(linebufsize); + char filename[1024]; + std::unique_ptr<FileReader> input = std::make_unique<FileReader>(); + std::vector<std::unique_ptr<std::ostream>> output; + + if (argc > (idx + 1)) { + if (!input->Open(argv[idx + 1])) { + printf("could not open input file!\n"); + return -1; + } + } else { + if (!input->OpenStdin()) { + printf("could not open stdin! (strange)\n"); + return -1; + } + } + + // open output files + output.reserve(outcnt); + for (i = 0; i < outcnt; i++) { + snprintf(filename, 1024, pattern, i); + output.emplace_back(std::make_unique<std::ofstream>(filename, std::ofstream::out | std::ofstream::binary | std::ofstream::trunc)); + if (! output.back()) { + printf("could not open output file: %s\n", filename); + input->Close(); + return -1; + } + } + + // split file + while ((res = input->ReadLine(&linebuf[0], linebufsize - 1)) >= 0) { + if (res < linebufsize - 1) { + linebuf[res] = '\n'; + linebuf[res + 1] = '\0'; // just in case + i = random() % outcnt; + if (!output[i]->write(&linebuf[0], res + 1)) { + printf("error writing to file '%d'\n", i); + } + } else { + printf("line too long, skipping...\n"); + } + } + + // close files + input->Close(); + return 0; +} diff --git a/fbench/src/test/.gitignore b/fbench/src/test/.gitignore new file mode 100644 index 00000000000..9cdf9d7f383 --- /dev/null +++ b/fbench/src/test/.gitignore @@ -0,0 +1,14 @@ +*.core +.depend +clientstatus +filereader +filereader_clean.txt +filereader_messy.txt +geturl +httpclient +httpclient_splitstring +Makefile +fbench_clientstatus_app +fbench_filereader_app +fbench_httpclient_app +fbench_httpclient_splitstring_app diff --git a/fbench/src/test/CMakeLists.txt b/fbench/src/test/CMakeLists.txt new file mode 100644 index 00000000000..2ea472ad8e5 --- /dev/null +++ b/fbench/src/test/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(fbench_httpclient_splitstring_app + SOURCES + httpclient_splitstring.cpp + DEPENDS + fbench_util + fastos +) +vespa_add_test(NAME fbench_httpclient_splitstring_app COMMAND fbench_httpclient_splitstring_app) +vespa_add_executable(fbench_httpclient_app + SOURCES + httpclient.cpp + DEPENDS + fbench_util + fastos +) +vespa_add_executable(fbench_filereader_app + SOURCES + filereader.cpp + DEPENDS + fbench_util +) +vespa_add_test(NAME fbench_filereader_app COMMAND fbench_filereader_app) +vespa_add_executable(fbench_clientstatus_app + SOURCES + clientstatus.cpp + DEPENDS + fbench_util +) +vespa_add_test(NAME fbench_clientstatus_app COMMAND fbench_clientstatus_app) diff --git a/fbench/src/test/README b/fbench/src/test/README new file mode 100644 index 00000000000..bd0d510840a --- /dev/null +++ b/fbench/src/test/README @@ -0,0 +1,4 @@ +Before making the tests you should do a 'make bootstrap' in the +'../src' directory. This is to ensure that all needed files are +present (testing of classes in program directories require direct +linking with the object files). diff --git a/fbench/src/test/clientstatus.cpp b/fbench/src/test/clientstatus.cpp new file mode 100644 index 00000000000..6bed6657699 --- /dev/null +++ b/fbench/src/test/clientstatus.cpp @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <util/timer.h> +#include <util/httpclient.h> +#include <util/filereader.h> +#include <fbench/client.h> + +int +main(int argc, char **argv) +{ + (void) argc; + (void) argv; + + ClientStatus *status = new ClientStatus; + + printf("adding response time: %d\n", 0); + status->ResponseTime(0); + printf("adding response time: %d\n", 1000); + status->ResponseTime(1000); + printf("adding response time: %d\n", 2000); + status->ResponseTime(2000); + printf("adding response time: %d\n", 3000); + status->ResponseTime(3000); + printf("adding response time: %d\n", 4000); + status->ResponseTime(4000); + printf("adding response time: %d\n", 5000); + status->ResponseTime(5000); + printf("adding response time: %d\n", 6000); + status->ResponseTime(6000); + printf("adding response time: %d\n", 7000); + status->ResponseTime(7000); + printf("adding response time: %d\n", 8000); + status->ResponseTime(8000); + printf("adding response time: %d\n", 9000); + status->ResponseTime(9000); + printf("adding response time: %d\n", 10000); + status->ResponseTime(10000); + + printf(" 0%% percentile: %8.2f\n", status->GetPercentile(0)); + printf(" 5%% percentile: %8.2f\n", status->GetPercentile(5)); + printf(" 10%% percentile: %8.2f\n", status->GetPercentile(10)); + printf(" 15%% percentile: %8.2f\n", status->GetPercentile(15)); + printf(" 20%% percentile: %8.2f\n", status->GetPercentile(20)); + printf(" 25%% percentile: %8.2f\n", status->GetPercentile(25)); + printf(" 30%% percentile: %8.2f\n", status->GetPercentile(30)); + printf(" 35%% percentile: %8.2f\n", status->GetPercentile(35)); + printf(" 40%% percentile: %8.2f\n", status->GetPercentile(40)); + printf(" 45%% percentile: %8.2f\n", status->GetPercentile(45)); + printf(" 50%% percentile: %8.2f\n", status->GetPercentile(50)); + printf(" 55%% percentile: %8.2f\n", status->GetPercentile(55)); + printf(" 60%% percentile: %8.2f\n", status->GetPercentile(60)); + printf(" 65%% percentile: %8.2f\n", status->GetPercentile(65)); + printf(" 70%% percentile: %8.2f\n", status->GetPercentile(70)); + printf(" 75%% percentile: %8.2f\n", status->GetPercentile(75)); + printf(" 80%% percentile: %8.2f\n", status->GetPercentile(80)); + printf(" 85%% percentile: %8.2f\n", status->GetPercentile(85)); + printf(" 90%% percentile: %8.2f\n", status->GetPercentile(90)); + printf(" 95%% percentile: %8.2f\n", status->GetPercentile(95)); + printf("100%% percentile: %8.2f\n", status->GetPercentile(100)); + + delete status; +} diff --git a/fbench/src/test/filereader.cpp b/fbench/src/test/filereader.cpp new file mode 100644 index 00000000000..01bfba0693a --- /dev/null +++ b/fbench/src/test/filereader.cpp @@ -0,0 +1,96 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <util/filereader.h> +#include <string.h> +#include <assert.h> + +int +main(int argc, char **argv) +{ + (void) argc; + (void) argv; + + // write test file with messy newlines. + std::ofstream file("filereader_messy.txt", std::ofstream::out | std::ofstream::binary | std::ofstream::trunc); + if (!file) { + printf("can't open 'filereader_messy.txt' for writing!\n"); + return -1; + } + const char *l1 = "a line with only newline\n"; + const char *l2 = "a line with only return\r"; + const char *l3 = "a line with newline return\n\r"; + const char *l4 = "a line with return newline\r\n"; + const char *l5 = "2 empty lines with newline\n"; + const char *l6 = "\n"; + const char *l7 = "\n"; + const char *l8 = "2 empty lines with return\r"; + const char *l9 = "\r"; + const char *l10 = "\r"; + const char *l11 = "2 empty lines with newline return\n\r"; + const char *l12 = "\n\r"; + const char *l13 = "\n\r"; + const char *l14 = "2 empty lines with return newline\r\n"; + const char *l15 = "\r\n"; + const char *l16 = "\r\n"; + const char *l17 = "file ends here x"; + file.write(l1, strlen(l1)); + file.write(l2, strlen(l2)); + file.write(l3, strlen(l3)); + file.write(l4, strlen(l4)); + file.write(l5, strlen(l5)); + file.write(l6, strlen(l6)); + file.write(l7, strlen(l7)); + file.write(l8, strlen(l8)); + file.write(l9, strlen(l9)); + file.write(l10, strlen(l10)); + file.write(l11, strlen(l11)); + file.write(l12, strlen(l12)); + file.write(l13, strlen(l13)); + file.write(l14, strlen(l14)); + file.write(l15, strlen(l15)); + file.write(l16, strlen(l16)); + file.write(l17, strlen(l17)); + file.close(); + + // convert file to use only '\n' as newlines. + FileReader *reader = new FileReader(); + if (!reader->Open("filereader_messy.txt")) { + printf("can't open 'filereader_messy.txt' for reading!\n"); + delete reader; + return -1; + } + file = std::ofstream("filereader_clean.txt", std::ofstream::out | std::ofstream::binary | std::ofstream::trunc); + if (!file) { + printf("can't open 'filereader_clean.txt' for writing!\n"); + reader->Close(); + delete reader; + return -1; + } + int res; + int buflen = 10240; + char buf[buflen]; + while ((res = reader->ReadLine(buf, buflen - 1)) >= 0) { + // printf("len=%d, content:>%s<\n", res, buf); + buf[res] = '\n'; + buf[res + 1] = '\0'; + file.write(buf, strlen(buf)); + } + file.close(); + reader->Close(); + delete reader; + + printf("Please confirm that 'filereader_clean.txt' is equal to\n"); + printf("'filereader_messy.txt' except that all line separators have\n"); + printf("been replaced by a single '\\n' character (hex 0a).\n"); + FileReader verify; + assert(verify.Open("filereader_messy.txt")); + assert(verify.ReadLine(buf, buflen - 1) == ssize_t(strlen(l1)-1)); + assert(memcmp(l1, buf, strlen(l1)-1) == 0); + assert(verify.ReadLine(buf, buflen - 1) == ssize_t(strlen(l2)-1)); + assert(memcmp(l2, buf, strlen(l2)-1) == 0); + while ((res = verify.ReadLine(buf, buflen - 1)) >= 0) { + printf("len=%d, content:>%s<\n", res, buf); + } + verify.Reset(); + assert(verify.ReadLine(buf, buflen - 1) == ssize_t(strlen(l1)-1)); + assert(memcmp(l1, buf, strlen(l1)-1) == 0); +} diff --git a/fbench/src/test/httpclient.cpp b/fbench/src/test/httpclient.cpp new file mode 100644 index 00000000000..9c1800d0003 --- /dev/null +++ b/fbench/src/test/httpclient.cpp @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <util/httpclient.h> +#include <iostream> +#include <thread> + +int +main(int argc, char **argv) +{ + if (argc < 4) { + printf("usage: httpclient <host> <port> <url> [keep-alive]\n"); + return 1; + } + + HTTPClient *client; + ssize_t len; + + if(argc == 4) { + client = new HTTPClient(argv[1], atoi(argv[2]), false, true); + } else { + client = new HTTPClient(argv[1], atoi(argv[2]), true, true); + } + + std::ostream * output = & std::cout; + + if ((len = client->Fetch(argv[3], output).ResultSize()) >= 0) { + printf("SUCCESS!\n"); + printf("LENGTH: %ld\n", len); + } else { + printf("ERROR: could not fetch URL content.\n"); + } + if ((len = client->Fetch(argv[3], output).ResultSize()) >= 0) { + printf("SUCCESS!\n"); + printf("LENGTH: %ld\n", len); + } else { + printf("ERROR: could not fetch URL content.\n"); + } + + std::this_thread::sleep_for(std::chrono::seconds(20)); + + if ((len = client->Fetch(argv[3], output).ResultSize()) >= 0) { + printf("SUCCESS!\n"); + printf("LENGTH: %ld\n", len); + } else { + printf("ERROR: could not fetch URL content.\n"); + } + if ((len = client->Fetch(argv[3], output).ResultSize()) >= 0) { + printf("SUCCESS!\n"); + printf("LENGTH: %ld\n", len); + } else { + printf("ERROR: could not fetch URL content.\n"); + } + printf("REUSE COUNT: %" PRIu64 "\n", client->GetReuseCount()); + return 0; +} diff --git a/fbench/src/test/httpclient_splitstring.cpp b/fbench/src/test/httpclient_splitstring.cpp new file mode 100644 index 00000000000..4e893d37db4 --- /dev/null +++ b/fbench/src/test/httpclient_splitstring.cpp @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <util/httpclient.h> + +class DebugHTTPClient : public HTTPClient +{ +public: + DebugHTTPClient(const char* server, int port, bool keepAlive) + : HTTPClient(server, port, keepAlive, true) {} + + static void SplitLineTest(const char *input); + static void DebugSplitLine(); +}; + +void +DebugHTTPClient::SplitLineTest(const char *input) +{ + char str[1024]; + char *rest; + int argc; + char *argv[5]; + int i; + + memcpy(str, input, strlen(input) + 1); + printf("*** TEST HTTPClient::SplitString ***\n"); + printf("string:'%s'\n", str); + rest = str; + while (rest != NULL) { + rest = SplitString(rest, argc, argv, 5); + printf("argc:'%d'\n", argc); + printf("rest:'%s'\n", (rest == NULL) ? "NULL" : rest); + for(i = 0; i < argc; i++) { + printf(" %d:'%s'\n", i, argv[i]); + } + } +} + +void +DebugHTTPClient::DebugSplitLine() +{ + SplitLineTest("This is a test"); + SplitLineTest("This is exactly five words"); + SplitLineTest("five words with traling space "); + SplitLineTest(" This\t is \ta \t harder\ttest "); + SplitLineTest("SingleWord"); + SplitLineTest("\t\t \t\tSingleWordWithSpacesAround \t\t "); + SplitLineTest("just all too many parts baby "); + SplitLineTest("many many words does this long fancy string contain " + ", and they all must be tokenized by split line"); +} + +int +main(int argc, char **argv) +{ + (void) argc; + (void) argv; + DebugHTTPClient::DebugSplitLine(); +} diff --git a/fbench/src/util/.gitignore b/fbench/src/util/.gitignore new file mode 100644 index 00000000000..316ace34e7b --- /dev/null +++ b/fbench/src/util/.gitignore @@ -0,0 +1,3 @@ +.depend +Makefile +util.lib diff --git a/fbench/src/util/CMakeLists.txt b/fbench/src/util/CMakeLists.txt new file mode 100644 index 00000000000..9efca452cea --- /dev/null +++ b/fbench/src/util/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(fbench_util STATIC + SOURCES + filereader.cpp + httpclient.cpp + timer.cpp + clientstatus.cpp + DEPENDS +) diff --git a/fbench/src/util/clientstatus.cpp b/fbench/src/util/clientstatus.cpp new file mode 100644 index 00000000000..6c117d7e0e6 --- /dev/null +++ b/fbench/src/util/clientstatus.cpp @@ -0,0 +1,176 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "clientstatus.h" +#include <string.h> +#include <math.h> + +ClientStatus::ClientStatus() + : _error(false), + _errorMsg(), + _skipCnt(0), + _failCnt(0), + _overtimeCnt(0), + _totalTime(0), + _realTime(0), + _requestCnt(0), + _timetableResolution(10), + _timetable(10240 * _timetableResolution, 0), + _higherCnt(0), + _minTime(0), + _maxTime(0), + _reuseCnt(0), + _zeroHitQueries(0), + _requestStatusDistribution() +{ +} + +ClientStatus::~ClientStatus() +{ +} + +void +ClientStatus::SetError(const char *errorMsg) +{ + _error = true; + _errorMsg = errorMsg; +} + +void +ClientStatus::ResponseTime(double ms) +{ + if (ms < 0) return; // should never happen. + if (ms > _maxTime) + _maxTime = ms; + if (ms < _minTime || _requestCnt == 0) + _minTime = ms; + _totalTime += ms; + + size_t t = (size_t)(ms * _timetableResolution + 0.5); + if (t >= _timetable.size()) + _higherCnt++; + else + _timetable[t]++; + _requestCnt++; +} + +void +ClientStatus::AddRequestStatus(uint32_t status) +{ + auto it = _requestStatusDistribution.find(status); + + if (it != _requestStatusDistribution.end()) + it->second++; + else + _requestStatusDistribution[status] = 1; +} + +void +ClientStatus::Merge(const ClientStatus & status) +{ + if (_timetable.size() != status._timetable.size()) { + printf("ClientStatus::Merge() : incompatible data structures!\n"); + return; + } + + if (_maxTime < status._maxTime) + _maxTime = status._maxTime; + if ((_requestCnt == 0) || + (_minTime > status._minTime && status._requestCnt > 0)) + _minTime = status._minTime; + _skipCnt += status._skipCnt; + _failCnt += status._failCnt; + _overtimeCnt += status._overtimeCnt; + _totalTime += status._totalTime; + _realTime += status._realTime; + _requestCnt += status._requestCnt; + for (size_t i = 0; i < _timetable.size(); i++) + _timetable[i] += status._timetable[i]; + _higherCnt += status._higherCnt; + _reuseCnt += status._reuseCnt; + _zeroHitQueries += status._zeroHitQueries; + + for (const auto& entry : status._requestStatusDistribution) { + auto it = _requestStatusDistribution.find(entry.first); + if (it != _requestStatusDistribution.end()) + it->second += entry.second; + else + _requestStatusDistribution[entry.first] = entry.second; + } +} + +double +ClientStatus::GetMin() +{ + return _minTime; +} + +double +ClientStatus::GetMax() +{ + return _maxTime; +} + +double +ClientStatus::GetAverage() +{ + return (_requestCnt == 0) ? + 0 : _totalTime / ((double)_requestCnt); +} + +double +ClientStatus::GetPercentile(double percent) +{ + if (percent < 0.0) percent = 0.0; + if (percent > 100.0) percent = 100.0; + + double target = ((double)(_requestCnt - 1)) * (percent / 100.0); + long t1 = (long)floor(target); + long t2 = (long)ceil(target); + double k = ceil(target) - target; + int i1 = 0; + int i2 = 0; + long cnt = 0; + double val1 = 0; + double val2 = 0; + + cnt = _timetable[0]; + while (cnt <= t1) { + if (i1 + 1 < int(_timetable.size())) { + cnt += _timetable[++i1]; + } else { + i1 = -1; + break; + } + } + i2 = i1; + if (i1 >= 0) { + val1 = i1; + while (cnt <= t2) { + if (i2 + 1 < int(_timetable.size())) { + cnt += _timetable[++i2]; + } else { + i2 = -1; + break; + } + } + } else { + if (_higherCnt < 2) { + val1 = _maxTime * _timetableResolution; + } else { + // use uniform distribution for approximation + val1 = (((double)(t1 - (_requestCnt - _higherCnt))) / ((double)(_higherCnt - 1))) + * (_maxTime * _timetableResolution - ((double)_timetable.size())) + ((double)_timetable.size()); + } + } + if (i2 >= 0) { + val2 = i2; + } else { + if (_higherCnt < 2) { + val2 = _maxTime * _timetableResolution; + } else { + // use uniform distribution for approximation + val2 = (((double)(t2 - (_requestCnt - _higherCnt))) / ((double)(_higherCnt - 1))) + * (_maxTime * _timetableResolution - ((double)_timetable.size())) + ((double)_timetable.size()); + } + } + return (k * val1 + (1 - k) * val2) / _timetableResolution; +} diff --git a/fbench/src/util/clientstatus.h b/fbench/src/util/clientstatus.h new file mode 100644 index 00000000000..5248f2618c5 --- /dev/null +++ b/fbench/src/util/clientstatus.h @@ -0,0 +1,217 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <map> +#include <vector> + +/** + * This is a helper struct that is used by the @ref Client class to + * aggregate runtime statistics. It is also used to record warnings + * and errors. + **/ +struct ClientStatus +{ + /** + * Indicates wether a fatal error has occurred. + **/ + bool _error; + + /** + * Message explaining the error indicated by _error. + **/ + std::string _errorMsg; + + /** + * The number of requests that has been skipped. + **/ + long _skipCnt; + + /** + * The number of requests that have failed. + **/ + long _failCnt; + + /** + * The number of requests that had response time greater than the + * cycle time. + **/ + long _overtimeCnt; + + /** + * Total response time for all requests. + **/ + double _totalTime; + + /** + * Real time passed. This is used to calculate the actual query + * rate. + **/ + double _realTime; + + /** + * Total number of (successful) requests. Overtime requests are + * counted with, but not failed or skipped ones. + **/ + long _requestCnt; + + /** + * Resolution of timetable. A resolution of 1 means each entry in + * the timetable is 1 millisecond. A resolution of 10 means each + * entry is 1/10th of a millisecond. + **/ + const int _timetableResolution; + + /** + * Table where _timetable[i] is the number of requests with response + * time in milliseconds (i is multiplied with the resolution). + **/ + std::vector<int> _timetable; + + /** + * Number of requests with response time greater than or equal + * _timetableSize divided by _timetableResolution milliseconds. + **/ + long _higherCnt; + + /** + * The minimum response time measured. + **/ + double _minTime; + + /** + * The maximum response time measured. + **/ + double _maxTime; + + /** + * Connection reuse count. Tells us how many requests were made + * without having to open a new connection. If keep-alive is not + * enabled, this will always be 0. + **/ + uint64_t _reuseCnt; + + /** + * The number of zero hit queries + **/ + long _zeroHitQueries; + + /** + * The request status distribution. Key=Status, Value=Count. + **/ + std::map<uint32_t, uint32_t> _requestStatusDistribution; + + ClientStatus(); + ~ClientStatus(); + + /** + * Notify that an error occurred and set an error message describing + * the error. The client should only call this method once right + * before exiting due to a fatal error. + * + * @param errorMsg A string explaining the error. + **/ + void SetError(const char* errorMsg); + + /** + * Notify that a request was skipped. Long requests (measured in + * bytes) will be skipped due to intenal buffer limitations. This + * should happen very rarely. + **/ + void SkippedRequest() { _skipCnt++; } + + /** + * Notify that a request failed. This should be called when the + * client could not establish a connection to the server or a read + * error occurred while fetching the response. + **/ + void RequestFailed() { _failCnt++; } + + /** + * Notify that the cycle time could not be held. This typically + * indicates that either the server response time is longer than the + * cycle time or that your thread/socket libraries are unable to + * handle the number of clients currently running. + **/ + void OverTime() { _overtimeCnt++; } + + /** + * This method is used to register response times measured by the + * client. Response times should only be registered for successful + * requests. + * + * @param ms Response time measured in milliseconds. + **/ + void ResponseTime(double ms); + + /** + * Set real time passed while benchmarking. + * + * @param ms time passed while benchmarking (in milliseconds) + **/ + void SetRealTime(double ms) { _realTime = ms; } + + /** + * Set connection reuse count. + * + * @param cnt connection reuse count + **/ + void SetReuseCount(uint64_t cnt) { _reuseCnt = cnt; } + + /** + * Add request status to request status distribution. + * + * @param status The status to insert + **/ + void AddRequestStatus(uint32_t status); + + /** + * Merge the info held by 'status' into the info held by this + * struct. Note that the error flag and error messages are ignored. If + * you do not want to use data held by a status struct with an error + * you should check the error flag before merging. + * + * @param status The ClientStatus that should be merged into this one. + **/ + void Merge(const ClientStatus & status); + + /** + * @return the minimum response time. + **/ + double GetMin(); + + /** + * @return the maximum response time. + **/ + double GetMax(); + + /** + * @return the average response time. + **/ + double GetAverage(); + + /** + * @return The 50 percent percentile (aka median). + **/ + double GetMedian() { return GetPercentile(50); } + + /** + * This method calculates a response time that separates the 'percent' + * percent fastest requests from the (100 - 'percent') percent slowest + * requests. A single request may be classified by comparing the + * request response time with the percentile returned by this + * method. If the requested percentile lies outside the time table + * measuring interval, -1 is returned. This indicates that the + * requested percentile was greater than _timetableSize (divided by + * resolution) milliseconds. + * + * @return the calculated percentile or -1 if it was outside the time table. + * @param percent percent of requests that should have response time lower + * than the percentile to be calculated by this method. Legal values + * of this parameter is in the range [0,100]. + **/ + double GetPercentile(double percent); + +private: + ClientStatus(const ClientStatus &); + ClientStatus &operator=(const ClientStatus &); +}; diff --git a/fbench/src/util/description.html b/fbench/src/util/description.html new file mode 100644 index 00000000000..4ac9f11ca21 --- /dev/null +++ b/fbench/src/util/description.html @@ -0,0 +1,2 @@ +<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +Library containing utility classes. diff --git a/fbench/src/util/filereader.cpp b/fbench/src/util/filereader.cpp new file mode 100644 index 00000000000..b1eebbcb2f0 --- /dev/null +++ b/fbench/src/util/filereader.cpp @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "filereader.h" +#include <iostream> +#include <unistd.h> + +int GetOpt (int argc, char *argv[], const char *optionsString, + const char* &optionArgument, + int &optionIndex) +{ + optind = optionIndex; + + int rc = getopt(argc, argv, optionsString); + optionArgument = optarg; + optionIndex = optind; + return rc; +} + +FileReader::FileReader() + : _backing(), + _file(&std::cin), + _bufsize(1024*1024), + _buf(_bufsize), + _bufused(0), + _bufpos(0) +{ +} + +FileReader::~FileReader() +{ +} + +bool +FileReader::Open(const char *filename) +{ + _backing = std::make_unique<std::ifstream>(filename); + _file = _backing.get(); + return (bool)*_file; +} + +bool +FileReader::OpenStdin() +{ + _file = &std::cin; + return true; +} + +bool +FileReader::Reset() +{ + _file->clear(); + _file->seekg(0); + return bool(*_file); +} + +bool +FileReader::SetFilePos(int64_t pos) +{ + _bufpos = 0; + _file->seekg(pos); + return bool(*_file); +} + +int64_t +FileReader::GetFileSize() +{ + _file->seekg (0, std::ifstream::end); + return _file->tellg(); +} + +uint64_t +FileReader::FindNewline(int64_t pos) +{ + char buf[100]; + SetFilePos(pos); + ssize_t len = ReadLine(buf, 100); + ++_bufpos; + + return pos+len; +} + +void +FileReader::FillBuffer() +{ + _file->read(&_buf[0], _bufsize); + _bufused = _file->gcount(); // may be -1 + _bufpos = 0; +} + +ssize_t +FileReader::ReadLine(char *buf, size_t bufsize) +{ + int c; + size_t len; + + len = 0; + c = ReadByte(); + if (c == -1) + return -1; + while (c != -1 && c != '\n' && c != '\r') { + if (len < bufsize - 1) + buf[len] = c; + len++; + c = ReadByte(); + } + if (_bufpos == _bufused) + FillBuffer(); + if ((_bufused > _bufpos) && + ((c == '\n' && _buf[_bufpos] == '\r') || + (c == '\r' && _buf[_bufpos] == '\n'))) + _bufpos++; + if (len < bufsize) + buf[len] = '\0'; // terminate string + else + buf[bufsize - 1] = '\0'; // terminate string + return len; +} + +void +FileReader::Close() +{ + if (_backing) { + _backing->close(); + } +} diff --git a/fbench/src/util/filereader.h b/fbench/src/util/filereader.h new file mode 100644 index 00000000000..b553c73a262 --- /dev/null +++ b/fbench/src/util/filereader.h @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <fstream> +#include <memory> +#include <vector> + +int GetOpt (int argc, char *argv[], const char *optionsString, + const char* &optionArgument, + int &optionIndex); + +/** + * This is a wrapper class for std::ifstream that may be used when + * reading line based text files. An internal buffer is used to + * improve performance. + **/ +class FileReader +{ +private: + std::unique_ptr<std::ifstream> _backing; + std::istream *_file; + int _bufsize; + std::vector<char> _buf; + int _bufused; + int _bufpos; + + /** + * Fill the internal buffer with data from the currently open file. + **/ + void FillBuffer(); + + FileReader(const FileReader &); + FileReader &operator=(const FileReader &); + +public: + + /** + * Creates a used for disk-access. An + * internal buffer of 5120 bytes is also created. + **/ + FileReader(); + + /** + * Frees memory used by the underlying file and the internal buffer. + **/ + ~FileReader(); + + /** + * Read a single byte from the currently open input file. You should + * call @ref Open before calling this method. The internal buffer is + * used to reduce the number of reads performed on the underlying + * file. + * + * @return the read byte or -1 if EOF was reached or an error occurred. + **/ + int ReadByte() + { + if(_bufpos == _bufused) + FillBuffer(); + return (_bufused > _bufpos) ? _buf[_bufpos++] & 0x0ff : -1; + } + + /** + * Open a file for reading. + * + * @return success(true)/failure(false) + * @param filename the name of the file to open. + **/ + bool Open(const char *filename); + + /** + * Open the standard input for reading. + * + * @return success(true)/failure(false) + **/ + bool OpenStdin(); + + /** + * Reset the file pointer and flush the internal buffer. The next + * read operation will apply to the beginning of the file. + * + * @return success(true)/failure(false) + **/ + bool Reset(); + + /** + * Works like Reset(), but sets the file pointer to 'pos + **/ + bool SetFilePos(int64_t pos); + + /** + * @return size of file in bytes + **/ + int64_t GetFileSize(); + + /** + * @returns _bufpos + **/ + uint64_t GetBufPos() const { return _bufpos; } + + /** + * @returns offset of next newline from pos + **/ + uint64_t FindNewline(int64_t pos); + + /** + * Read the next line of text from the the currently open file into + * 'buf'. If the line is longer than ('bufsize' - 1), the first + * ('bufsize' - 1) bytes will be placed in 'buf' and the true length + * of the line will be returned. The string placed in 'buf' will be + * terminated with a null character. Newline characters will be + * discarded. A line is terminated by either '\n', '\r', "\r\n", + * "\n\r" or EOF. This method uses @ref ReadByte to read single + * bytes from the file. + * + * @return the actual length of the next line, or -1 if no line was read. + * @param buf where to put the line. + * @param bufsize the length of buf. + **/ + ssize_t ReadLine(char *buf, size_t bufsize); + + /** + * Close the file. + **/ + void Close(); +}; + diff --git a/fbench/src/util/httpclient.cpp b/fbench/src/util/httpclient.cpp new file mode 100644 index 00000000000..ce2157335e4 --- /dev/null +++ b/fbench/src/util/httpclient.cpp @@ -0,0 +1,549 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "httpclient.h" + +#define FETCH_BUFLEN 5120 +#define FIXED_REQ_MAX 256 + + +HTTPClient::ConnCloseReader +HTTPClient::ConnCloseReader::_instance; + +HTTPClient::ContentLengthReader +HTTPClient::ContentLengthReader::_instance; + +HTTPClient::ChunkedReader +HTTPClient::ChunkedReader::_instance; + + +HTTPClient::HTTPClient(const char *hostname, int port, + bool keepAlive, bool headerBenchmarkdataCoverage, + const std::string & extraHeaders, const std::string &authority) + : _socket(new FastOS_Socket()), + _hostname(hostname), + _port(port), + _keepAlive(keepAlive), + _headerBenchmarkdataCoverage(headerBenchmarkdataCoverage), + _extraHeaders(extraHeaders), + _authority(authority), + _reuseCount(0), + _bufsize(10240), + _buf(new char[_bufsize]), + _bufused(0), + _bufpos(0), + _headerinfo(), + _isOpen(false), + _httpVersion(0), + _requestStatus(0), + _totalHitCount(-1), + _connectionCloseGiven(false), + _contentLengthGiven(false), + _chunkedEncodingGiven(false), + _keepAliveGiven(false), + _contentLength(0), + _chunkSeq(0), + _chunkLeft(0), + _dataRead(0), + _dataDone(false), + _reader(NULL) +{ + _socket->SetAddressByHostName(port, hostname); + if (_authority == "") { + char tmp[1024]; + snprintf(tmp, 1024, "%s:%d", hostname, port); + _authority = tmp; + } +} + +ssize_t +HTTPClient::FillBuffer() { + _bufused = _socket->Read(_buf, _bufsize); // may be -1 + _bufpos = 0; + return _bufused; +} + +HTTPClient::~HTTPClient() +{ + if (_socket) + _socket->Close(); + delete [] _buf; +} + +ssize_t +HTTPClient::ReadLine(char *buf, size_t bufsize) +{ + size_t len = 0; + int lastC = 0; + int c = ReadByte(); + + if (c == -1) + return -1; + while (c != '\n' && c != -1) { + if (len + 1 < bufsize) + buf[len] = c; + len++; + lastC = c; + c = ReadByte(); + } + if (lastC == '\r') + len--; + if (len < bufsize) + buf[len] = '\0'; // terminate string + else if (bufsize > 0) + buf[bufsize - 1] = '\0'; // terminate string + return len; +} + +bool +HTTPClient::Connect(const char *url) +{ + char tmp[4096]; + char *req = NULL; + uint32_t req_max = 0; + uint32_t url_len = strlen(url); + uint32_t host_len = _hostname.size(); + + // Add additional headers + std::string headers = _extraHeaders; + + // this is always requested to get robust info on total hit count. + headers += "X-Yahoo-Vespa-Benchmarkdata: true\r\n"; + + if ( _headerBenchmarkdataCoverage ) { + headers += "X-Yahoo-Vespa-Benchmarkdata-Coverage: true\r\n"; + } + + if (url_len + host_len + headers.length() + FIXED_REQ_MAX < sizeof(tmp)) { + req = tmp; + req_max = sizeof(tmp); + } else { + req_max = url_len + host_len + headers.length() + FIXED_REQ_MAX; + req = new char[req_max]; + assert(req != NULL); + } + + if (headers.length() > 0) { + headers += "\r\n"; + } + // create request + if(_keepAlive) { + snprintf(req, req_max, + "GET %s HTTP/1.1\r\n" + "Host: %s\r\n" + "User-Agent: fbench/4.2.10\r\n" + "%s" + "\r\n", + url, _authority.c_str(), headers.c_str()); + } else { + snprintf(req, req_max, + "GET %s HTTP/1.1\r\n" + "Host: %s\r\n" + "Connection: close\r\n" + "User-Agent: fbench/4.2.10\r\n" + "%s" + "\r\n", + url, _authority.c_str(), headers.c_str()); + } + + // try to reuse connection if keep-alive is enabled + if (_keepAlive + && _socket->IsOpened() + && _socket->Write(req, strlen(req)) == (ssize_t)strlen(req) + && FillBuffer() > 0) { + + // DEBUG + // printf("Socket Connection reused!\n"); + _reuseCount++; + if (req != tmp) { + delete [] req; + } + return true; + } else { + _socket->Close(); + ResetBuffer(); + } + + // try to open new connection to server + if (_socket->SetSoBlocking(true) + && _socket->Connect() + && _socket->SetNoDelay(true) + && _socket->SetSoLinger(false, 0) + && _socket->Write(req, strlen(req)) == (ssize_t)strlen(req)) { + + // DEBUG + // printf("New Socket connection!\n"); + if (req != tmp) { + delete [] req; + } + return true; + } else { + _socket->Close(); + } + + // DEBUG + // printf("Connect FAILED!\n"); + if (req != tmp) { + delete [] req; + } + return false; +} + +char * +HTTPClient::SplitString(char *input, int &argc, char **argv, int maxargs) +{ + for (argc = 0, argv[0] = input; *input != '\0'; input++) + if (*input == '\t' || *input == ' ') { + *input = '\0'; + if (*(argv[argc]) != '\0' && ++argc >= maxargs) + return (input + 1); // INCOMPLETE + argv[argc] = (input + 1); + } + if (*(argv[argc]) != '\0') + argc++; + return NULL; // COMPLETE +} + +bool +HTTPClient::ReadHTTPHeader() +{ + int lineLen; + char line[4096]; + int argc; + char *argv[32]; + int i; + + // clear HTTP header flags + _connectionCloseGiven = false; + _contentLengthGiven = false; + _chunkedEncodingGiven = false; + _keepAliveGiven = false; + + // read and split status line + if ((lineLen = ReadLine(line, 4096)) <= 0) + return false; + SplitString(line, argc, argv, 32); + + // parse status line + if (argc >= 2) { + if (strncmp(argv[0], "HTTP/", 5) != 0) + return false; + _httpVersion = (strncmp(argv[0], "HTTP/1.0", 8) == 0) ? + 0 : 1; + _requestStatus = atoi(argv[1]); + } else { + return false; + } + + // DEBUG + // printf("HTTP: version: 1.%d\n", _httpVersion); + // printf("HTTP: status: %d\n", _requestStatus); + + // read and parse rest of header + while((lineLen = ReadLine(line, 4096)) > 0) { + + // DEBUG + // printf("HTTP-Header: '%s'\n", line); + + if (strncmp(line, "X-Yahoo-Vespa-", strlen("X-Yahoo-Vespa")) == 0) { + const auto benchmark_data = std::string(line + 14); + + auto strpos = benchmark_data.find("TotalHitCount:"); + if (strpos != std::string::npos) { + _totalHitCount = atoi(benchmark_data.substr(14).c_str()); + } + + // Make sure to have enough memory in _headerinfo + _headerinfo += benchmark_data; + _headerinfo += "\n"; + } + + SplitString(line, argc, argv, 32); + if (argc > 1) { + if (strcasecmp(argv[0], "connection:") == 0) { + for(i = 1; i < argc; i++) { + // DEBUG + // printf("HTTP: Connection: '%s'\n", argv[i]); + + if (strcasecmp(argv[i], "keep-alive") == 0) { + _keepAliveGiven = true; + + // DEBUG + // printf("HTTP: connection keep-alive given\n"); + } + if (strcasecmp(argv[i], "close") == 0) { + _connectionCloseGiven = true; + + // DEBUG + // printf("HTTP: connection close given\n"); + } + } + } + if (strcasecmp(argv[0], "content-length:") == 0) { + _contentLengthGiven = true; + _contentLength = atoi(argv[1]); + + // DEBUG + // printf("HTTP: content length : %d\n", _contentLength); + } + if (strcasecmp(argv[0], "transfer-encoding:") == 0 + && strcasecmp(argv[1], "chunked") == 0) { + _chunkedEncodingGiven = true; + + // DEBUG + // printf("HTTP: chunked encoding given\n"); + } + } + } + return (lineLen == 0); +} + +bool +HTTPClient::ReadChunkHeader() +{ + int lineLen; + char numStr[10]; + char c; + int i; + + if (_chunkSeq++ > 0 && ReadLine(NULL, 0) != 0) + return false; // no CRLF(/LF) after data block + + assert(_chunkLeft == 0); + if (ReadLine(numStr, 10) <= 0) + return false; // chunk length not found + for (i = 0; i < 10; i++) { + c = numStr[i]; + if (c >= 'a' && c <= 'f') + c = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + c = c - 'A' + 10; + else if (c >= '0' && c <= '9') + c = c - '0'; + else + break; + if (i >= 8) // can't handle chunks this big + return false; + _chunkLeft = (_chunkLeft << 4) + c; + } + + // DEBUG + // printf("CHUNK: Length: %d\n", _chunkLeft); + + if (_chunkLeft == 0) { + while ((lineLen = ReadLine(NULL, 0)) > 0); // skip trailer + if (lineLen < 0) + return false; // data error + _dataDone = true; // got last chunk + } + return true; +} + +bool +HTTPClient::Open(const char *url) +{ + if (_isOpen) + Close(); + + ResetBuffer(); + _dataRead = 0; + _dataDone = false; + _isOpen = Connect(url); + if(!_isOpen || !ReadHTTPHeader()) { + Close(); + return false; + } + if(_chunkedEncodingGiven) { + _chunkSeq = 0; + _chunkLeft = 0; + + // DEBUG + // printf("READER = Chunked\n"); + _reader = ChunkedReader::GetInstance(); + } else if(_contentLengthGiven) { + + // DEBUG + // printf("READER = ContentLength\n"); + _reader = ContentLengthReader::GetInstance(); + } else { + + // DEBUG + // printf("READER = ConnClose\n"); + _reader = ConnCloseReader::GetInstance(); + } + return true; +} + +ssize_t +HTTPClient::ConnCloseReader::Read(HTTPClient &client, + void *buf, size_t len) +{ + size_t fromBuffer = 0; + ssize_t res = 0; + ssize_t readRes; + + if (client._bufused > client._bufpos) { // data in buffer ? + fromBuffer = (((size_t)(client._bufused - client._bufpos)) > len) ? + len : client._bufused - client._bufpos; + memcpy(buf, client._buf + client._bufpos, fromBuffer); + client._bufpos += fromBuffer; + client._dataRead += fromBuffer; + res = fromBuffer; + } + if ((len - fromBuffer) > (len >> 1)) { + readRes = client._socket->Read(static_cast<char *>(buf) + + fromBuffer, len - fromBuffer); + if (readRes < 0) { + client.Close(); + return -1; + } + if (readRes == 0) + client._dataDone = true; + client._dataRead += readRes; + res += readRes; + } + return res; +} + +ssize_t +HTTPClient::ContentLengthReader::Read(HTTPClient &client, + void *buf, size_t len) +{ + size_t fromBuffer = 0; + ssize_t res = 0; + ssize_t readLen; + ssize_t readRes; + + if (client._bufused > client._bufpos) { // data in buffer ? + fromBuffer = (((size_t)(client._bufused - client._bufpos)) > len) ? + len : client._bufused - client._bufpos; + memcpy(buf, client._buf + client._bufpos, fromBuffer); + client._bufpos += fromBuffer; + client._dataRead += fromBuffer; + res = fromBuffer; + if (client._dataRead >= client._contentLength) { + client._dataDone = true; + return res; + } + } + if ((len - fromBuffer) > (len >> 1)) { + readLen = (len - fromBuffer + < client._contentLength - client._dataRead) ? + len - fromBuffer : client._contentLength - client._dataRead; + readRes = client._socket->Read(static_cast<char *>(buf) + + fromBuffer, readLen); + if (readRes < 0) { + client.Close(); + return -1; + } + client._dataRead += readRes; + res += readRes; + if (client._dataRead >= client._contentLength) { + client._dataDone = true; + return res; + } + if (readRes == 0) { // data lost because server closed connection + client.Close(); + return -1; + } + } + return res; +} + +ssize_t +HTTPClient::ChunkedReader::Read(HTTPClient &client, + void *buf, size_t len) +{ + size_t fromBuffer = 0; + ssize_t res = 0; + + while ((len - res) > (len >> 1)) { + if (client._chunkLeft == 0) { + if (!client.ReadChunkHeader()) { + client.Close(); + return -1; + } + if (client._dataDone) + return res; + } + if (client._bufused == client._bufpos) { + if (client.FillBuffer() <= 0) { + client.Close(); + return -1; + } + } + fromBuffer = ((len - res) < ((size_t)(client._bufused - client._bufpos))) ? + len - res : client._bufused - client._bufpos; + fromBuffer = (client._chunkLeft < fromBuffer) ? + client._chunkLeft : fromBuffer; + memcpy(static_cast<char *>(buf) + res, client._buf + client._bufpos, fromBuffer); + client._bufpos += fromBuffer; + client._dataRead += fromBuffer; + client._chunkLeft -= fromBuffer; + res += fromBuffer; + } + return res; +} + +ssize_t +HTTPClient::Read(void *buf, size_t len) +{ + if (!_isOpen) + return -1; + if (_dataDone) + return 0; + return _reader->Read(*this, buf, len); +} + +bool +HTTPClient::Close() +{ + if (!_isOpen) + return true; + + _isOpen = false; + return (!_keepAlive + || _connectionCloseGiven + || !_dataDone + || (_httpVersion == 0 && !_keepAliveGiven)) ? + _socket->Close() : true; +} + +HTTPClient::FetchStatus +HTTPClient::Fetch(const char *url, std::ostream *file) +{ + size_t buflen = FETCH_BUFLEN; + char buf[FETCH_BUFLEN]; // NB: ensure big enough thread stack. + ssize_t readRes = 0; + ssize_t written = 0; + + if (!Open(url)) { + return FetchStatus(false, _requestStatus, _totalHitCount, 0); + } + + // Write headerinfo + if (file) { + file->write(_headerinfo.c_str(), _headerinfo.length()); + if (file->fail()) { + Close(); + return FetchStatus(false, _requestStatus, _totalHitCount, 0); + } + file->write("\r\n", 2); + // Reset header data. + _headerinfo = ""; + } + + while((readRes = Read(buf, buflen)) > 0) { + if(file != NULL) { + if (!file->write(buf, readRes)) { + Close(); + return FetchStatus(false, _requestStatus, _totalHitCount, written); + } + } + written += readRes; + } + Close(); + + return FetchStatus(_requestStatus == 200 && readRes == 0 && _totalHitCount >= 0, + _requestStatus, + _totalHitCount, + written); +} diff --git a/fbench/src/util/httpclient.h b/fbench/src/util/httpclient.h new file mode 100644 index 00000000000..e69a10346cd --- /dev/null +++ b/fbench/src/util/httpclient.h @@ -0,0 +1,335 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <ostream> +#include <memory> +#include <vespa/fastos/socket.h> + +/** + * This class implements a HTTP client that may be used to fetch + * documents from a HTTP server. It uses the HTTP 1.1 protocol, but in + * order to keep the external interface simple, it does not support + * request pipelining. + **/ +class HTTPClient +{ +private: + HTTPClient(const HTTPClient &); + HTTPClient &operator=(const HTTPClient &); + +protected: + + /** + * abstract superclass of classes used to handle reading of URL + * content depending on how the content length may be determined. + **/ + class ReaderInterface + { + public: + ReaderInterface() {} + virtual ~ReaderInterface() {} + + /** + * This method is called by the @ref HTTPClient::Read(char *, + * size_t) method in order to read from the URL in the appropriate + * way. + * + * @return bytes read or -1 on failure. + * @param client the client object doing the read. + * @param buf where to store the incoming data. + * @param len length of buf. + **/ + virtual ssize_t Read(HTTPClient &client, void *buf, size_t len) = 0; + }; + friend class HTTPClient::ReaderInterface; + + /** + * Class used to handle reading of URL content when content length + * is indicated by the server closing the connection. + **/ + class ConnCloseReader : public ReaderInterface + { + private: + static ConnCloseReader _instance; + public: + ConnCloseReader() {} + virtual ~ConnCloseReader() {} + static ReaderInterface *GetInstance() { return &_instance; } + ssize_t Read(HTTPClient &client, void *buf, size_t len); + }; + friend class HTTPClient::ConnCloseReader; + + /** + * Class used to handle reading of URL content when content length + * is given by a Content-Length header value. + **/ + class ContentLengthReader : public ReaderInterface + { + private: + static ContentLengthReader _instance; + public: + ContentLengthReader() {} + virtual ~ContentLengthReader() {} + static ReaderInterface *GetInstance() { return &_instance; } + ssize_t Read(HTTPClient &client, void *buf, size_t len); + }; + friend class HTTPClient::ContentLengthReader; + + /** + * Class used to handle reading of URL content sent with chunked + * transfer encoding. + **/ + class ChunkedReader : public ReaderInterface + { + private: + static ChunkedReader _instance; + public: + ChunkedReader() {} + virtual ~ChunkedReader() {} + static ReaderInterface *GetInstance() { return &_instance; } + ssize_t Read(HTTPClient &client, void *buf, size_t len); + }; + friend class HTTPClient::ChunkedReader; + + std::unique_ptr<FastOS_Socket> _socket; + std::string _hostname; + int _port; + bool _keepAlive; + bool _headerBenchmarkdataCoverage; + std::string _extraHeaders; + std::string _authority; + uint64_t _reuseCount; + + size_t _bufsize; + char *_buf; + ssize_t _bufused; + ssize_t _bufpos; + + std::string _headerinfo; + unsigned int _headerinfoPos; + + bool _isOpen; + unsigned int _httpVersion; + unsigned int _requestStatus; + int _totalHitCount; + bool _connectionCloseGiven; + bool _contentLengthGiven; + bool _chunkedEncodingGiven; + bool _keepAliveGiven; + unsigned int _contentLength; + + unsigned int _chunkSeq; // chunk sequence number + unsigned int _chunkLeft; // bytes left of current chunk + unsigned int _dataRead; // total bytes read from URL + bool _dataDone; // all URL content read ? + ReaderInterface *_reader; // handles core URL reading + + + /** + * Discard all data currently present in the internal buffer. + **/ + void ResetBuffer() + { + _bufpos = 0; + _bufused = 0; + } + + /** + * Fill the internal buffer with data from the url we are connected + * to. + * + * @return the number of bytes put into the buffer or -1 on fail. + **/ + ssize_t FillBuffer(); + + /** + * Return the next byte from the data stream we are reading. + * + * @return next byte from the data stream or -1 on EOF/ERROR + **/ + int ReadByte() + { + if (_bufpos == _bufused) + FillBuffer(); + return (_bufused > _bufpos) ? _buf[_bufpos++] & 0x0ff : -1; + } + + /** + * Connect to the given url. + * + * @return success(true)/failure(false) + * @param url the url you want to connect to + **/ + bool Connect(const char *url); + + /** + * Read the next line of text from the data stream into 'buf'. If + * the line is longer than ('bufsize' - 1), the first ('bufsize' - + * 1) bytes will be placed in buf (the rest of the line will be + * discarded), and the true length of the line will be returned. The + * string placed in buf will be terminated with a null + * character. Newline characters will be discarded. A line is + * terminated by either '\n', "\r\n" or EOF (EOF - connection + * closed) + * + * @return the actual length of the next line, or -1 if no line was read. + * @param buf where to put the line. + * @param bufsize the length of buf. + **/ + ssize_t ReadLine(char *buf, size_t bufsize); + + /** + * Split a string into parts by inserting null characters into the + * string and index the parts by putting pointers to them in the + * argument array given. Only non-empty parts will be indexed in the + * argument array. + * + * @return NULL(complete split)/rest of string(incomplete split) + * @param input the null-terminated input string. + * @param argc the number of parts found. + * @param argv the argument array. + * @param maxargs the size of 'argv'. + **/ + static char *SplitString(char *input, int &argc, char **argv, + int maxargs); + + /** + * Read and parse the HTTP Header. + * + * @return success(true)/failure(fail) + **/ + bool ReadHTTPHeader(); + + /** + * Read and parse a chunk header. Only used with chunked encoding. + * + * @return success(true)/failure(fail) + **/ + bool ReadChunkHeader(); + +public: + + /** + * Create a HTTP client that may be used to fetch documents from the + * given host. + * + * @param hostname the host you want to fetch documents from. + * @param port the TCP port to use when contacting the host. + * @param keepAlive flag indicating if keep-alive should be enabled. + **/ + HTTPClient(const char *hostname, int port, bool keepAlive, + bool headerBenchmarkdataCoverage, const std::string & extraHeaders="", const std::string &authority = ""); + + /** + * Disconnect from server and free memory. + **/ + ~HTTPClient(); + + /** + * This method may be used to obtain information about how many + * times a physical connection has been reused to send an additional + * HTTP request. Note that connections may only be reused if + * keep-alive is enabled. + * + * @return connection reuse count + **/ + uint64_t GetReuseCount() const + { + return _reuseCount; + } + + /** + * Connect to the given url and read the response HTTP header. Note + * that this method will fail if the host returns a status code + * other than 200. This is done in order to make the interface as + * simple as possible. + * + * @return success(true)/failure(false) + * @param url the url you want to connect to + **/ + bool Open(const char *url); + + /** + * Read data from the url we are currently connected to. This method + * should be called repeatedly until it returns 0 in order to + * completely read the URL content. If @ref Close is called before + * all URL content is read the physical connection will be closed + * even if keepAlive is enabled. + * + * @return bytes read or -1 on failure. + * @param buf where to store the incoming data. + * @param len length of buf. + **/ + ssize_t Read(void *buf, size_t len); + + /** + * Close the connection to the url we are currently reading + * from. Will also close the physical connection if keepAlive is not + * enabled or if all the url content was not read. This is done + * because skipping will probably be more expencive than creating a + * new connection. + * + * @return success(true)/failure(false) + **/ + bool Close(); + + /** + * Class that provides status about the executed fetch method. + **/ + class FetchStatus final + { + public: + /** + * Create a status for the executed fetch. + * + * @param requestStatus The status from the HTTP server. + * @param totalHitCount The total number of hits. + * @param resultSize The number of bytes in result. + **/ + FetchStatus(bool ok, uint32_t requestStatus, int32_t totalHitCount, int32_t resultSize) : + _ok(ok), + _requestStatus(requestStatus), + _totalHitCount(totalHitCount), + _resultSize(resultSize) + {} + /** + * Query if the operation was successful. + * @return Status of operation. + **/ + auto Ok() const { return _ok; } + /** + Query HTTP request status. + @return HTTP request status. + **/ + auto RequestStatus() const { return _requestStatus; } + /** + * Query total hit count. Returns -1 if the total hit count + * could not be found. + * @return Total hit count for query. + **/ + auto TotalHitCount() const { return _totalHitCount; } + /** + * Query the number of bytes in the result buffer. + * @return Number of bytes in result buffer. + **/ + auto ResultSize() const { return _resultSize; } + + private: + bool _ok; + uint32_t _requestStatus; + int32_t _totalHitCount; + int32_t _resultSize; + }; + + /** + * High-level method that may be used to fetch a document in a + * single method call and save the content to the given file. + * + * @return FetchStatus object which can be queried for status. + * @param url the url to fetch. + * @param file where to save the fetched document. If this parameter + * is NULL, the content will be read and then discarded. + **/ + FetchStatus Fetch(const char *url, std::ostream *file = NULL); +}; + diff --git a/fbench/src/util/timer.cpp b/fbench/src/util/timer.cpp new file mode 100644 index 00000000000..c96a52a1d70 --- /dev/null +++ b/fbench/src/util/timer.cpp @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "timer.h" +#include <stdio.h> +#include <thread> + +Timer::Timer() + : _time(), + _timespan(0), + _maxTime(0), + _running(false) +{ +} + +void +Timer::SetMax(double max) +{ + _maxTime = max; +} + +void +Timer::Start() +{ + if (_running) + return; + _running = true; + _time = clock::now(); +} + +void +Timer::Stop() +{ + if (!_running) + return; + _timespan = GetCurrent(); + _running = false; +} + +void +Timer::Clear() +{ + _running = false; + _timespan = 0; +} + +double +Timer::GetTimespan() +{ + if (_running) + Stop(); + return _timespan; +} + +double +Timer::GetRemaining() +{ + double span = GetTimespan(); + return (span < _maxTime) ? _maxTime - span : 0; +} + +double +Timer::GetCurrent() +{ + if (!_running) + return 0; + using milliseconds = std::chrono::duration<double, std::milli>; + return std::chrono::duration_cast<milliseconds>(time_point(clock::now()) - _time).count(); +} + +void +Timer::TestClass() +{ + Timer test; + + printf("*** Start Testing: class Timer ***\n"); + printf("set max time to 5 seconds, then sleep for 1...\n"); + test.SetMax(5000); + test.Start(); + std::this_thread::sleep_for(std::chrono::seconds(1)); + test.Stop(); + printf("elapsed: %f, left:%f\n", + test.GetTimespan(), test.GetRemaining()); + printf("set max time to 1 second, then sleep for 2...\n"); + test.SetMax(1000); + test.Start(); + std::this_thread::sleep_for(std::chrono::seconds(2)); + test.Stop(); + printf("elapsed: %f, left:%f\n", + test.GetTimespan(), test.GetRemaining()); + printf("*** Finished Testing: class Timer ***\n"); +} diff --git a/fbench/src/util/timer.h b/fbench/src/util/timer.h new file mode 100644 index 00000000000..2771f6e00cc --- /dev/null +++ b/fbench/src/util/timer.h @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <chrono> +#include <memory> + +/** + * This class is used to mesure time intervals, or time spans. In addition to + * simply measuring timespans, this class also has the ability to set + * a maximum timespan and use this as a reference when handling + * measured time spans. The max time span may be thought of as an + * upper limit for the time spans you are going to measure. After + * measuring a time span you may use the @ref GetRemaining and @ref + * GetOvertime methods to check how the measured time span relates to + * the maximum time span. + **/ +class Timer +{ +private: + typedef std::chrono::steady_clock clock; + typedef std::chrono::time_point<clock> time_point; + time_point _time; + double _timespan; + double _maxTime; + bool _running; + +public: + typedef std::unique_ptr<Timer> UP; + /** + * Create a new timer. + **/ + Timer(); + + /** + * Set the maximum time span. + * + * @param max the maximum time span in ms. + **/ + void SetMax(double max); + + /** + * Start the timer. This will set the start time to the current + * time. + **/ + void Start(); + + /** + * Stop the timer. This will set the measured time span to the + * difference between the current time and the start time. + **/ + void Stop(); + + /** + * Set the measured time spen to 0 ms and stop the timer if it is + * running. + **/ + void Clear(); + + /** + * Get the measured time span. If the timer is running, @ref Stop + * will be called. + * + * @return the measured time span in ms. + **/ + double GetTimespan(); + + /** + * Compare the measured time span with the maximum time span. If the + * maximum time span is greater, the difference between the maximum + * time span and the measured time span is returned. If the measured + * time span is greater, 0 is returned as there is no time remaining. + * + * @return remaining time in ms, or 0 if no time is remaining. + **/ + double GetRemaining(); + + /** + * @return time from start to current in ms + **/ + double GetCurrent(); + + /** + * Static method performing simple testing on the timer class. This + * method produces output to stdout that needs manual inspection. + **/ + static void TestClass(); +}; + diff --git a/fbench/util/fbench-formatter.py b/fbench/util/fbench-formatter.py new file mode 100755 index 00000000000..3c7eeca2bb1 --- /dev/null +++ b/fbench/util/fbench-formatter.py @@ -0,0 +1,391 @@ +#!/usr/bin/python +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +"""Usage: read.py [options] [fbench output file] + +Will read from stdin if no file name is given + +Wildcards: + %d : any digits + * : any string + . : any char + +Example: + fbench-formatter.py file%d directory/file + cat filename | fbench-formatter.py + +Options: + -h, --help show this help + -d, --dir=<string> search directory [default: current directory] + -n, --depth=<int> search depth for subfolders [default: no limit] + -f show file list + + -w give output as html + -s give output as minimal tab seperated list + (headers is written to stderr) + -c give output as comma seperated list + (headers is written to stderr) + + + -t, --tag=<string> set tag to output (use with -s) +""" +from math import sqrt +import os +import sys +import getopt +import re +from sets import Set + +delimer = "[--xxyyzz--FBENCH_MAGIC_DELIMITER--zzyyxx--]" +urlFailStr = "FBENCH: URL FETCH FAILED!"; +attributelist = ["NumHits", "NumFastHits", "TotalHitCount", "QueryHits", "QueryOffset", "NumErrors", "SearchTime", "AttributeFetchTime", "FillTime", "DocsSearched", "NodesSearched", "FullCoverage"] +timeAttributes = ['SearchTime', 'AttributeFetchTime', 'FillTime'] + + +# Init +acc = {} +avg = {} +max_d = {} +min_d = {} + +for i in attributelist: + acc[i] = 0 + avg[i] = 0.0 + max_d[i] = 0 + min_d[i] = sys.maxint + +entries = 0 +fail = 0 + +timeArray = list() +thisTime = 0 +totalTime = 0 + +zeroHits = 0 + +# Global options +_filelist = 0 +_output = 0 +_dir = "." +_depth = 0 + +_tag = "" +_useTag = 0 + +def usage(): + print >> sys.stderr, __doc__ + +def abort(message): + print >> sys.stderr, message + "\n" + usage() + sys.exit(2) + +def main(argv): + try: + opts, args = getopt.getopt(argv, "h:d:n:t:fwsc", ["help", "dir=", "depth=", "tag="]) + except getopt.GetoptError: + usage() + sys.exit(2) + + global _output + + for opt, arg in opts: + if opt in ("-h", "--help"): + abort("") + elif opt in ("-d", "--dir="): + global _dir + _dir = arg + elif opt in ("-n", "--depth="): + global _depth + try: + _depth = int(arg) + except: + abort("Depth must be an integer") + elif opt == "-f": + global _filelist + _filelist = 1 + elif opt == "-w": + _output = 1 + elif opt == "-s": + _output = 2 + elif opt == "-c": + _output = 3 + elif opt in ("-t", "--tag"): + global _tag, _useTag + _useTag = 1 + _tag = arg + + + # Get file patterns + files = Set() + stdin = 1 + + for argument in args: + + stdin = 0 + + # Regex is translated into emacs-format + filepattern = re.sub('[0-9]*%d', '[0-9]+', argument) + + # Get list of all matching files + if (_depth == 0): + cmd = "find %s -regex '.*/%s'" % (_dir, filepattern) + else: + cmd = "find %s -regex '.*/%s' -maxdepth %d" % (_dir, filepattern, _depth) + fi = os.popen(cmd) + + list = fi.readlines() + for i in list: + files.add( i.strip() ) + if len(list) == 0: + print >> sys.stderr, "\"%s\" does not match any files" % filepattern + + # Exit if no files or stdin + if len(files) == 0 and stdin == 0: + print >> sys.stderr, "No matching files found" + sys.exit(1) + + # Print filenames + if _filelist != 0: + print "Files: " + print files + print "" + + # Print number of files + if _filelist != 0: + print >> sys.stderr, "Processing %d files..." % len(files) + + # Parse all files + for file in files: + parsefile(file) + + if stdin == 1: + print >> sys.stderr, "Processing stdin..." + parsefile("-") + + calculate() + printResult() + +def parsefile(filename): + global zeroHits, entries, fail, timeArray, thisTime, acc, min_d, max_d + + if filename == "-": + file = sys.stdin + else: + file = open(filename, "r") + + valid = 0 + + for rawline in file: + # Skip empty lines + if (rawline == ""): + continue + + line = rawline.strip() + + # Deliminer + if (line == delimer): + if valid == 1: + entries += 1 + timeArray.append(thisTime) + thisTime = 0 + valid = 0 + continue + + if (line == urlFailStr): + fail += 1 + entries += 1 + continue + + # Split line at ':' + match = line.split(':') + if len(match) < 2: + continue + + name = match[0].strip() + valueStr = match[1].strip() + + if ( name in attributelist ): + valid = 1 + print name + + # Extract info from header + value = int(valueStr) + acc[name] += value + + if (value == 0 and name == "TotalHitCount"): + zeroHits += 1 + + if (name in timeAttributes): + thisTime += value + + # Find min/max + if value < min_d[name]: + min_d[name] = value + + if value > max_d[name]: + max_d[name] = value + + file.close() + +def calculate(): + + global avg, avgTime, Sn, totalTime, timeArray + + successes = entries - fail + + # Calculate average values + if successes == 0: + print "Could not find any successfully runned queries" + print "Make sure benchmarkdata reporting is activated" + sys.exit(1); + + for entry in acc.keys(): + avg[entry] = float(acc[entry]) / successes + + # Calculate average total time + totalTime = 0 + for i in timeAttributes: + totalTime += acc[i] + avgTime = float(totalTime) / float(successes) + + # Calculate standard deviation + Sn = 0.0 + for sample in timeArray[1:]: + Sn += ( float(sample)-avgTime )**2 + Sn = sqrt( Sn / successes ) + +def printResult(): + if _output == 0: + printDefault() + elif _output == 1: + printHtml() + elif _output == 2: + printSimple() + else: + printCommaSeperated() + +def printDefault(): + # Ordinary printing + print "%21s\t%14s\t%10s\t%6s\t%6s" % ("NAME", "TOTAL", "AVG", "MIN", "MAX") + for entry in acc.keys(): + print "%21s:\t%14d\t%10.2f\t%6d\t%6d" % (entry, acc[entry], avg[entry], min_d[entry], max_d[entry]) + print "" + print "%21s:\t%14.3f\t%10.2f\t%6d\t%6d" % ( "Search+Fill+AttrFetch", totalTime, avgTime, min(timeArray), max(timeArray) ) + print "%21s:\t%14.3f" % ( "Standard deviation", Sn) + print "%21s:\t%14d" % ( "Number of requests", entries) + print "%21s:\t%14d" % ( "successful requests", entries-fail) + print "%21s:\t%14d" % ( "failed requests", fail) + + print "%21s:\t%14d" % ( "zero hit requests", zeroHits) + +def printHtml(): + + # HTML printing + print "<html>" + print " <head>" + print " <title=\"Fbench\">" + print " </head>" + print " <body>" + + print " <table>" + print " <tr>" + print " <th align='left'>Name</th>" + print " <th>Total</th>" + print " <th>Avg</th>" + print " <th>Min</th>" + print " <th>Max</th>" + print " </tr>" + for entry in acc.keys(): + print " <tr>" + print " <td>%s</td>" % entry + print " <td align='right'>%d</td>" % acc[entry] + print " <td align='right'>%.2f</td>" % avg[entry] + print " <td align='right'>%d</td>" % min_d[entry] + print " <td align='right'>%d</td>" % max_d[entry] + print " </tr>" + print " </table>" + + print " <table>" + print " <tr>" + print " <th align='left'>Average time</th>" + print " <td align='right'>%.3f ms </td>" % avgTime + print " </tr>" + print " <th align='left'>Standard deviation</th>" + print " <td align='right'>%.3f</td>" % Sn + print " </tr>" + print " </tr>" + print " <th align='left'>Number of requests</th>" + print " <td align='right'>%d</td>" % entries + print " </tr>" + print " </tr>" + print " <th align='left'>Number of successful requests</th>" + print " <td align='right'>%d</td>" % entries - fail + print " </tr>" + print " </tr>" + print " <th align='left'>Number of failed requests</th>" + print " <td align='right'>%d</td>" % fail + print " </tr>" + print " </tr>" + print " <th align='left'>Number of zero hit requests</th>" + print " <td align='right'>%d</td>" % zeroHits + print " </tr>" + print " </table>" + print " </body>" + +def printSimple(): + # Minimal print + printHeader = "" + for entry in acc.keys(): + printHeader += entry + '\t' + printHeader += "NumRequests\t" + printHeader += "NumSuccess\t" + printHeader += "NumFailed\t" + printHeader += "ZeroHitRequests\t" + printHeader += "TotalTime\t" + if _useTag: + printHeader += "Tag" + print >> sys.stderr, printHeader + + printtext = "" + for entry in acc.keys(): + printtext += str(acc[entry]) + '\t' + printtext += str(entries) + '\t' + printtext += str(entries-fail) + '\t' + printtext += str(fail) + '\t' + printtext += str(zeroHits) + '\t' + printtext += str(totalTime) + '\t' + if _useTag: + printtext += _tag + print printtext + +def printCommaSeperated(): + printHeader = "" + for entry in acc.keys(): + printHeader += entry + ',' + printHeader += "NumRequests," + printHeader += "NumSuccess," + printHeader += "NumFailed," + printHeader += "ZeroHitRequests," + if _useTag: + printHeader += "TotalTime," + printHeader += "Tag" + else: + printHeader += "TotalTime" + print >> sys.stderr, printHeader + + printtext = "" + for entry in acc.keys(): + printtext += str(acc[entry]) + ',' + printtext += str(entries) + ',' + printtext += str(entries-fail) + ',' + printtext += str(fail) + ',' + printtext += str(zeroHits) + ',' + if _useTag: + printtext += str(totalTime) + ',' + printtext += _tag + else: + printtext += str(totalTime) + print printtext + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/fbench/util/plot.pl b/fbench/util/plot.pl new file mode 100755 index 00000000000..78964b170b0 --- /dev/null +++ b/fbench/util/plot.pl @@ -0,0 +1,82 @@ +#!/usr/bin/perl -s +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# TODO +# - parameter for input and output file name +# - more graphs + +sub usage { + die qq{usage: plot.pl [-h] [-x] <plotno> <format> +Plot the contents of 'result.txt' to 'graph.<format>'. + -h This help + -x Output to X11 window + plotno: 1: Response Time Percentiles by NumCli + 2: Rate by NumCli + 3: Response Time Percentiles by Rate + format: png (default), ps +}; +} + +$plotno = shift || die usage; +$term = shift || "png"; + +if ($h) { + usage; +} + +# setup the output +if ($x) { + # X11 output + open(PLOTSCRIPT, "| gnuplot -persist"); + print PLOTSCRIPT "set term X11\n"; + +} else { + open(PLOTSCRIPT, "| gnuplot"); + if ("$term" eq "ps") { + print PLOTSCRIPT "set term postscript\n"; + print PLOTSCRIPT "set output \"graph.ps\"\n"; + } + else { + print PLOTSCRIPT "set term png transparent small medium enhanced\n"; + print PLOTSCRIPT "set output \"graph.png\"\n"; + } +} +select(PLOTSCRIPT); + + + +# choose the graph +if ($plotno == 1) { + # Cli Percentile + print qq{ +set data style lines +set title "Response Time Percentiles by NumCli" +set xlabel "Number of clients" +set ylabel "Response time (msec)" +set key left top +plot 'result.txt' using 1:10 title "max", 'result.txt' using 1:17 title "99 %", 'result.txt' using 1:16 title "95 %", 'result.txt' using 1:15 title "90 %", 'result.txt' using 1:14 title "75 %", 'result.txt' using 1:13 title "50 %", 'result.txt' using 1:12 title "25 %", 'result.txt' using 1:9 title "min" + }; + +} elsif ($plotno == 2) { + # Cli Rate + print qq{ +set data style lines +set title "Rate by NumCli" +set xlabel "Number of clients" +set ylabel "Rate (queries/sec)" +set nokey +plot 'result.txt' using 1:18 + }; +} elsif ($plotno == 3) { + # Rate Percentile + print qq{ +set data style lines +set title "Response Time Percentiles by Rate" +set xlabel "Rate (queries/sec)" +set ylabel "Response time (msec)" +set key left top +plot 'result.txt' using 18:17 title "99 %", 'result.txt' using 18:16 title "95 %", 'result.txt' using 18:15 title "90 %", 'result.txt' using 18:14 title "75 %", 'result.txt' using 18:13 title "50 %", 'result.txt' using 18:12 title "25 %" + }; +} + +close(PLOTSCRIPT); diff --git a/fbench/util/pretest.sh b/fbench/util/pretest.sh new file mode 100755 index 00000000000..3292c56c22a --- /dev/null +++ b/fbench/util/pretest.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# +# This script will be run by the 'runtests.sh' script before +# each individual test run. It will typically use the 'geturl' +# program to clear the fsearch and fdispatch caches. +# + +# do not produce any output, log error messages to 'pretest.err' +exec > /dev/null 2>>pretest.err + +# +# Clear fsearch and fdispatch caches. hostX and portX should be +# replaced with real host names and port numbers referring to the http +# daemons of the fsearch and fdispatch programs you are benchmarking. +# +#bin/geturl host1 port1 "/admin?command=clear_caches" +#bin/geturl host2 port2 "/admin?command=clear_caches" +#bin/geturl host3 port3 "/admin?command=clear_caches" +#bin/geturl host4 port4 "/admin?command=clear_caches" +#bin/geturl host5 port5 "/admin?command=clear_caches" +#... diff --git a/fbench/util/resultfilter.pl b/fbench/util/resultfilter.pl new file mode 100755 index 00000000000..a49496cc27b --- /dev/null +++ b/fbench/util/resultfilter.pl @@ -0,0 +1,14 @@ +#!/usr/bin/perl +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# This script converts an fbench summary report read from stdin to a +# single line containing only the numerical values written to +# stdout. + +while(<>) { + chomp(); + if(/:\s*([-+]?[\d.]+)/) { + print $1, " "; + } +} +print "\n"; diff --git a/fbench/util/runtests.sh b/fbench/util/runtests.sh new file mode 100755 index 00000000000..58b72ae1f86 --- /dev/null +++ b/fbench/util/runtests.sh @@ -0,0 +1,92 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +opt_o=false +opt_l=false + +opt_error=false + +while getopts "ol" option; do + case $option in + "o") opt_o=true;; + "l") opt_l=true;; + "*") opt_error=true;; + esac +done + +shift $(($OPTIND - 1)) +if [ $# -lt 8 ] || [ "$opt_error" = "true" ]; then + echo "usage: runtests.sh [-o] [-l] <minClients> <maxClients> <deltaClients>" + echo " <minCycle> <maxCycle> <deltaCycle> [fbench options] <hostname> <port>" + echo "" + echo "The number of clients varies from <minClients> to <maxClients> with" + echo "<deltaClients> increments. For each client count, the cycle time will" + echo "vary in the same way according to <minCycle>, <maxCycle> and <deltaCycle>." + echo "fbench is run with each combination of client count and cycle time, and" + echo "the result output is filtered with the 'resultfilter.pl' script." + echo "If you want to save the results you should redirect stdout to a file." + echo "" + echo " -o : change the order in which the tests are performed so that client" + echo " count varies for each cycle time." + echo " -l : output a blank line between test subseries. If -o is not specified this" + echo " will output a blank line between test series using different client count." + echo " If -o was specified this will output blank lines between test series" + echo " using different cycle time." + echo "" + echo "[fbench options] <hostname> <port>: These arguments are passed to fbench." + echo " There are 2 things to remenber: first; do not specify either of the -n" + echo " or -c options since they will override the values for client count and" + echo " cycle time generated by this script. secondly; make sure you specify" + echo " the correct host and port number. See the fbench usage (run fbench" + echo " without parameters) for more info on how to invoke fbench." + exit 1 +fi + +minClients=$1; shift +maxClients=$1; shift +deltaClients=$1; shift +minCycle=$1; shift +maxCycle=$1; shift +deltaCycle=$1; shift + +if [ ! $deltaClients -gt 0 ]; then + echo "error: deltaClients must be greater than 0 !" + exit 1 +fi + +if [ ! $deltaCycle -gt 0 ]; then + echo "error: deltaCycle must be greater than 0 !" + exit 1 +fi + +echo "# fbench results collected by 'runtests.sh'." +echo "#" +echo "#1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20" +echo "#clients duration cycle lowlimit skip fail ok overtime min max avg 25% 50% 75% 90% 95% 99% rate util zerohit" +echo "#--------------------------------------------------------------------------------------------------" + +if [ "$opt_o" = "true" ]; then + cycle=$minCycle + while [ ! $cycle -gt $maxCycle ]; do + clients=$minClients + while [ ! $clients -gt $maxClients ]; do + test -f pretest.sh && ./pretest.sh > /dev/null 2>&1 + fbench -n $clients -c $cycle $@ | resultfilter.pl + clients=$(($clients + $deltaClients)) + done + [ "$opt_l" = "true" ] && echo "" + cycle=$(($cycle + $deltaCycle)) + done +else + clients=$minClients + while [ ! $clients -gt $maxClients ]; do + cycle=$minCycle + while [ ! $cycle -gt $maxCycle ]; do + test -f pretest.sh && ./pretest.sh > /dev/null 2>&1 + fbench -n $clients -c $cycle $@ | resultfilter.pl + cycle=$(($cycle + $deltaCycle)) + done + [ "$opt_l" = "true" ] && echo "" + clients=$(($clients + $deltaClients)) + done +fi diff --git a/fbench/util/separate.pl b/fbench/util/separate.pl new file mode 100755 index 00000000000..429ea4d0e37 --- /dev/null +++ b/fbench/util/separate.pl @@ -0,0 +1,29 @@ +#!/usr/bin/perl +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +$sepcol = shift; + +if ($sepcol eq "") { + die qq{usage: separate.pl <sepcol> + Separate a tabular numeric file into chunks using a blank + line whenever the value in column 'sepcol' changes. +}; +} + +$oldval = -2; +$newval = -2; + +while (<>) { + if (/^#/) { + print; + } else { + chomp; + @vals = split; + $newval = $vals[$sepcol]; + if ($newval != $oldval) { + print "\n"; + $oldval = $newval; + } + print "@vals\n"; + } +} |