summaryrefslogtreecommitdiffstats
path: root/fbench/util
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /fbench/util
Publish
Diffstat (limited to 'fbench/util')
-rwxr-xr-xfbench/util/fbench-formatter.py391
-rwxr-xr-xfbench/util/plot.pl82
-rwxr-xr-xfbench/util/pretest.sh23
-rwxr-xr-xfbench/util/resultfilter.pl14
-rwxr-xr-xfbench/util/runtests.sh92
-rwxr-xr-xfbench/util/separate.pl29
6 files changed, 631 insertions, 0 deletions
diff --git a/fbench/util/fbench-formatter.py b/fbench/util/fbench-formatter.py
new file mode 100755
index 00000000000..3c7eeca2bb1
--- /dev/null
+++ b/fbench/util/fbench-formatter.py
@@ -0,0 +1,391 @@
+#!/usr/bin/python
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+"""Usage: read.py [options] [fbench output file]
+
+Will read from stdin if no file name is given
+
+Wildcards:
+ %d : any digits
+ * : any string
+ . : any char
+
+Example:
+ fbench-formatter.py file%d directory/file
+ cat filename | fbench-formatter.py
+
+Options:
+ -h, --help show this help
+ -d, --dir=<string> search directory [default: current directory]
+ -n, --depth=<int> search depth for subfolders [default: no limit]
+ -f show file list
+
+ -w give output as html
+ -s give output as minimal tab seperated list
+ (headers is written to stderr)
+ -c give output as comma seperated list
+ (headers is written to stderr)
+
+
+ -t, --tag=<string> set tag to output (use with -s)
+"""
+from math import sqrt
+import os
+import sys
+import getopt
+import re
+from sets import Set
+
+delimer = "[--xxyyzz--FBENCH_MAGIC_DELIMITER--zzyyxx--]"
+urlFailStr = "FBENCH: URL FETCH FAILED!";
+attributelist = ["NumHits", "NumFastHits", "TotalHitCount", "QueryHits", "QueryOffset", "NumErrors", "SearchTime", "AttributeFetchTime", "FillTime", "DocsSearched", "NodesSearched", "FullCoverage"]
+timeAttributes = ['SearchTime', 'AttributeFetchTime', 'FillTime']
+
+
+# Init
+acc = {}
+avg = {}
+max_d = {}
+min_d = {}
+
+for i in attributelist:
+ acc[i] = 0
+ avg[i] = 0.0
+ max_d[i] = 0
+ min_d[i] = sys.maxint
+
+entries = 0
+fail = 0
+
+timeArray = list()
+thisTime = 0
+totalTime = 0
+
+zeroHits = 0
+
+# Global options
+_filelist = 0
+_output = 0
+_dir = "."
+_depth = 0
+
+_tag = ""
+_useTag = 0
+
+def usage():
+ print >> sys.stderr, __doc__
+
+def abort(message):
+ print >> sys.stderr, message + "\n"
+ usage()
+ sys.exit(2)
+
+def main(argv):
+ try:
+ opts, args = getopt.getopt(argv, "h:d:n:t:fwsc", ["help", "dir=", "depth=", "tag="])
+ except getopt.GetoptError:
+ usage()
+ sys.exit(2)
+
+ global _output
+
+ for opt, arg in opts:
+ if opt in ("-h", "--help"):
+ abort("")
+ elif opt in ("-d", "--dir="):
+ global _dir
+ _dir = arg
+ elif opt in ("-n", "--depth="):
+ global _depth
+ try:
+ _depth = int(arg)
+ except:
+ abort("Depth must be an integer")
+ elif opt == "-f":
+ global _filelist
+ _filelist = 1
+ elif opt == "-w":
+ _output = 1
+ elif opt == "-s":
+ _output = 2
+ elif opt == "-c":
+ _output = 3
+ elif opt in ("-t", "--tag"):
+ global _tag, _useTag
+ _useTag = 1
+ _tag = arg
+
+
+ # Get file patterns
+ files = Set()
+ stdin = 1
+
+ for argument in args:
+
+ stdin = 0
+
+ # Regex is translated into emacs-format
+ filepattern = re.sub('[0-9]*%d', '[0-9]+', argument)
+
+ # Get list of all matching files
+ if (_depth == 0):
+ cmd = "find %s -regex '.*/%s'" % (_dir, filepattern)
+ else:
+ cmd = "find %s -regex '.*/%s' -maxdepth %d" % (_dir, filepattern, _depth)
+ fi = os.popen(cmd)
+
+ list = fi.readlines()
+ for i in list:
+ files.add( i.strip() )
+ if len(list) == 0:
+ print >> sys.stderr, "\"%s\" does not match any files" % filepattern
+
+ # Exit if no files or stdin
+ if len(files) == 0 and stdin == 0:
+ print >> sys.stderr, "No matching files found"
+ sys.exit(1)
+
+ # Print filenames
+ if _filelist != 0:
+ print "Files: "
+ print files
+ print ""
+
+ # Print number of files
+ if _filelist != 0:
+ print >> sys.stderr, "Processing %d files..." % len(files)
+
+ # Parse all files
+ for file in files:
+ parsefile(file)
+
+ if stdin == 1:
+ print >> sys.stderr, "Processing stdin..."
+ parsefile("-")
+
+ calculate()
+ printResult()
+
+def parsefile(filename):
+ global zeroHits, entries, fail, timeArray, thisTime, acc, min_d, max_d
+
+ if filename == "-":
+ file = sys.stdin
+ else:
+ file = open(filename, "r")
+
+ valid = 0
+
+ for rawline in file:
+ # Skip empty lines
+ if (rawline == ""):
+ continue
+
+ line = rawline.strip()
+
+ # Deliminer
+ if (line == delimer):
+ if valid == 1:
+ entries += 1
+ timeArray.append(thisTime)
+ thisTime = 0
+ valid = 0
+ continue
+
+ if (line == urlFailStr):
+ fail += 1
+ entries += 1
+ continue
+
+ # Split line at ':'
+ match = line.split(':')
+ if len(match) < 2:
+ continue
+
+ name = match[0].strip()
+ valueStr = match[1].strip()
+
+ if ( name in attributelist ):
+ valid = 1
+ print name
+
+ # Extract info from header
+ value = int(valueStr)
+ acc[name] += value
+
+ if (value == 0 and name == "TotalHitCount"):
+ zeroHits += 1
+
+ if (name in timeAttributes):
+ thisTime += value
+
+ # Find min/max
+ if value < min_d[name]:
+ min_d[name] = value
+
+ if value > max_d[name]:
+ max_d[name] = value
+
+ file.close()
+
+def calculate():
+
+ global avg, avgTime, Sn, totalTime, timeArray
+
+ successes = entries - fail
+
+ # Calculate average values
+ if successes == 0:
+ print "Could not find any successfully runned queries"
+ print "Make sure benchmarkdata reporting is activated"
+ sys.exit(1);
+
+ for entry in acc.keys():
+ avg[entry] = float(acc[entry]) / successes
+
+ # Calculate average total time
+ totalTime = 0
+ for i in timeAttributes:
+ totalTime += acc[i]
+ avgTime = float(totalTime) / float(successes)
+
+ # Calculate standard deviation
+ Sn = 0.0
+ for sample in timeArray[1:]:
+ Sn += ( float(sample)-avgTime )**2
+ Sn = sqrt( Sn / successes )
+
+def printResult():
+ if _output == 0:
+ printDefault()
+ elif _output == 1:
+ printHtml()
+ elif _output == 2:
+ printSimple()
+ else:
+ printCommaSeperated()
+
+def printDefault():
+ # Ordinary printing
+ print "%21s\t%14s\t%10s\t%6s\t%6s" % ("NAME", "TOTAL", "AVG", "MIN", "MAX")
+ for entry in acc.keys():
+ print "%21s:\t%14d\t%10.2f\t%6d\t%6d" % (entry, acc[entry], avg[entry], min_d[entry], max_d[entry])
+ print ""
+ print "%21s:\t%14.3f\t%10.2f\t%6d\t%6d" % ( "Search+Fill+AttrFetch", totalTime, avgTime, min(timeArray), max(timeArray) )
+ print "%21s:\t%14.3f" % ( "Standard deviation", Sn)
+ print "%21s:\t%14d" % ( "Number of requests", entries)
+ print "%21s:\t%14d" % ( "successful requests", entries-fail)
+ print "%21s:\t%14d" % ( "failed requests", fail)
+
+ print "%21s:\t%14d" % ( "zero hit requests", zeroHits)
+
+def printHtml():
+
+ # HTML printing
+ print "<html>"
+ print " <head>"
+ print " <title=\"Fbench\">"
+ print " </head>"
+ print " <body>"
+
+ print " <table>"
+ print " <tr>"
+ print " <th align='left'>Name</th>"
+ print " <th>Total</th>"
+ print " <th>Avg</th>"
+ print " <th>Min</th>"
+ print " <th>Max</th>"
+ print " </tr>"
+ for entry in acc.keys():
+ print " <tr>"
+ print " <td>%s</td>" % entry
+ print " <td align='right'>%d</td>" % acc[entry]
+ print " <td align='right'>%.2f</td>" % avg[entry]
+ print " <td align='right'>%d</td>" % min_d[entry]
+ print " <td align='right'>%d</td>" % max_d[entry]
+ print " </tr>"
+ print " </table>"
+
+ print " <table>"
+ print " <tr>"
+ print " <th align='left'>Average time</th>"
+ print " <td align='right'>%.3f ms </td>" % avgTime
+ print " </tr>"
+ print " <th align='left'>Standard deviation</th>"
+ print " <td align='right'>%.3f</td>" % Sn
+ print " </tr>"
+ print " </tr>"
+ print " <th align='left'>Number of requests</th>"
+ print " <td align='right'>%d</td>" % entries
+ print " </tr>"
+ print " </tr>"
+ print " <th align='left'>Number of successful requests</th>"
+ print " <td align='right'>%d</td>" % entries - fail
+ print " </tr>"
+ print " </tr>"
+ print " <th align='left'>Number of failed requests</th>"
+ print " <td align='right'>%d</td>" % fail
+ print " </tr>"
+ print " </tr>"
+ print " <th align='left'>Number of zero hit requests</th>"
+ print " <td align='right'>%d</td>" % zeroHits
+ print " </tr>"
+ print " </table>"
+ print " </body>"
+
+def printSimple():
+ # Minimal print
+ printHeader = ""
+ for entry in acc.keys():
+ printHeader += entry + '\t'
+ printHeader += "NumRequests\t"
+ printHeader += "NumSuccess\t"
+ printHeader += "NumFailed\t"
+ printHeader += "ZeroHitRequests\t"
+ printHeader += "TotalTime\t"
+ if _useTag:
+ printHeader += "Tag"
+ print >> sys.stderr, printHeader
+
+ printtext = ""
+ for entry in acc.keys():
+ printtext += str(acc[entry]) + '\t'
+ printtext += str(entries) + '\t'
+ printtext += str(entries-fail) + '\t'
+ printtext += str(fail) + '\t'
+ printtext += str(zeroHits) + '\t'
+ printtext += str(totalTime) + '\t'
+ if _useTag:
+ printtext += _tag
+ print printtext
+
+def printCommaSeperated():
+ printHeader = ""
+ for entry in acc.keys():
+ printHeader += entry + ','
+ printHeader += "NumRequests,"
+ printHeader += "NumSuccess,"
+ printHeader += "NumFailed,"
+ printHeader += "ZeroHitRequests,"
+ if _useTag:
+ printHeader += "TotalTime,"
+ printHeader += "Tag"
+ else:
+ printHeader += "TotalTime"
+ print >> sys.stderr, printHeader
+
+ printtext = ""
+ for entry in acc.keys():
+ printtext += str(acc[entry]) + ','
+ printtext += str(entries) + ','
+ printtext += str(entries-fail) + ','
+ printtext += str(fail) + ','
+ printtext += str(zeroHits) + ','
+ if _useTag:
+ printtext += str(totalTime) + ','
+ printtext += _tag
+ else:
+ printtext += str(totalTime)
+ print printtext
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
diff --git a/fbench/util/plot.pl b/fbench/util/plot.pl
new file mode 100755
index 00000000000..78964b170b0
--- /dev/null
+++ b/fbench/util/plot.pl
@@ -0,0 +1,82 @@
+#!/usr/bin/perl -s
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# TODO
+# - parameter for input and output file name
+# - more graphs
+
+sub usage {
+ die qq{usage: plot.pl [-h] [-x] <plotno> <format>
+Plot the contents of 'result.txt' to 'graph.<format>'.
+ -h This help
+ -x Output to X11 window
+ plotno: 1: Response Time Percentiles by NumCli
+ 2: Rate by NumCli
+ 3: Response Time Percentiles by Rate
+ format: png (default), ps
+};
+}
+
+$plotno = shift || die usage;
+$term = shift || "png";
+
+if ($h) {
+ usage;
+}
+
+# setup the output
+if ($x) {
+ # X11 output
+ open(PLOTSCRIPT, "| gnuplot -persist");
+ print PLOTSCRIPT "set term X11\n";
+
+} else {
+ open(PLOTSCRIPT, "| gnuplot");
+ if ("$term" eq "ps") {
+ print PLOTSCRIPT "set term postscript\n";
+ print PLOTSCRIPT "set output \"graph.ps\"\n";
+ }
+ else {
+ print PLOTSCRIPT "set term png transparent small medium enhanced\n";
+ print PLOTSCRIPT "set output \"graph.png\"\n";
+ }
+}
+select(PLOTSCRIPT);
+
+
+
+# choose the graph
+if ($plotno == 1) {
+ # Cli Percentile
+ print qq{
+set data style lines
+set title "Response Time Percentiles by NumCli"
+set xlabel "Number of clients"
+set ylabel "Response time (msec)"
+set key left top
+plot 'result.txt' using 1:10 title "max", 'result.txt' using 1:17 title "99 %", 'result.txt' using 1:16 title "95 %", 'result.txt' using 1:15 title "90 %", 'result.txt' using 1:14 title "75 %", 'result.txt' using 1:13 title "50 %", 'result.txt' using 1:12 title "25 %", 'result.txt' using 1:9 title "min"
+ };
+
+} elsif ($plotno == 2) {
+ # Cli Rate
+ print qq{
+set data style lines
+set title "Rate by NumCli"
+set xlabel "Number of clients"
+set ylabel "Rate (queries/sec)"
+set nokey
+plot 'result.txt' using 1:18
+ };
+} elsif ($plotno == 3) {
+ # Rate Percentile
+ print qq{
+set data style lines
+set title "Response Time Percentiles by Rate"
+set xlabel "Rate (queries/sec)"
+set ylabel "Response time (msec)"
+set key left top
+plot 'result.txt' using 18:17 title "99 %", 'result.txt' using 18:16 title "95 %", 'result.txt' using 18:15 title "90 %", 'result.txt' using 18:14 title "75 %", 'result.txt' using 18:13 title "50 %", 'result.txt' using 18:12 title "25 %"
+ };
+}
+
+close(PLOTSCRIPT);
diff --git a/fbench/util/pretest.sh b/fbench/util/pretest.sh
new file mode 100755
index 00000000000..3292c56c22a
--- /dev/null
+++ b/fbench/util/pretest.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#
+# This script will be run by the 'runtests.sh' script before
+# each individual test run. It will typically use the 'geturl'
+# program to clear the fsearch and fdispatch caches.
+#
+
+# do not produce any output, log error messages to 'pretest.err'
+exec > /dev/null 2>>pretest.err
+
+#
+# Clear fsearch and fdispatch caches. hostX and portX should be
+# replaced with real host names and port numbers referring to the http
+# daemons of the fsearch and fdispatch programs you are benchmarking.
+#
+#bin/geturl host1 port1 "/admin?command=clear_caches"
+#bin/geturl host2 port2 "/admin?command=clear_caches"
+#bin/geturl host3 port3 "/admin?command=clear_caches"
+#bin/geturl host4 port4 "/admin?command=clear_caches"
+#bin/geturl host5 port5 "/admin?command=clear_caches"
+#...
diff --git a/fbench/util/resultfilter.pl b/fbench/util/resultfilter.pl
new file mode 100755
index 00000000000..a49496cc27b
--- /dev/null
+++ b/fbench/util/resultfilter.pl
@@ -0,0 +1,14 @@
+#!/usr/bin/perl
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# This script converts an fbench summary report read from stdin to a
+# single line containing only the numerical values written to
+# stdout.
+
+while(<>) {
+ chomp();
+ if(/:\s*([-+]?[\d.]+)/) {
+ print $1, " ";
+ }
+}
+print "\n";
diff --git a/fbench/util/runtests.sh b/fbench/util/runtests.sh
new file mode 100755
index 00000000000..58b72ae1f86
--- /dev/null
+++ b/fbench/util/runtests.sh
@@ -0,0 +1,92 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+opt_o=false
+opt_l=false
+
+opt_error=false
+
+while getopts "ol" option; do
+ case $option in
+ "o") opt_o=true;;
+ "l") opt_l=true;;
+ "*") opt_error=true;;
+ esac
+done
+
+shift $(($OPTIND - 1))
+if [ $# -lt 8 ] || [ "$opt_error" = "true" ]; then
+ echo "usage: runtests.sh [-o] [-l] <minClients> <maxClients> <deltaClients>"
+ echo " <minCycle> <maxCycle> <deltaCycle> [fbench options] <hostname> <port>"
+ echo ""
+ echo "The number of clients varies from <minClients> to <maxClients> with"
+ echo "<deltaClients> increments. For each client count, the cycle time will"
+ echo "vary in the same way according to <minCycle>, <maxCycle> and <deltaCycle>."
+ echo "fbench is run with each combination of client count and cycle time, and"
+ echo "the result output is filtered with the 'resultfilter.pl' script."
+ echo "If you want to save the results you should redirect stdout to a file."
+ echo ""
+ echo " -o : change the order in which the tests are performed so that client"
+ echo " count varies for each cycle time."
+ echo " -l : output a blank line between test subseries. If -o is not specified this"
+ echo " will output a blank line between test series using different client count."
+ echo " If -o was specified this will output blank lines between test series"
+ echo " using different cycle time."
+ echo ""
+ echo "[fbench options] <hostname> <port>: These arguments are passed to fbench."
+ echo " There are 2 things to remenber: first; do not specify either of the -n"
+ echo " or -c options since they will override the values for client count and"
+ echo " cycle time generated by this script. secondly; make sure you specify"
+ echo " the correct host and port number. See the fbench usage (run fbench"
+ echo " without parameters) for more info on how to invoke fbench."
+ exit 1
+fi
+
+minClients=$1; shift
+maxClients=$1; shift
+deltaClients=$1; shift
+minCycle=$1; shift
+maxCycle=$1; shift
+deltaCycle=$1; shift
+
+if [ ! $deltaClients -gt 0 ]; then
+ echo "error: deltaClients must be greater than 0 !"
+ exit 1
+fi
+
+if [ ! $deltaCycle -gt 0 ]; then
+ echo "error: deltaCycle must be greater than 0 !"
+ exit 1
+fi
+
+echo "# fbench results collected by 'runtests.sh'."
+echo "#"
+echo "#1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20"
+echo "#clients duration cycle lowlimit skip fail ok overtime min max avg 25% 50% 75% 90% 95% 99% rate util zerohit"
+echo "#--------------------------------------------------------------------------------------------------"
+
+if [ "$opt_o" = "true" ]; then
+ cycle=$minCycle
+ while [ ! $cycle -gt $maxCycle ]; do
+ clients=$minClients
+ while [ ! $clients -gt $maxClients ]; do
+ test -f pretest.sh && ./pretest.sh > /dev/null 2>&1
+ fbench -n $clients -c $cycle $@ | resultfilter.pl
+ clients=$(($clients + $deltaClients))
+ done
+ [ "$opt_l" = "true" ] && echo ""
+ cycle=$(($cycle + $deltaCycle))
+ done
+else
+ clients=$minClients
+ while [ ! $clients -gt $maxClients ]; do
+ cycle=$minCycle
+ while [ ! $cycle -gt $maxCycle ]; do
+ test -f pretest.sh && ./pretest.sh > /dev/null 2>&1
+ fbench -n $clients -c $cycle $@ | resultfilter.pl
+ cycle=$(($cycle + $deltaCycle))
+ done
+ [ "$opt_l" = "true" ] && echo ""
+ clients=$(($clients + $deltaClients))
+ done
+fi
diff --git a/fbench/util/separate.pl b/fbench/util/separate.pl
new file mode 100755
index 00000000000..429ea4d0e37
--- /dev/null
+++ b/fbench/util/separate.pl
@@ -0,0 +1,29 @@
+#!/usr/bin/perl
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+$sepcol = shift;
+
+if ($sepcol eq "") {
+ die qq{usage: separate.pl <sepcol>
+ Separate a tabular numeric file into chunks using a blank
+ line whenever the value in column 'sepcol' changes.
+};
+}
+
+$oldval = -2;
+$newval = -2;
+
+while (<>) {
+ if (/^#/) {
+ print;
+ } else {
+ chomp;
+ @vals = split;
+ $newval = $vals[$sepcol];
+ if ($newval != $oldval) {
+ print "\n";
+ $oldval = $newval;
+ }
+ print "@vals\n";
+ }
+}