From 72231250ed81e10d66bfe70701e64fa5fe50f712 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Wed, 15 Jun 2016 23:09:44 +0200 Subject: Publish --- fbench/util/fbench-formatter.py | 391 ++++++++++++++++++++++++++++++++++++++++ fbench/util/plot.pl | 82 +++++++++ fbench/util/pretest.sh | 23 +++ fbench/util/resultfilter.pl | 14 ++ fbench/util/runtests.sh | 92 ++++++++++ fbench/util/separate.pl | 29 +++ 6 files changed, 631 insertions(+) create mode 100755 fbench/util/fbench-formatter.py create mode 100755 fbench/util/plot.pl create mode 100755 fbench/util/pretest.sh create mode 100755 fbench/util/resultfilter.pl create mode 100755 fbench/util/runtests.sh create mode 100755 fbench/util/separate.pl (limited to 'fbench/util') diff --git a/fbench/util/fbench-formatter.py b/fbench/util/fbench-formatter.py new file mode 100755 index 00000000000..3c7eeca2bb1 --- /dev/null +++ b/fbench/util/fbench-formatter.py @@ -0,0 +1,391 @@ +#!/usr/bin/python +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +"""Usage: read.py [options] [fbench output file] + +Will read from stdin if no file name is given + +Wildcards: + %d : any digits + * : any string + . : any char + +Example: + fbench-formatter.py file%d directory/file + cat filename | fbench-formatter.py + +Options: + -h, --help show this help + -d, --dir= search directory [default: current directory] + -n, --depth= search depth for subfolders [default: no limit] + -f show file list + + -w give output as html + -s give output as minimal tab seperated list + (headers is written to stderr) + -c give output as comma seperated list + (headers is written to stderr) + + + -t, --tag= set tag to output (use with -s) +""" +from math import sqrt +import os +import sys +import getopt +import re +from sets import Set + +delimer = "[--xxyyzz--FBENCH_MAGIC_DELIMITER--zzyyxx--]" +urlFailStr = "FBENCH: URL FETCH FAILED!"; +attributelist = ["NumHits", "NumFastHits", "TotalHitCount", "QueryHits", "QueryOffset", "NumErrors", "SearchTime", "AttributeFetchTime", "FillTime", "DocsSearched", "NodesSearched", "FullCoverage"] +timeAttributes = ['SearchTime', 'AttributeFetchTime', 'FillTime'] + + +# Init +acc = {} +avg = {} +max_d = {} +min_d = {} + +for i in attributelist: + acc[i] = 0 + avg[i] = 0.0 + max_d[i] = 0 + min_d[i] = sys.maxint + +entries = 0 +fail = 0 + +timeArray = list() +thisTime = 0 +totalTime = 0 + +zeroHits = 0 + +# Global options +_filelist = 0 +_output = 0 +_dir = "." +_depth = 0 + +_tag = "" +_useTag = 0 + +def usage(): + print >> sys.stderr, __doc__ + +def abort(message): + print >> sys.stderr, message + "\n" + usage() + sys.exit(2) + +def main(argv): + try: + opts, args = getopt.getopt(argv, "h:d:n:t:fwsc", ["help", "dir=", "depth=", "tag="]) + except getopt.GetoptError: + usage() + sys.exit(2) + + global _output + + for opt, arg in opts: + if opt in ("-h", "--help"): + abort("") + elif opt in ("-d", "--dir="): + global _dir + _dir = arg + elif opt in ("-n", "--depth="): + global _depth + try: + _depth = int(arg) + except: + abort("Depth must be an integer") + elif opt == "-f": + global _filelist + _filelist = 1 + elif opt == "-w": + _output = 1 + elif opt == "-s": + _output = 2 + elif opt == "-c": + _output = 3 + elif opt in ("-t", "--tag"): + global _tag, _useTag + _useTag = 1 + _tag = arg + + + # Get file patterns + files = Set() + stdin = 1 + + for argument in args: + + stdin = 0 + + # Regex is translated into emacs-format + filepattern = re.sub('[0-9]*%d', '[0-9]+', argument) + + # Get list of all matching files + if (_depth == 0): + cmd = "find %s -regex '.*/%s'" % (_dir, filepattern) + else: + cmd = "find %s -regex '.*/%s' -maxdepth %d" % (_dir, filepattern, _depth) + fi = os.popen(cmd) + + list = fi.readlines() + for i in list: + files.add( i.strip() ) + if len(list) == 0: + print >> sys.stderr, "\"%s\" does not match any files" % filepattern + + # Exit if no files or stdin + if len(files) == 0 and stdin == 0: + print >> sys.stderr, "No matching files found" + sys.exit(1) + + # Print filenames + if _filelist != 0: + print "Files: " + print files + print "" + + # Print number of files + if _filelist != 0: + print >> sys.stderr, "Processing %d files..." % len(files) + + # Parse all files + for file in files: + parsefile(file) + + if stdin == 1: + print >> sys.stderr, "Processing stdin..." + parsefile("-") + + calculate() + printResult() + +def parsefile(filename): + global zeroHits, entries, fail, timeArray, thisTime, acc, min_d, max_d + + if filename == "-": + file = sys.stdin + else: + file = open(filename, "r") + + valid = 0 + + for rawline in file: + # Skip empty lines + if (rawline == ""): + continue + + line = rawline.strip() + + # Deliminer + if (line == delimer): + if valid == 1: + entries += 1 + timeArray.append(thisTime) + thisTime = 0 + valid = 0 + continue + + if (line == urlFailStr): + fail += 1 + entries += 1 + continue + + # Split line at ':' + match = line.split(':') + if len(match) < 2: + continue + + name = match[0].strip() + valueStr = match[1].strip() + + if ( name in attributelist ): + valid = 1 + print name + + # Extract info from header + value = int(valueStr) + acc[name] += value + + if (value == 0 and name == "TotalHitCount"): + zeroHits += 1 + + if (name in timeAttributes): + thisTime += value + + # Find min/max + if value < min_d[name]: + min_d[name] = value + + if value > max_d[name]: + max_d[name] = value + + file.close() + +def calculate(): + + global avg, avgTime, Sn, totalTime, timeArray + + successes = entries - fail + + # Calculate average values + if successes == 0: + print "Could not find any successfully runned queries" + print "Make sure benchmarkdata reporting is activated" + sys.exit(1); + + for entry in acc.keys(): + avg[entry] = float(acc[entry]) / successes + + # Calculate average total time + totalTime = 0 + for i in timeAttributes: + totalTime += acc[i] + avgTime = float(totalTime) / float(successes) + + # Calculate standard deviation + Sn = 0.0 + for sample in timeArray[1:]: + Sn += ( float(sample)-avgTime )**2 + Sn = sqrt( Sn / successes ) + +def printResult(): + if _output == 0: + printDefault() + elif _output == 1: + printHtml() + elif _output == 2: + printSimple() + else: + printCommaSeperated() + +def printDefault(): + # Ordinary printing + print "%21s\t%14s\t%10s\t%6s\t%6s" % ("NAME", "TOTAL", "AVG", "MIN", "MAX") + for entry in acc.keys(): + print "%21s:\t%14d\t%10.2f\t%6d\t%6d" % (entry, acc[entry], avg[entry], min_d[entry], max_d[entry]) + print "" + print "%21s:\t%14.3f\t%10.2f\t%6d\t%6d" % ( "Search+Fill+AttrFetch", totalTime, avgTime, min(timeArray), max(timeArray) ) + print "%21s:\t%14.3f" % ( "Standard deviation", Sn) + print "%21s:\t%14d" % ( "Number of requests", entries) + print "%21s:\t%14d" % ( "successful requests", entries-fail) + print "%21s:\t%14d" % ( "failed requests", fail) + + print "%21s:\t%14d" % ( "zero hit requests", zeroHits) + +def printHtml(): + + # HTML printing + print "" + print " " + print " " + print " " + print " " + + print " " + print " " + print " " + print " " + print " " + print " " + print " " + print " " + for entry in acc.keys(): + print " " + print " " % entry + print " " % acc[entry] + print " " % avg[entry] + print " " % min_d[entry] + print " " % max_d[entry] + print " " + print "

Name	Total	Avg	Min	Max
%s	%d	%.2f	%d	%d

" + + print " " + print " " + print " " + print " " % avgTime + print " " + print " " + print " " % Sn + print " " + print " " + print " " + print " " % entries + print " " + print " " + print " " + print " " % entries - fail + print " " + print " " + print " " + print " " % fail + print " " + print " " + print " " + print " " % zeroHits + print " " + print "

Average time	%.3f ms
Standard deviation	%.3f
Number of requests	%d
Number of successful requests	%d
Number of failed requests	%d
Number of zero hit requests	%d

" + print " " + +def printSimple(): + # Minimal print + printHeader = "" + for entry in acc.keys(): + printHeader += entry + '\t' + printHeader += "NumRequests\t" + printHeader += "NumSuccess\t" + printHeader += "NumFailed\t" + printHeader += "ZeroHitRequests\t" + printHeader += "TotalTime\t" + if _useTag: + printHeader += "Tag" + print >> sys.stderr, printHeader + + printtext = "" + for entry in acc.keys(): + printtext += str(acc[entry]) + '\t' + printtext += str(entries) + '\t' + printtext += str(entries-fail) + '\t' + printtext += str(fail) + '\t' + printtext += str(zeroHits) + '\t' + printtext += str(totalTime) + '\t' + if _useTag: + printtext += _tag + print printtext + +def printCommaSeperated(): + printHeader = "" + for entry in acc.keys(): + printHeader += entry + ',' + printHeader += "NumRequests," + printHeader += "NumSuccess," + printHeader += "NumFailed," + printHeader += "ZeroHitRequests," + if _useTag: + printHeader += "TotalTime," + printHeader += "Tag" + else: + printHeader += "TotalTime" + print >> sys.stderr, printHeader + + printtext = "" + for entry in acc.keys(): + printtext += str(acc[entry]) + ',' + printtext += str(entries) + ',' + printtext += str(entries-fail) + ',' + printtext += str(fail) + ',' + printtext += str(zeroHits) + ',' + if _useTag: + printtext += str(totalTime) + ',' + printtext += _tag + else: + printtext += str(totalTime) + print printtext + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/fbench/util/plot.pl b/fbench/util/plot.pl new file mode 100755 index 00000000000..78964b170b0 --- /dev/null +++ b/fbench/util/plot.pl @@ -0,0 +1,82 @@ +#!/usr/bin/perl -s +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# TODO +# - parameter for input and output file name +# - more graphs + +sub usage { + die qq{usage: plot.pl [-h] [-x] +Plot the contents of 'result.txt' to 'graph.'. + -h This help + -x Output to X11 window + plotno: 1: Response Time Percentiles by NumCli + 2: Rate by NumCli + 3: Response Time Percentiles by Rate + format: png (default), ps +}; +} + +$plotno = shift || die usage; +$term = shift || "png"; + +if ($h) { + usage; +} + +# setup the output +if ($x) { + # X11 output + open(PLOTSCRIPT, "| gnuplot -persist"); + print PLOTSCRIPT "set term X11\n"; + +} else { + open(PLOTSCRIPT, "| gnuplot"); + if ("$term" eq "ps") { + print PLOTSCRIPT "set term postscript\n"; + print PLOTSCRIPT "set output \"graph.ps\"\n"; + } + else { + print PLOTSCRIPT "set term png transparent small medium enhanced\n"; + print PLOTSCRIPT "set output \"graph.png\"\n"; + } +} +select(PLOTSCRIPT); + + + +# choose the graph +if ($plotno == 1) { + # Cli Percentile + print qq{ +set data style lines +set title "Response Time Percentiles by NumCli" +set xlabel "Number of clients" +set ylabel "Response time (msec)" +set key left top +plot 'result.txt' using 1:10 title "max", 'result.txt' using 1:17 title "99 %", 'result.txt' using 1:16 title "95 %", 'result.txt' using 1:15 title "90 %", 'result.txt' using 1:14 title "75 %", 'result.txt' using 1:13 title "50 %", 'result.txt' using 1:12 title "25 %", 'result.txt' using 1:9 title "min" + }; + +} elsif ($plotno == 2) { + # Cli Rate + print qq{ +set data style lines +set title "Rate by NumCli" +set xlabel "Number of clients" +set ylabel "Rate (queries/sec)" +set nokey +plot 'result.txt' using 1:18 + }; +} elsif ($plotno == 3) { + # Rate Percentile + print qq{ +set data style lines +set title "Response Time Percentiles by Rate" +set xlabel "Rate (queries/sec)" +set ylabel "Response time (msec)" +set key left top +plot 'result.txt' using 18:17 title "99 %", 'result.txt' using 18:16 title "95 %", 'result.txt' using 18:15 title "90 %", 'result.txt' using 18:14 title "75 %", 'result.txt' using 18:13 title "50 %", 'result.txt' using 18:12 title "25 %" + }; +} + +close(PLOTSCRIPT); diff --git a/fbench/util/pretest.sh b/fbench/util/pretest.sh new file mode 100755 index 00000000000..3292c56c22a --- /dev/null +++ b/fbench/util/pretest.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# +# This script will be run by the 'runtests.sh' script before +# each individual test run. It will typically use the 'geturl' +# program to clear the fsearch and fdispatch caches. +# + +# do not produce any output, log error messages to 'pretest.err' +exec > /dev/null 2>>pretest.err + +# +# Clear fsearch and fdispatch caches. hostX and portX should be +# replaced with real host names and port numbers referring to the http +# daemons of the fsearch and fdispatch programs you are benchmarking. +# +#bin/geturl host1 port1 "/admin?command=clear_caches" +#bin/geturl host2 port2 "/admin?command=clear_caches" +#bin/geturl host3 port3 "/admin?command=clear_caches" +#bin/geturl host4 port4 "/admin?command=clear_caches" +#bin/geturl host5 port5 "/admin?command=clear_caches" +#... diff --git a/fbench/util/resultfilter.pl b/fbench/util/resultfilter.pl new file mode 100755 index 00000000000..a49496cc27b --- /dev/null +++ b/fbench/util/resultfilter.pl @@ -0,0 +1,14 @@ +#!/usr/bin/perl +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# This script converts an fbench summary report read from stdin to a +# single line containing only the numerical values written to +# stdout. + +while(<>) { + chomp(); + if(/:\s*([-+]?[\d.]+)/) { + print $1, " "; + } +} +print "\n"; diff --git a/fbench/util/runtests.sh b/fbench/util/runtests.sh new file mode 100755 index 00000000000..58b72ae1f86 --- /dev/null +++ b/fbench/util/runtests.sh @@ -0,0 +1,92 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +opt_o=false +opt_l=false + +opt_error=false + +while getopts "ol" option; do + case $option in + "o") opt_o=true;; + "l") opt_l=true;; + "*") opt_error=true;; + esac +done + +shift $(($OPTIND - 1)) +if [ $# -lt 8 ] || [ "$opt_error" = "true" ]; then + echo "usage: runtests.sh [-o] [-l] " + echo " [fbench options] " + echo "" + echo "The number of clients varies from to with" + echo " increments. For each client count, the cycle time will" + echo "vary in the same way according to , and ." + echo "fbench is run with each combination of client count and cycle time, and" + echo "the result output is filtered with the 'resultfilter.pl' script." + echo "If you want to save the results you should redirect stdout to a file." + echo "" + echo " -o : change the order in which the tests are performed so that client" + echo " count varies for each cycle time." + echo " -l : output a blank line between test subseries. If -o is not specified this" + echo " will output a blank line between test series using different client count." + echo " If -o was specified this will output blank lines between test series" + echo " using different cycle time." + echo "" + echo "[fbench options] : These arguments are passed to fbench." + echo " There are 2 things to remenber: first; do not specify either of the -n" + echo " or -c options since they will override the values for client count and" + echo " cycle time generated by this script. secondly; make sure you specify" + echo " the correct host and port number. See the fbench usage (run fbench" + echo " without parameters) for more info on how to invoke fbench." + exit 1 +fi + +minClients=$1; shift +maxClients=$1; shift +deltaClients=$1; shift +minCycle=$1; shift +maxCycle=$1; shift +deltaCycle=$1; shift + +if [ ! $deltaClients -gt 0 ]; then + echo "error: deltaClients must be greater than 0 !" + exit 1 +fi + +if [ ! $deltaCycle -gt 0 ]; then + echo "error: deltaCycle must be greater than 0 !" + exit 1 +fi + +echo "# fbench results collected by 'runtests.sh'." +echo "#" +echo "#1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20" +echo "#clients duration cycle lowlimit skip fail ok overtime min max avg 25% 50% 75% 90% 95% 99% rate util zerohit" +echo "#--------------------------------------------------------------------------------------------------" + +if [ "$opt_o" = "true" ]; then + cycle=$minCycle + while [ ! $cycle -gt $maxCycle ]; do + clients=$minClients + while [ ! $clients -gt $maxClients ]; do + test -f pretest.sh && ./pretest.sh > /dev/null 2>&1 + fbench -n $clients -c $cycle $@ | resultfilter.pl + clients=$(($clients + $deltaClients)) + done + [ "$opt_l" = "true" ] && echo "" + cycle=$(($cycle + $deltaCycle)) + done +else + clients=$minClients + while [ ! $clients -gt $maxClients ]; do + cycle=$minCycle + while [ ! $cycle -gt $maxCycle ]; do + test -f pretest.sh && ./pretest.sh > /dev/null 2>&1 + fbench -n $clients -c $cycle $@ | resultfilter.pl + cycle=$(($cycle + $deltaCycle)) + done + [ "$opt_l" = "true" ] && echo "" + clients=$(($clients + $deltaClients)) + done +fi diff --git a/fbench/util/separate.pl b/fbench/util/separate.pl new file mode 100755 index 00000000000..429ea4d0e37 --- /dev/null +++ b/fbench/util/separate.pl @@ -0,0 +1,29 @@ +#!/usr/bin/perl +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +$sepcol = shift; + +if ($sepcol eq "") { + die qq{usage: separate.pl + Separate a tabular numeric file into chunks using a blank + line whenever the value in column 'sepcol' changes. +}; +} + +$oldval = -2; +$newval = -2; + +while (<>) { + if (/^#/) { + print; + } else { + chomp; + @vals = split; + $newval = $vals[$sepcol]; + if ($newval != $oldval) { + print "\n"; + $oldval = $newval; + } + print "@vals\n"; + } +} -- cgit v1.2.3