Publish

author: Jon Bratseth <bratseth@yahoo-inc.com> 2016-06-15 23:09:44 +0200
committer: Jon Bratseth <bratseth@yahoo-inc.com> 2016-06-15 23:09:44 +0200
commit: 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree: 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /fbench/util
6 files changed, 631 insertions, 0 deletions
diff --git a/fbench/util/fbench-formatter.py b/fbench/util/fbench-formatter.py
new file mode 100755
index 00000000000..3c7eeca2bb1
--- /dev/null
+++ b/fbench/util/fbench-formatter.py
@@ -0,0 +1,391 @@
+#!/usr/bin/python
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+"""Usage: read.py [options] [fbench output file]
+
+Will read from stdin if no file name is given
+
+Wildcards:
+    %d : any digits
+     * : any string
+     . : any char
+
+Example:
+    fbench-formatter.py file%d directory/file
+    cat filename | fbench-formatter.py
+
+Options:
+    -h, --help              show this help
+    -d, --dir=<string>      search directory [default: current directory]
+    -n, --depth=<int>       search depth for subfolders [default: no limit]
+    -f                      show file list
+    
+    -w                      give output as html
+    -s                      give output as minimal tab seperated list
+                            (headers is written to stderr)
+    -c                      give output as comma seperated list
+                            (headers is written to stderr)
+
+
+    -t, --tag=<string>      set tag to output (use with -s)
+"""
+from math import sqrt
+import os
+import sys
+import getopt
+import re
+from sets import Set
+
+delimer = "[--xxyyzz--FBENCH_MAGIC_DELIMITER--zzyyxx--]"
+urlFailStr = "FBENCH: URL FETCH FAILED!";
+attributelist = ["NumHits", "NumFastHits", "TotalHitCount", "QueryHits", "QueryOffset", "NumErrors", "SearchTime", "AttributeFetchTime", "FillTime", "DocsSearched", "NodesSearched", "FullCoverage"]
+timeAttributes = ['SearchTime', 'AttributeFetchTime', 'FillTime']
+
+
+# Init
+acc = {}
+avg = {}
+max_d = {}
+min_d = {}
+
+for i in attributelist:
+    acc[i] = 0
+    avg[i] = 0.0
+    max_d[i] = 0
+    min_d[i] = sys.maxint
+
+entries = 0
+fail   = 0
+
+timeArray = list()
+thisTime = 0
+totalTime = 0
+
+zeroHits = 0
+
+# Global options
+_filelist = 0
+_output = 0
+_dir = "."
+_depth = 0
+
+_tag = ""
+_useTag = 0
+
+def usage():
+    print >> sys.stderr, __doc__
+
+def abort(message):
+    print >> sys.stderr, message + "\n"
+    usage()
+    sys.exit(2)
+
+def main(argv):
+    try:
+        opts, args = getopt.getopt(argv, "h:d:n:t:fwsc", ["help", "dir=", "depth=", "tag="])
+    except getopt.GetoptError:
+        usage()
+        sys.exit(2)
+
+    global _output
+
+    for opt, arg in opts:
+        if opt in ("-h", "--help"):
+            abort("")
+        elif opt in ("-d", "--dir="):
+            global _dir
+            _dir = arg
+        elif opt in ("-n", "--depth="):
+            global _depth
+            try:
+                _depth = int(arg)
+            except:
+                abort("Depth must be an integer")
+        elif opt == "-f":
+            global _filelist
+            _filelist = 1
+        elif opt == "-w":
+            _output = 1
+        elif opt == "-s":
+            _output = 2
+        elif opt == "-c":
+            _output = 3
+        elif opt in ("-t", "--tag"):
+            global _tag, _useTag
+            _useTag = 1
+            _tag = arg
+
+
+    # Get file patterns
+    files = Set()
+    stdin = 1
+    
+    for argument in args:
+
+        stdin = 0
+
+        # Regex is translated into emacs-format
+        filepattern = re.sub('[0-9]*%d', '[0-9]+', argument)
+
+        # Get list of all matching files
+        if (_depth == 0):
+            cmd = "find %s -regex '.*/%s'" % (_dir, filepattern)
+        else:
+            cmd = "find %s -regex '.*/%s' -maxdepth %d" % (_dir, filepattern, _depth)
+        fi = os.popen(cmd)
+
+        list = fi.readlines()
+        for i in list:
+            files.add( i.strip() )
+        if len(list) == 0:
+            print >> sys.stderr, "\"%s\" does not match any files" % filepattern
+
+    # Exit if no files or stdin
+    if len(files) == 0 and stdin == 0:
+        print >> sys.stderr, "No matching files found"
+        sys.exit(1)
+
+    # Print filenames
+    if _filelist != 0:
+        print "Files: "
+        print files
+        print ""
+
+    # Print number of files
+    if _filelist != 0:
+        print >> sys.stderr, "Processing %d files..." % len(files)
+
+    # Parse all files
+    for file in files:
+        parsefile(file)
+
+    if stdin == 1:
+        print >> sys.stderr, "Processing stdin..."
+        parsefile("-")
+
+    calculate()
+    printResult()
+
+def parsefile(filename):
+    global zeroHits, entries, fail, timeArray, thisTime, acc, min_d, max_d
+
+    if filename == "-":
+        file = sys.stdin
+    else:
+        file = open(filename, "r")
+
+    valid = 0
+
+    for rawline in file:
+        # Skip empty lines
+        if (rawline == ""):
+            continue
+        
+        line = rawline.strip()
+
+        # Deliminer
+        if (line == delimer):
+            if valid == 1:
+                entries += 1
+                timeArray.append(thisTime)
+                thisTime = 0
+                valid = 0
+                continue
+
+        if (line == urlFailStr):
+            fail += 1
+            entries += 1
+            continue
+
+        # Split line at ':'
+        match = line.split(':')
+        if len(match) < 2:
+            continue
+        
+        name = match[0].strip()
+        valueStr = match[1].strip()
+
+        if ( name in attributelist ):
+            valid = 1
+            print name
+
+            # Extract info from header
+            value = int(valueStr)
+            acc[name] += value
+
+            if (value == 0 and name == "TotalHitCount"):
+                zeroHits += 1
+
+            if (name in timeAttributes):
+                thisTime += value
+
+            # Find min/max
+            if value < min_d[name]:
+                min_d[name] = value
+
+            if value > max_d[name]:
+                max_d[name] = value
+
+    file.close()
+
+def calculate():
+
+    global avg, avgTime, Sn, totalTime, timeArray
+
+    successes = entries - fail
+
+    # Calculate average values
+    if successes == 0:
+        print "Could not find any successfully runned queries"
+        print "Make sure benchmarkdata reporting is activated"
+        sys.exit(1);
+    
+    for entry in acc.keys():
+        avg[entry] = float(acc[entry]) / successes
+
+    # Calculate average total time
+    totalTime = 0
+    for i in timeAttributes:
+        totalTime += acc[i]
+    avgTime = float(totalTime) / float(successes)
+
+    # Calculate standard deviation
+    Sn = 0.0
+    for sample in timeArray[1:]:
+        Sn += ( float(sample)-avgTime )**2
+    Sn = sqrt( Sn / successes )
+
+def printResult():
+    if _output == 0:
+        printDefault()
+    elif _output == 1:
+        printHtml()
+    elif _output == 2:
+        printSimple()
+    else:
+        printCommaSeperated()
+
+def printDefault():
+    # Ordinary printing
+    print "%21s\t%14s\t%10s\t%6s\t%6s" % ("NAME", "TOTAL", "AVG", "MIN", "MAX")
+    for entry in acc.keys():
+        print "%21s:\t%14d\t%10.2f\t%6d\t%6d" % (entry, acc[entry], avg[entry], min_d[entry], max_d[entry])
+    print ""
+    print "%21s:\t%14.3f\t%10.2f\t%6d\t%6d" % ( "Search+Fill+AttrFetch", totalTime, avgTime, min(timeArray), max(timeArray) )
+    print "%21s:\t%14.3f" % ( "Standard deviation", Sn)
+    print "%21s:\t%14d" % ( "Number of requests", entries)
+    print "%21s:\t%14d" % ( "successful requests", entries-fail)
+    print "%21s:\t%14d" % ( "failed requests", fail)
+
+    print "%21s:\t%14d" % ( "zero hit requests", zeroHits)
+
+def printHtml():
+    
+        # HTML printing
+        print "<html>"
+        print "  <head>"
+        print "    <title=\"Fbench\">"
+        print "  </head>"
+        print "  <body>"
+        
+        print "    <table>"
+        print "      <tr>"
+        print "        <th align='left'>Name</th>"
+        print "        <th>Total</th>"
+        print "        <th>Avg</th>"
+        print "        <th>Min</th>"
+        print "        <th>Max</th>"
+        print "      </tr>"
+        for entry in acc.keys():
+            print "      <tr>"
+            print "        <td>%s</td>" % entry
+            print "        <td align='right'>%d</td>" % acc[entry]
+            print "        <td align='right'>%.2f</td>" % avg[entry]
+            print "        <td align='right'>%d</td>" % min_d[entry]
+            print "        <td align='right'>%d</td>" % max_d[entry]
+            print "      </tr>"
+        print "    </table>"
+
+        print "    <table>"
+        print "      <tr>"
+        print "        <th align='left'>Average time</th>"
+        print "        <td align='right'>%.3f ms </td>" % avgTime
+        print "      </tr>"
+        print "        <th align='left'>Standard deviation</th>"
+        print "        <td align='right'>%.3f</td>" % Sn
+        print "      </tr>"
+        print "      </tr>"
+        print "        <th align='left'>Number of requests</th>"
+        print "        <td align='right'>%d</td>" % entries
+        print "      </tr>"
+        print "      </tr>"
+        print "        <th align='left'>Number of successful requests</th>"
+        print "        <td align='right'>%d</td>" % entries - fail
+        print "      </tr>"
+        print "      </tr>"
+        print "        <th align='left'>Number of failed requests</th>"
+        print "        <td align='right'>%d</td>" % fail
+        print "      </tr>"
+        print "      </tr>"
+        print "        <th align='left'>Number of zero hit requests</th>"
+        print "        <td align='right'>%d</td>" % zeroHits
+        print "      </tr>"
+        print "    </table>"
+        print "  </body>"
+
+def printSimple():
+    # Minimal print
+    printHeader = ""
+    for entry in acc.keys():
+        printHeader += entry + '\t'
+    printHeader += "NumRequests\t"
+    printHeader += "NumSuccess\t"
+    printHeader += "NumFailed\t"
+    printHeader += "ZeroHitRequests\t"
+    printHeader += "TotalTime\t"
+    if _useTag:
+        printHeader += "Tag"
+    print >> sys.stderr, printHeader
+        
+    printtext = ""
+    for entry in acc.keys():
+        printtext += str(acc[entry]) + '\t'
+    printtext += str(entries) + '\t'
+    printtext += str(entries-fail) + '\t'
+    printtext += str(fail) + '\t'
+    printtext += str(zeroHits) + '\t'
+    printtext += str(totalTime) + '\t'
+    if _useTag:
+        printtext += _tag
+    print printtext
+
+def printCommaSeperated():
+    printHeader = ""
+    for entry in acc.keys():
+        printHeader += entry + ','
+    printHeader += "NumRequests,"
+    printHeader += "NumSuccess,"
+    printHeader += "NumFailed,"
+    printHeader += "ZeroHitRequests,"
+    if _useTag:
+        printHeader += "TotalTime,"
+        printHeader += "Tag"
+    else:
+        printHeader += "TotalTime"
+    print >> sys.stderr, printHeader
+        
+    printtext = ""
+    for entry in acc.keys():
+        printtext += str(acc[entry]) + ','
+    printtext += str(entries) + ','
+    printtext += str(entries-fail) + ','
+    printtext += str(fail) + ','
+    printtext += str(zeroHits) + ','
+    if _useTag:
+        printtext += str(totalTime) + ','
+        printtext += _tag
+    else:
+        printtext += str(totalTime)
+    print printtext
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/fbench/util/plot.pl b/fbench/util/plot.pl
new file mode 100755
index 00000000000..78964b170b0
--- /dev/null
+++ b/fbench/util/plot.pl
@@ -0,0 +1,82 @@
+#!/usr/bin/perl -s
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# TODO
+# 	- parameter for input and output file name
+#	- more graphs
+
+sub usage {
+  die qq{usage: plot.pl [-h] [-x] <plotno> <format>
+Plot the contents of 'result.txt' to 'graph.<format>'.
+	-h	This help
+	-x	Output to X11 window
+	plotno:	1: Response Time Percentiles by NumCli
+		2: Rate by NumCli
+		3: Response Time Percentiles by Rate
+	format:	png (default), ps
+};
+}
+
+$plotno = shift || die usage;
+$term = shift || "png";
+
+if ($h) {
+  usage;
+}
+
+# setup the output
+if ($x) {
+  # X11 output
+  open(PLOTSCRIPT, "| gnuplot -persist");
+  print PLOTSCRIPT "set term X11\n";
+
+} else {
+  open(PLOTSCRIPT, "| gnuplot");
+  if ("$term" eq "ps") {
+    print PLOTSCRIPT "set term postscript\n";
+    print PLOTSCRIPT "set output \"graph.ps\"\n";
+  }
+  else {
+    print PLOTSCRIPT "set term png transparent small medium enhanced\n";
+    print PLOTSCRIPT "set output \"graph.png\"\n";
+  }
+}
+select(PLOTSCRIPT);
+
+
+
+# choose the graph
+if ($plotno == 1) {
+  # Cli Percentile
+  print qq{
+set data style lines
+set title "Response Time Percentiles by NumCli"
+set xlabel "Number of clients"
+set ylabel "Response time (msec)"
+set key left top
+plot 'result.txt' using 1:10 title "max", 'result.txt' using 1:17 title "99 %", 'result.txt' using 1:16 title "95 %", 'result.txt' using 1:15 title "90 %", 'result.txt' using 1:14 title "75 %", 'result.txt' using 1:13 title "50 %", 'result.txt' using 1:12 title "25 %", 'result.txt' using 1:9 title "min"
+  };
+
+} elsif ($plotno == 2) {
+  # Cli Rate
+  print qq{
+set data style lines
+set title "Rate by NumCli"
+set xlabel "Number of clients"
+set ylabel "Rate (queries/sec)"
+set nokey
+plot 'result.txt' using 1:18
+  };
+} elsif ($plotno == 3) {
+  # Rate Percentile
+  print qq{
+set data style lines
+set title "Response Time Percentiles by Rate"
+set xlabel "Rate (queries/sec)"
+set ylabel "Response time (msec)"
+set key left top
+plot 'result.txt' using 18:17 title "99 %", 'result.txt' using 18:16 title "95 %", 'result.txt' using 18:15 title "90 %", 'result.txt' using 18:14 title "75 %", 'result.txt' using 18:13 title "50 %", 'result.txt' using 18:12 title "25 %"
+  };
+}
+
+close(PLOTSCRIPT);
diff --git a/fbench/util/pretest.sh b/fbench/util/pretest.sh
new file mode 100755
index 00000000000..3292c56c22a
--- /dev/null
+++ b/fbench/util/pretest.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#
+# This script will be run by the 'runtests.sh' script before
+# each individual test run. It will typically use the 'geturl'
+# program to clear the fsearch and fdispatch caches.
+#
+
+# do not produce any output, log error messages to 'pretest.err'
+exec > /dev/null 2>>pretest.err
+
+#
+# Clear fsearch and fdispatch caches. hostX and portX should be
+# replaced with real host names and port numbers referring to the http
+# daemons of the fsearch and fdispatch programs you are benchmarking.
+#
+#bin/geturl host1 port1 "/admin?command=clear_caches"
+#bin/geturl host2 port2 "/admin?command=clear_caches"
+#bin/geturl host3 port3 "/admin?command=clear_caches"
+#bin/geturl host4 port4 "/admin?command=clear_caches"
+#bin/geturl host5 port5 "/admin?command=clear_caches"
+#...
diff --git a/fbench/util/resultfilter.pl b/fbench/util/resultfilter.pl
new file mode 100755
index 00000000000..a49496cc27b
--- /dev/null
+++ b/fbench/util/resultfilter.pl
@@ -0,0 +1,14 @@
+#!/usr/bin/perl
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# This script converts an fbench summary report read from stdin to a
+# single line containing only the numerical values written to
+# stdout.
+
+while(<>) {
+    chomp();
+    if(/:\s*([-+]?[\d.]+)/) {
+	print $1, " ";
+    }
+}
+print "\n";
diff --git a/fbench/util/runtests.sh b/fbench/util/runtests.sh
new file mode 100755
index 00000000000..58b72ae1f86
--- /dev/null
+++ b/fbench/util/runtests.sh
@@ -0,0 +1,92 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+opt_o=false
+opt_l=false
+
+opt_error=false
+
+while getopts "ol" option; do
+    case $option in
+        "o") opt_o=true;;
+        "l") opt_l=true;;
+        "*") opt_error=true;;
+    esac
+done
+
+shift $(($OPTIND - 1))
+if [ $# -lt 8 ] || [ "$opt_error" = "true" ]; then
+    echo "usage: runtests.sh [-o] [-l] <minClients> <maxClients> <deltaClients>"
+    echo "         <minCycle> <maxCycle> <deltaCycle> [fbench options] <hostname> <port>"
+    echo ""
+    echo "The number of clients varies from <minClients> to <maxClients> with"
+    echo "<deltaClients> increments. For each client count, the cycle time will"
+    echo "vary in the same way according to <minCycle>, <maxCycle> and <deltaCycle>."
+    echo "fbench is run with each combination of client count and cycle time, and"
+    echo "the result output is filtered with the 'resultfilter.pl' script."
+    echo "If you want to save the results you should redirect stdout to a file."
+    echo ""
+    echo " -o : change the order in which the tests are performed so that client"
+    echo "      count varies for each cycle time."
+    echo " -l : output a blank line between test subseries. If -o is not specified this"
+    echo "      will output a blank line between test series using different client count."
+    echo "      If -o was specified this will output blank lines between test series"
+    echo "      using different cycle time."
+    echo ""
+    echo "[fbench options] <hostname> <port>: These arguments are passed to fbench."
+    echo "  There are 2 things to remenber: first; do not specify either of the -n"
+    echo "  or -c options since they will override the values for client count and"
+    echo "  cycle time generated by this script. secondly; make sure you specify"
+    echo "  the correct host and port number. See the fbench usage (run fbench"
+    echo "  without parameters) for more info on how to invoke fbench."
+    exit 1
+fi
+
+minClients=$1; shift
+maxClients=$1; shift
+deltaClients=$1; shift
+minCycle=$1; shift
+maxCycle=$1; shift
+deltaCycle=$1; shift
+
+if [ ! $deltaClients -gt 0 ]; then
+    echo "error: deltaClients must be greater than 0 !"
+    exit 1
+fi
+
+if [ ! $deltaCycle -gt 0 ]; then
+    echo "error: deltaCycle must be greater than 0 !"
+    exit 1
+fi
+
+echo "# fbench results collected by 'runtests.sh'."
+echo "#"
+echo "#1       2        3     4        5    6    7  8        9   10  11  12  13  14  15  16  17  18      19   20"
+echo "#clients duration cycle lowlimit skip fail ok overtime min max avg 25% 50% 75% 90% 95% 99% rate util zerohit"
+echo "#--------------------------------------------------------------------------------------------------"
+
+if [ "$opt_o" = "true" ]; then
+    cycle=$minCycle
+    while [ ! $cycle -gt $maxCycle ]; do
+	clients=$minClients
+	while [ ! $clients -gt $maxClients ]; do
+	    test -f pretest.sh && ./pretest.sh > /dev/null 2>&1
+	    fbench -n $clients -c $cycle $@ | resultfilter.pl
+	    clients=$(($clients + $deltaClients))
+	done
+	[ "$opt_l" = "true" ] && echo ""
+	cycle=$(($cycle + $deltaCycle))
+    done
+else
+    clients=$minClients
+    while [ ! $clients -gt $maxClients ]; do
+	cycle=$minCycle
+	while [ ! $cycle -gt $maxCycle ]; do
+	    test -f pretest.sh && ./pretest.sh > /dev/null 2>&1
+	    fbench -n $clients -c $cycle $@ | resultfilter.pl
+	    cycle=$(($cycle + $deltaCycle))
+	done
+	[ "$opt_l" = "true" ] && echo ""
+	clients=$(($clients + $deltaClients))
+    done
+fi
diff --git a/fbench/util/separate.pl b/fbench/util/separate.pl
new file mode 100755
index 00000000000..429ea4d0e37
--- /dev/null
+++ b/fbench/util/separate.pl
@@ -0,0 +1,29 @@
+#!/usr/bin/perl
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+$sepcol = shift;
+
+if ($sepcol eq "") {
+  die qq{usage: separate.pl <sepcol>
+       Separate a tabular numeric file into chunks using a blank
+       line whenever the value in column 'sepcol' changes.
+};
+}
+
+$oldval = -2;
+$newval = -2;
+
+while (<>) {
+  if (/^#/) {
+    print;
+  } else {
+    chomp;
+    @vals = split;
+    $newval = $vals[$sepcol];
+    if ($newval != $oldval) {
+      print "\n";
+      $oldval = $newval;
+    }
+    print "@vals\n";
+  }
+}
author	Jon Bratseth <bratseth@yahoo-inc.com>	2016-06-15 23:09:44 +0200
committer	Jon Bratseth <bratseth@yahoo-inc.com>	2016-06-15 23:09:44 +0200
commit	72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree	2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /fbench/util