diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /fbench/util/fbench-formatter.py |
Publish
Diffstat (limited to 'fbench/util/fbench-formatter.py')
-rwxr-xr-x | fbench/util/fbench-formatter.py | 391 |
1 files changed, 391 insertions, 0 deletions
diff --git a/fbench/util/fbench-formatter.py b/fbench/util/fbench-formatter.py new file mode 100755 index 00000000000..3c7eeca2bb1 --- /dev/null +++ b/fbench/util/fbench-formatter.py @@ -0,0 +1,391 @@ +#!/usr/bin/python +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +"""Usage: read.py [options] [fbench output file] + +Will read from stdin if no file name is given + +Wildcards: + %d : any digits + * : any string + . : any char + +Example: + fbench-formatter.py file%d directory/file + cat filename | fbench-formatter.py + +Options: + -h, --help show this help + -d, --dir=<string> search directory [default: current directory] + -n, --depth=<int> search depth for subfolders [default: no limit] + -f show file list + + -w give output as html + -s give output as minimal tab seperated list + (headers is written to stderr) + -c give output as comma seperated list + (headers is written to stderr) + + + -t, --tag=<string> set tag to output (use with -s) +""" +from math import sqrt +import os +import sys +import getopt +import re +from sets import Set + +delimer = "[--xxyyzz--FBENCH_MAGIC_DELIMITER--zzyyxx--]" +urlFailStr = "FBENCH: URL FETCH FAILED!"; +attributelist = ["NumHits", "NumFastHits", "TotalHitCount", "QueryHits", "QueryOffset", "NumErrors", "SearchTime", "AttributeFetchTime", "FillTime", "DocsSearched", "NodesSearched", "FullCoverage"] +timeAttributes = ['SearchTime', 'AttributeFetchTime', 'FillTime'] + + +# Init +acc = {} +avg = {} +max_d = {} +min_d = {} + +for i in attributelist: + acc[i] = 0 + avg[i] = 0.0 + max_d[i] = 0 + min_d[i] = sys.maxint + +entries = 0 +fail = 0 + +timeArray = list() +thisTime = 0 +totalTime = 0 + +zeroHits = 0 + +# Global options +_filelist = 0 +_output = 0 +_dir = "." +_depth = 0 + +_tag = "" +_useTag = 0 + +def usage(): + print >> sys.stderr, __doc__ + +def abort(message): + print >> sys.stderr, message + "\n" + usage() + sys.exit(2) + +def main(argv): + try: + opts, args = getopt.getopt(argv, "h:d:n:t:fwsc", ["help", "dir=", "depth=", "tag="]) + except getopt.GetoptError: + usage() + sys.exit(2) + + global _output + + for opt, arg in opts: + if opt in ("-h", "--help"): + abort("") + elif opt in ("-d", "--dir="): + global _dir + _dir = arg + elif opt in ("-n", "--depth="): + global _depth + try: + _depth = int(arg) + except: + abort("Depth must be an integer") + elif opt == "-f": + global _filelist + _filelist = 1 + elif opt == "-w": + _output = 1 + elif opt == "-s": + _output = 2 + elif opt == "-c": + _output = 3 + elif opt in ("-t", "--tag"): + global _tag, _useTag + _useTag = 1 + _tag = arg + + + # Get file patterns + files = Set() + stdin = 1 + + for argument in args: + + stdin = 0 + + # Regex is translated into emacs-format + filepattern = re.sub('[0-9]*%d', '[0-9]+', argument) + + # Get list of all matching files + if (_depth == 0): + cmd = "find %s -regex '.*/%s'" % (_dir, filepattern) + else: + cmd = "find %s -regex '.*/%s' -maxdepth %d" % (_dir, filepattern, _depth) + fi = os.popen(cmd) + + list = fi.readlines() + for i in list: + files.add( i.strip() ) + if len(list) == 0: + print >> sys.stderr, "\"%s\" does not match any files" % filepattern + + # Exit if no files or stdin + if len(files) == 0 and stdin == 0: + print >> sys.stderr, "No matching files found" + sys.exit(1) + + # Print filenames + if _filelist != 0: + print "Files: " + print files + print "" + + # Print number of files + if _filelist != 0: + print >> sys.stderr, "Processing %d files..." % len(files) + + # Parse all files + for file in files: + parsefile(file) + + if stdin == 1: + print >> sys.stderr, "Processing stdin..." + parsefile("-") + + calculate() + printResult() + +def parsefile(filename): + global zeroHits, entries, fail, timeArray, thisTime, acc, min_d, max_d + + if filename == "-": + file = sys.stdin + else: + file = open(filename, "r") + + valid = 0 + + for rawline in file: + # Skip empty lines + if (rawline == ""): + continue + + line = rawline.strip() + + # Deliminer + if (line == delimer): + if valid == 1: + entries += 1 + timeArray.append(thisTime) + thisTime = 0 + valid = 0 + continue + + if (line == urlFailStr): + fail += 1 + entries += 1 + continue + + # Split line at ':' + match = line.split(':') + if len(match) < 2: + continue + + name = match[0].strip() + valueStr = match[1].strip() + + if ( name in attributelist ): + valid = 1 + print name + + # Extract info from header + value = int(valueStr) + acc[name] += value + + if (value == 0 and name == "TotalHitCount"): + zeroHits += 1 + + if (name in timeAttributes): + thisTime += value + + # Find min/max + if value < min_d[name]: + min_d[name] = value + + if value > max_d[name]: + max_d[name] = value + + file.close() + +def calculate(): + + global avg, avgTime, Sn, totalTime, timeArray + + successes = entries - fail + + # Calculate average values + if successes == 0: + print "Could not find any successfully runned queries" + print "Make sure benchmarkdata reporting is activated" + sys.exit(1); + + for entry in acc.keys(): + avg[entry] = float(acc[entry]) / successes + + # Calculate average total time + totalTime = 0 + for i in timeAttributes: + totalTime += acc[i] + avgTime = float(totalTime) / float(successes) + + # Calculate standard deviation + Sn = 0.0 + for sample in timeArray[1:]: + Sn += ( float(sample)-avgTime )**2 + Sn = sqrt( Sn / successes ) + +def printResult(): + if _output == 0: + printDefault() + elif _output == 1: + printHtml() + elif _output == 2: + printSimple() + else: + printCommaSeperated() + +def printDefault(): + # Ordinary printing + print "%21s\t%14s\t%10s\t%6s\t%6s" % ("NAME", "TOTAL", "AVG", "MIN", "MAX") + for entry in acc.keys(): + print "%21s:\t%14d\t%10.2f\t%6d\t%6d" % (entry, acc[entry], avg[entry], min_d[entry], max_d[entry]) + print "" + print "%21s:\t%14.3f\t%10.2f\t%6d\t%6d" % ( "Search+Fill+AttrFetch", totalTime, avgTime, min(timeArray), max(timeArray) ) + print "%21s:\t%14.3f" % ( "Standard deviation", Sn) + print "%21s:\t%14d" % ( "Number of requests", entries) + print "%21s:\t%14d" % ( "successful requests", entries-fail) + print "%21s:\t%14d" % ( "failed requests", fail) + + print "%21s:\t%14d" % ( "zero hit requests", zeroHits) + +def printHtml(): + + # HTML printing + print "<html>" + print " <head>" + print " <title=\"Fbench\">" + print " </head>" + print " <body>" + + print " <table>" + print " <tr>" + print " <th align='left'>Name</th>" + print " <th>Total</th>" + print " <th>Avg</th>" + print " <th>Min</th>" + print " <th>Max</th>" + print " </tr>" + for entry in acc.keys(): + print " <tr>" + print " <td>%s</td>" % entry + print " <td align='right'>%d</td>" % acc[entry] + print " <td align='right'>%.2f</td>" % avg[entry] + print " <td align='right'>%d</td>" % min_d[entry] + print " <td align='right'>%d</td>" % max_d[entry] + print " </tr>" + print " </table>" + + print " <table>" + print " <tr>" + print " <th align='left'>Average time</th>" + print " <td align='right'>%.3f ms </td>" % avgTime + print " </tr>" + print " <th align='left'>Standard deviation</th>" + print " <td align='right'>%.3f</td>" % Sn + print " </tr>" + print " </tr>" + print " <th align='left'>Number of requests</th>" + print " <td align='right'>%d</td>" % entries + print " </tr>" + print " </tr>" + print " <th align='left'>Number of successful requests</th>" + print " <td align='right'>%d</td>" % entries - fail + print " </tr>" + print " </tr>" + print " <th align='left'>Number of failed requests</th>" + print " <td align='right'>%d</td>" % fail + print " </tr>" + print " </tr>" + print " <th align='left'>Number of zero hit requests</th>" + print " <td align='right'>%d</td>" % zeroHits + print " </tr>" + print " </table>" + print " </body>" + +def printSimple(): + # Minimal print + printHeader = "" + for entry in acc.keys(): + printHeader += entry + '\t' + printHeader += "NumRequests\t" + printHeader += "NumSuccess\t" + printHeader += "NumFailed\t" + printHeader += "ZeroHitRequests\t" + printHeader += "TotalTime\t" + if _useTag: + printHeader += "Tag" + print >> sys.stderr, printHeader + + printtext = "" + for entry in acc.keys(): + printtext += str(acc[entry]) + '\t' + printtext += str(entries) + '\t' + printtext += str(entries-fail) + '\t' + printtext += str(fail) + '\t' + printtext += str(zeroHits) + '\t' + printtext += str(totalTime) + '\t' + if _useTag: + printtext += _tag + print printtext + +def printCommaSeperated(): + printHeader = "" + for entry in acc.keys(): + printHeader += entry + ',' + printHeader += "NumRequests," + printHeader += "NumSuccess," + printHeader += "NumFailed," + printHeader += "ZeroHitRequests," + if _useTag: + printHeader += "TotalTime," + printHeader += "Tag" + else: + printHeader += "TotalTime" + print >> sys.stderr, printHeader + + printtext = "" + for entry in acc.keys(): + printtext += str(acc[entry]) + ',' + printtext += str(entries) + ',' + printtext += str(entries-fail) + ',' + printtext += str(fail) + ',' + printtext += str(zeroHits) + ',' + if _useTag: + printtext += str(totalTime) + ',' + printtext += _tag + else: + printtext += str(totalTime) + print printtext + +if __name__ == "__main__": + main(sys.argv[1:]) |