searchlib/src/tests/attribute/benchmark/benchmarkplotter.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
require 'rexml/document'

def plot_graph(plot_data, plot_png, title, xlabel, ylabel, graph_titles)
  plot_cmd = "";
  plot_cmd += "set terminal png\n"
  plot_cmd += "set output \"#{plot_png}\"\n"
  plot_cmd += "set title \"#{title}\"\n"
  plot_cmd += "set xlabel \"#{xlabel}\"\n"
  plot_cmd += "set ylabel \"#{ylabel}\"\n"
  c = 2
  plots = []
  plot_cmd += "plot "
  graph_titles.each do |title|
    plots.push("\"#{plot_data}\" using 1:#{c} title \"#{title}\" with linespoints")
    c += 1
  end
  plot_cmd += plots.join(", ")

  plot_cmd_file = File.open("plot_graph.cmd", "w")
  plot_cmd_file.write(plot_cmd);
  plot_cmd_file.close
  cmd = "gnuplot plot_graph.cmd"
  puts cmd
  puts `#{cmd}`
end

def extract_alpha(num_docs, percentages, input, output, xml_getter)
  plot_data = File.open(output, "w");
  num_docs.each do |num|
    data_line = "#{num} "
    percentages.each do |prc|
      unique = num * prc
      filename = input.sub("#N", "#{num}").sub("#V", "#{unique}")
      value = 0
      begin
        xml_root = REXML::Document.new(File.open(filename)).root
        value = send(xml_getter, xml_root)
      rescue REXML::ParseException
        puts "Could not parse file: #{filename}"
      end
      data_line += "#{value} "
    end
    plot_data.write(data_line + "\n")
  end
  plot_data.close
end

def extract_beta(num_docs, percentage, num_threads, input, output, xml_getter)
  plot_data = File.open(output, "w");
  num_docs.each do |num|
    data_line = "#{num} "
    unique = num * percentage
    num_threads.each do |thread|
      filename = input.sub("#N", "#{num}").sub("#V", "#{unique}").sub("#S", "#{thread}")
      value = 0
      begin
        xml_root = REXML::Document.new(File.open(filename)).root
        value = send(xml_getter, xml_root)
      rescue REXML::ParseException
        puts "Could not parse file: #{filename}"
      end
      data_line += "#{value} "
    end
    plot_data.write(data_line + "\n")
  end
  plot_data.close
end

def xml_getter_update_0_throughput(xml_root)
  return xml_root.elements["update[@id='0']"].elements["throughput"].text
end

def xml_getter_search_throughput(xml_root)
  return xml_root.elements["total-searcher-summary"].elements["search-throughput"].text
end

def xml_getter_updater_thread_throughput(xml_root)
  return throughput = xml_root.elements["updater-summary"].elements["throughput"].text
end


vectors = ["mv-num-new"]#, "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"]
num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000]
unique_percentages = [0.001, 0.01, 0.05, 0.20, 0.50]
num_threads = [1, 2, 4, 8, 16]

inputs = ["03-27-full/#AV-n#N-v#V-p2-r1-s1-q1000.log",
          "03-27-full/#AV-n#N-v#V-s#S-q100-b.log"]
graph_titles = [[], []]
unique_percentages.each do |percentage|
  graph_titles[0].push("#{percentage * 100} % uniques")
end
num_threads.each do |thread|
  graph_titles[1].push("#{thread} searcher thread(s)")
end

vectors.each do |vector|
  extract_alpha(num_docs, unique_percentages,
                inputs[0].sub("#AV", vector),
                "#{vector}-update-speed.dat",
                :xml_getter_update_0_throughput)
  plot_graph("#{vector}-update-speed.dat",
             "#{vector}-update-speed.png",
             "Update speed when applying 1M updates",
             "Number of documents", "Updates per/sec", graph_titles[0])

  extract_alpha(num_docs, unique_percentages,
                inputs[0].sub("#AV", vector),
                "#{vector}-search-speed.dat",
                :xml_getter_search_throughput)
  plot_graph("#{vector}-search-speed.dat",
             "#{vector}-search-speed.png",
             "Search speed with 1 searcher thread",
             "Number of documents", "Queries per/sec", graph_titles[0])

  extract_beta(num_docs, 0.01, num_threads,
               inputs[1].sub("#AV", vector),
               "#{vector}-search-speed-multiple.dat",
               :xml_getter_search_throughput)
  plot_graph("#{vector}-search-speed-multiple.dat",
             "#{vector}-search-speed-multiple.png",
             "Search speed with 1 update thread and X searcher threads",
             "Number of documents", "Queries per/sec", graph_titles[1])

  extract_beta(num_docs, 0.01, num_threads,
               inputs[1].sub("#AV", vector),
               "#{vector}-update-speed-multiple.dat",
               :xml_getter_updater_thread_throughput)
  plot_graph("#{vector}-update-speed-multiple.dat",
             "#{vector}-update-speed-multiple.png",
             "Update speed with 1 update thread and X searcher threads",
             "Number of documents", "Updates per/sec", graph_titles[1])
end