aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/attribute/benchmark
diff options
context:
space:
mode:
authorArne Juul <arnej@yahoo-inc.com>2016-06-27 21:07:09 +0200
committerArne Juul <arnej@yahoo-inc.com>2016-06-27 21:07:09 +0200
commit1e87e2eb9bc2d43c24df818d3e44b0b50cec7520 (patch)
tree8975a986131560d390b3c20215de6fe9d992a4af /searchlib/src/tests/attribute/benchmark
parent4c7621236ba903006bd0919941f975545815cf01 (diff)
split attribute tests into directories
* these tests would clash when run in parallel
Diffstat (limited to 'searchlib/src/tests/attribute/benchmark')
-rw-r--r--searchlib/src/tests/attribute/benchmark/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/benchmark/attributebenchmark.cpp678
-rw-r--r--searchlib/src/tests/attribute/benchmark/attributebenchmark.rb22
-rw-r--r--searchlib/src/tests/attribute/benchmark/benchmarkplotter.rb134
4 files changed, 842 insertions, 0 deletions
diff --git a/searchlib/src/tests/attribute/benchmark/CMakeLists.txt b/searchlib/src/tests/attribute/benchmark/CMakeLists.txt
new file mode 100644
index 00000000000..156d90ac1bd
--- /dev/null
+++ b/searchlib/src/tests/attribute/benchmark/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attributebenchmark_app
+ SOURCES
+ attributebenchmark.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attributebenchmark_app COMMAND searchlib_attributebenchmark_app BENCHMARK)
diff --git a/searchlib/src/tests/attribute/benchmark/attributebenchmark.cpp b/searchlib/src/tests/attribute/benchmark/attributebenchmark.cpp
new file mode 100644
index 00000000000..88446ef71f7
--- /dev/null
+++ b/searchlib/src/tests/attribute/benchmark/attributebenchmark.cpp
@@ -0,0 +1,678 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "attributesearcher.h"
+#include "attributeupdater.h"
+#include <vespa/searchlib/util/randomgenerator.h>
+#include "runnable.h"
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/vespalib/util/sync.h>
+#include <iostream>
+#include <fstream>
+#include <vespa/log/log.h>
+
+LOG_SETUP("attributebenchmark");
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+using vespalib::Monitor;
+using vespalib::MonitorGuard;
+using std::shared_ptr;
+
+typedef std::vector<uint32_t> NumVector;
+typedef std::vector<vespalib::string> StringVector;
+typedef AttributeVector::SP AttributePtr;
+typedef AttributeVector::DocId DocId;
+typedef search::attribute::Config AttrConfig;
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+
+namespace search {
+
+class AttributeBenchmark : public FastOS_Application
+{
+private:
+ class Config {
+ public:
+ vespalib::string _attribute;
+ uint32_t _numDocs;
+ uint32_t _numUpdates;
+ uint32_t _numValues;
+ uint32_t _numSearchers;
+ uint32_t _numQueries;
+ bool _searchersOnly;
+ bool _validate;
+ uint32_t _populateRuns;
+ uint32_t _updateRuns;
+ uint32_t _commitFreq;
+ uint32_t _minValueCount;
+ uint32_t _maxValueCount;
+ uint32_t _minStringLen;
+ uint32_t _maxStringLen;
+ uint32_t _seed;
+ bool _writeAttribute;
+ int64_t _rangeStart;
+ int64_t _rangeEnd;
+ int64_t _rangeDelta;
+ bool _rangeSearch;
+ uint32_t _prefixLength;
+ bool _prefixSearch;
+
+
+ Config() : _attribute(""), _numDocs(0), _numUpdates(0), _numValues(0),
+ _numSearchers(0), _numQueries(0), _searchersOnly(true), _validate(false), _populateRuns(0), _updateRuns(0),
+ _commitFreq(0), _minValueCount(0), _maxValueCount(0), _minStringLen(0), _maxStringLen(0), _seed(0),
+ _writeAttribute(false), _rangeStart(0), _rangeEnd(0), _rangeDelta(0), _rangeSearch(false),
+ _prefixLength(0), _prefixSearch(false) {}
+ void printXML() const;
+ };
+
+ class Resource {
+ private:
+ std::vector<struct rusage> _usages;
+ struct rusage _reset;
+
+ public:
+ Resource() : _usages(), _reset() { reset(); };
+ void reset() {
+ getrusage(0, &_reset);
+ }
+ void saveUsage() {
+ struct rusage now;
+ getrusage(0, &now);
+ struct rusage usage = computeDifference(_reset, now);
+ _usages.push_back(usage);
+ }
+ void printLastXML(uint32_t opCount) {
+ (void) opCount;
+ struct rusage & usage = _usages.back();
+ std::cout << "<ru_utime>" << usage.ru_utime.tv_sec * 1000 + usage.ru_utime.tv_usec / 1000
+ << "</ru_utime>" << std::endl;
+ std::cout << "<ru_stime>" << usage.ru_stime.tv_sec * 1000 + usage.ru_stime.tv_usec / 1000
+ << "</ru_stime>" << std::endl;
+ std::cout << "<ru_nvcsw>" << usage.ru_nvcsw << "</ru_nvcsw>" << std::endl;
+ std::cout << "<ru_nivcsw>" << usage.ru_nivcsw << "</ru_nivcsw>" << std::endl;
+ }
+ static struct rusage computeDifference(struct rusage & first, struct rusage & second);
+ };
+
+ FastOS_ThreadPool * _threadPool;
+ Config _config;
+ RandomGenerator _rndGen;
+
+ void init(const Config & config);
+ void usage();
+
+ // benchmark helper methods
+ void addDocs(const AttributePtr & ptr, uint32_t numDocs);
+ template <typename Vector, typename T, typename BT>
+ void benchmarkPopulate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id);
+ template <typename Vector, typename T, typename BT>
+ void benchmarkUpdate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id);
+
+ template <typename T>
+ std::vector<vespalib::string> prepareForPrefixSearch(const std::vector<T> & values) const;
+ template <typename T>
+ void benchmarkSearch(const AttributePtr & ptr, const std::vector<T> & values);
+ template <typename Vector, typename T, typename BT>
+ void benchmarkSearchWithUpdater(const AttributePtr & ptr,
+ const std::vector<T> & values);
+
+ template <typename Vector, typename T, typename BT>
+ void benchmarkAttribute(const AttributePtr & ptr, const std::vector<T> & values);
+
+ // Numeric Attribute
+ void benchmarkNumeric(const AttributePtr & ptr);
+
+ // String Attribute
+ void benchmarkString(const AttributePtr & ptr);
+
+
+public:
+ AttributeBenchmark() : _threadPool(NULL), _config(), _rndGen() {}
+ ~AttributeBenchmark() {
+ if (_threadPool != NULL) {
+ delete _threadPool;
+ }
+ }
+ int Main();
+};
+
+
+void
+AttributeBenchmark::Config::printXML() const
+{
+ std::cout << "<config>" << std::endl;
+ std::cout << "<attribute>" << _attribute << "</attribute>" << std::endl;
+ std::cout << "<num-docs>" << _numDocs << "</num-docs>" << std::endl;
+ std::cout << "<num-updates>" << _numUpdates << "</num-updates>" << std::endl;
+ std::cout << "<num-values>" << _numValues << "</num-values>" << std::endl;
+ std::cout << "<num-searchers>" << _numSearchers << "</num-searchers>" << std::endl;
+ std::cout << "<num-queries>" << _numQueries << "</num-queries>" << std::endl;
+ std::cout << "<searchers-only>" << (_searchersOnly ? "true" : "false") << "</searchers-only>" << std::endl;
+ std::cout << "<validate>" << (_validate ? "true" : "false") << "</validate>" << std::endl;
+ std::cout << "<populate-runs>" << _populateRuns << "</populate-runs>" << std::endl;
+ std::cout << "<update-runs>" << _updateRuns << "</update-runs>" << std::endl;
+ std::cout << "<commit-freq>" << _commitFreq << "</commit-freq>" << std::endl;
+ std::cout << "<min-value-count>" << _minValueCount << "</min-value-count>" << std::endl;
+ std::cout << "<max-value-count>" << _maxValueCount << "</max-value-count>" << std::endl;
+ std::cout << "<min-string-len>" << _minStringLen << "</min-string-len>" << std::endl;
+ std::cout << "<max-string-len>" << _maxStringLen << "</max-string-len>" << std::endl;
+ std::cout << "<seed>" << _seed << "</seed>" << std::endl;
+ std::cout << "<range-start>" << _rangeStart << "</range-start>" << std::endl;
+ std::cout << "<range-end>" << _rangeEnd << "</range-end>" << std::endl;
+ std::cout << "<range-delta>" << _rangeDelta << "</range-delta>" << std::endl;
+ std::cout << "<range-search>" << (_rangeSearch ? "true" : "false") << "</range-search>" << std::endl;
+ std::cout << "<prefix-length>" << _prefixLength << "</range-length>" << std::endl;
+ std::cout << "<prefix-search>" << (_prefixSearch ? "true" : "false") << "</prefix-search>" << std::endl;
+ std::cout << "</config>" << std::endl;
+}
+
+void
+AttributeBenchmark::init(const Config & config)
+{
+ _config = config;
+ _rndGen.srand(_config._seed);
+}
+
+
+//-----------------------------------------------------------------------------
+// Benchmark helper methods
+//-----------------------------------------------------------------------------
+void
+AttributeBenchmark::addDocs(const AttributePtr & ptr, uint32_t numDocs)
+{
+ DocId startDoc;
+ DocId lastDoc;
+ bool success = ptr->addDocs(startDoc, lastDoc, numDocs);
+ assert(success);
+ (void) success;
+ assert(startDoc == 0);
+ assert(lastDoc + 1 == numDocs);
+ assert(ptr->getNumDocs() == numDocs);
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkPopulate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id)
+{
+ std::cout << "<!-- Populate " << _config._numDocs << " documents -->" << std::endl;
+ AttributeUpdater<Vector, T, BT>
+ updater(ptr, values, _rndGen, _config._validate, _config._commitFreq,
+ _config._minValueCount, _config._maxValueCount);
+ updater.populate();
+ std::cout << "<populate id='" << id << "'>" << std::endl;
+ updater.getStatus().printXML();
+ std::cout << "</populate>" << std::endl;
+ if (_config._validate) {
+ std::cout << "<!-- All " << updater.getValidator().getTotalCnt()
+ << " asserts passed -->" << std::endl;
+ }
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkUpdate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id)
+{
+ std::cout << "<!-- Apply " << _config._numUpdates << " updates -->" << std::endl;
+ AttributeUpdater<Vector, T, BT>
+ updater(ptr, values, _rndGen, _config._validate, _config._commitFreq,
+ _config._minValueCount, _config._maxValueCount);
+ updater.update(_config._numUpdates);
+ std::cout << "<update id='" << id << "'>" << std::endl;
+ updater.getStatus().printXML();
+ std::cout << "</update>" << std::endl;
+ if (_config._validate) {
+ std::cout << "<!-- All " << updater.getValidator().getTotalCnt()
+ << " asserts passed -->" << std::endl;
+ }
+}
+
+template <typename T>
+std::vector<vespalib::string>
+AttributeBenchmark::prepareForPrefixSearch(const std::vector<T> & values) const
+{
+ (void) values;
+ return std::vector<vespalib::string>();
+}
+
+template <>
+std::vector<vespalib::string>
+AttributeBenchmark::prepareForPrefixSearch(const std::vector<AttributeVector::WeightedString> & values) const
+{
+ std::vector<vespalib::string> retval;
+ retval.reserve(values.size());
+ for (size_t i = 0; i < values.size(); ++i) {
+ retval.push_back(values[i].getValue().substr(0, _config._prefixLength));
+ }
+ return retval;
+}
+
+template <typename T>
+void
+AttributeBenchmark::benchmarkSearch(const AttributePtr & ptr, const std::vector<T> & values)
+{
+ std::vector<AttributeSearcher *> searchers;
+ if (_config._numSearchers > 0) {
+ std::cout << "<!-- Starting " << _config._numSearchers << " searcher threads with "
+ << _config._numQueries << " queries each -->" << std::endl;
+
+ std::vector<vespalib::string> prefixStrings = prepareForPrefixSearch(values);
+
+ for (uint32_t i = 0; i < _config._numSearchers; ++i) {
+ if (_config._rangeSearch) {
+ RangeSpec spec(_config._rangeStart, _config._rangeEnd, _config._rangeDelta);
+ searchers.push_back(new AttributeRangeSearcher(i, ptr, spec, _config._numQueries));
+ } else if (_config._prefixSearch) {
+ searchers.push_back(new AttributePrefixSearcher(i, ptr, prefixStrings, _config._numQueries));
+ } else {
+ searchers.push_back(new AttributeFindSearcher<T>(i, ptr, values, _config._numQueries));
+ }
+ _threadPool->NewThread(searchers.back());
+ }
+
+ for (uint32_t i = 0; i < searchers.size(); ++i) {
+ searchers[i]->join();
+ }
+
+ AttributeSearcherStatus totalStatus;
+ for (uint32_t i = 0; i < searchers.size(); ++i) {
+ std::cout << "<searcher-summary id='" << i << "'>" << std::endl;
+ searchers[i]->getStatus().printXML();
+ std::cout << "</searcher-summary>" << std::endl;
+ totalStatus.merge(searchers[i]->getStatus());
+ delete searchers[i];
+ }
+ std::cout << "<total-searcher-summary>" << std::endl;
+ totalStatus.printXML();
+ std::cout << "</total-searcher-summary>" << std::endl;
+ }
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkSearchWithUpdater(const AttributePtr & ptr,
+ const std::vector<T> & values)
+{
+ if (_config._numSearchers > 0) {
+ std::cout << "<!-- Starting 1 updater thread -->" << std::endl;
+ AttributeUpdaterThread<Vector, T, BT>
+ updater(ptr, values, _rndGen, _config._validate, _config._commitFreq,
+ _config._minValueCount, _config._maxValueCount);
+ _threadPool->NewThread(&updater);
+ benchmarkSearch(ptr, values);
+ updater.stop();
+ updater.join();
+ std::cout << "<updater-summary>" << std::endl;
+ updater.getStatus().printXML();
+ std::cout << "</updater-summary>" << std::endl;
+ if (_config._validate) {
+ std::cout << "<!-- All " << updater.getValidator().getTotalCnt()
+ << " asserts passed -->" << std::endl;
+ }
+ }
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkAttribute(const AttributePtr & ptr, const std::vector<T> & values)
+{
+ addDocs(ptr, _config._numDocs);
+
+ // populate
+ for (uint32_t i = 0; i < _config._populateRuns; ++i) {
+ benchmarkPopulate<Vector, T, BT>(ptr, values, i);
+ }
+
+ // update
+ if (_config._numUpdates > 0) {
+ for (uint32_t i = 0; i < _config._updateRuns; ++i) {
+ benchmarkUpdate<Vector, T, BT>(ptr, values, i);
+ }
+ }
+
+ // search
+ if (_config._searchersOnly) {
+ benchmarkSearch(ptr, values);
+ } else {
+ benchmarkSearchWithUpdater<Vector, T, BT>(ptr, values);
+ }
+
+ _threadPool->Close();
+}
+
+
+//-----------------------------------------------------------------------------
+// Numeric Attribute
+//-----------------------------------------------------------------------------
+void
+AttributeBenchmark::benchmarkNumeric(const AttributePtr & ptr)
+{
+ NumVector values;
+ if (_config._rangeSearch) {
+ values.reserve(_config._numValues);
+ for (uint32_t i = 0; i < _config._numValues; ++i) {
+ values.push_back(i);
+ }
+ } else {
+ _rndGen.fillRandomIntegers(values, _config._numValues);
+ }
+
+ std::vector<int32_t> weights;
+ _rndGen.fillRandomIntegers(weights, _config._numValues);
+
+ std::vector<AttributeVector::WeightedInt> weightedVector;
+ weightedVector.reserve(values.size());
+ for (size_t i = 0; i < values.size(); ++i) {
+ if (!ptr->hasWeightedSetType()) {
+ weightedVector.push_back(AttributeVector::WeightedInt(values[i]));
+ } else {
+ weightedVector.push_back(AttributeVector::WeightedInt(values[i], weights[i]));
+ }
+ }
+ benchmarkAttribute<IntegerAttribute, AttributeVector::WeightedInt, AttributeVector::WeightedInt>
+ (ptr, weightedVector);
+}
+
+
+//-----------------------------------------------------------------------------
+// String Attribute
+//-----------------------------------------------------------------------------
+void
+AttributeBenchmark::benchmarkString(const AttributePtr & ptr)
+{
+ StringVector strings;
+ _rndGen.fillRandomStrings(strings, _config._numValues, _config._minStringLen, _config._maxStringLen);
+
+ std::vector<int32_t> weights;
+ _rndGen.fillRandomIntegers(weights, _config._numValues);
+
+ std::vector<AttributeVector::WeightedString> weightedVector;
+ weightedVector.reserve(strings.size());
+ for (size_t i = 0; i < strings.size(); ++i) {
+ if (!ptr->hasWeightedSetType()) {
+ weightedVector.push_back(AttributeVector::WeightedString(strings[i]));
+ } else {
+ weightedVector.push_back(AttributeVector::WeightedString(strings[i], weights[i]));
+ }
+ }
+ benchmarkAttribute<StringAttribute, AttributeVector::WeightedString, AttributeVector::WeightedString>
+ (ptr, weightedVector);
+}
+
+
+//-----------------------------------------------------------------------------
+// Resource utilization
+//-----------------------------------------------------------------------------
+struct rusage
+AttributeBenchmark::Resource::computeDifference(struct rusage & first, struct rusage & second)
+{
+ struct rusage result;
+ // utime
+ uint64_t firstutime = first.ru_utime.tv_sec * 1000000 + first.ru_utime.tv_usec;
+ uint64_t secondutime = second.ru_utime.tv_sec * 1000000 + second.ru_utime.tv_usec;
+ uint64_t resultutime = secondutime - firstutime;
+ result.ru_utime.tv_sec = resultutime / 1000000;
+ result.ru_utime.tv_usec = resultutime % 1000000;
+
+ // stime
+ uint64_t firststime = first.ru_stime.tv_sec * 1000000 + first.ru_stime.tv_usec;
+ uint64_t secondstime = second.ru_stime.tv_sec * 1000000 + second.ru_stime.tv_usec;
+ uint64_t resultstime = secondstime - firststime;
+ result.ru_stime.tv_sec = resultstime / 1000000;
+ result.ru_stime.tv_usec = resultstime % 1000000;
+
+ result.ru_maxrss = second.ru_maxrss; // - first.ru_maxrss;
+ result.ru_ixrss = second.ru_ixrss; // - first.ru_ixrss;
+ result.ru_idrss = second.ru_idrss; // - first.ru_idrss;
+ result.ru_isrss = second.ru_isrss; // - first.ru_isrss;
+ result.ru_minflt = second.ru_minflt - first.ru_minflt;
+ result.ru_majflt = second.ru_majflt - first.ru_majflt;
+ result.ru_nswap = second.ru_nswap - first.ru_nswap;
+ result.ru_inblock = second.ru_inblock - first.ru_inblock;
+ result.ru_oublock = second.ru_oublock - first.ru_oublock;
+ result.ru_msgsnd = second.ru_msgsnd - first.ru_msgsnd;
+ result.ru_msgrcv = second.ru_msgrcv - first.ru_msgrcv;
+ result.ru_nsignals = second.ru_nsignals - first.ru_nsignals;
+ result.ru_nvcsw = second.ru_nvcsw - first.ru_nvcsw;
+ result.ru_nivcsw = second.ru_nivcsw - first.ru_nivcsw;
+
+ return result;
+}
+
+
+void
+AttributeBenchmark::usage()
+{
+ std::cout << "usage: attributebenchmark [-n numDocs] [-u numUpdates] [-v numValues]" << std::endl;
+ std::cout << " [-s numSearchers] [-q numQueries] [-p populateRuns] [-r updateRuns]" << std::endl;
+ std::cout << " [-c commitFrequency] [-l minValueCount] [-h maxValueCount]" << std::endl;
+ std::cout << " [-i minStringLen] [-a maxStringLen] [-e seed]" << std::endl;
+ std::cout << " [-S rangeStart] [-E rangeEnd] [-D rangeDelta] [-L prefixLength]" << std::endl;
+ std::cout << " [-b (searchers with updater)] [-R (range search)] [-P (prefix search)]" << std::endl;
+ std::cout << " [-t (validate updates)] [-w (write attribute to disk)]" << std::endl;
+ std::cout << " <attribute>" << std::endl;
+ std::cout << " <attribute> : s-uint32, a-uint32, ws-uint32" << std::endl;
+ std::cout << " s-fa-uint32, a-fa-uint32, ws-fa-uint32" << std::endl;
+ std::cout << " s-fs-uint32, a-fs-uint32, ws-fs-uint32 ws-frs-uint32" << std::endl;
+ std::cout << " s-string, a-string, ws-string" << std::endl;
+ std::cout << " s-fs-string, a-fs-string, ws-fs-string ws-frs-string" << std::endl;
+}
+
+int
+AttributeBenchmark::Main()
+{
+ Config dc;
+ dc._numDocs = 50000;
+ dc._numUpdates = 50000;
+ dc._numValues = 1000;
+ dc._numSearchers = 0;
+ dc._numQueries = 1000;
+ dc._searchersOnly = true;
+ dc._validate = false;
+ dc._populateRuns = 1;
+ dc._updateRuns = 1;
+ dc._commitFreq = 1000;
+ dc._minValueCount = 0;
+ dc._maxValueCount = 20;
+ dc._minStringLen = 1;
+ dc._maxStringLen = 50;
+ dc._seed = 555;
+ dc._writeAttribute = false;
+ dc._rangeStart = 0;
+ dc._rangeEnd = 1000;
+ dc._rangeDelta = 10;
+ dc._rangeSearch = false;
+ dc._prefixLength = 2;
+ dc._prefixSearch = false;
+
+ int idx = 1;
+ char opt;
+ const char * arg;
+ bool optError = false;
+ while ((opt = GetOpt("n:u:v:s:q:p:r:c:l:h:i:a:e:S:E:D:L:bRPtw", arg, idx)) != -1) {
+ switch (opt) {
+ case 'n':
+ dc._numDocs = atoi(arg);
+ break;
+ case 'u':
+ dc._numUpdates = atoi(arg);
+ break;
+ case 'v':
+ dc._numValues = atoi(arg);
+ break;
+ case 's':
+ dc._numSearchers = atoi(arg);
+ break;
+ case 'q':
+ dc._numQueries = atoi(arg);
+ break;
+ case 'p':
+ dc._populateRuns = atoi(arg);
+ break;
+ case 'r':
+ dc._updateRuns = atoi(arg);
+ break;
+ case 'c':
+ dc._commitFreq = atoi(arg);
+ break;
+ case 'l':
+ dc._minValueCount = atoi(arg);
+ break;
+ case 'h':
+ dc._maxValueCount = atoi(arg);
+ break;
+ case 'i':
+ dc._minStringLen = atoi(arg);
+ break;
+ case 'a':
+ dc._maxStringLen = atoi(arg);
+ break;
+ case 'e':
+ dc._seed = atoi(arg);
+ break;
+ case 'S':
+ dc._rangeStart = strtoll(arg, NULL, 10);
+ break;
+ case 'E':
+ dc._rangeEnd = strtoll(arg, NULL, 10);
+ break;
+ case 'D':
+ dc._rangeDelta = strtoll(arg, NULL, 10);
+ break;
+ case 'L':
+ dc._prefixLength = atoi(arg);
+ break;
+ case 'b':
+ dc._searchersOnly = false;
+ break;
+ case 'R':
+ dc._rangeSearch = true;
+ break;
+ case 'P':
+ dc._prefixSearch = true;
+ break;
+ case 't':
+ dc._validate = true;
+ break;
+ case 'w':
+ dc._writeAttribute = true;
+ break;
+ default:
+ optError = true;
+ break;
+ }
+ }
+
+ if (_argc != (idx + 1) || optError) {
+ usage();
+ return -1;
+ }
+
+ dc._attribute = vespalib::string(_argv[idx]);
+
+ _threadPool = new FastOS_ThreadPool(256000);
+
+ std::cout << "<attribute-benchmark>" << std::endl;
+ init(dc);
+ _config.printXML();
+
+ AttributePtr ptr;
+
+ if (_config._attribute == "s-int32") {
+ std::cout << "<!-- Benchmark SingleValueNumericAttribute<int32_t> -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("s-int32", AttrConfig(BasicType::INT32, CollectionType::SINGLE));
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "a-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericAttribute<int32_t> (array) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("a-int32", AttrConfig(BasicType::INT32, CollectionType::ARRAY));
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "ws-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericAttribute<int32_t> (wset) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("ws-int32", AttrConfig(BasicType::INT32, CollectionType::WSET));
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "s-fs-int32") {
+ std::cout << "<!-- Benchmark SingleValueNumericPostingAttribute<int32_t> -->" << std::endl;
+ AttrConfig cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("s-fs-int32", cfg);
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "a-fs-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericPostingAttribute<int32_t> (array) -->" << std::endl;
+ AttrConfig cfg(BasicType::INT32, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("a-fs-int32", cfg);
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "ws-fs-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericPostingAttribute<int32_t> (wset) -->" << std::endl;
+ AttrConfig cfg(BasicType::INT32, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg);
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "s-string") {
+ std::cout << "<!-- Benchmark SingleValueStringAttribute -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("s-string", AttrConfig(BasicType::STRING, CollectionType::SINGLE));
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "a-string") {
+ std::cout << "<!-- Benchmark ArrayStringAttribute (array) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("a-string", AttrConfig(BasicType::STRING, CollectionType::ARRAY));
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "ws-string") {
+ std::cout << "<!-- Benchmark WeightedSetStringAttribute (wset) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("ws-string", AttrConfig(BasicType::STRING, CollectionType::WSET));
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "s-fs-string") {
+ std::cout << "<!-- Benchmark SingleValueStringPostingAttribute (single fast search) -->" << std::endl;
+ AttrConfig cfg(BasicType::STRING, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("s-fs-string", cfg);
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "a-fs-string") {
+ std::cout << "<!-- Benchmark ArrayStringPostingAttribute (array fast search) -->" << std::endl;
+ AttrConfig cfg(BasicType::STRING, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("a-fs-string", cfg);
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "ws-fs-string") {
+ std::cout << "<!-- Benchmark WeightedSetStringPostingAttribute (wset fast search) -->" << std::endl;
+ AttrConfig cfg(BasicType::STRING, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("ws-fs-string", cfg);
+ benchmarkString(ptr);
+
+ }
+
+ if (dc._writeAttribute) {
+ std::cout << "<!-- Writing attribute to disk -->" << std::endl;
+ ptr->saveAs(ptr->getBaseFileName());
+ }
+
+ std::cout << "</attribute-benchmark>" << std::endl;
+
+ return 0;
+}
+}
+
+int main(int argc, char ** argv)
+{
+ search::AttributeBenchmark myapp;
+ return myapp.Entry(argc, argv);
+}
+
diff --git a/searchlib/src/tests/attribute/benchmark/attributebenchmark.rb b/searchlib/src/tests/attribute/benchmark/attributebenchmark.rb
new file mode 100644
index 00000000000..44b08ec4389
--- /dev/null
+++ b/searchlib/src/tests/attribute/benchmark/attributebenchmark.rb
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vectors = ["sv-num-new", "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"]
+num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000]
+unique_percent = [0.001, 0.01, 0.05, 0.20, 0.50]
+
+vectors.each do |vector|
+ num_docs.each do |num|
+ unique_percent.each do |percent|
+ unique = num * percent
+ command = "./attributebenchmark -n #{num} -u 1000000 -v #{unique} -p 2 -r 1 -s 1 -q 1000 #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-p2-r1-s1-q1000.log 2>&1"
+ puts command
+ `#{command}`
+ s = 1
+ 5.times do
+ command = "./attributebenchmark -n #{num} -v #{unique} -p 1 -r 0 -s #{s} -q 100 -b #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-s#{s}-q100-b.log 2>&1"
+ puts command
+ `#{command}`
+ s = s*2;
+ end
+ end
+ end
+end
diff --git a/searchlib/src/tests/attribute/benchmark/benchmarkplotter.rb b/searchlib/src/tests/attribute/benchmark/benchmarkplotter.rb
new file mode 100644
index 00000000000..d77c92c8acd
--- /dev/null
+++ b/searchlib/src/tests/attribute/benchmark/benchmarkplotter.rb
@@ -0,0 +1,134 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+require 'rexml/document'
+
+def plot_graph(plot_data, plot_png, title, xlabel, ylabel, graph_titles)
+ plot_cmd = "";
+ plot_cmd += "set terminal png\n"
+ plot_cmd += "set output \"#{plot_png}\"\n"
+ plot_cmd += "set title \"#{title}\"\n"
+ plot_cmd += "set xlabel \"#{xlabel}\"\n"
+ plot_cmd += "set ylabel \"#{ylabel}\"\n"
+ c = 2
+ plots = []
+ plot_cmd += "plot "
+ graph_titles.each do |title|
+ plots.push("\"#{plot_data}\" using 1:#{c} title \"#{title}\" with linespoints")
+ c += 1
+ end
+ plot_cmd += plots.join(", ")
+
+ plot_cmd_file = File.open("plot_graph.cmd", "w")
+ plot_cmd_file.write(plot_cmd);
+ plot_cmd_file.close
+ cmd = "gnuplot plot_graph.cmd"
+ puts cmd
+ puts `#{cmd}`
+end
+
+def extract_alpha(num_docs, percentages, input, output, xml_getter)
+ plot_data = File.open(output, "w");
+ num_docs.each do |num|
+ data_line = "#{num} "
+ percentages.each do |prc|
+ unique = num * prc
+ filename = input.sub("#N", "#{num}").sub("#V", "#{unique}")
+ value = 0
+ begin
+ xml_root = REXML::Document.new(File.open(filename)).root
+ value = send(xml_getter, xml_root)
+ rescue REXML::ParseException
+ puts "Could not parse file: #{filename}"
+ end
+ data_line += "#{value} "
+ end
+ plot_data.write(data_line + "\n")
+ end
+ plot_data.close
+end
+
+def extract_beta(num_docs, percentage, num_threads, input, output, xml_getter)
+ plot_data = File.open(output, "w");
+ num_docs.each do |num|
+ data_line = "#{num} "
+ unique = num * percentage
+ num_threads.each do |thread|
+ filename = input.sub("#N", "#{num}").sub("#V", "#{unique}").sub("#S", "#{thread}")
+ value = 0
+ begin
+ xml_root = REXML::Document.new(File.open(filename)).root
+ value = send(xml_getter, xml_root)
+ rescue REXML::ParseException
+ puts "Could not parse file: #{filename}"
+ end
+ data_line += "#{value} "
+ end
+ plot_data.write(data_line + "\n")
+ end
+ plot_data.close
+end
+
+def xml_getter_update_0_throughput(xml_root)
+ return xml_root.elements["update[@id='0']"].elements["throughput"].text
+end
+
+def xml_getter_search_throughput(xml_root)
+ return xml_root.elements["total-searcher-summary"].elements["search-throughput"].text
+end
+
+def xml_getter_updater_thread_throughput(xml_root)
+ return throughput = xml_root.elements["updater-summary"].elements["throughput"].text
+end
+
+
+vectors = ["mv-num-new"]#, "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"]
+num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000]
+unique_percentages = [0.001, 0.01, 0.05, 0.20, 0.50]
+num_threads = [1, 2, 4, 8, 16]
+
+inputs = ["03-27-full/#AV-n#N-v#V-p2-r1-s1-q1000.log",
+ "03-27-full/#AV-n#N-v#V-s#S-q100-b.log"]
+graph_titles = [[], []]
+unique_percentages.each do |percentage|
+ graph_titles[0].push("#{percentage * 100} % uniques")
+end
+num_threads.each do |thread|
+ graph_titles[1].push("#{thread} searcher thread(s)")
+end
+
+vectors.each do |vector|
+ extract_alpha(num_docs, unique_percentages,
+ inputs[0].sub("#AV", vector),
+ "#{vector}-update-speed.dat",
+ :xml_getter_update_0_throughput)
+ plot_graph("#{vector}-update-speed.dat",
+ "#{vector}-update-speed.png",
+ "Update speed when applying 1M updates",
+ "Number of documents", "Updates per/sec", graph_titles[0])
+
+ extract_alpha(num_docs, unique_percentages,
+ inputs[0].sub("#AV", vector),
+ "#{vector}-search-speed.dat",
+ :xml_getter_search_throughput)
+ plot_graph("#{vector}-search-speed.dat",
+ "#{vector}-search-speed.png",
+ "Search speed with 1 searcher thread",
+ "Number of documents", "Queries per/sec", graph_titles[0])
+
+ extract_beta(num_docs, 0.01, num_threads,
+ inputs[1].sub("#AV", vector),
+ "#{vector}-search-speed-multiple.dat",
+ :xml_getter_search_throughput)
+ plot_graph("#{vector}-search-speed-multiple.dat",
+ "#{vector}-search-speed-multiple.png",
+ "Search speed with 1 update thread and X searcher threads",
+ "Number of documents", "Queries per/sec", graph_titles[1])
+
+ extract_beta(num_docs, 0.01, num_threads,
+ inputs[1].sub("#AV", vector),
+ "#{vector}-update-speed-multiple.dat",
+ :xml_getter_updater_thread_throughput)
+ plot_graph("#{vector}-update-speed-multiple.dat",
+ "#{vector}-update-speed-multiple.png",
+ "Update speed with 1 update thread and X searcher threads",
+ "Number of documents", "Updates per/sec", graph_titles[1])
+end