// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include #include #include #include #include #include #include #include #include "attributesearcher.h" #include "attributeupdater.h" #include #include #include LOG_SETUP("attributebenchmark"); using std::shared_ptr; using NumVector = std::vector; using StringVector = std::vector; using AttrConfig = search::attribute::Config; using search::attribute::BasicType; using search::attribute::CollectionType; namespace search { using AttributePtr = AttributeVector::SP; using DocId = AttributeVector::DocId; class AttributeBenchmark { private: class Config { public: vespalib::string _attribute; uint32_t _numDocs; uint32_t _numUpdates; uint32_t _numValues; uint32_t _numSearchers; uint32_t _numQueries; bool _searchersOnly; bool _validate; uint32_t _populateRuns; uint32_t _updateRuns; uint32_t _commitFreq; uint32_t _minValueCount; uint32_t _maxValueCount; uint32_t _minStringLen; uint32_t _maxStringLen; uint32_t _seed; bool _writeAttribute; int64_t _rangeStart; int64_t _rangeEnd; int64_t _rangeDelta; bool _rangeSearch; uint32_t _prefixLength; bool _prefixSearch; Config() : _attribute(""), _numDocs(0), _numUpdates(0), _numValues(0), _numSearchers(0), _numQueries(0), _searchersOnly(true), _validate(false), _populateRuns(0), _updateRuns(0), _commitFreq(0), _minValueCount(0), _maxValueCount(0), _minStringLen(0), _maxStringLen(0), _seed(0), _writeAttribute(false), _rangeStart(0), _rangeEnd(0), _rangeDelta(0), _rangeSearch(false), _prefixLength(0), _prefixSearch(false) {} void printXML() const; }; class Resource { private: std::vector _usages; struct rusage _reset; public: Resource() : _usages(), _reset() { reset(); }; void reset() { getrusage(0, &_reset); } void saveUsage() { struct rusage now; getrusage(0, &now); struct rusage usage = computeDifference(_reset, now); _usages.push_back(usage); } void printLastXML(uint32_t opCount) { (void) opCount; struct rusage & usage = _usages.back(); std::cout << "" << usage.ru_utime.tv_sec * 1000 + usage.ru_utime.tv_usec / 1000 << "" << std::endl; std::cout << "" << usage.ru_stime.tv_sec * 1000 + usage.ru_stime.tv_usec / 1000 << "" << std::endl; std::cout << "" << usage.ru_nvcsw << "" << std::endl; std::cout << "" << usage.ru_nivcsw << "" << std::endl; } static struct rusage computeDifference(struct rusage & first, struct rusage & second); }; Config _config; RandomGenerator _rndGen; void init(const Config & config); void usage(); // benchmark helper methods void addDocs(const AttributePtr & ptr, uint32_t numDocs); template void benchmarkPopulate(const AttributePtr & ptr, const std::vector & values, uint32_t id); template void benchmarkUpdate(const AttributePtr & ptr, const std::vector & values, uint32_t id); template std::vector prepareForPrefixSearch(const std::vector & values) const; template void benchmarkSearch(const AttributePtr & ptr, const std::vector & values); template void benchmarkSearchWithUpdater(const AttributePtr & ptr, const std::vector & values); template void benchmarkAttribute(const AttributePtr & ptr, const std::vector & values); // Numeric Attribute void benchmarkNumeric(const AttributePtr & ptr); // String Attribute void benchmarkString(const AttributePtr & ptr); public: AttributeBenchmark() : _config(), _rndGen() {} ~AttributeBenchmark() = default; int main(int argc, char **argv); }; void AttributeBenchmark::Config::printXML() const { std::cout << "" << std::endl; std::cout << "" << _attribute << "" << std::endl; std::cout << "" << _numDocs << "" << std::endl; std::cout << "" << _numUpdates << "" << std::endl; std::cout << "" << _numValues << "" << std::endl; std::cout << "" << _numSearchers << "" << std::endl; std::cout << "" << _numQueries << "" << std::endl; std::cout << "" << (_searchersOnly ? "true" : "false") << "" << std::endl; std::cout << "" << (_validate ? "true" : "false") << "" << std::endl; std::cout << "" << _populateRuns << "" << std::endl; std::cout << "" << _updateRuns << "" << std::endl; std::cout << "" << _commitFreq << "" << std::endl; std::cout << "" << _minValueCount << "" << std::endl; std::cout << "" << _maxValueCount << "" << std::endl; std::cout << "" << _minStringLen << "" << std::endl; std::cout << "" << _maxStringLen << "" << std::endl; std::cout << "" << _seed << "" << std::endl; std::cout << "" << _rangeStart << "" << std::endl; std::cout << "" << _rangeEnd << "" << std::endl; std::cout << "" << _rangeDelta << "" << std::endl; std::cout << "" << (_rangeSearch ? "true" : "false") << "" << std::endl; std::cout << "" << _prefixLength << "" << std::endl; std::cout << "" << (_prefixSearch ? "true" : "false") << "" << std::endl; std::cout << "" << std::endl; } void AttributeBenchmark::init(const Config & config) { _config = config; _rndGen.srand(_config._seed); } //----------------------------------------------------------------------------- // Benchmark helper methods //----------------------------------------------------------------------------- void AttributeBenchmark::addDocs(const AttributePtr & ptr, uint32_t numDocs) { DocId startDoc; DocId lastDoc; bool success = ptr->addDocs(startDoc, lastDoc, numDocs); assert(success); (void) success; assert(startDoc == 0); assert(lastDoc + 1 == numDocs); assert(ptr->getNumDocs() == numDocs); } template void AttributeBenchmark::benchmarkPopulate(const AttributePtr & ptr, const std::vector & values, uint32_t id) { std::cout << "" << std::endl; AttributeUpdater updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, _config._minValueCount, _config._maxValueCount); updater.populate(); std::cout << "" << std::endl; updater.getStatus().printXML(); std::cout << "" << std::endl; if (_config._validate) { std::cout << "" << std::endl; } } template void AttributeBenchmark::benchmarkUpdate(const AttributePtr & ptr, const std::vector & values, uint32_t id) { std::cout << "" << std::endl; AttributeUpdater updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, _config._minValueCount, _config._maxValueCount); updater.update(_config._numUpdates); std::cout << "" << std::endl; updater.getStatus().printXML(); std::cout << "" << std::endl; if (_config._validate) { std::cout << "" << std::endl; } } template std::vector AttributeBenchmark::prepareForPrefixSearch(const std::vector & values) const { (void) values; return std::vector(); } template <> std::vector AttributeBenchmark::prepareForPrefixSearch(const std::vector & values) const { std::vector retval; retval.reserve(values.size()); for (size_t i = 0; i < values.size(); ++i) { retval.push_back(values[i].getValue().substr(0, _config._prefixLength)); } return retval; } template void AttributeBenchmark::benchmarkSearch(const AttributePtr & ptr, const std::vector & values) { std::vector searchers; if (_config._numSearchers > 0) { std::cout << "" << std::endl; std::vector prefixStrings = prepareForPrefixSearch(values); for (uint32_t i = 0; i < _config._numSearchers; ++i) { if (_config._rangeSearch) { RangeSpec spec(_config._rangeStart, _config._rangeEnd, _config._rangeDelta); searchers.push_back(new AttributeRangeSearcher(ptr, spec, _config._numQueries)); } else if (_config._prefixSearch) { searchers.push_back(new AttributePrefixSearcher(ptr, prefixStrings, _config._numQueries)); } else { searchers.push_back(new AttributeFindSearcher(ptr, values, _config._numQueries)); } searchers.back()->start(); } for (uint32_t i = 0; i < searchers.size(); ++i) { searchers[i]->join(); } AttributeSearcherStatus totalStatus; for (uint32_t i = 0; i < searchers.size(); ++i) { std::cout << "" << std::endl; searchers[i]->getStatus().printXML(); std::cout << "" << std::endl; totalStatus.merge(searchers[i]->getStatus()); delete searchers[i]; } std::cout << "" << std::endl; totalStatus.printXML(); std::cout << "" << std::endl; } } template void AttributeBenchmark::benchmarkSearchWithUpdater(const AttributePtr & ptr, const std::vector & values) { if (_config._numSearchers > 0) { std::cout << "" << std::endl; AttributeUpdaterThread updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, _config._minValueCount, _config._maxValueCount); updater.start(); benchmarkSearch(ptr, values); updater.stop(); updater.join(); std::cout << "" << std::endl; updater.getStatus().printXML(); std::cout << "" << std::endl; if (_config._validate) { std::cout << "" << std::endl; } } } template void AttributeBenchmark::benchmarkAttribute(const AttributePtr & ptr, const std::vector & values) { addDocs(ptr, _config._numDocs); // populate for (uint32_t i = 0; i < _config._populateRuns; ++i) { benchmarkPopulate(ptr, values, i); } // update if (_config._numUpdates > 0) { for (uint32_t i = 0; i < _config._updateRuns; ++i) { benchmarkUpdate(ptr, values, i); } } // search if (_config._searchersOnly) { benchmarkSearch(ptr, values); } else { benchmarkSearchWithUpdater(ptr, values); } } //----------------------------------------------------------------------------- // Numeric Attribute //----------------------------------------------------------------------------- void AttributeBenchmark::benchmarkNumeric(const AttributePtr & ptr) { NumVector values; if (_config._rangeSearch) { values.reserve(_config._numValues); for (uint32_t i = 0; i < _config._numValues; ++i) { values.push_back(i); } } else { _rndGen.fillRandomIntegers(values, _config._numValues); } std::vector weights; _rndGen.fillRandomIntegers(weights, _config._numValues); std::vector weightedVector; weightedVector.reserve(values.size()); for (size_t i = 0; i < values.size(); ++i) { if (!ptr->hasWeightedSetType()) { weightedVector.push_back(AttributeVector::WeightedInt(values[i])); } else { weightedVector.push_back(AttributeVector::WeightedInt(values[i], weights[i])); } } benchmarkAttribute (ptr, weightedVector); } //----------------------------------------------------------------------------- // String Attribute //----------------------------------------------------------------------------- void AttributeBenchmark::benchmarkString(const AttributePtr & ptr) { StringVector strings; _rndGen.fillRandomStrings(strings, _config._numValues, _config._minStringLen, _config._maxStringLen); std::vector weights; _rndGen.fillRandomIntegers(weights, _config._numValues); std::vector weightedVector; weightedVector.reserve(strings.size()); for (size_t i = 0; i < strings.size(); ++i) { if (!ptr->hasWeightedSetType()) { weightedVector.push_back(AttributeVector::WeightedString(strings[i])); } else { weightedVector.push_back(AttributeVector::WeightedString(strings[i], weights[i])); } } benchmarkAttribute (ptr, weightedVector); } //----------------------------------------------------------------------------- // Resource utilization //----------------------------------------------------------------------------- struct rusage AttributeBenchmark::Resource::computeDifference(struct rusage & first, struct rusage & second) { struct rusage result; // utime uint64_t firstutime = first.ru_utime.tv_sec * 1000000 + first.ru_utime.tv_usec; uint64_t secondutime = second.ru_utime.tv_sec * 1000000 + second.ru_utime.tv_usec; uint64_t resultutime = secondutime - firstutime; result.ru_utime.tv_sec = resultutime / 1000000; result.ru_utime.tv_usec = resultutime % 1000000; // stime uint64_t firststime = first.ru_stime.tv_sec * 1000000 + first.ru_stime.tv_usec; uint64_t secondstime = second.ru_stime.tv_sec * 1000000 + second.ru_stime.tv_usec; uint64_t resultstime = secondstime - firststime; result.ru_stime.tv_sec = resultstime / 1000000; result.ru_stime.tv_usec = resultstime % 1000000; result.ru_maxrss = second.ru_maxrss; // - first.ru_maxrss; result.ru_ixrss = second.ru_ixrss; // - first.ru_ixrss; result.ru_idrss = second.ru_idrss; // - first.ru_idrss; result.ru_isrss = second.ru_isrss; // - first.ru_isrss; result.ru_minflt = second.ru_minflt - first.ru_minflt; result.ru_majflt = second.ru_majflt - first.ru_majflt; result.ru_nswap = second.ru_nswap - first.ru_nswap; result.ru_inblock = second.ru_inblock - first.ru_inblock; result.ru_oublock = second.ru_oublock - first.ru_oublock; result.ru_msgsnd = second.ru_msgsnd - first.ru_msgsnd; result.ru_msgrcv = second.ru_msgrcv - first.ru_msgrcv; result.ru_nsignals = second.ru_nsignals - first.ru_nsignals; result.ru_nvcsw = second.ru_nvcsw - first.ru_nvcsw; result.ru_nivcsw = second.ru_nivcsw - first.ru_nivcsw; return result; } void AttributeBenchmark::usage() { std::cout << "usage: attributebenchmark [-n numDocs] [-u numUpdates] [-v numValues]" << std::endl; std::cout << " [-s numSearchers] [-q numQueries] [-p populateRuns] [-r updateRuns]" << std::endl; std::cout << " [-c commitFrequency] [-l minValueCount] [-h maxValueCount]" << std::endl; std::cout << " [-i minStringLen] [-a maxStringLen] [-e seed]" << std::endl; std::cout << " [-S rangeStart] [-E rangeEnd] [-D rangeDelta] [-L prefixLength]" << std::endl; std::cout << " [-b (searchers with updater)] [-R (range search)] [-P (prefix search)]" << std::endl; std::cout << " [-t (validate updates)] [-w (write attribute to disk)]" << std::endl; std::cout << " " << std::endl; std::cout << " : s-uint32, a-uint32, ws-uint32" << std::endl; std::cout << " s-fa-uint32, a-fa-uint32, ws-fa-uint32" << std::endl; std::cout << " s-fs-uint32, a-fs-uint32, ws-fs-uint32 ws-frs-uint32" << std::endl; std::cout << " s-string, a-string, ws-string" << std::endl; std::cout << " s-fs-string, a-fs-string, ws-fs-string ws-frs-string" << std::endl; } int AttributeBenchmark::main(int argc, char **argv) { Config dc; dc._numDocs = 50000; dc._numUpdates = 50000; dc._numValues = 1000; dc._numSearchers = 0; dc._numQueries = 1000; dc._searchersOnly = true; dc._validate = false; dc._populateRuns = 1; dc._updateRuns = 1; dc._commitFreq = 1000; dc._minValueCount = 0; dc._maxValueCount = 20; dc._minStringLen = 1; dc._maxStringLen = 50; dc._seed = 555; dc._writeAttribute = false; dc._rangeStart = 0; dc._rangeEnd = 1000; dc._rangeDelta = 10; dc._rangeSearch = false; dc._prefixLength = 2; dc._prefixSearch = false; int opt; bool optError = false; while ((opt = getopt(argc, argv, "n:u:v:s:q:p:r:c:l:h:i:a:e:S:E:D:L:bRPtw")) != -1) { switch (opt) { case 'n': dc._numDocs = atoi(optarg); break; case 'u': dc._numUpdates = atoi(optarg); break; case 'v': dc._numValues = atoi(optarg); break; case 's': dc._numSearchers = atoi(optarg); break; case 'q': dc._numQueries = atoi(optarg); break; case 'p': dc._populateRuns = atoi(optarg); break; case 'r': dc._updateRuns = atoi(optarg); break; case 'c': dc._commitFreq = atoi(optarg); break; case 'l': dc._minValueCount = atoi(optarg); break; case 'h': dc._maxValueCount = atoi(optarg); break; case 'i': dc._minStringLen = atoi(optarg); break; case 'a': dc._maxStringLen = atoi(optarg); break; case 'e': dc._seed = atoi(optarg); break; case 'S': dc._rangeStart = strtoll(optarg, NULL, 10); break; case 'E': dc._rangeEnd = strtoll(optarg, NULL, 10); break; case 'D': dc._rangeDelta = strtoll(optarg, NULL, 10); break; case 'L': dc._prefixLength = atoi(optarg); break; case 'b': dc._searchersOnly = false; break; case 'R': dc._rangeSearch = true; break; case 'P': dc._prefixSearch = true; break; case 't': dc._validate = true; break; case 'w': dc._writeAttribute = true; break; default: optError = true; break; } } if (argc != (optind + 1) || optError) { usage(); return -1; } dc._attribute = vespalib::string(argv[optind]); std::cout << "" << std::endl; init(dc); _config.printXML(); AttributePtr ptr; if (_config._attribute == "s-int32") { std::cout << "" << std::endl; ptr = AttributeFactory::createAttribute("s-int32", AttrConfig(BasicType::INT32, CollectionType::SINGLE)); benchmarkNumeric(ptr); } else if (_config._attribute == "a-int32") { std::cout << "" << std::endl; ptr = AttributeFactory::createAttribute("a-int32", AttrConfig(BasicType::INT32, CollectionType::ARRAY)); benchmarkNumeric(ptr); } else if (_config._attribute == "ws-int32") { std::cout << "" << std::endl; ptr = AttributeFactory::createAttribute("ws-int32", AttrConfig(BasicType::INT32, CollectionType::WSET)); benchmarkNumeric(ptr); } else if (_config._attribute == "s-fs-int32") { std::cout << "" << std::endl; AttrConfig cfg(BasicType::INT32, CollectionType::SINGLE); cfg.setFastSearch(true); ptr = AttributeFactory::createAttribute("s-fs-int32", cfg); benchmarkNumeric(ptr); } else if (_config._attribute == "a-fs-int32") { std::cout << "" << std::endl; AttrConfig cfg(BasicType::INT32, CollectionType::ARRAY); cfg.setFastSearch(true); ptr = AttributeFactory::createAttribute("a-fs-int32", cfg); benchmarkNumeric(ptr); } else if (_config._attribute == "ws-fs-int32") { std::cout << "" << std::endl; AttrConfig cfg(BasicType::INT32, CollectionType::WSET); cfg.setFastSearch(true); ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg); benchmarkNumeric(ptr); } else if (_config._attribute == "s-string") { std::cout << "" << std::endl; ptr = AttributeFactory::createAttribute("s-string", AttrConfig(BasicType::STRING, CollectionType::SINGLE)); benchmarkString(ptr); } else if (_config._attribute == "a-string") { std::cout << "" << std::endl; ptr = AttributeFactory::createAttribute("a-string", AttrConfig(BasicType::STRING, CollectionType::ARRAY)); benchmarkString(ptr); } else if (_config._attribute == "ws-string") { std::cout << "" << std::endl; ptr = AttributeFactory::createAttribute("ws-string", AttrConfig(BasicType::STRING, CollectionType::WSET)); benchmarkString(ptr); } else if (_config._attribute == "s-fs-string") { std::cout << "" << std::endl; AttrConfig cfg(BasicType::STRING, CollectionType::SINGLE); cfg.setFastSearch(true); ptr = AttributeFactory::createAttribute("s-fs-string", cfg); benchmarkString(ptr); } else if (_config._attribute == "a-fs-string") { std::cout << "" << std::endl; AttrConfig cfg(BasicType::STRING, CollectionType::ARRAY); cfg.setFastSearch(true); ptr = AttributeFactory::createAttribute("a-fs-string", cfg); benchmarkString(ptr); } else if (_config._attribute == "ws-fs-string") { std::cout << "" << std::endl; AttrConfig cfg(BasicType::STRING, CollectionType::WSET); cfg.setFastSearch(true); ptr = AttributeFactory::createAttribute("ws-fs-string", cfg); benchmarkString(ptr); } if (dc._writeAttribute) { std::cout << "" << std::endl; ptr->save(ptr->getBaseFileName()); } std::cout << "" << std::endl; return 0; } } int main(int argc, char **argv) { vespalib::SignalHandler::PIPE.ignore(); search::AttributeBenchmark myapp; return myapp.main(argc, argv); }