diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-05-08 13:38:40 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2019-05-08 13:38:40 +0000 |
commit | cdde44ae5723a067e45abfcd7e1dd62d8c1f2612 (patch) | |
tree | 51d0d4a24d237d2ae6a2836040450b799252fde0 /searchlib/src/tests/postinglistbm | |
parent | 0678777a769aa694997cc993d3e7dbd87ca84002 (diff) |
Add support to run posting list benchmark with different operators (direct, and, or).
Diffstat (limited to 'searchlib/src/tests/postinglistbm')
-rw-r--r-- | searchlib/src/tests/postinglistbm/postinglistbm.cpp | 33 | ||||
-rw-r--r-- | searchlib/src/tests/postinglistbm/stress_runner.cpp | 209 | ||||
-rw-r--r-- | searchlib/src/tests/postinglistbm/stress_runner.h | 11 |
3 files changed, 173 insertions, 80 deletions
diff --git a/searchlib/src/tests/postinglistbm/postinglistbm.cpp b/searchlib/src/tests/postinglistbm/postinglistbm.cpp index d910f502cb1..16b8e9cd7f5 100644 --- a/searchlib/src/tests/postinglistbm/postinglistbm.cpp +++ b/searchlib/src/tests/postinglistbm/postinglistbm.cpp @@ -34,6 +34,7 @@ private: uint32_t _rareDocFreq; uint32_t _numWordsPerClass; std::vector<std::string> _postingTypes; + StressRunner::OperatorType _operatorType; uint32_t _loops; uint32_t _skipCommonPairsRate; FakeWordSet _wordSet; @@ -62,6 +63,7 @@ usage() "[-l <numLoops>] " "[-s <stride>] " "[-t <postingType>] " + "[-o {direct, and, or}] " "[-u] " "[-w <numWordsPerClass>]\n"); } @@ -69,7 +71,7 @@ usage() void badPostingType(const std::string &postingType) { - printf("Bad posting list type: %s\n", postingType.c_str()); + printf("Bad posting list type: '%s'\n", postingType.c_str()); printf("Supported types: "); bool first = true; @@ -91,6 +93,7 @@ PostingListBM::PostingListBM() _rareDocFreq(10), _numWordsPerClass(100), _postingTypes(), + _operatorType(StressRunner::OperatorType::And), _loops(1), _skipCommonPairsRate(1), _wordSet(), @@ -113,7 +116,7 @@ PostingListBM::Main() bool hasElements = false; bool hasElementWeights = false; - while ((c = GetOpt("C:c:m:r:d:l:s:t:uw:T:q", optArg, argi)) != -1) { + while ((c = GetOpt("C:c:m:r:d:l:s:t:o:uw:T:q", optArg, argi)) != -1) { switch(c) { case 'C': _skipCommonPairsRate = atoi(optArg); @@ -129,7 +132,8 @@ PostingListBM::Main() hasElements = true; hasElementWeights = true; } else { - printf("Bad collection type: %s\n", optArg); + printf("Bad collection type: '%s'\n", optArg); + printf("Supported types: single, array, weightedSet\n"); return 1; } break; @@ -166,6 +170,22 @@ PostingListBM::Main() } while (0); _postingTypes.push_back(optArg); break; + case 'o': + { + vespalib::string operatorType(optArg); + if (operatorType == "direct") { + _operatorType = StressRunner::OperatorType::Direct; + } else if (operatorType == "and") { + _operatorType = StressRunner::OperatorType::And; + } else if (operatorType == "or") { + _operatorType = StressRunner::OperatorType::Or; + } else { + printf("Bad operator type: '%s'\n", operatorType.c_str()); + printf("Supported types: direct, and, or\n"); + return 1; + } + break; + } case 'u': _unpack = true; break; @@ -193,8 +213,11 @@ PostingListBM::Main() _wordSet.setupWords(_rnd, _numDocs, _commonDocFreq, _mediumDocFreq, _rareDocFreq, _numWordsPerClass); - StressRunner::run(_rnd, _wordSet, - _postingTypes, _loops, + StressRunner::run(_rnd, + _wordSet, + _postingTypes, + _operatorType, + _loops, _skipCommonPairsRate, numTasks, _stride, diff --git a/searchlib/src/tests/postinglistbm/stress_runner.cpp b/searchlib/src/tests/postinglistbm/stress_runner.cpp index 21387995883..98ace5e00a1 100644 --- a/searchlib/src/tests/postinglistbm/stress_runner.cpp +++ b/searchlib/src/tests/postinglistbm/stress_runner.cpp @@ -28,7 +28,8 @@ using namespace search::fakedata; namespace postinglistbm { -class AndStressWorker; +class StressWorker; +using StressWorkerUP = std::unique_ptr<StressWorker>; class StressMaster { private: @@ -37,16 +38,17 @@ private: StressMaster &operator=(const StressMaster &); search::Rand48 &_rnd; - unsigned int _numDocs; + uint32_t _numDocs; std::vector<std::string> _postingTypes; - unsigned int _loops; - unsigned int _skipCommonPairsRate; + StressRunner::OperatorType _operatorType; + uint32_t _loops; + uint32_t _skipCommonPairsRate; uint32_t _stride; bool _unpack; FastOS_ThreadPool *_threadPool; - std::vector<AndStressWorker *> _workers; - unsigned int _workersDone; + std::vector<StressWorkerUP> _workers; + uint32_t _workersDone; FakeWordSet &_wordSet; @@ -54,7 +56,7 @@ private: std::mutex _taskLock; std::condition_variable _taskCond; - unsigned int _taskIdx; + uint32_t _taskIdx; uint32_t _numTasks; public: @@ -67,8 +69,9 @@ public: StressMaster(search::Rand48 &rnd, FakeWordSet &wordSet, const std::vector<std::string> &postingType, - unsigned int loops, - unsigned int skipCommonPairsRate, + StressRunner::OperatorType operatorType, + uint32_t loops, + uint32_t skipCommonPairsRate, uint32_t numTasks, uint32_t stride, bool unpack); @@ -81,40 +84,68 @@ public: void dropPostings(); void dropTasks(); void resetTasks(); // Prepare for rerun - void setupTasks(unsigned int numTasks); + void setupTasks(uint32_t numTasks); Task *getTask(); - unsigned int getNumDocs() const { return _numDocs; } + uint32_t getNumDocs() const { return _numDocs; } bool getUnpack() const { return _unpack; } double runWorkers(const std::string &postingFormat); }; -class AndStressWorker : public FastOS_Runnable { +class StressWorker : public FastOS_Runnable { +protected: + StressMaster& _master; + uint32_t _id; + + virtual void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) = 0; + +public: + StressWorker(const StressWorker&) = delete; + StressWorker& operator=(const StressWorker&) = delete; + + StressWorker(StressMaster& master, uint32_t id); + virtual ~StressWorker(); + + virtual void Run(FastOS_ThreadInterface* thisThread, void* arg) override; +}; + +class DirectStressWorker : public StressWorker { private: - AndStressWorker(const AndStressWorker &); + void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) override; - AndStressWorker & - operator=(const AndStressWorker &); +public: + DirectStressWorker(StressMaster& master, uint32_t id); +}; + +class AndStressWorker : public StressWorker { +private: + void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) override; - StressMaster &_master; - unsigned int _id; public: - AndStressWorker(StressMaster &master, unsigned int id); - ~AndStressWorker(); - virtual void Run(FastOS_ThreadInterface *thisThread, void *arg) override; + AndStressWorker(StressMaster& master, uint32_t id); +}; + +class OrStressWorker : public StressWorker { +private: + void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) override; + +public: + OrStressWorker(StressMaster& master, uint32_t id); }; StressMaster::StressMaster(search::Rand48 &rnd, FakeWordSet &wordSet, const std::vector<std::string> &postingTypes, - unsigned int loops, - unsigned int skipCommonPairsRate, + StressRunner::OperatorType operatorType, + uint32_t loops, + uint32_t skipCommonPairsRate, uint32_t numTasks, uint32_t stride, bool unpack) : _rnd(rnd), _numDocs(wordSet.numDocs()), _postingTypes(postingTypes), + _operatorType(operatorType), _loops(loops), _skipCommonPairsRate(skipCommonPairsRate), _stride(stride), @@ -135,16 +166,6 @@ StressMaster::StressMaster(search::Rand48 &rnd, _threadPool = new FastOS_ThreadPool(128 * 1024, 400); } -template <class C> -static void -clearPtrVector(std::vector<C> &vector) -{ - for (auto& elem : vector) { - delete elem; - } - vector.clear(); -} - StressMaster::~StressMaster() { LOG(info, "StressMaster::~StressMaster()"); @@ -152,7 +173,7 @@ StressMaster::~StressMaster() _threadPool->Close(); delete _threadPool; _threadPool = nullptr; - clearPtrVector(_workers); + _workers.clear(); dropPostings(); } @@ -235,14 +256,14 @@ StressMaster::makePostingsHelper(FPFactory *postingFactory, } void -StressMaster::setupTasks(unsigned int numTasks) +StressMaster::setupTasks(uint32_t numTasks) { - unsigned int wordclass1; - unsigned int wordclass2; - unsigned int word1idx; - unsigned int word2idx; + uint32_t wordclass1; + uint32_t wordclass2; + uint32_t word1idx; + uint32_t word2idx; - for (unsigned int i = 0; i < numTasks; ++i) { + for (uint32_t i = 0; i < numTasks; ++i) { wordclass1 = _rnd.lrand48() % _postings.size(); wordclass2 = _rnd.lrand48() % _postings.size(); while (wordclass1 == FakeWordSet::COMMON_WORD && @@ -286,7 +307,7 @@ StressMaster::run() makePostingsHelper(factory.get(), type, true, false); setupTasks(_numTasks); double totalTime = 0; - for (unsigned int loop = 0; loop < _loops; ++loop) { + for (uint32_t loop = 0; loop < _loops; ++loop) { totalTime += runWorkers(type); resetTasks(); } @@ -306,13 +327,19 @@ StressMaster::runWorkers(const std::string &postingFormat) tv.SetNow(); before = tv.Secs(); - unsigned int numWorkers = 8; - for (unsigned int i = 0; i < numWorkers; ++i) { - _workers.push_back(new AndStressWorker(*this, i)); + uint32_t numWorkers = 8; + for (uint32_t i = 0; i < numWorkers; ++i) { + if (_operatorType == StressRunner::OperatorType::Direct) { + _workers.push_back(std::make_unique<DirectStressWorker>(*this, i)); + } else if (_operatorType == StressRunner::OperatorType::And) { + _workers.push_back(std::make_unique<AndStressWorker>(*this, i)); + } else if (_operatorType == StressRunner::OperatorType::Or) { + _workers.push_back(std::make_unique<OrStressWorker>(*this, i)); + } } for (auto& worker : _workers) { - _threadPool->NewThread(worker); + _threadPool->NewThread(worker.get()); } { @@ -327,39 +354,25 @@ StressMaster::runWorkers(const std::string &postingFormat) "StressMaster::run() elapsed %10.6f s for workers %s format", after - before, postingFormat.c_str()); - clearPtrVector(_workers); + _workers.clear(); _workersDone = 0; return after - before; } -AndStressWorker::AndStressWorker(StressMaster &master, unsigned int id) +StressWorker::StressWorker(StressMaster& master, uint32_t id) : _master(master), _id(id) { - LOG(debug, "AndStressWorker::AndStressWorker(), id=%u", id); } -AndStressWorker::~AndStressWorker() -{ - LOG(debug, "AndStressWorker::~AndStressWorker(), id=%u", _id); -} - -void -testFakePair(const FakePosting &f1, const FakePosting &f2, uint32_t doc_id_limit, bool unpack) -{ - uint64_t scan_unpack_time = 0; - int hits = unpack ? - FakeMatchLoop::and_pair_posting_scan_with_unpack(f1, f2, doc_id_limit, scan_unpack_time) : - FakeMatchLoop::and_pair_posting_scan(f1, f2, doc_id_limit, scan_unpack_time); - (void) hits; -} +StressWorker::~StressWorker() = default; void -AndStressWorker::Run(FastOS_ThreadInterface *thisThread, void *arg) +StressWorker::Run(FastOS_ThreadInterface* thisThread, void* arg) { (void) thisThread; (void) arg; - LOG(debug, "AndStressWorker::Run(), id=%u", _id); + LOG(debug, "StressWorker::Run(), id=%u", _id); bool unpack = _master.getUnpack(); for (;;) { @@ -367,8 +380,54 @@ AndStressWorker::Run(FastOS_ThreadInterface *thisThread, void *arg) if (task == nullptr) { break; } - testFakePair(*task->first, *task->second, _master.getNumDocs(), - unpack); + run_task(*task->first, *task->second, _master.getNumDocs(), unpack); + } +} + +DirectStressWorker::DirectStressWorker(StressMaster& master, uint32_t id) + : StressWorker(master, id) +{ +} + +void +DirectStressWorker::run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) +{ + if (unpack) { + FakeMatchLoop::direct_posting_scan_with_unpack(f1, doc_id_limit); + FakeMatchLoop::direct_posting_scan_with_unpack(f2, doc_id_limit); + } else { + FakeMatchLoop::direct_posting_scan(f1, doc_id_limit); + FakeMatchLoop::direct_posting_scan(f2, doc_id_limit); + } +} + +AndStressWorker::AndStressWorker(StressMaster& master, uint32_t id) + : StressWorker(master, id) +{ +} + +void +AndStressWorker::run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) +{ + if (unpack) { + FakeMatchLoop::and_pair_posting_scan_with_unpack(f1, f2, doc_id_limit); + } else { + FakeMatchLoop::and_pair_posting_scan(f1, f2, doc_id_limit); + } +} + +OrStressWorker::OrStressWorker(StressMaster& master, uint32_t id) + : StressWorker(master, id) +{ +} + +void +OrStressWorker::run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) +{ + if (unpack) { + FakeMatchLoop::or_pair_posting_scan_with_unpack(f1, f2, doc_id_limit); + } else { + FakeMatchLoop::or_pair_posting_scan(f1, f2, doc_id_limit); } } @@ -376,19 +435,23 @@ void StressRunner::run(search::Rand48 &rnd, FakeWordSet &wordSet, const std::vector<std::string> &postingTypes, - unsigned int loops, - unsigned int skipCommonPairsRate, + OperatorType operatorType, + uint32_t loops, + uint32_t skipCommonPairsRate, uint32_t numTasks, uint32_t stride, bool unpack) { LOG(debug, "StressRunner::run()"); - StressMaster master(rnd, wordSet, - postingTypes, loops, - skipCommonPairsRate, - numTasks, - stride, - unpack); + StressMaster master(rnd, + wordSet, + postingTypes, + operatorType, + loops, + skipCommonPairsRate, + numTasks, + stride, + unpack); master.run(); } diff --git a/searchlib/src/tests/postinglistbm/stress_runner.h b/searchlib/src/tests/postinglistbm/stress_runner.h index 3fc8f537fd0..d4974bc969e 100644 --- a/searchlib/src/tests/postinglistbm/stress_runner.h +++ b/searchlib/src/tests/postinglistbm/stress_runner.h @@ -15,11 +15,18 @@ namespace postinglistbm { class StressRunner { public: + enum class OperatorType { + Direct, + And, + Or + }; + static void run(search::Rand48 &rnd, search::fakedata::FakeWordSet &wordSet, const std::vector<std::string> &postingTypes, - unsigned int loops, - unsigned int skipCommonPairsRate, + OperatorType operatorType, + uint32_t loops, + uint32_t skipCommonPairsRate, uint32_t numTasks, uint32_t stride, bool unpack); |