diff options
Diffstat (limited to 'searchlib/src')
-rw-r--r-- | searchlib/src/tests/postinglistbm/CMakeLists.txt | 2 | ||||
-rw-r--r-- | searchlib/src/tests/postinglistbm/andstress.h | 34 | ||||
-rw-r--r-- | searchlib/src/tests/postinglistbm/posting_list_test.cpp | 37 | ||||
-rw-r--r-- | searchlib/src/tests/postinglistbm/postinglistbm.cpp | 75 | ||||
-rw-r--r-- | searchlib/src/tests/postinglistbm/stress_runner.cpp (renamed from searchlib/src/tests/postinglistbm/andstress.cpp) | 311 | ||||
-rw-r--r-- | searchlib/src/tests/postinglistbm/stress_runner.h | 35 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp | 75 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h | 13 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp | 31 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h | 16 |
10 files changed, 332 insertions, 297 deletions
diff --git a/searchlib/src/tests/postinglistbm/CMakeLists.txt b/searchlib/src/tests/postinglistbm/CMakeLists.txt index 479f54e8531..6e90f44726a 100644 --- a/searchlib/src/tests/postinglistbm/CMakeLists.txt +++ b/searchlib/src/tests/postinglistbm/CMakeLists.txt @@ -12,7 +12,7 @@ vespa_add_test(NAME searchlib_posting_list_test_app NO_VALGRIND COMMAND searchli vespa_add_executable(searchlib_postinglistbm_app TEST SOURCES postinglistbm.cpp - andstress.cpp + stress_runner.cpp DEPENDS searchlib_test searchlib diff --git a/searchlib/src/tests/postinglistbm/andstress.h b/searchlib/src/tests/postinglistbm/andstress.h deleted file mode 100644 index fdc99fb42ba..00000000000 --- a/searchlib/src/tests/postinglistbm/andstress.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <string> -#include <vector> - -namespace search { -class Rand48; - -namespace fakedata { class FakeWordSet; } - -} - -namespace postinglistbm { - -class AndStress { -public: - AndStress(); - - ~AndStress(); - - void run(search::Rand48 &rnd, - search::fakedata::FakeWordSet &wordSet, - unsigned int numDocs, - unsigned int commonDocFreq, - const std::vector<std::string> &postingTypes, - unsigned int loops, - unsigned int skipCommonPairsRate, - uint32_t numTasks, - uint32_t stride, - bool unpack); -}; - -} diff --git a/searchlib/src/tests/postinglistbm/posting_list_test.cpp b/searchlib/src/tests/postinglistbm/posting_list_test.cpp index ad3410b8f92..dad21ada469 100644 --- a/searchlib/src/tests/postinglistbm/posting_list_test.cpp +++ b/searchlib/src/tests/postinglistbm/posting_list_test.cpp @@ -53,32 +53,6 @@ test_fake(const std::string& posting_type, static_cast<int>(posting->l4SkipBitSize())); validate_posting_list_for_word(*posting, word); - - uint64_t scan_time = 0; - uint64_t scan_unpack_time = 0; - int hits1 = FakeMatchLoop::single_posting_scan(*posting, word.getDocIdLimit(), scan_time); - int hits2 = FakeMatchLoop::single_posting_scan_with_unpack(*posting, word.getDocIdLimit(), scan_unpack_time); - - printf("test_fake: '%s': hits1=%d, hits2=%d, scan_time=%" PRIu64 "(ns), scan_unpack_time=%" PRIu64 "(ns)\n", - posting->getName().c_str(), hits1, hits2, scan_time, scan_unpack_time); -} - -void -test_fake_pair(const std::string& posting_type, const Schema& schema, - const FakeWord& word1, const FakeWord& word2) -{ - std::unique_ptr<FPFactory> factory(getFPFactory(posting_type, schema)); - std::vector<const FakeWord *> words; - words.push_back(&word1); - words.push_back(&word2); - factory->setup(words); - auto posting1 = factory->make(word1); - auto posting2 = factory->make(word2); - - uint64_t scan_time = 0; - int hits = FakeMatchLoop::and_pair_posting_scan(*posting1, *posting2, word1.getDocIdLimit(), scan_time); - printf("test_fake_pair: '%s' AND '%s' => %d hits, scan_time=%" PRIu64 " (ns)\n", - posting1->getName().c_str(), posting2->getName().c_str(), hits, scan_time); } struct PostingListTest : public ::testing::Test { @@ -138,15 +112,8 @@ struct PostingListTest : public ::testing::Test { test_fake(type, word_set.getSchema(), *word1); test_fake(type, word_set.getSchema(), *word2); test_fake(type, word_set.getSchema(), *word3); - } - - for (const auto& type : posting_types) { - test_fake_pair(type, word_set.getSchema(), *word1, *word3); - test_fake_pair(type, word_set.getSchema(), *word2, *word3); - } - - for (const auto& type : posting_types) { - test_fake_pair(type, word_set.getSchema(), *word4, *word5); + test_fake(type, word_set.getSchema(), *word4); + test_fake(type, word_set.getSchema(), *word5); } } diff --git a/searchlib/src/tests/postinglistbm/postinglistbm.cpp b/searchlib/src/tests/postinglistbm/postinglistbm.cpp index 0a6f99ede11..16b8e9cd7f5 100644 --- a/searchlib/src/tests/postinglistbm/postinglistbm.cpp +++ b/searchlib/src/tests/postinglistbm/postinglistbm.cpp @@ -1,6 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "andstress.h" +#include "stress_runner.h" #include <vespa/fastos/app.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/resultset.h> @@ -30,8 +30,11 @@ class PostingListBM : public FastOS_Application { private: uint32_t _numDocs; uint32_t _commonDocFreq; + uint32_t _mediumDocFreq; + uint32_t _rareDocFreq; uint32_t _numWordsPerClass; std::vector<std::string> _postingTypes; + StressRunner::OperatorType _operatorType; uint32_t _loops; uint32_t _skipCommonPairsRate; FakeWordSet _wordSet; @@ -54,19 +57,21 @@ usage() "[-C <skipCommonPairsRate>] " "[-T {string, array, weightedSet}] " "[-c <commonDoqFreq>] " + "[-m <mediumDoqFreq>] " + "[-r <rareDoqFreq>] " "[-d <numDocs>] " "[-l <numLoops>] " "[-s <stride>] " "[-t <postingType>] " + "[-o {direct, and, or}] " "[-u] " - "[-w <numWordsPerClass>] " - "[-q]\n"); + "[-w <numWordsPerClass>]\n"); } void badPostingType(const std::string &postingType) { - printf("Bad posting list type: %s\n", postingType.c_str()); + printf("Bad posting list type: '%s'\n", postingType.c_str()); printf("Supported types: "); bool first = true; @@ -84,8 +89,11 @@ badPostingType(const std::string &postingType) PostingListBM::PostingListBM() : _numDocs(10000000), _commonDocFreq(50000), + _mediumDocFreq(1000), + _rareDocFreq(10), _numWordsPerClass(100), _postingTypes(), + _operatorType(StressRunner::OperatorType::And), _loops(1), _skipCommonPairsRate(1), _wordSet(), @@ -107,9 +115,8 @@ PostingListBM::Main() argi = 1; bool hasElements = false; bool hasElementWeights = false; - bool quick = false; - while ((c = GetOpt("C:c:d:l:s:t:uw:T:q", optArg, argi)) != -1) { + while ((c = GetOpt("C:c:m:r:d:l:s:t:o:uw:T:q", optArg, argi)) != -1) { switch(c) { case 'C': _skipCommonPairsRate = atoi(optArg); @@ -125,13 +132,20 @@ PostingListBM::Main() hasElements = true; hasElementWeights = true; } else { - printf("Bad collection type: %s\n", optArg); + printf("Bad collection type: '%s'\n", optArg); + printf("Supported types: single, array, weightedSet\n"); return 1; } break; case 'c': _commonDocFreq = atoi(optArg); break; + case 'm': + _mediumDocFreq = atoi(optArg); + break; + case 'r': + _rareDocFreq = atoi(optArg); + break; case 'd': _numDocs = atoi(optArg); break; @@ -156,18 +170,28 @@ PostingListBM::Main() } while (0); _postingTypes.push_back(optArg); break; + case 'o': + { + vespalib::string operatorType(optArg); + if (operatorType == "direct") { + _operatorType = StressRunner::OperatorType::Direct; + } else if (operatorType == "and") { + _operatorType = StressRunner::OperatorType::And; + } else if (operatorType == "or") { + _operatorType = StressRunner::OperatorType::Or; + } else { + printf("Bad operator type: '%s'\n", operatorType.c_str()); + printf("Supported types: direct, and, or\n"); + return 1; + } + break; + } case 'u': _unpack = true; break; case 'w': _numWordsPerClass = atoi(optArg); break; - case 'q': - quick = true; - _numDocs = 36000; - _commonDocFreq = 10000; - _numWordsPerClass = 5; - break; default: usage(); return 1; @@ -182,23 +206,22 @@ PostingListBM::Main() _wordSet.setupParams(hasElements, hasElementWeights); uint32_t numTasks = 40000; - if (quick) { - numTasks = 40; - } - + if (_postingTypes.empty()) { _postingTypes = getPostingTypes(); } - _wordSet.setupWords(_rnd, _numDocs, _commonDocFreq, _numWordsPerClass); - - AndStress andstress; - andstress.run(_rnd, _wordSet, - _numDocs, _commonDocFreq, _postingTypes, _loops, - _skipCommonPairsRate, - numTasks, - _stride, - _unpack); + _wordSet.setupWords(_rnd, _numDocs, _commonDocFreq, _mediumDocFreq, _rareDocFreq, _numWordsPerClass); + + StressRunner::run(_rnd, + _wordSet, + _postingTypes, + _operatorType, + _loops, + _skipCommonPairsRate, + numTasks, + _stride, + _unpack); return 0; } diff --git a/searchlib/src/tests/postinglistbm/andstress.cpp b/searchlib/src/tests/postinglistbm/stress_runner.cpp index adca7892464..98ace5e00a1 100644 --- a/searchlib/src/tests/postinglistbm/andstress.cpp +++ b/searchlib/src/tests/postinglistbm/stress_runner.cpp @@ -1,6 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "andstress.h" +#include "stress_runner.h" #include <vespa/fastos/thread.h> #include <vespa/fastos/time.h> @@ -19,7 +19,7 @@ #include <vector> #include <vespa/log/log.h> -LOG_SETUP(".andstress"); +LOG_SETUP(".stress_runner"); using search::fef::TermFieldMatchData; using search::fef::TermFieldMatchDataArray; @@ -28,27 +28,27 @@ using namespace search::fakedata; namespace postinglistbm { -class AndStressWorker; +class StressWorker; +using StressWorkerUP = std::unique_ptr<StressWorker>; -class AndStressMaster { +class StressMaster { private: - AndStressMaster(const AndStressMaster &); + StressMaster(const StressMaster &); - AndStressMaster & - operator=(const AndStressMaster &); + StressMaster &operator=(const StressMaster &); search::Rand48 &_rnd; - unsigned int _numDocs; - unsigned int _commonDocFreq; + uint32_t _numDocs; std::vector<std::string> _postingTypes; - unsigned int _loops; - unsigned int _skipCommonPairsRate; + StressRunner::OperatorType _operatorType; + uint32_t _loops; + uint32_t _skipCommonPairsRate; uint32_t _stride; bool _unpack; FastOS_ThreadPool *_threadPool; - std::vector<AndStressWorker *> _workers; - unsigned int _workersDone; + std::vector<StressWorkerUP> _workers; + uint32_t _workersDone; FakeWordSet &_wordSet; @@ -56,7 +56,7 @@ private: std::mutex _taskLock; std::condition_variable _taskCond; - unsigned int _taskIdx; + uint32_t _taskIdx; uint32_t _numTasks; public: @@ -66,18 +66,17 @@ private: std::vector<Task> _tasks; public: - AndStressMaster(search::Rand48 &rnd, - FakeWordSet &wordSet, - unsigned int numDocs, - unsigned int commonDocFreq, - const std::vector<std::string> &postingType, - unsigned int loops, - unsigned int skipCommonPairsRate, - uint32_t numTasks, - uint32_t stride, - bool unpack); - - ~AndStressMaster(); + StressMaster(search::Rand48 &rnd, + FakeWordSet &wordSet, + const std::vector<std::string> &postingType, + StressRunner::OperatorType operatorType, + uint32_t loops, + uint32_t skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack); + + ~StressMaster(); void run(); void makePostingsHelper(FPFactory *postingFactory, const std::string &postingFormat, @@ -85,50 +84,68 @@ public: void dropPostings(); void dropTasks(); void resetTasks(); // Prepare for rerun - void setupTasks(unsigned int numTasks); + void setupTasks(uint32_t numTasks); Task *getTask(); - unsigned int getNumDocs() const { return _numDocs; } + uint32_t getNumDocs() const { return _numDocs; } bool getUnpack() const { return _unpack; } double runWorkers(const std::string &postingFormat); }; +class StressWorker : public FastOS_Runnable { +protected: + StressMaster& _master; + uint32_t _id; -class AndStressWorker : public FastOS_Runnable { + virtual void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) = 0; + +public: + StressWorker(const StressWorker&) = delete; + StressWorker& operator=(const StressWorker&) = delete; + + StressWorker(StressMaster& master, uint32_t id); + virtual ~StressWorker(); + + virtual void Run(FastOS_ThreadInterface* thisThread, void* arg) override; +}; + +class DirectStressWorker : public StressWorker { private: - AndStressWorker(const AndStressWorker &); + void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) override; + +public: + DirectStressWorker(StressMaster& master, uint32_t id); +}; - AndStressWorker & - operator=(const AndStressWorker &); +class AndStressWorker : public StressWorker { +private: + void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) override; - AndStressMaster &_master; - unsigned int _id; public: - AndStressWorker(AndStressMaster &master, unsigned int id); - ~AndStressWorker(); - virtual void Run(FastOS_ThreadInterface *thisThread, void *arg) override; + AndStressWorker(StressMaster& master, uint32_t id); }; -template <class P> -FakePosting * -makePosting(FakeWord &fw) -{ - return new P(fw); -} +class OrStressWorker : public StressWorker { +private: + void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) override; -AndStressMaster::AndStressMaster(search::Rand48 &rnd, - FakeWordSet &wordSet, - unsigned int numDocs, - unsigned int commonDocFreq, - const std::vector<std::string> &postingTypes, - unsigned int loops, - unsigned int skipCommonPairsRate, - uint32_t numTasks, - uint32_t stride, - bool unpack) +public: + OrStressWorker(StressMaster& master, uint32_t id); +}; + + +StressMaster::StressMaster(search::Rand48 &rnd, + FakeWordSet &wordSet, + const std::vector<std::string> &postingTypes, + StressRunner::OperatorType operatorType, + uint32_t loops, + uint32_t skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack) : _rnd(rnd), - _numDocs(numDocs), - _commonDocFreq(commonDocFreq), + _numDocs(wordSet.numDocs()), _postingTypes(postingTypes), + _operatorType(operatorType), _loops(loops), _skipCommonPairsRate(skipCommonPairsRate), _stride(stride), @@ -144,34 +161,24 @@ AndStressMaster::AndStressMaster(search::Rand48 &rnd, _numTasks(numTasks), _tasks() { - LOG(info, "AndStressMaster::AndStressMaster"); + LOG(info, "StressMaster::StressMaster()"); _threadPool = new FastOS_ThreadPool(128 * 1024, 400); } -template <class C> -static void -clearPtrVector(std::vector<C> &vector) +StressMaster::~StressMaster() { - for (auto& elem : vector) { - delete elem; - } - vector.clear(); -} - -AndStressMaster::~AndStressMaster() -{ - LOG(info, "AndStressMaster::~AndStressMaster"); + LOG(info, "StressMaster::~StressMaster()"); _threadPool->Close(); delete _threadPool; _threadPool = nullptr; - clearPtrVector(_workers); + _workers.clear(); dropPostings(); } void -AndStressMaster::dropPostings() +StressMaster::dropPostings() { for (auto& posting : _postings) { posting.clear(); @@ -180,14 +187,14 @@ AndStressMaster::dropPostings() } void -AndStressMaster::dropTasks() +StressMaster::dropTasks() { _tasks.clear(); _taskIdx = 0; } void -AndStressMaster::resetTasks() +StressMaster::resetTasks() { _taskIdx = 0; } @@ -223,9 +230,9 @@ makeSomePostings(FPFactory *postingFactory, } void -AndStressMaster::makePostingsHelper(FPFactory *postingFactory, - const std::string &postingFormat, - bool validate, bool verbose) +StressMaster::makePostingsHelper(FPFactory *postingFactory, + const std::string &postingFormat, + bool validate, bool verbose) { FastOS_Time tv; double before; @@ -243,20 +250,20 @@ AndStressMaster::makePostingsHelper(FPFactory *postingFactory, tv.SetNow(); after = tv.Secs(); LOG(info, - "AndStressMaster::makePostingsHelper elapsed %10.6f s for %s format", + "StressMaster::makePostingsHelper() elapsed %10.6f s for %s format", after - before, postingFormat.c_str()); } void -AndStressMaster::setupTasks(unsigned int numTasks) +StressMaster::setupTasks(uint32_t numTasks) { - unsigned int wordclass1; - unsigned int wordclass2; - unsigned int word1idx; - unsigned int word2idx; + uint32_t wordclass1; + uint32_t wordclass2; + uint32_t word1idx; + uint32_t word2idx; - for (unsigned int i = 0; i < numTasks; ++i) { + for (uint32_t i = 0; i < numTasks; ++i) { wordclass1 = _rnd.lrand48() % _postings.size(); wordclass2 = _rnd.lrand48() % _postings.size(); while (wordclass1 == FakeWordSet::COMMON_WORD && @@ -273,8 +280,8 @@ AndStressMaster::setupTasks(unsigned int numTasks) } } -AndStressMaster::Task * -AndStressMaster::getTask() +StressMaster::Task * +StressMaster::getTask() { Task *result = nullptr; std::lock_guard<std::mutex> taskGuard(_taskLock); @@ -291,20 +298,20 @@ AndStressMaster::getTask() } void -AndStressMaster::run() +StressMaster::run() { - LOG(info, "AndStressMaster::run"); + LOG(info, "StressMaster::run()"); for (const auto& type : _postingTypes) { std::unique_ptr<FPFactory> factory(getFPFactory(type, _wordSet.getSchema())); makePostingsHelper(factory.get(), type, true, false); setupTasks(_numTasks); double totalTime = 0; - for (unsigned int loop = 0; loop < _loops; ++loop) { + for (uint32_t loop = 0; loop < _loops; ++loop) { totalTime += runWorkers(type); resetTasks(); } - LOG(info, "AndStressMaster::average run elapsed %10.6f s for workers %s format", + LOG(info, "StressMaster::average run elapsed %10.6f s for workers %s format", totalTime / _loops, type.c_str()); dropPostings(); } @@ -312,7 +319,7 @@ AndStressMaster::run() } double -AndStressMaster::runWorkers(const std::string &postingFormat) +StressMaster::runWorkers(const std::string &postingFormat) { FastOS_Time tv; double before; @@ -320,13 +327,19 @@ AndStressMaster::runWorkers(const std::string &postingFormat) tv.SetNow(); before = tv.Secs(); - unsigned int numWorkers = 8; - for (unsigned int i = 0; i < numWorkers; ++i) { - _workers.push_back(new AndStressWorker(*this, i)); + uint32_t numWorkers = 8; + for (uint32_t i = 0; i < numWorkers; ++i) { + if (_operatorType == StressRunner::OperatorType::Direct) { + _workers.push_back(std::make_unique<DirectStressWorker>(*this, i)); + } else if (_operatorType == StressRunner::OperatorType::And) { + _workers.push_back(std::make_unique<AndStressWorker>(*this, i)); + } else if (_operatorType == StressRunner::OperatorType::Or) { + _workers.push_back(std::make_unique<OrStressWorker>(*this, i)); + } } for (auto& worker : _workers) { - _threadPool->NewThread(worker); + _threadPool->NewThread(worker.get()); } { @@ -338,83 +351,107 @@ AndStressMaster::runWorkers(const std::string &postingFormat) tv.SetNow(); after = tv.Secs(); LOG(info, - "AndStressMaster::run elapsed %10.6f s for workers %s format", + "StressMaster::run() elapsed %10.6f s for workers %s format", after - before, postingFormat.c_str()); - clearPtrVector(_workers); + _workers.clear(); _workersDone = 0; return after - before; } -AndStressWorker::AndStressWorker(AndStressMaster &master, unsigned int id) +StressWorker::StressWorker(StressMaster& master, uint32_t id) : _master(master), _id(id) { - LOG(debug, "AndStressWorker::AndStressWorker, id=%u", id); } -AndStressWorker::~AndStressWorker() -{ - LOG(debug, "AndStressWorker::~AndStressWorker, id=%u", _id); -} +StressWorker::~StressWorker() = default; void -testFakePair(const FakePosting &f1, const FakePosting &f2, uint32_t doc_id_limit, bool unpack) -{ - uint64_t scan_unpack_time = 0; - int hits = unpack ? - FakeMatchLoop::and_pair_posting_scan_with_unpack(f1, f2, doc_id_limit, scan_unpack_time) : - FakeMatchLoop::and_pair_posting_scan(f1, f2, doc_id_limit, scan_unpack_time); - (void) hits; -} - -void -AndStressWorker::Run(FastOS_ThreadInterface *thisThread, void *arg) +StressWorker::Run(FastOS_ThreadInterface* thisThread, void* arg) { (void) thisThread; (void) arg; - LOG(debug, "AndStressWorker::Run, id=%u", _id); + LOG(debug, "StressWorker::Run(), id=%u", _id); bool unpack = _master.getUnpack(); for (;;) { - AndStressMaster::Task *task = _master.getTask(); + StressMaster::Task *task = _master.getTask(); if (task == nullptr) { break; } - testFakePair(*task->first, *task->second, _master.getNumDocs(), - unpack); + run_task(*task->first, *task->second, _master.getNumDocs(), unpack); } } -AndStress::AndStress() +DirectStressWorker::DirectStressWorker(StressMaster& master, uint32_t id) + : StressWorker(master, id) { - LOG(debug, "Andstress::AndStress"); } -AndStress::~AndStress() +void +DirectStressWorker::run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) { - LOG(debug, "Andstress::~AndStress"); + if (unpack) { + FakeMatchLoop::direct_posting_scan_with_unpack(f1, doc_id_limit); + FakeMatchLoop::direct_posting_scan_with_unpack(f2, doc_id_limit); + } else { + FakeMatchLoop::direct_posting_scan(f1, doc_id_limit); + FakeMatchLoop::direct_posting_scan(f2, doc_id_limit); + } +} + +AndStressWorker::AndStressWorker(StressMaster& master, uint32_t id) + : StressWorker(master, id) +{ +} + +void +AndStressWorker::run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) +{ + if (unpack) { + FakeMatchLoop::and_pair_posting_scan_with_unpack(f1, f2, doc_id_limit); + } else { + FakeMatchLoop::and_pair_posting_scan(f1, f2, doc_id_limit); + } +} + +OrStressWorker::OrStressWorker(StressMaster& master, uint32_t id) + : StressWorker(master, id) +{ +} + +void +OrStressWorker::run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) +{ + if (unpack) { + FakeMatchLoop::or_pair_posting_scan_with_unpack(f1, f2, doc_id_limit); + } else { + FakeMatchLoop::or_pair_posting_scan(f1, f2, doc_id_limit); + } } void -AndStress::run(search::Rand48 &rnd, - FakeWordSet &wordSet, - unsigned int numDocs, - unsigned int commonDocFreq, - const std::vector<std::string> &postingTypes, - unsigned int loops, - unsigned int skipCommonPairsRate, - uint32_t numTasks, - uint32_t stride, - bool unpack) +StressRunner::run(search::Rand48 &rnd, + FakeWordSet &wordSet, + const std::vector<std::string> &postingTypes, + OperatorType operatorType, + uint32_t loops, + uint32_t skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack) { - LOG(debug, "Andstress::run"); - AndStressMaster master(rnd, wordSet, - numDocs, commonDocFreq, postingTypes, loops, - skipCommonPairsRate, - numTasks, - stride, - unpack); + LOG(debug, "StressRunner::run()"); + StressMaster master(rnd, + wordSet, + postingTypes, + operatorType, + loops, + skipCommonPairsRate, + numTasks, + stride, + unpack); master.run(); } diff --git a/searchlib/src/tests/postinglistbm/stress_runner.h b/searchlib/src/tests/postinglistbm/stress_runner.h new file mode 100644 index 00000000000..d4974bc969e --- /dev/null +++ b/searchlib/src/tests/postinglistbm/stress_runner.h @@ -0,0 +1,35 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <string> +#include <vector> + +namespace search { +class Rand48; + +namespace fakedata { class FakeWordSet; } + +} + +namespace postinglistbm { + +class StressRunner { +public: + enum class OperatorType { + Direct, + And, + Or + }; + + static void run(search::Rand48 &rnd, + search::fakedata::FakeWordSet &wordSet, + const std::vector<std::string> &postingTypes, + OperatorType operatorType, + uint32_t loops, + uint32_t skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack); +}; + +} diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp index 282a3e07a0e..ef4edf86c59 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp @@ -4,10 +4,14 @@ #include "fakeposting.h" #include <vespa/fastos/timestamp.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/queryeval/andsearch.h> +#include <vespa/searchlib/queryeval/orsearch.h> #include <vespa/searchlib/queryeval/searchiterator.h> using search::fef::TermFieldMatchData; using search::fef::TermFieldMatchDataArray; +using search::queryeval::AndSearch; +using search::queryeval::OrSearch; using search::queryeval::SearchIterator; namespace search::fakedata { @@ -32,14 +36,14 @@ public: ~IteratorState() {} SearchIterator& itr() { return *_itr; } + SearchIterator* release() { return _itr.release(); } }; template <bool do_unpack> int -do_single_posting_scan(SearchIterator& itr, uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +do_match_loop(SearchIterator& itr, uint32_t doc_id_limit) { uint32_t hits = 0; - uint64_t time_before = fastos::ClockSystem::now(); itr.initFullRange(); uint32_t doc_id = itr.getDocId(); while (doc_id < doc_id_limit) { @@ -55,82 +59,59 @@ do_single_posting_scan(SearchIterator& itr, uint32_t doc_id_limit, uint64_t& ela ++doc_id; } } - uint64_t time_after = fastos::ClockSystem::now(); - elapsed_time_ns = time_after - time_before; return hits; } } int -FakeMatchLoop::single_posting_scan(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +FakeMatchLoop::direct_posting_scan(const FakePosting& posting, uint32_t doc_id_limit) { IteratorState state(posting); - return do_single_posting_scan<false>(state.itr(), doc_id_limit, elapsed_time_ns); + return do_match_loop<false>(state.itr(), doc_id_limit); } int -FakeMatchLoop::single_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +FakeMatchLoop::direct_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit) { IteratorState state(posting); - return do_single_posting_scan<true>(state.itr(), doc_id_limit, elapsed_time_ns); + return do_match_loop<true>(state.itr(), doc_id_limit); } -namespace { - -template <bool do_unpack> int -do_and_pair_posting_scan(SearchIterator& itr1, SearchIterator& itr2, - uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +FakeMatchLoop::and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit) { - uint32_t hits = 0; - uint64_t time_before = fastos::ClockSystem::now(); - itr1.initFullRange(); - itr2.initFullRange(); - uint32_t doc_id = itr1.getDocId(); - while (doc_id < doc_id_limit) { - if (itr1.seek(doc_id)) { - if (itr2.seek(doc_id)) { - ++hits; - if (do_unpack) { - itr1.unpack(doc_id); - itr2.unpack(doc_id); - } - ++doc_id; - } else if (doc_id < itr2.getDocId()) { - doc_id = itr2.getDocId(); - } else { - ++doc_id; - } - } else if (doc_id < itr1.getDocId()) { - doc_id = itr1.getDocId(); - } else { - ++doc_id; - } - } - uint64_t time_after = fastos::ClockSystem::now(); - elapsed_time_ns = time_after - time_before; - return hits; + IteratorState state_1(posting_1); + IteratorState state_2(posting_2); + std::unique_ptr<SearchIterator> iterator(AndSearch::create({state_1.release(), state_2.release()}, true)); + return do_match_loop<false>(*iterator, doc_id_limit); } +int +FakeMatchLoop::and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit) +{ + IteratorState state_1(posting_1); + IteratorState state_2(posting_2); + std::unique_ptr<SearchIterator> iterator(AndSearch::create({state_1.release(), state_2.release()}, true)); + return do_match_loop<true>(*iterator, doc_id_limit); } int -FakeMatchLoop::and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, - uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +FakeMatchLoop::or_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit) { IteratorState state_1(posting_1); IteratorState state_2(posting_2); - return do_and_pair_posting_scan<false>(state_1.itr(), state_2.itr(), doc_id_limit, elapsed_time_ns); + std::unique_ptr<SearchIterator> iterator(OrSearch::create({state_1.release(), state_2.release()}, true)); + return do_match_loop<false>(*iterator, doc_id_limit); } int -FakeMatchLoop::and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, - uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +FakeMatchLoop::or_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit) { IteratorState state_1(posting_1); IteratorState state_2(posting_2); - return do_and_pair_posting_scan<true>(state_1.itr(), state_2.itr(), doc_id_limit, elapsed_time_ns); + std::unique_ptr<SearchIterator> iterator(OrSearch::create({state_1.release(), state_2.release()}, true)); + return do_match_loop<true>(*iterator, doc_id_limit); } } diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h index 7bd98f1b4ca..f24e4019846 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h @@ -13,13 +13,14 @@ class FakePosting; */ class FakeMatchLoop { public: - static int single_posting_scan(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns); - static int single_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns); + static int direct_posting_scan(const FakePosting& posting, uint32_t doc_id_limit); + static int direct_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit); - static int and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, - uint32_t doc_id_limit, uint64_t& elapsed_time_ns); - static int and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, - uint32_t doc_id_limit, uint64_t& elapsed_time_ns); + static int and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit); + static int and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit); + + static int or_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit); + static int or_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit); }; } diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp index 5c87bf88e9c..09c8e86d979 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp @@ -31,7 +31,8 @@ applyDocIdBiasToVector(FakeWordVector& words, uint32_t docIdBias) FakeWordSet::FakeWordSet() : _words(NUM_WORDCLASSES), _schema(), - _fieldsParams() + _fieldsParams(), + _numDocs(0) { setupParams(false, false); } @@ -40,7 +41,8 @@ FakeWordSet::FakeWordSet(bool hasElements, bool hasElementWeights) : _words(NUM_WORDCLASSES), _schema(), - _fieldsParams() + _fieldsParams(), + _numDocs(0) { setupParams(hasElements, hasElementWeights); } @@ -75,9 +77,20 @@ FakeWordSet::setupParams(bool hasElements, void FakeWordSet::setupWords(search::Rand48 &rnd, - unsigned int numDocs, - unsigned int commonDocFreq, - unsigned int numWordsPerWordClass) + uint32_t numDocs, + uint32_t commonDocFreq, + uint32_t numWordsPerWordClass) +{ + setupWords(rnd, numDocs, commonDocFreq, 1000, 10, numWordsPerWordClass); +} + +void +FakeWordSet::setupWords(search::Rand48 &rnd, + uint32_t numDocs, + uint32_t commonDocFreq, + uint32_t mediumDocFreq, + uint32_t rareDocFreq, + uint32_t numWordsPerWordClass) { std::string common = "common"; std::string medium = "medium"; @@ -86,11 +99,13 @@ FakeWordSet::setupWords(search::Rand48 &rnd, double before; double after; + _numDocs = numDocs; + LOG(info, "enter setupWords"); tv.SetNow(); before = tv.Secs(); uint32_t packedIndex = _fieldsParams.size() - 1; - for (unsigned int i = 0; i < numWordsPerWordClass; ++i) { + for (uint32_t i = 0; i < numWordsPerWordClass; ++i) { std::ostringstream vi; vi << (i + 1); @@ -99,12 +114,12 @@ FakeWordSet::setupWords(search::Rand48 &rnd, _fieldsParams[packedIndex], packedIndex)); - _words[MEDIUM_WORD].push_back(std::make_unique<FakeWord>(numDocs, 1000, 500, + _words[MEDIUM_WORD].push_back(std::make_unique<FakeWord>(numDocs, mediumDocFreq, mediumDocFreq / 2, medium + vi.str(), rnd, _fieldsParams[packedIndex], packedIndex)); - _words[RARE_WORD].push_back(std::make_unique<FakeWord>(numDocs, 10, 5, + _words[RARE_WORD].push_back(std::make_unique<FakeWord>(numDocs, rareDocFreq, rareDocFreq / 2, rare + vi.str(), rnd, _fieldsParams[packedIndex], packedIndex)); diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h index 0b7ee4db6fe..d404c664a34 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h @@ -32,6 +32,7 @@ private: std::vector<FakeWordVector> _words; Schema _schema; std::vector<PosOccFieldsParams> _fieldsParams; + uint32_t _numDocs; public: FakeWordSet(); @@ -45,9 +46,16 @@ public: bool hasElementWeights); void setupWords(search::Rand48 &rnd, - unsigned int numDocs, - unsigned int commonDocFreq, - unsigned int numWordsPerWordClass); + uint32_t numDocs, + uint32_t commonDocFreq, + uint32_t numWordsPerWordClass); + + void setupWords(search::Rand48 &rnd, + uint32_t numDocs, + uint32_t commonDocFreq, + uint32_t mediumDocFreq, + uint32_t rareDocFreq, + uint32_t numWordsPerWordClass); const std::vector<FakeWordVector>& words() const { return _words; } @@ -69,6 +77,8 @@ public: return _schema; } + uint32_t numDocs() const { return _numDocs; } + void addDocIdBias(uint32_t docIdBias); }; |