diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-05-06 14:32:26 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-06 14:32:26 +0200 |
commit | c98ce8216c6a7076b0a7b94fe360b5bb6f679cf6 (patch) | |
tree | f60543dabb4d3e20a21a528f2e58713a7e0910ab | |
parent | c32390cfb821886c8c50a847ea7673a191322f4f (diff) | |
parent | 5c7a56c51a65636f026d534e52ec428902df0ffe (diff) |
Merge pull request #9283 from vespa-engine/geirst/posting-list-benchmark-refactor
Geirst/posting list benchmark refactor
6 files changed, 302 insertions, 371 deletions
diff --git a/searchlib/src/tests/postinglistbm/andstress.cpp b/searchlib/src/tests/postinglistbm/andstress.cpp index 15e56c2d4ef..7152e2a3981 100644 --- a/searchlib/src/tests/postinglistbm/andstress.cpp +++ b/searchlib/src/tests/postinglistbm/andstress.cpp @@ -2,19 +2,20 @@ #include "andstress.h" +#include <vespa/fastos/thread.h> +#include <vespa/fastos/time.h> #include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/test/fakedata/fake_match_loop.h> +#include <vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h> +#include <vespa/searchlib/test/fakedata/fakefilterocc.h> +#include <vespa/searchlib/test/fakedata/fakeposting.h> #include <vespa/searchlib/test/fakedata/fakeword.h> #include <vespa/searchlib/test/fakedata/fakewordset.h> -#include <vespa/searchlib/test/fakedata/fakeposting.h> -#include <vespa/searchlib/test/fakedata/fakefilterocc.h> -#include <vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h> -#include <vespa/searchlib/test/fakedata/fakezcfilterocc.h> #include <vespa/searchlib/test/fakedata/fakezcbfilterocc.h> +#include <vespa/searchlib/test/fakedata/fakezcfilterocc.h> #include <vespa/searchlib/test/fakedata/fpfactory.h> -#include <vespa/fastos/thread.h> -#include <vespa/fastos/time.h> -#include <mutex> #include <condition_variable> +#include <mutex> #include <vector> #include <vespa/log/log.h> @@ -29,8 +30,7 @@ namespace postinglistbm { class AndStressWorker; -class AndStressMaster -{ +class AndStressMaster { private: AndStressMaster(const AndStressMaster &); @@ -60,9 +60,11 @@ private: uint32_t _numTasks; public: - typedef std::pair<FakePosting *, FakePosting *> Task; + using Task = std::pair<FakePosting *, FakePosting *>; + private: std::vector<Task> _tasks; + public: AndStressMaster(search::Rand48 &rnd, FakeWordSet &wordSet, @@ -91,8 +93,7 @@ public: }; -class AndStressWorker : public FastOS_Runnable -{ +class AndStressWorker : public FastOS_Runnable { private: AndStressWorker(const AndStressWorker &); @@ -107,7 +108,6 @@ public: virtual void Run(FastOS_ThreadInterface *thisThread, void *arg) override; }; - template <class P> FakePosting * makePosting(FakeWord &fw) @@ -115,7 +115,6 @@ makePosting(FakeWord &fw) return new P(fw); } - AndStressMaster::AndStressMaster(search::Rand48 &rnd, FakeWordSet &wordSet, unsigned int numDocs, @@ -134,7 +133,7 @@ AndStressMaster::AndStressMaster(search::Rand48 &rnd, _skipCommonPairsRate(skipCommonPairsRate), _stride(stride), _unpack(unpack), - _threadPool(NULL), + _threadPool(nullptr), _workers(), _workersDone(0), _wordSet(wordSet), @@ -152,35 +151,34 @@ AndStressMaster::AndStressMaster(search::Rand48 &rnd, template <class C> static void -clearPtrVector(std::vector<C> &v) +clearPtrVector(std::vector<C> &vector) { - for (unsigned int i = 0; i < v.size(); ++i) - delete v[i]; - v.clear(); + for (auto& elem : vector) { + delete elem; + } + vector.clear(); } - AndStressMaster::~AndStressMaster() { LOG(info, "AndStressMaster::~AndStressMaster"); _threadPool->Close(); delete _threadPool; - _threadPool = NULL; + _threadPool = nullptr; clearPtrVector(_workers); dropPostings(); } - void AndStressMaster::dropPostings() { - for (unsigned int i = 0; i < _postings.size(); ++i) - _postings[i].clear(); + for (auto& posting : _postings) { + posting.clear(); + } dropTasks(); } - void AndStressMaster::dropTasks() { @@ -188,39 +186,39 @@ AndStressMaster::dropTasks() _taskIdx = 0; } - void AndStressMaster::resetTasks() { _taskIdx = 0; } - static void makeSomePostings(FPFactory *postingFactory, - std::vector<FakeWord *> &w, - std::vector<FakePosting::SP> &p, + std::vector<FakeWord *> &words, + std::vector<FakePosting::SP> &postings, uint32_t stride, bool validate, bool verbose) { - for (unsigned int i = 0; i < w.size(); ++i) { - FakePosting::SP np(postingFactory->make(*w[i])); + for (const auto& word : words) { + auto posting = postingFactory->make(*word); if (validate) { TermFieldMatchData md; TermFieldMatchDataArray tfmda; tfmda.add(&md); - std::unique_ptr<SearchIterator> sb(np->createIterator(tfmda)); - if (np->hasWordPositions()) { - if (stride != 0) - w[i]->validate(sb.get(), tfmda, stride, verbose); - else - w[i]->validate(sb.get(), tfmda, verbose); - } else - w[i]->validate(sb.get(), verbose); + std::unique_ptr<SearchIterator> iterator(posting->createIterator(tfmda)); + if (posting->hasWordPositions()) { + if (stride != 0) { + word->validate(iterator.get(), tfmda, stride, verbose); + } else { + word->validate(iterator.get(), tfmda, verbose); + } + } else { + word->validate(iterator.get(), verbose); + } } - p.push_back(np); + postings.push_back(posting); } } @@ -236,7 +234,7 @@ AndStressMaster::makePostingsHelper(FPFactory *postingFactory, tv.SetNow(); before = tv.Secs(); postingFactory->setup(_wordSet); - for (unsigned int i = 0; i < _wordSet._words.size(); ++i) + for (size_t i = 0; i < _wordSet._words.size(); ++i) makeSomePostings(postingFactory, _wordSet._words[i], _postings[i], _stride, @@ -250,7 +248,6 @@ AndStressMaster::makePostingsHelper(FPFactory *postingFactory, postingFormat.c_str()); } - void AndStressMaster::setupTasks(unsigned int numTasks) { @@ -276,19 +273,19 @@ AndStressMaster::setupTasks(unsigned int numTasks) } } - AndStressMaster::Task * AndStressMaster::getTask() { - Task *result = NULL; + Task *result = nullptr; std::lock_guard<std::mutex> taskGuard(_taskLock); if (_taskIdx < _tasks.size()) { result = &_tasks[_taskIdx]; ++_taskIdx; } else { _workersDone++; - if (_workersDone == _workers.size()) + if (_workersDone == _workers.size()) { _taskCond.notify_all(); + } } return result; } @@ -298,26 +295,22 @@ AndStressMaster::run() { LOG(info, "AndStressMaster::run"); - std::vector<std::string>::const_iterator pti; - std::vector<std::string>::const_iterator ptie = _postingTypes.end() ; - - for (pti = _postingTypes.begin(); pti != ptie; ++pti) { - std::unique_ptr<FPFactory> ff(getFPFactory(*pti, _wordSet.getSchema())); - makePostingsHelper(ff.get(), *pti, true, false); + for (const auto& type : _postingTypes) { + std::unique_ptr<FPFactory> factory(getFPFactory(type, _wordSet.getSchema())); + makePostingsHelper(factory.get(), type, true, false); setupTasks(_numTasks); double totalTime = 0; for (unsigned int loop = 0; loop < _loops; ++loop) { - totalTime += runWorkers(*pti); + totalTime += runWorkers(type); resetTasks(); } LOG(info, "AndStressMaster::average run elapsed %10.6f s for workers %s format", - totalTime / _loops, pti->c_str()); + totalTime / _loops, type.c_str()); dropPostings(); } FastOS_Thread::Sleep(250); } - double AndStressMaster::runWorkers(const std::string &postingFormat) { @@ -328,15 +321,19 @@ AndStressMaster::runWorkers(const std::string &postingFormat) tv.SetNow(); before = tv.Secs(); unsigned int numWorkers = 8; - for (unsigned int i = 0; i < numWorkers; ++i) + for (unsigned int i = 0; i < numWorkers; ++i) { _workers.push_back(new AndStressWorker(*this, i)); + } + + for (auto& worker : _workers) { + _threadPool->NewThread(worker); + } - for (unsigned int i = 0; i < _workers.size(); ++i) - _threadPool->NewThread(_workers[i]); { std::unique_lock<std::mutex> taskGuard(_taskLock); - while (_workersDone < _workers.size()) + while (_workersDone < _workers.size()) { _taskCond.wait(taskGuard); + } } tv.SetNow(); after = tv.Secs(); @@ -349,7 +346,6 @@ AndStressMaster::runWorkers(const std::string &postingFormat) return after - before; } - AndStressWorker::AndStressWorker(AndStressMaster &master, unsigned int id) : _master(master), _id(id) @@ -362,100 +358,14 @@ AndStressWorker::~AndStressWorker() LOG(debug, "AndStressWorker::~AndStressWorker, id=%u", _id); } - -static int -highLevelAndPairPostingScan(SearchIterator &sb1, - SearchIterator &sb2, - uint32_t numDocs, uint64_t *cycles) -{ - uint32_t hits = 0; - uint64_t before = fastos::ClockSystem::now(); - sb1.initFullRange(); - sb2.initFullRange(); - uint32_t docId = sb1.getDocId(); - while (docId < numDocs) { - if (sb1.seek(docId)) { - if (sb2.seek(docId)) { - ++hits; - ++docId; - } else if (docId < sb2.getDocId()) - docId = sb2.getDocId(); - else - ++docId; - } else if (docId < sb1.getDocId()) - docId= sb1.getDocId(); - else - ++docId; - } - uint64_t after = fastos::ClockSystem::now(); - *cycles = after - before; - return hits; -} - - -static int -highLevelAndPairPostingScanUnpack(SearchIterator &sb1, - SearchIterator &sb2, - uint32_t numDocs, - uint64_t *cycles) -{ - uint32_t hits = 0; - uint64_t before = fastos::ClockSystem::now(); - sb1.initFullRange(); - sb2.initFullRange(); - uint32_t docId = sb1.getDocId(); - while (docId < numDocs) { - if (sb1.seek(docId)) { - if (sb2.seek(docId)) { - ++hits; - sb1.unpack(docId); - sb2.unpack(docId); - ++docId; - } else if (docId < sb2.getDocId()) - docId = sb2.getDocId(); - else - ++docId; - } else if (docId < sb1.getDocId()) - docId= sb1.getDocId(); - else - ++docId; - } - uint64_t after = fastos::ClockSystem::now(); - *cycles = after - before; - return hits; -} - void -testFakePair(FakePosting &f1, FakePosting &f2, unsigned int numDocs, - bool unpack) +testFakePair(const FakePosting &f1, const FakePosting &f2, uint32_t doc_id_limit, bool unpack) { - TermFieldMatchData md1; - TermFieldMatchDataArray tfmda1; - tfmda1.add(&md1); - std::unique_ptr<SearchIterator> sb1(f1.createIterator(tfmda1)); - - TermFieldMatchData md2; - TermFieldMatchDataArray tfmda2; - tfmda1.add(&md2); - std::unique_ptr<SearchIterator> sb2(f2.createIterator(tfmda2)); - - int hits = 0; - uint64_t scanUnpackTime = 0; - if (unpack) - hits = highLevelAndPairPostingScanUnpack(*sb1.get(), *sb2.get(), - numDocs, &scanUnpackTime); - else - hits = highLevelAndPairPostingScan(*sb1.get(), *sb2.get(), - numDocs, &scanUnpackTime); -#if 0 - printf("Fakepair %s AND %s => %d hits, %" PRIu64 " cycles\n", - f1.getName().c_str(), - f2.getName().c_str(), - hits, - scanUnpackTime); -#else - (void)hits; -#endif + uint64_t scan_unpack_time = 0; + int hits = unpack ? + FakeMatchLoop::and_pair_posting_scan_with_unpack(f1, f2, doc_id_limit, scan_unpack_time) : + FakeMatchLoop::and_pair_posting_scan(f1, f2, doc_id_limit, scan_unpack_time); + (void) hits; } void @@ -468,20 +378,19 @@ AndStressWorker::Run(FastOS_ThreadInterface *thisThread, void *arg) bool unpack = _master.getUnpack(); for (;;) { AndStressMaster::Task *task = _master.getTask(); - if (task == NULL) + if (task == nullptr) { break; + } testFakePair(*task->first, *task->second, _master.getNumDocs(), unpack); } } - AndStress::AndStress() { LOG(debug, "Andstress::AndStress"); } - AndStress::~AndStress() { LOG(debug, "Andstress::~AndStress"); diff --git a/searchlib/src/tests/postinglistbm/andstress.h b/searchlib/src/tests/postinglistbm/andstress.h index 07e344f3549..fdc99fb42ba 100644 --- a/searchlib/src/tests/postinglistbm/andstress.h +++ b/searchlib/src/tests/postinglistbm/andstress.h @@ -1,8 +1,8 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include <vector> #include <string> +#include <vector> namespace search { class Rand48; @@ -13,24 +13,22 @@ namespace fakedata { class FakeWordSet; } namespace postinglistbm { -class AndStress -{ +class AndStress { public: AndStress(); ~AndStress(); - void - run(search::Rand48 &rnd, - search::fakedata::FakeWordSet &wordSet, - unsigned int numDocs, - unsigned int commonDocFreq, - const std::vector<std::string> &postingTypes, - unsigned int loops, - unsigned int skipCommonPairsRate, - uint32_t numTasks, - uint32_t stride, - bool unpack); + void run(search::Rand48 &rnd, + search::fakedata::FakeWordSet &wordSet, + unsigned int numDocs, + unsigned int commonDocFreq, + const std::vector<std::string> &postingTypes, + unsigned int loops, + unsigned int skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack); }; } diff --git a/searchlib/src/tests/postinglistbm/postinglistbm.cpp b/searchlib/src/tests/postinglistbm/postinglistbm.cpp index 41d94bf7186..02fbe4878ba 100644 --- a/searchlib/src/tests/postinglistbm/postinglistbm.cpp +++ b/searchlib/src/tests/postinglistbm/postinglistbm.cpp @@ -1,15 +1,16 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "andstress.h" +#include <vespa/fastos/app.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/resultset.h> -#include <vespa/searchlib/util/rand48.h> -#include "andstress.h" -#include <vespa/searchlib/test/fakedata/fakeword.h> +#include <vespa/searchlib/index/docidandfeatures.h> +#include <vespa/searchlib/test/fakedata/fake_match_loop.h> #include <vespa/searchlib/test/fakedata/fakeposting.h> +#include <vespa/searchlib/test/fakedata/fakeword.h> #include <vespa/searchlib/test/fakedata/fakewordset.h> #include <vespa/searchlib/test/fakedata/fpfactory.h> -#include <vespa/searchlib/index/docidandfeatures.h> -#include <vespa/fastos/app.h> +#include <vespa/searchlib/util/rand48.h> #include <vespa/log/log.h> @@ -29,8 +30,7 @@ void FastS_block_usr2() {} namespace postinglistbm { -class PostingListBM : public FastOS_Application -{ +class PostingListBM : public FastOS_Application { private: bool _verbose; uint32_t _numDocs; @@ -42,24 +42,24 @@ private: FakeWordSet _wordSet; uint32_t _stride; bool _unpack; + public: search::Rand48 _rnd; private: - void Usage(); + void usage(); void badPostingType(const std::string &postingType); void testFake(const std::string &postingType, const Schema &schema, - const FakeWord &fw); + const FakeWord &word); public: PostingListBM(); ~PostingListBM(); int Main() override; }; - void -PostingListBM::Usage() +PostingListBM::usage() { printf("postinglistbm " "[-C <skipCommonPairsRate>] " @@ -74,29 +74,24 @@ PostingListBM::Usage() "[-v]\n"); } - void PostingListBM::badPostingType(const std::string &postingType) { printf("Bad posting list type: %s\n", postingType.c_str()); printf("Supported types: "); - std::vector<std::string> postingTypes = getPostingTypes(); - std::vector<std::string>::const_iterator pti; - std::vector<std::string>::const_iterator ptie = postingTypes.end(); bool first = true; - - for (pti = postingTypes.begin(); pti != ptie; ++pti) { - if (first) + for (const auto& type : getPostingTypes()) { + if (first) { first = false; - else + } else { printf(", "); - printf("%s", pti->c_str()); + } + printf("%s", type.c_str()); } printf("\n"); } - PostingListBM::PostingListBM() : _verbose(false), _numDocs(10000000), @@ -112,171 +107,55 @@ PostingListBM::PostingListBM() { } +PostingListBM::~PostingListBM() = default; -PostingListBM::~PostingListBM() -{ -} - - -static int -highLevelSinglePostingScan(SearchIterator &sb, uint32_t numDocs, uint64_t *cycles) -{ - uint32_t hits = 0; - uint64_t before = fastos::ClockSystem::now(); - sb.initFullRange(); - uint32_t docId = sb.getDocId(); - while (docId < numDocs) { - if (sb.seek(docId)) { - ++hits; - ++docId; - } else if (docId < sb.getDocId()) - docId= sb.getDocId(); - else - ++docId; - } - uint64_t after = fastos::ClockSystem::now(); - *cycles = after - before; - return hits; -} - - -static int -highLevelSinglePostingScanUnpack(SearchIterator &sb, - uint32_t numDocs, uint64_t *cycles) -{ - uint32_t hits = 0; - uint64_t before = fastos::ClockSystem::now(); - sb.initFullRange(); - uint32_t docId = sb.getDocId(); - while (docId < numDocs) { - if (sb.seek(docId)) { - ++hits; - sb.unpack(docId); - ++docId; - } else if (docId < sb.getDocId()) - docId= sb.getDocId(); - else - ++docId; - } - uint64_t after = fastos::ClockSystem::now(); - *cycles = after - before; - return hits; -} - - -static int -highLevelAndPairPostingScan(SearchIterator &sb1, - SearchIterator &sb2, - uint32_t numDocs, uint64_t *cycles) +void +validate_posting_for_word(const FakePosting& posting, const FakeWord& word, bool verbose) { - uint32_t hits = 0; - uint64_t before = fastos::ClockSystem::now(); - sb1.initFullRange(); - sb2.initFullRange(); - uint32_t docId = sb1.getDocId(); - while (docId < numDocs) { - if (sb1.seek(docId)) { - if (sb2.seek(docId)) { - ++hits; - ++docId; - } else if (docId < sb2.getDocId()) - docId = sb2.getDocId(); - else - ++docId; - } else if (docId < sb1.getDocId()) - docId= sb1.getDocId(); - else - ++docId; - } - uint64_t after = fastos::ClockSystem::now(); - *cycles = after - before; - return hits; -} - + TermFieldMatchData md; + TermFieldMatchDataArray tfmda; + tfmda.add(&md); -static int -highLevelAndPairPostingScanUnpack(SearchIterator &sb1, - SearchIterator &sb2, - uint32_t numDocs, - uint64_t *cycles) -{ - uint32_t hits = 0; - uint64_t before = fastos::ClockSystem::now(); - sb1.initFullRange(); - sb1.initFullRange(); - uint32_t docId = sb1.getDocId(); - while (docId < numDocs) { - if (sb1.seek(docId)) { - if (sb2.seek(docId)) { - ++hits; - sb1.unpack(docId); - sb2.unpack(docId); - ++docId; - } else if (docId < sb2.getDocId()) - docId = sb2.getDocId(); - else - ++docId; - } else if (docId < sb1.getDocId()) - docId= sb1.getDocId(); - else - ++docId; + std::unique_ptr<SearchIterator> iterator(posting.createIterator(tfmda)); + if (posting.hasWordPositions()) { + word.validate(iterator.get(), tfmda, verbose); + } else { + word.validate(iterator.get(), verbose); } - uint64_t after = fastos::ClockSystem::now(); - *cycles = after - before; - return hits; } - void PostingListBM::testFake(const std::string &postingType, const Schema &schema, - const FakeWord &fw) + const FakeWord &word) { - std::unique_ptr<FPFactory> ff(getFPFactory(postingType, schema)); - std::vector<const FakeWord *> v; - v.push_back(&fw); - ff->setup(v); - FakePosting::SP f(ff->make(fw)); + auto posting_factory = getFPFactory(postingType, schema); + std::vector<const FakeWord *> words; + words.push_back(&word); + posting_factory->setup(words); + auto posting = posting_factory->make(word); printf("%s.bitsize=%d+%d+%d+%d+%d\n", - f->getName().c_str(), - static_cast<int>(f->bitSize()), - static_cast<int>(f->l1SkipBitSize()), - static_cast<int>(f->l2SkipBitSize()), - static_cast<int>(f->l3SkipBitSize()), - static_cast<int>(f->l4SkipBitSize())); - TermFieldMatchData md; - TermFieldMatchDataArray tfmda; - tfmda.add(&md); + posting->getName().c_str(), + static_cast<int>(posting->bitSize()), + static_cast<int>(posting->l1SkipBitSize()), + static_cast<int>(posting->l2SkipBitSize()), + static_cast<int>(posting->l3SkipBitSize()), + static_cast<int>(posting->l4SkipBitSize())); + + validate_posting_for_word(*posting, word, _verbose); - std::unique_ptr<SearchIterator> sb(f->createIterator(tfmda)); - if (f->hasWordPositions()) - fw.validate(sb.get(), tfmda, _verbose); - else - fw.validate(sb.get(), _verbose); uint64_t scanTime = 0; uint64_t scanUnpackTime = 0; - TermFieldMatchData md2; - TermFieldMatchDataArray tfmda2; - tfmda2.add(&md2); - - std::unique_ptr<SearchIterator> sb2(f->createIterator(tfmda2)); - int hits1 = highLevelSinglePostingScan(*sb2.get(), fw.getDocIdLimit(), - &scanTime); - TermFieldMatchData md3; - TermFieldMatchDataArray tfmda3; - tfmda3.add(&md3); - - std::unique_ptr<SearchIterator> sb3(f->createIterator(tfmda3)); - int hits2 = highLevelSinglePostingScanUnpack(*sb3.get(), fw.getDocIdLimit(), - &scanUnpackTime); + int hits1 = FakeMatchLoop::single_posting_scan(*posting, word.getDocIdLimit(), scanTime); + int hits2 = FakeMatchLoop::single_posting_scan_with_unpack(*posting, word.getDocIdLimit(), scanUnpackTime); + printf("testFake '%s' hits1=%d, hits2=%d, scanTime=%" PRIu64 ", scanUnpackTime=%" PRIu64 "\n", - f->getName().c_str(), + posting->getName().c_str(), hits1, hits2, scanTime, scanUnpackTime); } - void testFakePair(const std::string &postingType, const Schema &schema, @@ -291,24 +170,10 @@ testFakePair(const std::string &postingType, FakePosting::SP f1(ff->make(fw1)); FakePosting::SP f2(ff->make(fw2)); - TermFieldMatchData md1; - TermFieldMatchDataArray tfmda1; - tfmda1.add(&md1); - std::unique_ptr<SearchIterator> sb1(f1->createIterator(tfmda1)); - - TermFieldMatchData md2; - TermFieldMatchDataArray tfmda2; - tfmda1.add(&md2); - std::unique_ptr<SearchIterator> sb2(f2->createIterator(tfmda2)); - - int hits = 0; uint64_t scanUnpackTime = 0; - if (unpack) - hits = highLevelAndPairPostingScanUnpack(*sb1.get(), *sb2.get(), - fw1.getDocIdLimit(), &scanUnpackTime); - else - hits = highLevelAndPairPostingScan(*sb1.get(), *sb2.get(), - fw1.getDocIdLimit(), &scanUnpackTime); + int hits = unpack ? + FakeMatchLoop::and_pair_posting_scan_with_unpack(*f1, *f2, fw1.getDocIdLimit(), scanUnpackTime) : + FakeMatchLoop::and_pair_posting_scan(*f1, *f2, fw1.getDocIdLimit(), scanUnpackTime); printf("Fakepair %s AND %s => %d hits, %" PRIu64 " cycles\n", f1->getName().c_str(), f2->getName().c_str(), @@ -316,7 +181,6 @@ testFakePair(const std::string &postingType, scanUnpackTime); } - int PostingListBM::Main() { @@ -374,7 +238,7 @@ PostingListBM::Main() CollectionType::SINGLE); schema.addIndexField(indexField); std::unique_ptr<FPFactory> ff(getFPFactory(optArg, schema)); - if (ff.get() == NULL) { + if (ff.get() == nullptr) { badPostingType(optArg); return 1; } @@ -397,13 +261,13 @@ PostingListBM::Main() _numWordsPerClass = 5; break; default: - Usage(); + usage(); return 1; } } if (_commonDocFreq > _numDocs) { - Usage(); + usage(); return 1; } @@ -437,24 +301,23 @@ PostingListBM::Main() "word5", word4, w4w5od, _rnd, _wordSet.getFieldsParams(), _wordSet.getPackedIndex()); - if (_postingTypes.empty()) + if (_postingTypes.empty()) { _postingTypes = getPostingTypes(); - std::vector<std::string>::const_iterator pti; - std::vector<std::string>::const_iterator ptie = _postingTypes.end() ; + } - for (pti = _postingTypes.begin(); pti != ptie; ++pti) { - testFake(*pti, _wordSet.getSchema(), word1); - testFake(*pti, _wordSet.getSchema(), word2); - testFake(*pti, _wordSet.getSchema(), word3); + for (const auto& type : _postingTypes) { + testFake(type, _wordSet.getSchema(), word1); + testFake(type, _wordSet.getSchema(), word2); + testFake(type, _wordSet.getSchema(), word3); } - for (pti = _postingTypes.begin(); pti != ptie; ++pti) { - testFakePair(*pti, _wordSet.getSchema(), false, word1, word3); - testFakePair(*pti, _wordSet.getSchema(), false, word2, word3); + for (const auto& type : _postingTypes) { + testFakePair(type, _wordSet.getSchema(), false, word1, word3); + testFakePair(type, _wordSet.getSchema(), false, word2, word3); } - for (pti = _postingTypes.begin(); pti != ptie; ++pti) { - testFakePair(*pti, _wordSet.getSchema(), false, word4, word5); + for (const auto& type : _postingTypes) { + testFakePair(type, _wordSet.getSchema(), false, word4, word5); } if (doandstress) { @@ -472,16 +335,14 @@ PostingListBM::Main() return 0; } -} // namespace postinglistbm +} int main(int argc, char **argv) { postinglistbm::PostingListBM app; - setvbuf(stdout, NULL, _IOLBF, 32768); + setvbuf(stdout, nullptr, _IOLBF, 32768); app._rnd.srand48(32); return app.Entry(argc, argv); - - return 0; } diff --git a/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt index 7a86003e9f6..275fa505e9d 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt @@ -1,6 +1,7 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(searchlib_test_fakedata OBJECT SOURCES + fake_match_loop.cpp fakeword.cpp fakewordset.cpp fakeposting.cpp diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp new file mode 100644 index 00000000000..282a3e07a0e --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp @@ -0,0 +1,137 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "fake_match_loop.h" +#include "fakeposting.h" +#include <vespa/fastos/timestamp.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/queryeval/searchiterator.h> + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using search::queryeval::SearchIterator; + +namespace search::fakedata { + +namespace { + +class IteratorState { +private: + TermFieldMatchData _md; + TermFieldMatchDataArray _tfmda; + std::unique_ptr<SearchIterator> _itr; + +public: + IteratorState(const FakePosting& posting) + : _md(), + _tfmda(), + _itr() + { + _tfmda.add(&_md); + _itr.reset(posting.createIterator(_tfmda)); + } + ~IteratorState() {} + + SearchIterator& itr() { return *_itr; } +}; + +template <bool do_unpack> +int +do_single_posting_scan(SearchIterator& itr, uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +{ + uint32_t hits = 0; + uint64_t time_before = fastos::ClockSystem::now(); + itr.initFullRange(); + uint32_t doc_id = itr.getDocId(); + while (doc_id < doc_id_limit) { + if (itr.seek(doc_id)) { + ++hits; + if (do_unpack) { + itr.unpack(doc_id); + } + ++doc_id; + } else if (doc_id < itr.getDocId()) { + doc_id = itr.getDocId(); + } else { + ++doc_id; + } + } + uint64_t time_after = fastos::ClockSystem::now(); + elapsed_time_ns = time_after - time_before; + return hits; +} + +} + +int +FakeMatchLoop::single_posting_scan(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +{ + IteratorState state(posting); + return do_single_posting_scan<false>(state.itr(), doc_id_limit, elapsed_time_ns); +} + +int +FakeMatchLoop::single_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +{ + IteratorState state(posting); + return do_single_posting_scan<true>(state.itr(), doc_id_limit, elapsed_time_ns); +} + +namespace { + +template <bool do_unpack> +int +do_and_pair_posting_scan(SearchIterator& itr1, SearchIterator& itr2, + uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +{ + uint32_t hits = 0; + uint64_t time_before = fastos::ClockSystem::now(); + itr1.initFullRange(); + itr2.initFullRange(); + uint32_t doc_id = itr1.getDocId(); + while (doc_id < doc_id_limit) { + if (itr1.seek(doc_id)) { + if (itr2.seek(doc_id)) { + ++hits; + if (do_unpack) { + itr1.unpack(doc_id); + itr2.unpack(doc_id); + } + ++doc_id; + } else if (doc_id < itr2.getDocId()) { + doc_id = itr2.getDocId(); + } else { + ++doc_id; + } + } else if (doc_id < itr1.getDocId()) { + doc_id = itr1.getDocId(); + } else { + ++doc_id; + } + } + uint64_t time_after = fastos::ClockSystem::now(); + elapsed_time_ns = time_after - time_before; + return hits; +} + +} + +int +FakeMatchLoop::and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, + uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +{ + IteratorState state_1(posting_1); + IteratorState state_2(posting_2); + return do_and_pair_posting_scan<false>(state_1.itr(), state_2.itr(), doc_id_limit, elapsed_time_ns); +} + +int +FakeMatchLoop::and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, + uint32_t doc_id_limit, uint64_t& elapsed_time_ns) +{ + IteratorState state_1(posting_1); + IteratorState state_2(posting_2); + return do_and_pair_posting_scan<true>(state_1.itr(), state_2.itr(), doc_id_limit, elapsed_time_ns); +} + +} + diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h new file mode 100644 index 00000000000..7bd98f1b4ca --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h @@ -0,0 +1,25 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <cstdint> + +namespace search::fakedata { + +class FakePosting; + +/** + * Implementations of fake match loops used for testing and benchmarking. + */ +class FakeMatchLoop { +public: + static int single_posting_scan(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns); + static int single_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns); + + static int and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, + uint32_t doc_id_limit, uint64_t& elapsed_time_ns); + static int and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, + uint32_t doc_id_limit, uint64_t& elapsed_time_ns); +}; + +} |