aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-05-06 14:32:26 +0200
committerGitHub <noreply@github.com>2019-05-06 14:32:26 +0200
commitc98ce8216c6a7076b0a7b94fe360b5bb6f679cf6 (patch)
treef60543dabb4d3e20a21a528f2e58713a7e0910ab
parentc32390cfb821886c8c50a847ea7673a191322f4f (diff)
parent5c7a56c51a65636f026d534e52ec428902df0ffe (diff)
Merge pull request #9283 from vespa-engine/geirst/posting-list-benchmark-refactor
Geirst/posting list benchmark refactor
-rw-r--r--searchlib/src/tests/postinglistbm/andstress.cpp217
-rw-r--r--searchlib/src/tests/postinglistbm/andstress.h26
-rw-r--r--searchlib/src/tests/postinglistbm/postinglistbm.cpp267
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp137
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h25
6 files changed, 302 insertions, 371 deletions
diff --git a/searchlib/src/tests/postinglistbm/andstress.cpp b/searchlib/src/tests/postinglistbm/andstress.cpp
index 15e56c2d4ef..7152e2a3981 100644
--- a/searchlib/src/tests/postinglistbm/andstress.cpp
+++ b/searchlib/src/tests/postinglistbm/andstress.cpp
@@ -2,19 +2,20 @@
#include "andstress.h"
+#include <vespa/fastos/thread.h>
+#include <vespa/fastos/time.h>
#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/test/fakedata/fake_match_loop.h>
+#include <vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h>
+#include <vespa/searchlib/test/fakedata/fakefilterocc.h>
+#include <vespa/searchlib/test/fakedata/fakeposting.h>
#include <vespa/searchlib/test/fakedata/fakeword.h>
#include <vespa/searchlib/test/fakedata/fakewordset.h>
-#include <vespa/searchlib/test/fakedata/fakeposting.h>
-#include <vespa/searchlib/test/fakedata/fakefilterocc.h>
-#include <vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h>
-#include <vespa/searchlib/test/fakedata/fakezcfilterocc.h>
#include <vespa/searchlib/test/fakedata/fakezcbfilterocc.h>
+#include <vespa/searchlib/test/fakedata/fakezcfilterocc.h>
#include <vespa/searchlib/test/fakedata/fpfactory.h>
-#include <vespa/fastos/thread.h>
-#include <vespa/fastos/time.h>
-#include <mutex>
#include <condition_variable>
+#include <mutex>
#include <vector>
#include <vespa/log/log.h>
@@ -29,8 +30,7 @@ namespace postinglistbm {
class AndStressWorker;
-class AndStressMaster
-{
+class AndStressMaster {
private:
AndStressMaster(const AndStressMaster &);
@@ -60,9 +60,11 @@ private:
uint32_t _numTasks;
public:
- typedef std::pair<FakePosting *, FakePosting *> Task;
+ using Task = std::pair<FakePosting *, FakePosting *>;
+
private:
std::vector<Task> _tasks;
+
public:
AndStressMaster(search::Rand48 &rnd,
FakeWordSet &wordSet,
@@ -91,8 +93,7 @@ public:
};
-class AndStressWorker : public FastOS_Runnable
-{
+class AndStressWorker : public FastOS_Runnable {
private:
AndStressWorker(const AndStressWorker &);
@@ -107,7 +108,6 @@ public:
virtual void Run(FastOS_ThreadInterface *thisThread, void *arg) override;
};
-
template <class P>
FakePosting *
makePosting(FakeWord &fw)
@@ -115,7 +115,6 @@ makePosting(FakeWord &fw)
return new P(fw);
}
-
AndStressMaster::AndStressMaster(search::Rand48 &rnd,
FakeWordSet &wordSet,
unsigned int numDocs,
@@ -134,7 +133,7 @@ AndStressMaster::AndStressMaster(search::Rand48 &rnd,
_skipCommonPairsRate(skipCommonPairsRate),
_stride(stride),
_unpack(unpack),
- _threadPool(NULL),
+ _threadPool(nullptr),
_workers(),
_workersDone(0),
_wordSet(wordSet),
@@ -152,35 +151,34 @@ AndStressMaster::AndStressMaster(search::Rand48 &rnd,
template <class C>
static void
-clearPtrVector(std::vector<C> &v)
+clearPtrVector(std::vector<C> &vector)
{
- for (unsigned int i = 0; i < v.size(); ++i)
- delete v[i];
- v.clear();
+ for (auto& elem : vector) {
+ delete elem;
+ }
+ vector.clear();
}
-
AndStressMaster::~AndStressMaster()
{
LOG(info, "AndStressMaster::~AndStressMaster");
_threadPool->Close();
delete _threadPool;
- _threadPool = NULL;
+ _threadPool = nullptr;
clearPtrVector(_workers);
dropPostings();
}
-
void
AndStressMaster::dropPostings()
{
- for (unsigned int i = 0; i < _postings.size(); ++i)
- _postings[i].clear();
+ for (auto& posting : _postings) {
+ posting.clear();
+ }
dropTasks();
}
-
void
AndStressMaster::dropTasks()
{
@@ -188,39 +186,39 @@ AndStressMaster::dropTasks()
_taskIdx = 0;
}
-
void
AndStressMaster::resetTasks()
{
_taskIdx = 0;
}
-
static void
makeSomePostings(FPFactory *postingFactory,
- std::vector<FakeWord *> &w,
- std::vector<FakePosting::SP> &p,
+ std::vector<FakeWord *> &words,
+ std::vector<FakePosting::SP> &postings,
uint32_t stride,
bool validate,
bool verbose)
{
- for (unsigned int i = 0; i < w.size(); ++i) {
- FakePosting::SP np(postingFactory->make(*w[i]));
+ for (const auto& word : words) {
+ auto posting = postingFactory->make(*word);
if (validate) {
TermFieldMatchData md;
TermFieldMatchDataArray tfmda;
tfmda.add(&md);
- std::unique_ptr<SearchIterator> sb(np->createIterator(tfmda));
- if (np->hasWordPositions()) {
- if (stride != 0)
- w[i]->validate(sb.get(), tfmda, stride, verbose);
- else
- w[i]->validate(sb.get(), tfmda, verbose);
- } else
- w[i]->validate(sb.get(), verbose);
+ std::unique_ptr<SearchIterator> iterator(posting->createIterator(tfmda));
+ if (posting->hasWordPositions()) {
+ if (stride != 0) {
+ word->validate(iterator.get(), tfmda, stride, verbose);
+ } else {
+ word->validate(iterator.get(), tfmda, verbose);
+ }
+ } else {
+ word->validate(iterator.get(), verbose);
+ }
}
- p.push_back(np);
+ postings.push_back(posting);
}
}
@@ -236,7 +234,7 @@ AndStressMaster::makePostingsHelper(FPFactory *postingFactory,
tv.SetNow();
before = tv.Secs();
postingFactory->setup(_wordSet);
- for (unsigned int i = 0; i < _wordSet._words.size(); ++i)
+ for (size_t i = 0; i < _wordSet._words.size(); ++i)
makeSomePostings(postingFactory,
_wordSet._words[i], _postings[i],
_stride,
@@ -250,7 +248,6 @@ AndStressMaster::makePostingsHelper(FPFactory *postingFactory,
postingFormat.c_str());
}
-
void
AndStressMaster::setupTasks(unsigned int numTasks)
{
@@ -276,19 +273,19 @@ AndStressMaster::setupTasks(unsigned int numTasks)
}
}
-
AndStressMaster::Task *
AndStressMaster::getTask()
{
- Task *result = NULL;
+ Task *result = nullptr;
std::lock_guard<std::mutex> taskGuard(_taskLock);
if (_taskIdx < _tasks.size()) {
result = &_tasks[_taskIdx];
++_taskIdx;
} else {
_workersDone++;
- if (_workersDone == _workers.size())
+ if (_workersDone == _workers.size()) {
_taskCond.notify_all();
+ }
}
return result;
}
@@ -298,26 +295,22 @@ AndStressMaster::run()
{
LOG(info, "AndStressMaster::run");
- std::vector<std::string>::const_iterator pti;
- std::vector<std::string>::const_iterator ptie = _postingTypes.end() ;
-
- for (pti = _postingTypes.begin(); pti != ptie; ++pti) {
- std::unique_ptr<FPFactory> ff(getFPFactory(*pti, _wordSet.getSchema()));
- makePostingsHelper(ff.get(), *pti, true, false);
+ for (const auto& type : _postingTypes) {
+ std::unique_ptr<FPFactory> factory(getFPFactory(type, _wordSet.getSchema()));
+ makePostingsHelper(factory.get(), type, true, false);
setupTasks(_numTasks);
double totalTime = 0;
for (unsigned int loop = 0; loop < _loops; ++loop) {
- totalTime += runWorkers(*pti);
+ totalTime += runWorkers(type);
resetTasks();
}
LOG(info, "AndStressMaster::average run elapsed %10.6f s for workers %s format",
- totalTime / _loops, pti->c_str());
+ totalTime / _loops, type.c_str());
dropPostings();
}
FastOS_Thread::Sleep(250);
}
-
double
AndStressMaster::runWorkers(const std::string &postingFormat)
{
@@ -328,15 +321,19 @@ AndStressMaster::runWorkers(const std::string &postingFormat)
tv.SetNow();
before = tv.Secs();
unsigned int numWorkers = 8;
- for (unsigned int i = 0; i < numWorkers; ++i)
+ for (unsigned int i = 0; i < numWorkers; ++i) {
_workers.push_back(new AndStressWorker(*this, i));
+ }
+
+ for (auto& worker : _workers) {
+ _threadPool->NewThread(worker);
+ }
- for (unsigned int i = 0; i < _workers.size(); ++i)
- _threadPool->NewThread(_workers[i]);
{
std::unique_lock<std::mutex> taskGuard(_taskLock);
- while (_workersDone < _workers.size())
+ while (_workersDone < _workers.size()) {
_taskCond.wait(taskGuard);
+ }
}
tv.SetNow();
after = tv.Secs();
@@ -349,7 +346,6 @@ AndStressMaster::runWorkers(const std::string &postingFormat)
return after - before;
}
-
AndStressWorker::AndStressWorker(AndStressMaster &master, unsigned int id)
: _master(master),
_id(id)
@@ -362,100 +358,14 @@ AndStressWorker::~AndStressWorker()
LOG(debug, "AndStressWorker::~AndStressWorker, id=%u", _id);
}
-
-static int
-highLevelAndPairPostingScan(SearchIterator &sb1,
- SearchIterator &sb2,
- uint32_t numDocs, uint64_t *cycles)
-{
- uint32_t hits = 0;
- uint64_t before = fastos::ClockSystem::now();
- sb1.initFullRange();
- sb2.initFullRange();
- uint32_t docId = sb1.getDocId();
- while (docId < numDocs) {
- if (sb1.seek(docId)) {
- if (sb2.seek(docId)) {
- ++hits;
- ++docId;
- } else if (docId < sb2.getDocId())
- docId = sb2.getDocId();
- else
- ++docId;
- } else if (docId < sb1.getDocId())
- docId= sb1.getDocId();
- else
- ++docId;
- }
- uint64_t after = fastos::ClockSystem::now();
- *cycles = after - before;
- return hits;
-}
-
-
-static int
-highLevelAndPairPostingScanUnpack(SearchIterator &sb1,
- SearchIterator &sb2,
- uint32_t numDocs,
- uint64_t *cycles)
-{
- uint32_t hits = 0;
- uint64_t before = fastos::ClockSystem::now();
- sb1.initFullRange();
- sb2.initFullRange();
- uint32_t docId = sb1.getDocId();
- while (docId < numDocs) {
- if (sb1.seek(docId)) {
- if (sb2.seek(docId)) {
- ++hits;
- sb1.unpack(docId);
- sb2.unpack(docId);
- ++docId;
- } else if (docId < sb2.getDocId())
- docId = sb2.getDocId();
- else
- ++docId;
- } else if (docId < sb1.getDocId())
- docId= sb1.getDocId();
- else
- ++docId;
- }
- uint64_t after = fastos::ClockSystem::now();
- *cycles = after - before;
- return hits;
-}
-
void
-testFakePair(FakePosting &f1, FakePosting &f2, unsigned int numDocs,
- bool unpack)
+testFakePair(const FakePosting &f1, const FakePosting &f2, uint32_t doc_id_limit, bool unpack)
{
- TermFieldMatchData md1;
- TermFieldMatchDataArray tfmda1;
- tfmda1.add(&md1);
- std::unique_ptr<SearchIterator> sb1(f1.createIterator(tfmda1));
-
- TermFieldMatchData md2;
- TermFieldMatchDataArray tfmda2;
- tfmda1.add(&md2);
- std::unique_ptr<SearchIterator> sb2(f2.createIterator(tfmda2));
-
- int hits = 0;
- uint64_t scanUnpackTime = 0;
- if (unpack)
- hits = highLevelAndPairPostingScanUnpack(*sb1.get(), *sb2.get(),
- numDocs, &scanUnpackTime);
- else
- hits = highLevelAndPairPostingScan(*sb1.get(), *sb2.get(),
- numDocs, &scanUnpackTime);
-#if 0
- printf("Fakepair %s AND %s => %d hits, %" PRIu64 " cycles\n",
- f1.getName().c_str(),
- f2.getName().c_str(),
- hits,
- scanUnpackTime);
-#else
- (void)hits;
-#endif
+ uint64_t scan_unpack_time = 0;
+ int hits = unpack ?
+ FakeMatchLoop::and_pair_posting_scan_with_unpack(f1, f2, doc_id_limit, scan_unpack_time) :
+ FakeMatchLoop::and_pair_posting_scan(f1, f2, doc_id_limit, scan_unpack_time);
+ (void) hits;
}
void
@@ -468,20 +378,19 @@ AndStressWorker::Run(FastOS_ThreadInterface *thisThread, void *arg)
bool unpack = _master.getUnpack();
for (;;) {
AndStressMaster::Task *task = _master.getTask();
- if (task == NULL)
+ if (task == nullptr) {
break;
+ }
testFakePair(*task->first, *task->second, _master.getNumDocs(),
unpack);
}
}
-
AndStress::AndStress()
{
LOG(debug, "Andstress::AndStress");
}
-
AndStress::~AndStress()
{
LOG(debug, "Andstress::~AndStress");
diff --git a/searchlib/src/tests/postinglistbm/andstress.h b/searchlib/src/tests/postinglistbm/andstress.h
index 07e344f3549..fdc99fb42ba 100644
--- a/searchlib/src/tests/postinglistbm/andstress.h
+++ b/searchlib/src/tests/postinglistbm/andstress.h
@@ -1,8 +1,8 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
-#include <vector>
#include <string>
+#include <vector>
namespace search {
class Rand48;
@@ -13,24 +13,22 @@ namespace fakedata { class FakeWordSet; }
namespace postinglistbm {
-class AndStress
-{
+class AndStress {
public:
AndStress();
~AndStress();
- void
- run(search::Rand48 &rnd,
- search::fakedata::FakeWordSet &wordSet,
- unsigned int numDocs,
- unsigned int commonDocFreq,
- const std::vector<std::string> &postingTypes,
- unsigned int loops,
- unsigned int skipCommonPairsRate,
- uint32_t numTasks,
- uint32_t stride,
- bool unpack);
+ void run(search::Rand48 &rnd,
+ search::fakedata::FakeWordSet &wordSet,
+ unsigned int numDocs,
+ unsigned int commonDocFreq,
+ const std::vector<std::string> &postingTypes,
+ unsigned int loops,
+ unsigned int skipCommonPairsRate,
+ uint32_t numTasks,
+ uint32_t stride,
+ bool unpack);
};
}
diff --git a/searchlib/src/tests/postinglistbm/postinglistbm.cpp b/searchlib/src/tests/postinglistbm/postinglistbm.cpp
index 41d94bf7186..02fbe4878ba 100644
--- a/searchlib/src/tests/postinglistbm/postinglistbm.cpp
+++ b/searchlib/src/tests/postinglistbm/postinglistbm.cpp
@@ -1,15 +1,16 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "andstress.h"
+#include <vespa/fastos/app.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/common/resultset.h>
-#include <vespa/searchlib/util/rand48.h>
-#include "andstress.h"
-#include <vespa/searchlib/test/fakedata/fakeword.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/test/fakedata/fake_match_loop.h>
#include <vespa/searchlib/test/fakedata/fakeposting.h>
+#include <vespa/searchlib/test/fakedata/fakeword.h>
#include <vespa/searchlib/test/fakedata/fakewordset.h>
#include <vespa/searchlib/test/fakedata/fpfactory.h>
-#include <vespa/searchlib/index/docidandfeatures.h>
-#include <vespa/fastos/app.h>
+#include <vespa/searchlib/util/rand48.h>
#include <vespa/log/log.h>
@@ -29,8 +30,7 @@ void FastS_block_usr2() {}
namespace postinglistbm {
-class PostingListBM : public FastOS_Application
-{
+class PostingListBM : public FastOS_Application {
private:
bool _verbose;
uint32_t _numDocs;
@@ -42,24 +42,24 @@ private:
FakeWordSet _wordSet;
uint32_t _stride;
bool _unpack;
+
public:
search::Rand48 _rnd;
private:
- void Usage();
+ void usage();
void badPostingType(const std::string &postingType);
void testFake(const std::string &postingType,
const Schema &schema,
- const FakeWord &fw);
+ const FakeWord &word);
public:
PostingListBM();
~PostingListBM();
int Main() override;
};
-
void
-PostingListBM::Usage()
+PostingListBM::usage()
{
printf("postinglistbm "
"[-C <skipCommonPairsRate>] "
@@ -74,29 +74,24 @@ PostingListBM::Usage()
"[-v]\n");
}
-
void
PostingListBM::badPostingType(const std::string &postingType)
{
printf("Bad posting list type: %s\n", postingType.c_str());
printf("Supported types: ");
- std::vector<std::string> postingTypes = getPostingTypes();
- std::vector<std::string>::const_iterator pti;
- std::vector<std::string>::const_iterator ptie = postingTypes.end();
bool first = true;
-
- for (pti = postingTypes.begin(); pti != ptie; ++pti) {
- if (first)
+ for (const auto& type : getPostingTypes()) {
+ if (first) {
first = false;
- else
+ } else {
printf(", ");
- printf("%s", pti->c_str());
+ }
+ printf("%s", type.c_str());
}
printf("\n");
}
-
PostingListBM::PostingListBM()
: _verbose(false),
_numDocs(10000000),
@@ -112,171 +107,55 @@ PostingListBM::PostingListBM()
{
}
+PostingListBM::~PostingListBM() = default;
-PostingListBM::~PostingListBM()
-{
-}
-
-
-static int
-highLevelSinglePostingScan(SearchIterator &sb, uint32_t numDocs, uint64_t *cycles)
-{
- uint32_t hits = 0;
- uint64_t before = fastos::ClockSystem::now();
- sb.initFullRange();
- uint32_t docId = sb.getDocId();
- while (docId < numDocs) {
- if (sb.seek(docId)) {
- ++hits;
- ++docId;
- } else if (docId < sb.getDocId())
- docId= sb.getDocId();
- else
- ++docId;
- }
- uint64_t after = fastos::ClockSystem::now();
- *cycles = after - before;
- return hits;
-}
-
-
-static int
-highLevelSinglePostingScanUnpack(SearchIterator &sb,
- uint32_t numDocs, uint64_t *cycles)
-{
- uint32_t hits = 0;
- uint64_t before = fastos::ClockSystem::now();
- sb.initFullRange();
- uint32_t docId = sb.getDocId();
- while (docId < numDocs) {
- if (sb.seek(docId)) {
- ++hits;
- sb.unpack(docId);
- ++docId;
- } else if (docId < sb.getDocId())
- docId= sb.getDocId();
- else
- ++docId;
- }
- uint64_t after = fastos::ClockSystem::now();
- *cycles = after - before;
- return hits;
-}
-
-
-static int
-highLevelAndPairPostingScan(SearchIterator &sb1,
- SearchIterator &sb2,
- uint32_t numDocs, uint64_t *cycles)
+void
+validate_posting_for_word(const FakePosting& posting, const FakeWord& word, bool verbose)
{
- uint32_t hits = 0;
- uint64_t before = fastos::ClockSystem::now();
- sb1.initFullRange();
- sb2.initFullRange();
- uint32_t docId = sb1.getDocId();
- while (docId < numDocs) {
- if (sb1.seek(docId)) {
- if (sb2.seek(docId)) {
- ++hits;
- ++docId;
- } else if (docId < sb2.getDocId())
- docId = sb2.getDocId();
- else
- ++docId;
- } else if (docId < sb1.getDocId())
- docId= sb1.getDocId();
- else
- ++docId;
- }
- uint64_t after = fastos::ClockSystem::now();
- *cycles = after - before;
- return hits;
-}
-
+ TermFieldMatchData md;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&md);
-static int
-highLevelAndPairPostingScanUnpack(SearchIterator &sb1,
- SearchIterator &sb2,
- uint32_t numDocs,
- uint64_t *cycles)
-{
- uint32_t hits = 0;
- uint64_t before = fastos::ClockSystem::now();
- sb1.initFullRange();
- sb1.initFullRange();
- uint32_t docId = sb1.getDocId();
- while (docId < numDocs) {
- if (sb1.seek(docId)) {
- if (sb2.seek(docId)) {
- ++hits;
- sb1.unpack(docId);
- sb2.unpack(docId);
- ++docId;
- } else if (docId < sb2.getDocId())
- docId = sb2.getDocId();
- else
- ++docId;
- } else if (docId < sb1.getDocId())
- docId= sb1.getDocId();
- else
- ++docId;
+ std::unique_ptr<SearchIterator> iterator(posting.createIterator(tfmda));
+ if (posting.hasWordPositions()) {
+ word.validate(iterator.get(), tfmda, verbose);
+ } else {
+ word.validate(iterator.get(), verbose);
}
- uint64_t after = fastos::ClockSystem::now();
- *cycles = after - before;
- return hits;
}
-
void
PostingListBM::testFake(const std::string &postingType,
const Schema &schema,
- const FakeWord &fw)
+ const FakeWord &word)
{
- std::unique_ptr<FPFactory> ff(getFPFactory(postingType, schema));
- std::vector<const FakeWord *> v;
- v.push_back(&fw);
- ff->setup(v);
- FakePosting::SP f(ff->make(fw));
+ auto posting_factory = getFPFactory(postingType, schema);
+ std::vector<const FakeWord *> words;
+ words.push_back(&word);
+ posting_factory->setup(words);
+ auto posting = posting_factory->make(word);
printf("%s.bitsize=%d+%d+%d+%d+%d\n",
- f->getName().c_str(),
- static_cast<int>(f->bitSize()),
- static_cast<int>(f->l1SkipBitSize()),
- static_cast<int>(f->l2SkipBitSize()),
- static_cast<int>(f->l3SkipBitSize()),
- static_cast<int>(f->l4SkipBitSize()));
- TermFieldMatchData md;
- TermFieldMatchDataArray tfmda;
- tfmda.add(&md);
+ posting->getName().c_str(),
+ static_cast<int>(posting->bitSize()),
+ static_cast<int>(posting->l1SkipBitSize()),
+ static_cast<int>(posting->l2SkipBitSize()),
+ static_cast<int>(posting->l3SkipBitSize()),
+ static_cast<int>(posting->l4SkipBitSize()));
+
+ validate_posting_for_word(*posting, word, _verbose);
- std::unique_ptr<SearchIterator> sb(f->createIterator(tfmda));
- if (f->hasWordPositions())
- fw.validate(sb.get(), tfmda, _verbose);
- else
- fw.validate(sb.get(), _verbose);
uint64_t scanTime = 0;
uint64_t scanUnpackTime = 0;
- TermFieldMatchData md2;
- TermFieldMatchDataArray tfmda2;
- tfmda2.add(&md2);
-
- std::unique_ptr<SearchIterator> sb2(f->createIterator(tfmda2));
- int hits1 = highLevelSinglePostingScan(*sb2.get(), fw.getDocIdLimit(),
- &scanTime);
- TermFieldMatchData md3;
- TermFieldMatchDataArray tfmda3;
- tfmda3.add(&md3);
-
- std::unique_ptr<SearchIterator> sb3(f->createIterator(tfmda3));
- int hits2 = highLevelSinglePostingScanUnpack(*sb3.get(), fw.getDocIdLimit(),
- &scanUnpackTime);
+ int hits1 = FakeMatchLoop::single_posting_scan(*posting, word.getDocIdLimit(), scanTime);
+ int hits2 = FakeMatchLoop::single_posting_scan_with_unpack(*posting, word.getDocIdLimit(), scanUnpackTime);
+
printf("testFake '%s' hits1=%d, hits2=%d, scanTime=%" PRIu64
", scanUnpackTime=%" PRIu64 "\n",
- f->getName().c_str(),
+ posting->getName().c_str(),
hits1, hits2, scanTime, scanUnpackTime);
}
-
void
testFakePair(const std::string &postingType,
const Schema &schema,
@@ -291,24 +170,10 @@ testFakePair(const std::string &postingType,
FakePosting::SP f1(ff->make(fw1));
FakePosting::SP f2(ff->make(fw2));
- TermFieldMatchData md1;
- TermFieldMatchDataArray tfmda1;
- tfmda1.add(&md1);
- std::unique_ptr<SearchIterator> sb1(f1->createIterator(tfmda1));
-
- TermFieldMatchData md2;
- TermFieldMatchDataArray tfmda2;
- tfmda1.add(&md2);
- std::unique_ptr<SearchIterator> sb2(f2->createIterator(tfmda2));
-
- int hits = 0;
uint64_t scanUnpackTime = 0;
- if (unpack)
- hits = highLevelAndPairPostingScanUnpack(*sb1.get(), *sb2.get(),
- fw1.getDocIdLimit(), &scanUnpackTime);
- else
- hits = highLevelAndPairPostingScan(*sb1.get(), *sb2.get(),
- fw1.getDocIdLimit(), &scanUnpackTime);
+ int hits = unpack ?
+ FakeMatchLoop::and_pair_posting_scan_with_unpack(*f1, *f2, fw1.getDocIdLimit(), scanUnpackTime) :
+ FakeMatchLoop::and_pair_posting_scan(*f1, *f2, fw1.getDocIdLimit(), scanUnpackTime);
printf("Fakepair %s AND %s => %d hits, %" PRIu64 " cycles\n",
f1->getName().c_str(),
f2->getName().c_str(),
@@ -316,7 +181,6 @@ testFakePair(const std::string &postingType,
scanUnpackTime);
}
-
int
PostingListBM::Main()
{
@@ -374,7 +238,7 @@ PostingListBM::Main()
CollectionType::SINGLE);
schema.addIndexField(indexField);
std::unique_ptr<FPFactory> ff(getFPFactory(optArg, schema));
- if (ff.get() == NULL) {
+ if (ff.get() == nullptr) {
badPostingType(optArg);
return 1;
}
@@ -397,13 +261,13 @@ PostingListBM::Main()
_numWordsPerClass = 5;
break;
default:
- Usage();
+ usage();
return 1;
}
}
if (_commonDocFreq > _numDocs) {
- Usage();
+ usage();
return 1;
}
@@ -437,24 +301,23 @@ PostingListBM::Main()
"word5", word4, w4w5od, _rnd,
_wordSet.getFieldsParams(), _wordSet.getPackedIndex());
- if (_postingTypes.empty())
+ if (_postingTypes.empty()) {
_postingTypes = getPostingTypes();
- std::vector<std::string>::const_iterator pti;
- std::vector<std::string>::const_iterator ptie = _postingTypes.end() ;
+ }
- for (pti = _postingTypes.begin(); pti != ptie; ++pti) {
- testFake(*pti, _wordSet.getSchema(), word1);
- testFake(*pti, _wordSet.getSchema(), word2);
- testFake(*pti, _wordSet.getSchema(), word3);
+ for (const auto& type : _postingTypes) {
+ testFake(type, _wordSet.getSchema(), word1);
+ testFake(type, _wordSet.getSchema(), word2);
+ testFake(type, _wordSet.getSchema(), word3);
}
- for (pti = _postingTypes.begin(); pti != ptie; ++pti) {
- testFakePair(*pti, _wordSet.getSchema(), false, word1, word3);
- testFakePair(*pti, _wordSet.getSchema(), false, word2, word3);
+ for (const auto& type : _postingTypes) {
+ testFakePair(type, _wordSet.getSchema(), false, word1, word3);
+ testFakePair(type, _wordSet.getSchema(), false, word2, word3);
}
- for (pti = _postingTypes.begin(); pti != ptie; ++pti) {
- testFakePair(*pti, _wordSet.getSchema(), false, word4, word5);
+ for (const auto& type : _postingTypes) {
+ testFakePair(type, _wordSet.getSchema(), false, word4, word5);
}
if (doandstress) {
@@ -472,16 +335,14 @@ PostingListBM::Main()
return 0;
}
-} // namespace postinglistbm
+}
int
main(int argc, char **argv)
{
postinglistbm::PostingListBM app;
- setvbuf(stdout, NULL, _IOLBF, 32768);
+ setvbuf(stdout, nullptr, _IOLBF, 32768);
app._rnd.srand48(32);
return app.Entry(argc, argv);
-
- return 0;
}
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt
index 7a86003e9f6..275fa505e9d 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt
@@ -1,6 +1,7 @@
# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
vespa_add_library(searchlib_test_fakedata OBJECT
SOURCES
+ fake_match_loop.cpp
fakeword.cpp
fakewordset.cpp
fakeposting.cpp
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp
new file mode 100644
index 00000000000..282a3e07a0e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp
@@ -0,0 +1,137 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "fake_match_loop.h"
+#include "fakeposting.h"
+#include <vespa/fastos/timestamp.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataArray;
+using search::queryeval::SearchIterator;
+
+namespace search::fakedata {
+
+namespace {
+
+class IteratorState {
+private:
+ TermFieldMatchData _md;
+ TermFieldMatchDataArray _tfmda;
+ std::unique_ptr<SearchIterator> _itr;
+
+public:
+ IteratorState(const FakePosting& posting)
+ : _md(),
+ _tfmda(),
+ _itr()
+ {
+ _tfmda.add(&_md);
+ _itr.reset(posting.createIterator(_tfmda));
+ }
+ ~IteratorState() {}
+
+ SearchIterator& itr() { return *_itr; }
+};
+
+template <bool do_unpack>
+int
+do_single_posting_scan(SearchIterator& itr, uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+{
+ uint32_t hits = 0;
+ uint64_t time_before = fastos::ClockSystem::now();
+ itr.initFullRange();
+ uint32_t doc_id = itr.getDocId();
+ while (doc_id < doc_id_limit) {
+ if (itr.seek(doc_id)) {
+ ++hits;
+ if (do_unpack) {
+ itr.unpack(doc_id);
+ }
+ ++doc_id;
+ } else if (doc_id < itr.getDocId()) {
+ doc_id = itr.getDocId();
+ } else {
+ ++doc_id;
+ }
+ }
+ uint64_t time_after = fastos::ClockSystem::now();
+ elapsed_time_ns = time_after - time_before;
+ return hits;
+}
+
+}
+
+int
+FakeMatchLoop::single_posting_scan(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+{
+ IteratorState state(posting);
+ return do_single_posting_scan<false>(state.itr(), doc_id_limit, elapsed_time_ns);
+}
+
+int
+FakeMatchLoop::single_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+{
+ IteratorState state(posting);
+ return do_single_posting_scan<true>(state.itr(), doc_id_limit, elapsed_time_ns);
+}
+
+namespace {
+
+template <bool do_unpack>
+int
+do_and_pair_posting_scan(SearchIterator& itr1, SearchIterator& itr2,
+ uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+{
+ uint32_t hits = 0;
+ uint64_t time_before = fastos::ClockSystem::now();
+ itr1.initFullRange();
+ itr2.initFullRange();
+ uint32_t doc_id = itr1.getDocId();
+ while (doc_id < doc_id_limit) {
+ if (itr1.seek(doc_id)) {
+ if (itr2.seek(doc_id)) {
+ ++hits;
+ if (do_unpack) {
+ itr1.unpack(doc_id);
+ itr2.unpack(doc_id);
+ }
+ ++doc_id;
+ } else if (doc_id < itr2.getDocId()) {
+ doc_id = itr2.getDocId();
+ } else {
+ ++doc_id;
+ }
+ } else if (doc_id < itr1.getDocId()) {
+ doc_id = itr1.getDocId();
+ } else {
+ ++doc_id;
+ }
+ }
+ uint64_t time_after = fastos::ClockSystem::now();
+ elapsed_time_ns = time_after - time_before;
+ return hits;
+}
+
+}
+
+int
+FakeMatchLoop::and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2,
+ uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+{
+ IteratorState state_1(posting_1);
+ IteratorState state_2(posting_2);
+ return do_and_pair_posting_scan<false>(state_1.itr(), state_2.itr(), doc_id_limit, elapsed_time_ns);
+}
+
+int
+FakeMatchLoop::and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2,
+ uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+{
+ IteratorState state_1(posting_1);
+ IteratorState state_2(posting_2);
+ return do_and_pair_posting_scan<true>(state_1.itr(), state_2.itr(), doc_id_limit, elapsed_time_ns);
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h
new file mode 100644
index 00000000000..7bd98f1b4ca
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h
@@ -0,0 +1,25 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cstdint>
+
+namespace search::fakedata {
+
+class FakePosting;
+
+/**
+ * Implementations of fake match loops used for testing and benchmarking.
+ */
+class FakeMatchLoop {
+public:
+ static int single_posting_scan(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns);
+ static int single_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns);
+
+ static int and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2,
+ uint32_t doc_id_limit, uint64_t& elapsed_time_ns);
+ static int and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2,
+ uint32_t doc_id_limit, uint64_t& elapsed_time_ns);
+};
+
+}