summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchlib/src/tests/postinglistbm/CMakeLists.txt2
-rw-r--r--searchlib/src/tests/postinglistbm/andstress.h34
-rw-r--r--searchlib/src/tests/postinglistbm/posting_list_test.cpp37
-rw-r--r--searchlib/src/tests/postinglistbm/postinglistbm.cpp75
-rw-r--r--searchlib/src/tests/postinglistbm/stress_runner.cpp (renamed from searchlib/src/tests/postinglistbm/andstress.cpp)311
-rw-r--r--searchlib/src/tests/postinglistbm/stress_runner.h35
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp75
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h13
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h16
10 files changed, 332 insertions, 297 deletions
diff --git a/searchlib/src/tests/postinglistbm/CMakeLists.txt b/searchlib/src/tests/postinglistbm/CMakeLists.txt
index 479f54e8531..6e90f44726a 100644
--- a/searchlib/src/tests/postinglistbm/CMakeLists.txt
+++ b/searchlib/src/tests/postinglistbm/CMakeLists.txt
@@ -12,7 +12,7 @@ vespa_add_test(NAME searchlib_posting_list_test_app NO_VALGRIND COMMAND searchli
vespa_add_executable(searchlib_postinglistbm_app TEST
SOURCES
postinglistbm.cpp
- andstress.cpp
+ stress_runner.cpp
DEPENDS
searchlib_test
searchlib
diff --git a/searchlib/src/tests/postinglistbm/andstress.h b/searchlib/src/tests/postinglistbm/andstress.h
deleted file mode 100644
index fdc99fb42ba..00000000000
--- a/searchlib/src/tests/postinglistbm/andstress.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#pragma once
-
-#include <string>
-#include <vector>
-
-namespace search {
-class Rand48;
-
-namespace fakedata { class FakeWordSet; }
-
-}
-
-namespace postinglistbm {
-
-class AndStress {
-public:
- AndStress();
-
- ~AndStress();
-
- void run(search::Rand48 &rnd,
- search::fakedata::FakeWordSet &wordSet,
- unsigned int numDocs,
- unsigned int commonDocFreq,
- const std::vector<std::string> &postingTypes,
- unsigned int loops,
- unsigned int skipCommonPairsRate,
- uint32_t numTasks,
- uint32_t stride,
- bool unpack);
-};
-
-}
diff --git a/searchlib/src/tests/postinglistbm/posting_list_test.cpp b/searchlib/src/tests/postinglistbm/posting_list_test.cpp
index ad3410b8f92..dad21ada469 100644
--- a/searchlib/src/tests/postinglistbm/posting_list_test.cpp
+++ b/searchlib/src/tests/postinglistbm/posting_list_test.cpp
@@ -53,32 +53,6 @@ test_fake(const std::string& posting_type,
static_cast<int>(posting->l4SkipBitSize()));
validate_posting_list_for_word(*posting, word);
-
- uint64_t scan_time = 0;
- uint64_t scan_unpack_time = 0;
- int hits1 = FakeMatchLoop::single_posting_scan(*posting, word.getDocIdLimit(), scan_time);
- int hits2 = FakeMatchLoop::single_posting_scan_with_unpack(*posting, word.getDocIdLimit(), scan_unpack_time);
-
- printf("test_fake: '%s': hits1=%d, hits2=%d, scan_time=%" PRIu64 "(ns), scan_unpack_time=%" PRIu64 "(ns)\n",
- posting->getName().c_str(), hits1, hits2, scan_time, scan_unpack_time);
-}
-
-void
-test_fake_pair(const std::string& posting_type, const Schema& schema,
- const FakeWord& word1, const FakeWord& word2)
-{
- std::unique_ptr<FPFactory> factory(getFPFactory(posting_type, schema));
- std::vector<const FakeWord *> words;
- words.push_back(&word1);
- words.push_back(&word2);
- factory->setup(words);
- auto posting1 = factory->make(word1);
- auto posting2 = factory->make(word2);
-
- uint64_t scan_time = 0;
- int hits = FakeMatchLoop::and_pair_posting_scan(*posting1, *posting2, word1.getDocIdLimit(), scan_time);
- printf("test_fake_pair: '%s' AND '%s' => %d hits, scan_time=%" PRIu64 " (ns)\n",
- posting1->getName().c_str(), posting2->getName().c_str(), hits, scan_time);
}
struct PostingListTest : public ::testing::Test {
@@ -138,15 +112,8 @@ struct PostingListTest : public ::testing::Test {
test_fake(type, word_set.getSchema(), *word1);
test_fake(type, word_set.getSchema(), *word2);
test_fake(type, word_set.getSchema(), *word3);
- }
-
- for (const auto& type : posting_types) {
- test_fake_pair(type, word_set.getSchema(), *word1, *word3);
- test_fake_pair(type, word_set.getSchema(), *word2, *word3);
- }
-
- for (const auto& type : posting_types) {
- test_fake_pair(type, word_set.getSchema(), *word4, *word5);
+ test_fake(type, word_set.getSchema(), *word4);
+ test_fake(type, word_set.getSchema(), *word5);
}
}
diff --git a/searchlib/src/tests/postinglistbm/postinglistbm.cpp b/searchlib/src/tests/postinglistbm/postinglistbm.cpp
index 0a6f99ede11..16b8e9cd7f5 100644
--- a/searchlib/src/tests/postinglistbm/postinglistbm.cpp
+++ b/searchlib/src/tests/postinglistbm/postinglistbm.cpp
@@ -1,6 +1,6 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "andstress.h"
+#include "stress_runner.h"
#include <vespa/fastos/app.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/common/resultset.h>
@@ -30,8 +30,11 @@ class PostingListBM : public FastOS_Application {
private:
uint32_t _numDocs;
uint32_t _commonDocFreq;
+ uint32_t _mediumDocFreq;
+ uint32_t _rareDocFreq;
uint32_t _numWordsPerClass;
std::vector<std::string> _postingTypes;
+ StressRunner::OperatorType _operatorType;
uint32_t _loops;
uint32_t _skipCommonPairsRate;
FakeWordSet _wordSet;
@@ -54,19 +57,21 @@ usage()
"[-C <skipCommonPairsRate>] "
"[-T {string, array, weightedSet}] "
"[-c <commonDoqFreq>] "
+ "[-m <mediumDoqFreq>] "
+ "[-r <rareDoqFreq>] "
"[-d <numDocs>] "
"[-l <numLoops>] "
"[-s <stride>] "
"[-t <postingType>] "
+ "[-o {direct, and, or}] "
"[-u] "
- "[-w <numWordsPerClass>] "
- "[-q]\n");
+ "[-w <numWordsPerClass>]\n");
}
void
badPostingType(const std::string &postingType)
{
- printf("Bad posting list type: %s\n", postingType.c_str());
+ printf("Bad posting list type: '%s'\n", postingType.c_str());
printf("Supported types: ");
bool first = true;
@@ -84,8 +89,11 @@ badPostingType(const std::string &postingType)
PostingListBM::PostingListBM()
: _numDocs(10000000),
_commonDocFreq(50000),
+ _mediumDocFreq(1000),
+ _rareDocFreq(10),
_numWordsPerClass(100),
_postingTypes(),
+ _operatorType(StressRunner::OperatorType::And),
_loops(1),
_skipCommonPairsRate(1),
_wordSet(),
@@ -107,9 +115,8 @@ PostingListBM::Main()
argi = 1;
bool hasElements = false;
bool hasElementWeights = false;
- bool quick = false;
- while ((c = GetOpt("C:c:d:l:s:t:uw:T:q", optArg, argi)) != -1) {
+ while ((c = GetOpt("C:c:m:r:d:l:s:t:o:uw:T:q", optArg, argi)) != -1) {
switch(c) {
case 'C':
_skipCommonPairsRate = atoi(optArg);
@@ -125,13 +132,20 @@ PostingListBM::Main()
hasElements = true;
hasElementWeights = true;
} else {
- printf("Bad collection type: %s\n", optArg);
+ printf("Bad collection type: '%s'\n", optArg);
+ printf("Supported types: single, array, weightedSet\n");
return 1;
}
break;
case 'c':
_commonDocFreq = atoi(optArg);
break;
+ case 'm':
+ _mediumDocFreq = atoi(optArg);
+ break;
+ case 'r':
+ _rareDocFreq = atoi(optArg);
+ break;
case 'd':
_numDocs = atoi(optArg);
break;
@@ -156,18 +170,28 @@ PostingListBM::Main()
} while (0);
_postingTypes.push_back(optArg);
break;
+ case 'o':
+ {
+ vespalib::string operatorType(optArg);
+ if (operatorType == "direct") {
+ _operatorType = StressRunner::OperatorType::Direct;
+ } else if (operatorType == "and") {
+ _operatorType = StressRunner::OperatorType::And;
+ } else if (operatorType == "or") {
+ _operatorType = StressRunner::OperatorType::Or;
+ } else {
+ printf("Bad operator type: '%s'\n", operatorType.c_str());
+ printf("Supported types: direct, and, or\n");
+ return 1;
+ }
+ break;
+ }
case 'u':
_unpack = true;
break;
case 'w':
_numWordsPerClass = atoi(optArg);
break;
- case 'q':
- quick = true;
- _numDocs = 36000;
- _commonDocFreq = 10000;
- _numWordsPerClass = 5;
- break;
default:
usage();
return 1;
@@ -182,23 +206,22 @@ PostingListBM::Main()
_wordSet.setupParams(hasElements, hasElementWeights);
uint32_t numTasks = 40000;
- if (quick) {
- numTasks = 40;
- }
-
+
if (_postingTypes.empty()) {
_postingTypes = getPostingTypes();
}
- _wordSet.setupWords(_rnd, _numDocs, _commonDocFreq, _numWordsPerClass);
-
- AndStress andstress;
- andstress.run(_rnd, _wordSet,
- _numDocs, _commonDocFreq, _postingTypes, _loops,
- _skipCommonPairsRate,
- numTasks,
- _stride,
- _unpack);
+ _wordSet.setupWords(_rnd, _numDocs, _commonDocFreq, _mediumDocFreq, _rareDocFreq, _numWordsPerClass);
+
+ StressRunner::run(_rnd,
+ _wordSet,
+ _postingTypes,
+ _operatorType,
+ _loops,
+ _skipCommonPairsRate,
+ numTasks,
+ _stride,
+ _unpack);
return 0;
}
diff --git a/searchlib/src/tests/postinglistbm/andstress.cpp b/searchlib/src/tests/postinglistbm/stress_runner.cpp
index adca7892464..98ace5e00a1 100644
--- a/searchlib/src/tests/postinglistbm/andstress.cpp
+++ b/searchlib/src/tests/postinglistbm/stress_runner.cpp
@@ -1,6 +1,6 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "andstress.h"
+#include "stress_runner.h"
#include <vespa/fastos/thread.h>
#include <vespa/fastos/time.h>
@@ -19,7 +19,7 @@
#include <vector>
#include <vespa/log/log.h>
-LOG_SETUP(".andstress");
+LOG_SETUP(".stress_runner");
using search::fef::TermFieldMatchData;
using search::fef::TermFieldMatchDataArray;
@@ -28,27 +28,27 @@ using namespace search::fakedata;
namespace postinglistbm {
-class AndStressWorker;
+class StressWorker;
+using StressWorkerUP = std::unique_ptr<StressWorker>;
-class AndStressMaster {
+class StressMaster {
private:
- AndStressMaster(const AndStressMaster &);
+ StressMaster(const StressMaster &);
- AndStressMaster &
- operator=(const AndStressMaster &);
+ StressMaster &operator=(const StressMaster &);
search::Rand48 &_rnd;
- unsigned int _numDocs;
- unsigned int _commonDocFreq;
+ uint32_t _numDocs;
std::vector<std::string> _postingTypes;
- unsigned int _loops;
- unsigned int _skipCommonPairsRate;
+ StressRunner::OperatorType _operatorType;
+ uint32_t _loops;
+ uint32_t _skipCommonPairsRate;
uint32_t _stride;
bool _unpack;
FastOS_ThreadPool *_threadPool;
- std::vector<AndStressWorker *> _workers;
- unsigned int _workersDone;
+ std::vector<StressWorkerUP> _workers;
+ uint32_t _workersDone;
FakeWordSet &_wordSet;
@@ -56,7 +56,7 @@ private:
std::mutex _taskLock;
std::condition_variable _taskCond;
- unsigned int _taskIdx;
+ uint32_t _taskIdx;
uint32_t _numTasks;
public:
@@ -66,18 +66,17 @@ private:
std::vector<Task> _tasks;
public:
- AndStressMaster(search::Rand48 &rnd,
- FakeWordSet &wordSet,
- unsigned int numDocs,
- unsigned int commonDocFreq,
- const std::vector<std::string> &postingType,
- unsigned int loops,
- unsigned int skipCommonPairsRate,
- uint32_t numTasks,
- uint32_t stride,
- bool unpack);
-
- ~AndStressMaster();
+ StressMaster(search::Rand48 &rnd,
+ FakeWordSet &wordSet,
+ const std::vector<std::string> &postingType,
+ StressRunner::OperatorType operatorType,
+ uint32_t loops,
+ uint32_t skipCommonPairsRate,
+ uint32_t numTasks,
+ uint32_t stride,
+ bool unpack);
+
+ ~StressMaster();
void run();
void makePostingsHelper(FPFactory *postingFactory,
const std::string &postingFormat,
@@ -85,50 +84,68 @@ public:
void dropPostings();
void dropTasks();
void resetTasks(); // Prepare for rerun
- void setupTasks(unsigned int numTasks);
+ void setupTasks(uint32_t numTasks);
Task *getTask();
- unsigned int getNumDocs() const { return _numDocs; }
+ uint32_t getNumDocs() const { return _numDocs; }
bool getUnpack() const { return _unpack; }
double runWorkers(const std::string &postingFormat);
};
+class StressWorker : public FastOS_Runnable {
+protected:
+ StressMaster& _master;
+ uint32_t _id;
-class AndStressWorker : public FastOS_Runnable {
+ virtual void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) = 0;
+
+public:
+ StressWorker(const StressWorker&) = delete;
+ StressWorker& operator=(const StressWorker&) = delete;
+
+ StressWorker(StressMaster& master, uint32_t id);
+ virtual ~StressWorker();
+
+ virtual void Run(FastOS_ThreadInterface* thisThread, void* arg) override;
+};
+
+class DirectStressWorker : public StressWorker {
private:
- AndStressWorker(const AndStressWorker &);
+ void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) override;
+
+public:
+ DirectStressWorker(StressMaster& master, uint32_t id);
+};
- AndStressWorker &
- operator=(const AndStressWorker &);
+class AndStressWorker : public StressWorker {
+private:
+ void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) override;
- AndStressMaster &_master;
- unsigned int _id;
public:
- AndStressWorker(AndStressMaster &master, unsigned int id);
- ~AndStressWorker();
- virtual void Run(FastOS_ThreadInterface *thisThread, void *arg) override;
+ AndStressWorker(StressMaster& master, uint32_t id);
};
-template <class P>
-FakePosting *
-makePosting(FakeWord &fw)
-{
- return new P(fw);
-}
+class OrStressWorker : public StressWorker {
+private:
+ void run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack) override;
-AndStressMaster::AndStressMaster(search::Rand48 &rnd,
- FakeWordSet &wordSet,
- unsigned int numDocs,
- unsigned int commonDocFreq,
- const std::vector<std::string> &postingTypes,
- unsigned int loops,
- unsigned int skipCommonPairsRate,
- uint32_t numTasks,
- uint32_t stride,
- bool unpack)
+public:
+ OrStressWorker(StressMaster& master, uint32_t id);
+};
+
+
+StressMaster::StressMaster(search::Rand48 &rnd,
+ FakeWordSet &wordSet,
+ const std::vector<std::string> &postingTypes,
+ StressRunner::OperatorType operatorType,
+ uint32_t loops,
+ uint32_t skipCommonPairsRate,
+ uint32_t numTasks,
+ uint32_t stride,
+ bool unpack)
: _rnd(rnd),
- _numDocs(numDocs),
- _commonDocFreq(commonDocFreq),
+ _numDocs(wordSet.numDocs()),
_postingTypes(postingTypes),
+ _operatorType(operatorType),
_loops(loops),
_skipCommonPairsRate(skipCommonPairsRate),
_stride(stride),
@@ -144,34 +161,24 @@ AndStressMaster::AndStressMaster(search::Rand48 &rnd,
_numTasks(numTasks),
_tasks()
{
- LOG(info, "AndStressMaster::AndStressMaster");
+ LOG(info, "StressMaster::StressMaster()");
_threadPool = new FastOS_ThreadPool(128 * 1024, 400);
}
-template <class C>
-static void
-clearPtrVector(std::vector<C> &vector)
+StressMaster::~StressMaster()
{
- for (auto& elem : vector) {
- delete elem;
- }
- vector.clear();
-}
-
-AndStressMaster::~AndStressMaster()
-{
- LOG(info, "AndStressMaster::~AndStressMaster");
+ LOG(info, "StressMaster::~StressMaster()");
_threadPool->Close();
delete _threadPool;
_threadPool = nullptr;
- clearPtrVector(_workers);
+ _workers.clear();
dropPostings();
}
void
-AndStressMaster::dropPostings()
+StressMaster::dropPostings()
{
for (auto& posting : _postings) {
posting.clear();
@@ -180,14 +187,14 @@ AndStressMaster::dropPostings()
}
void
-AndStressMaster::dropTasks()
+StressMaster::dropTasks()
{
_tasks.clear();
_taskIdx = 0;
}
void
-AndStressMaster::resetTasks()
+StressMaster::resetTasks()
{
_taskIdx = 0;
}
@@ -223,9 +230,9 @@ makeSomePostings(FPFactory *postingFactory,
}
void
-AndStressMaster::makePostingsHelper(FPFactory *postingFactory,
- const std::string &postingFormat,
- bool validate, bool verbose)
+StressMaster::makePostingsHelper(FPFactory *postingFactory,
+ const std::string &postingFormat,
+ bool validate, bool verbose)
{
FastOS_Time tv;
double before;
@@ -243,20 +250,20 @@ AndStressMaster::makePostingsHelper(FPFactory *postingFactory,
tv.SetNow();
after = tv.Secs();
LOG(info,
- "AndStressMaster::makePostingsHelper elapsed %10.6f s for %s format",
+ "StressMaster::makePostingsHelper() elapsed %10.6f s for %s format",
after - before,
postingFormat.c_str());
}
void
-AndStressMaster::setupTasks(unsigned int numTasks)
+StressMaster::setupTasks(uint32_t numTasks)
{
- unsigned int wordclass1;
- unsigned int wordclass2;
- unsigned int word1idx;
- unsigned int word2idx;
+ uint32_t wordclass1;
+ uint32_t wordclass2;
+ uint32_t word1idx;
+ uint32_t word2idx;
- for (unsigned int i = 0; i < numTasks; ++i) {
+ for (uint32_t i = 0; i < numTasks; ++i) {
wordclass1 = _rnd.lrand48() % _postings.size();
wordclass2 = _rnd.lrand48() % _postings.size();
while (wordclass1 == FakeWordSet::COMMON_WORD &&
@@ -273,8 +280,8 @@ AndStressMaster::setupTasks(unsigned int numTasks)
}
}
-AndStressMaster::Task *
-AndStressMaster::getTask()
+StressMaster::Task *
+StressMaster::getTask()
{
Task *result = nullptr;
std::lock_guard<std::mutex> taskGuard(_taskLock);
@@ -291,20 +298,20 @@ AndStressMaster::getTask()
}
void
-AndStressMaster::run()
+StressMaster::run()
{
- LOG(info, "AndStressMaster::run");
+ LOG(info, "StressMaster::run()");
for (const auto& type : _postingTypes) {
std::unique_ptr<FPFactory> factory(getFPFactory(type, _wordSet.getSchema()));
makePostingsHelper(factory.get(), type, true, false);
setupTasks(_numTasks);
double totalTime = 0;
- for (unsigned int loop = 0; loop < _loops; ++loop) {
+ for (uint32_t loop = 0; loop < _loops; ++loop) {
totalTime += runWorkers(type);
resetTasks();
}
- LOG(info, "AndStressMaster::average run elapsed %10.6f s for workers %s format",
+ LOG(info, "StressMaster::average run elapsed %10.6f s for workers %s format",
totalTime / _loops, type.c_str());
dropPostings();
}
@@ -312,7 +319,7 @@ AndStressMaster::run()
}
double
-AndStressMaster::runWorkers(const std::string &postingFormat)
+StressMaster::runWorkers(const std::string &postingFormat)
{
FastOS_Time tv;
double before;
@@ -320,13 +327,19 @@ AndStressMaster::runWorkers(const std::string &postingFormat)
tv.SetNow();
before = tv.Secs();
- unsigned int numWorkers = 8;
- for (unsigned int i = 0; i < numWorkers; ++i) {
- _workers.push_back(new AndStressWorker(*this, i));
+ uint32_t numWorkers = 8;
+ for (uint32_t i = 0; i < numWorkers; ++i) {
+ if (_operatorType == StressRunner::OperatorType::Direct) {
+ _workers.push_back(std::make_unique<DirectStressWorker>(*this, i));
+ } else if (_operatorType == StressRunner::OperatorType::And) {
+ _workers.push_back(std::make_unique<AndStressWorker>(*this, i));
+ } else if (_operatorType == StressRunner::OperatorType::Or) {
+ _workers.push_back(std::make_unique<OrStressWorker>(*this, i));
+ }
}
for (auto& worker : _workers) {
- _threadPool->NewThread(worker);
+ _threadPool->NewThread(worker.get());
}
{
@@ -338,83 +351,107 @@ AndStressMaster::runWorkers(const std::string &postingFormat)
tv.SetNow();
after = tv.Secs();
LOG(info,
- "AndStressMaster::run elapsed %10.6f s for workers %s format",
+ "StressMaster::run() elapsed %10.6f s for workers %s format",
after - before,
postingFormat.c_str());
- clearPtrVector(_workers);
+ _workers.clear();
_workersDone = 0;
return after - before;
}
-AndStressWorker::AndStressWorker(AndStressMaster &master, unsigned int id)
+StressWorker::StressWorker(StressMaster& master, uint32_t id)
: _master(master),
_id(id)
{
- LOG(debug, "AndStressWorker::AndStressWorker, id=%u", id);
}
-AndStressWorker::~AndStressWorker()
-{
- LOG(debug, "AndStressWorker::~AndStressWorker, id=%u", _id);
-}
+StressWorker::~StressWorker() = default;
void
-testFakePair(const FakePosting &f1, const FakePosting &f2, uint32_t doc_id_limit, bool unpack)
-{
- uint64_t scan_unpack_time = 0;
- int hits = unpack ?
- FakeMatchLoop::and_pair_posting_scan_with_unpack(f1, f2, doc_id_limit, scan_unpack_time) :
- FakeMatchLoop::and_pair_posting_scan(f1, f2, doc_id_limit, scan_unpack_time);
- (void) hits;
-}
-
-void
-AndStressWorker::Run(FastOS_ThreadInterface *thisThread, void *arg)
+StressWorker::Run(FastOS_ThreadInterface* thisThread, void* arg)
{
(void) thisThread;
(void) arg;
- LOG(debug, "AndStressWorker::Run, id=%u", _id);
+ LOG(debug, "StressWorker::Run(), id=%u", _id);
bool unpack = _master.getUnpack();
for (;;) {
- AndStressMaster::Task *task = _master.getTask();
+ StressMaster::Task *task = _master.getTask();
if (task == nullptr) {
break;
}
- testFakePair(*task->first, *task->second, _master.getNumDocs(),
- unpack);
+ run_task(*task->first, *task->second, _master.getNumDocs(), unpack);
}
}
-AndStress::AndStress()
+DirectStressWorker::DirectStressWorker(StressMaster& master, uint32_t id)
+ : StressWorker(master, id)
{
- LOG(debug, "Andstress::AndStress");
}
-AndStress::~AndStress()
+void
+DirectStressWorker::run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack)
{
- LOG(debug, "Andstress::~AndStress");
+ if (unpack) {
+ FakeMatchLoop::direct_posting_scan_with_unpack(f1, doc_id_limit);
+ FakeMatchLoop::direct_posting_scan_with_unpack(f2, doc_id_limit);
+ } else {
+ FakeMatchLoop::direct_posting_scan(f1, doc_id_limit);
+ FakeMatchLoop::direct_posting_scan(f2, doc_id_limit);
+ }
+}
+
+AndStressWorker::AndStressWorker(StressMaster& master, uint32_t id)
+ : StressWorker(master, id)
+{
+}
+
+void
+AndStressWorker::run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack)
+{
+ if (unpack) {
+ FakeMatchLoop::and_pair_posting_scan_with_unpack(f1, f2, doc_id_limit);
+ } else {
+ FakeMatchLoop::and_pair_posting_scan(f1, f2, doc_id_limit);
+ }
+}
+
+OrStressWorker::OrStressWorker(StressMaster& master, uint32_t id)
+ : StressWorker(master, id)
+{
+}
+
+void
+OrStressWorker::run_task(const FakePosting& f1, const FakePosting& f2, uint32_t doc_id_limit, bool unpack)
+{
+ if (unpack) {
+ FakeMatchLoop::or_pair_posting_scan_with_unpack(f1, f2, doc_id_limit);
+ } else {
+ FakeMatchLoop::or_pair_posting_scan(f1, f2, doc_id_limit);
+ }
}
void
-AndStress::run(search::Rand48 &rnd,
- FakeWordSet &wordSet,
- unsigned int numDocs,
- unsigned int commonDocFreq,
- const std::vector<std::string> &postingTypes,
- unsigned int loops,
- unsigned int skipCommonPairsRate,
- uint32_t numTasks,
- uint32_t stride,
- bool unpack)
+StressRunner::run(search::Rand48 &rnd,
+ FakeWordSet &wordSet,
+ const std::vector<std::string> &postingTypes,
+ OperatorType operatorType,
+ uint32_t loops,
+ uint32_t skipCommonPairsRate,
+ uint32_t numTasks,
+ uint32_t stride,
+ bool unpack)
{
- LOG(debug, "Andstress::run");
- AndStressMaster master(rnd, wordSet,
- numDocs, commonDocFreq, postingTypes, loops,
- skipCommonPairsRate,
- numTasks,
- stride,
- unpack);
+ LOG(debug, "StressRunner::run()");
+ StressMaster master(rnd,
+ wordSet,
+ postingTypes,
+ operatorType,
+ loops,
+ skipCommonPairsRate,
+ numTasks,
+ stride,
+ unpack);
master.run();
}
diff --git a/searchlib/src/tests/postinglistbm/stress_runner.h b/searchlib/src/tests/postinglistbm/stress_runner.h
new file mode 100644
index 00000000000..d4974bc969e
--- /dev/null
+++ b/searchlib/src/tests/postinglistbm/stress_runner.h
@@ -0,0 +1,35 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <string>
+#include <vector>
+
+namespace search {
+class Rand48;
+
+namespace fakedata { class FakeWordSet; }
+
+}
+
+namespace postinglistbm {
+
+class StressRunner {
+public:
+ enum class OperatorType {
+ Direct,
+ And,
+ Or
+ };
+
+ static void run(search::Rand48 &rnd,
+ search::fakedata::FakeWordSet &wordSet,
+ const std::vector<std::string> &postingTypes,
+ OperatorType operatorType,
+ uint32_t loops,
+ uint32_t skipCommonPairsRate,
+ uint32_t numTasks,
+ uint32_t stride,
+ bool unpack);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp
index 282a3e07a0e..ef4edf86c59 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.cpp
@@ -4,10 +4,14 @@
#include "fakeposting.h"
#include <vespa/fastos/timestamp.h>
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/queryeval/andsearch.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
#include <vespa/searchlib/queryeval/searchiterator.h>
using search::fef::TermFieldMatchData;
using search::fef::TermFieldMatchDataArray;
+using search::queryeval::AndSearch;
+using search::queryeval::OrSearch;
using search::queryeval::SearchIterator;
namespace search::fakedata {
@@ -32,14 +36,14 @@ public:
~IteratorState() {}
SearchIterator& itr() { return *_itr; }
+ SearchIterator* release() { return _itr.release(); }
};
template <bool do_unpack>
int
-do_single_posting_scan(SearchIterator& itr, uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+do_match_loop(SearchIterator& itr, uint32_t doc_id_limit)
{
uint32_t hits = 0;
- uint64_t time_before = fastos::ClockSystem::now();
itr.initFullRange();
uint32_t doc_id = itr.getDocId();
while (doc_id < doc_id_limit) {
@@ -55,82 +59,59 @@ do_single_posting_scan(SearchIterator& itr, uint32_t doc_id_limit, uint64_t& ela
++doc_id;
}
}
- uint64_t time_after = fastos::ClockSystem::now();
- elapsed_time_ns = time_after - time_before;
return hits;
}
}
int
-FakeMatchLoop::single_posting_scan(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+FakeMatchLoop::direct_posting_scan(const FakePosting& posting, uint32_t doc_id_limit)
{
IteratorState state(posting);
- return do_single_posting_scan<false>(state.itr(), doc_id_limit, elapsed_time_ns);
+ return do_match_loop<false>(state.itr(), doc_id_limit);
}
int
-FakeMatchLoop::single_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+FakeMatchLoop::direct_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit)
{
IteratorState state(posting);
- return do_single_posting_scan<true>(state.itr(), doc_id_limit, elapsed_time_ns);
+ return do_match_loop<true>(state.itr(), doc_id_limit);
}
-namespace {
-
-template <bool do_unpack>
int
-do_and_pair_posting_scan(SearchIterator& itr1, SearchIterator& itr2,
- uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+FakeMatchLoop::and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit)
{
- uint32_t hits = 0;
- uint64_t time_before = fastos::ClockSystem::now();
- itr1.initFullRange();
- itr2.initFullRange();
- uint32_t doc_id = itr1.getDocId();
- while (doc_id < doc_id_limit) {
- if (itr1.seek(doc_id)) {
- if (itr2.seek(doc_id)) {
- ++hits;
- if (do_unpack) {
- itr1.unpack(doc_id);
- itr2.unpack(doc_id);
- }
- ++doc_id;
- } else if (doc_id < itr2.getDocId()) {
- doc_id = itr2.getDocId();
- } else {
- ++doc_id;
- }
- } else if (doc_id < itr1.getDocId()) {
- doc_id = itr1.getDocId();
- } else {
- ++doc_id;
- }
- }
- uint64_t time_after = fastos::ClockSystem::now();
- elapsed_time_ns = time_after - time_before;
- return hits;
+ IteratorState state_1(posting_1);
+ IteratorState state_2(posting_2);
+ std::unique_ptr<SearchIterator> iterator(AndSearch::create({state_1.release(), state_2.release()}, true));
+ return do_match_loop<false>(*iterator, doc_id_limit);
}
+int
+FakeMatchLoop::and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit)
+{
+ IteratorState state_1(posting_1);
+ IteratorState state_2(posting_2);
+ std::unique_ptr<SearchIterator> iterator(AndSearch::create({state_1.release(), state_2.release()}, true));
+ return do_match_loop<true>(*iterator, doc_id_limit);
}
int
-FakeMatchLoop::and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2,
- uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+FakeMatchLoop::or_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit)
{
IteratorState state_1(posting_1);
IteratorState state_2(posting_2);
- return do_and_pair_posting_scan<false>(state_1.itr(), state_2.itr(), doc_id_limit, elapsed_time_ns);
+ std::unique_ptr<SearchIterator> iterator(OrSearch::create({state_1.release(), state_2.release()}, true));
+ return do_match_loop<false>(*iterator, doc_id_limit);
}
int
-FakeMatchLoop::and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2,
- uint32_t doc_id_limit, uint64_t& elapsed_time_ns)
+FakeMatchLoop::or_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit)
{
IteratorState state_1(posting_1);
IteratorState state_2(posting_2);
- return do_and_pair_posting_scan<true>(state_1.itr(), state_2.itr(), doc_id_limit, elapsed_time_ns);
+ std::unique_ptr<SearchIterator> iterator(OrSearch::create({state_1.release(), state_2.release()}, true));
+ return do_match_loop<true>(*iterator, doc_id_limit);
}
}
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h
index 7bd98f1b4ca..f24e4019846 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fake_match_loop.h
@@ -13,13 +13,14 @@ class FakePosting;
*/
class FakeMatchLoop {
public:
- static int single_posting_scan(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns);
- static int single_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit, uint64_t& elapsed_time_ns);
+ static int direct_posting_scan(const FakePosting& posting, uint32_t doc_id_limit);
+ static int direct_posting_scan_with_unpack(const FakePosting& posting, uint32_t doc_id_limit);
- static int and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2,
- uint32_t doc_id_limit, uint64_t& elapsed_time_ns);
- static int and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2,
- uint32_t doc_id_limit, uint64_t& elapsed_time_ns);
+ static int and_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit);
+ static int and_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit);
+
+ static int or_pair_posting_scan(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit);
+ static int or_pair_posting_scan_with_unpack(const FakePosting& posting_1, const FakePosting& posting_2, uint32_t doc_id_limit);
};
}
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp
index 5c87bf88e9c..09c8e86d979 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp
@@ -31,7 +31,8 @@ applyDocIdBiasToVector(FakeWordVector& words, uint32_t docIdBias)
FakeWordSet::FakeWordSet()
: _words(NUM_WORDCLASSES),
_schema(),
- _fieldsParams()
+ _fieldsParams(),
+ _numDocs(0)
{
setupParams(false, false);
}
@@ -40,7 +41,8 @@ FakeWordSet::FakeWordSet(bool hasElements,
bool hasElementWeights)
: _words(NUM_WORDCLASSES),
_schema(),
- _fieldsParams()
+ _fieldsParams(),
+ _numDocs(0)
{
setupParams(hasElements, hasElementWeights);
}
@@ -75,9 +77,20 @@ FakeWordSet::setupParams(bool hasElements,
void
FakeWordSet::setupWords(search::Rand48 &rnd,
- unsigned int numDocs,
- unsigned int commonDocFreq,
- unsigned int numWordsPerWordClass)
+ uint32_t numDocs,
+ uint32_t commonDocFreq,
+ uint32_t numWordsPerWordClass)
+{
+ setupWords(rnd, numDocs, commonDocFreq, 1000, 10, numWordsPerWordClass);
+}
+
+void
+FakeWordSet::setupWords(search::Rand48 &rnd,
+ uint32_t numDocs,
+ uint32_t commonDocFreq,
+ uint32_t mediumDocFreq,
+ uint32_t rareDocFreq,
+ uint32_t numWordsPerWordClass)
{
std::string common = "common";
std::string medium = "medium";
@@ -86,11 +99,13 @@ FakeWordSet::setupWords(search::Rand48 &rnd,
double before;
double after;
+ _numDocs = numDocs;
+
LOG(info, "enter setupWords");
tv.SetNow();
before = tv.Secs();
uint32_t packedIndex = _fieldsParams.size() - 1;
- for (unsigned int i = 0; i < numWordsPerWordClass; ++i) {
+ for (uint32_t i = 0; i < numWordsPerWordClass; ++i) {
std::ostringstream vi;
vi << (i + 1);
@@ -99,12 +114,12 @@ FakeWordSet::setupWords(search::Rand48 &rnd,
_fieldsParams[packedIndex],
packedIndex));
- _words[MEDIUM_WORD].push_back(std::make_unique<FakeWord>(numDocs, 1000, 500,
+ _words[MEDIUM_WORD].push_back(std::make_unique<FakeWord>(numDocs, mediumDocFreq, mediumDocFreq / 2,
medium + vi.str(), rnd,
_fieldsParams[packedIndex],
packedIndex));
- _words[RARE_WORD].push_back(std::make_unique<FakeWord>(numDocs, 10, 5,
+ _words[RARE_WORD].push_back(std::make_unique<FakeWord>(numDocs, rareDocFreq, rareDocFreq / 2,
rare + vi.str(), rnd,
_fieldsParams[packedIndex],
packedIndex));
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h
index 0b7ee4db6fe..d404c664a34 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h
@@ -32,6 +32,7 @@ private:
std::vector<FakeWordVector> _words;
Schema _schema;
std::vector<PosOccFieldsParams> _fieldsParams;
+ uint32_t _numDocs;
public:
FakeWordSet();
@@ -45,9 +46,16 @@ public:
bool hasElementWeights);
void setupWords(search::Rand48 &rnd,
- unsigned int numDocs,
- unsigned int commonDocFreq,
- unsigned int numWordsPerWordClass);
+ uint32_t numDocs,
+ uint32_t commonDocFreq,
+ uint32_t numWordsPerWordClass);
+
+ void setupWords(search::Rand48 &rnd,
+ uint32_t numDocs,
+ uint32_t commonDocFreq,
+ uint32_t mediumDocFreq,
+ uint32_t rareDocFreq,
+ uint32_t numWordsPerWordClass);
const std::vector<FakeWordVector>& words() const { return _words; }
@@ -69,6 +77,8 @@ public:
return _schema;
}
+ uint32_t numDocs() const { return _numDocs; }
+
void addDocIdBias(uint32_t docIdBias);
};