diff options
Diffstat (limited to 'searchlib/src/tests/diskindex/pagedict4')
-rw-r--r-- | searchlib/src/tests/diskindex/pagedict4/.gitignore | 5 | ||||
-rw-r--r-- | searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt | 9 | ||||
-rw-r--r-- | searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp | 876 |
3 files changed, 890 insertions, 0 deletions
diff --git a/searchlib/src/tests/diskindex/pagedict4/.gitignore b/searchlib/src/tests/diskindex/pagedict4/.gitignore new file mode 100644 index 00000000000..2381ed57229 --- /dev/null +++ b/searchlib/src/tests/diskindex/pagedict4/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +pagedict4_test +fakedict.* +searchlib_pagedict4_test_app diff --git a/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt b/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt new file mode 100644 index 00000000000..f8aef573c9a --- /dev/null +++ b/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_pagedict4_test_app + SOURCES + pagedict4test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_pagedict4_test_app COMMAND searchlib_pagedict4_test_app) diff --git a/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp new file mode 100644 index 00000000000..03d73e84b42 --- /dev/null +++ b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp @@ -0,0 +1,876 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("pagedict4test"); +#include <vespa/searchlib/bitcompression/compression.h> +#include <vector> +#include <vespa/searchlib/util/rand48.h> +#include <vespa/searchlib/index/schemautil.h> +#include <vespa/searchlib/bitcompression/countcompression.h> +#include <vespa/searchlib/bitcompression/pagedict4.h> +#include <vespa/searchlib/test/diskindex/threelevelcountbuffers.h> +#include <vespa/searchlib/index/postinglistcounts.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/diskindex/pagedict4file.h> +#include <vespa/searchlib/diskindex/pagedict4randread.h> +#include <vespa/searchlib/common/tunefileinfo.h> + +using search::bitcompression::PostingListCountFileEncodeContext; +using search::bitcompression::PostingListCountFileDecodeContext; +using search::index::PostingListCounts; +using search::index::PostingListOffsetAndCounts; +using search::index::PostingListParams; +using search::bitcompression::PageDict4SSWriter; +using search::bitcompression::PageDict4SPWriter; +using search::bitcompression::PageDict4PWriter; +using search::bitcompression::PageDict4Reader; +using search::bitcompression::PageDict4SSReader; +using search::bitcompression::PageDict4SSLookupRes; +using search::bitcompression::PageDict4SPLookupRes; +using search::bitcompression::PageDict4PLookupRes; +using search::index::Schema; +using search::index::DictionaryFileSeqRead; +using search::index::DictionaryFileSeqWrite; +using search::index::DictionaryFileRandRead; +using search::diskindex::PageDict4FileSeqRead; +using search::diskindex::PageDict4FileSeqWrite; +using search::diskindex::PageDict4RandRead; +using search::index::DummyFileHeaderContext; + +typedef search::bitcompression::PageDict4StartOffset StartOffset; + +namespace +{ + + +class Writer : public search::diskindex::ThreeLevelCountWriteBuffers +{ +public: + PageDict4SSWriter *_ssw; + PageDict4SPWriter *_spw; + PageDict4PWriter *_pw; + + Writer(EC &sse, + EC &spe, + EC &pe) + : ThreeLevelCountWriteBuffers(sse, spe, pe), + _ssw(NULL), + _spw(NULL), + _pw(NULL) + { + } + + ~Writer(void) + { + delete _ssw; + delete _spw; + delete _pw; + } + + void + allocWriters() + { + _ssw = new PageDict4SSWriter(_sse); + _spw = new PageDict4SPWriter(*_ssw, _spe); + _pw = new PageDict4PWriter(*_spw, _pe); + _spw->setup(); + _pw->setup(); + } + + void + flush(void) + { + _pw->flush(); + ThreeLevelCountWriteBuffers::flush(); + } + + void + addCounts(const std::string &word, + const PostingListCounts &counts) + { + _pw->addCounts(word, counts); + } +}; + + +class SeqReader : public search::diskindex::ThreeLevelCountReadBuffers +{ +public: + PageDict4SSReader _ssr; + PageDict4Reader _pr; + + SeqReader(DC &ssd, + DC &spd, + DC &pd, + search::diskindex::ThreeLevelCountWriteBuffers &wb) + : ThreeLevelCountReadBuffers(ssd, spd, pd, wb), + _ssr(_rcssd, + wb._ssHeaderLen, wb._ssFileBitSize, + wb._spHeaderLen, wb._spFileBitSize, + wb._pHeaderLen, wb._pFileBitSize), + _pr(_ssr, spd, pd) + { + _ssr.setup(ssd); + _pr.setup(); + } + + void + readCounts(vespalib::string &word, + uint64_t &wordNum, + PostingListCounts &counts) + { + _pr.readCounts(word, wordNum, counts); + } +}; + +class RandReader : public search::diskindex::ThreeLevelCountReadBuffers +{ +public: + PageDict4SSReader _ssr; + const char *_spData; + const char *_pData; + size_t _pageSize; + + RandReader(DC &ssd, + DC &spd, + DC &pd, + search::diskindex::ThreeLevelCountWriteBuffers &wb) + : ThreeLevelCountReadBuffers(ssd, spd, pd, wb), + _ssr(_rcssd, + wb._ssHeaderLen, wb._ssFileBitSize, + wb._spHeaderLen, wb._spFileBitSize, + wb._pHeaderLen, wb._pFileBitSize), + _spData(static_cast<const char *>(_rcspd._comprBuf)), + _pData(static_cast<const char *>(_rcpd._comprBuf)), + _pageSize(search::bitcompression::PageDict4PageParams::getPageByteSize()) + { + _ssr.setup(ssd); + } + + bool + lookup(const std::string &key, + uint64_t &wordNum, + PostingListCounts &counts, + StartOffset &offsets) + { + PageDict4SSLookupRes sslr; + + sslr = _ssr.lookup(key); + if (!sslr._res) { + counts.clear(); + offsets = sslr._l6StartOffset; + wordNum = sslr._l6WordNum; + return false; + } + + if (sslr._overflow) { + wordNum = sslr._l6WordNum; + counts = sslr._counts; + offsets = sslr._startOffset; + return true; + } + PageDict4SPLookupRes splr; + splr.lookup(_ssr, + _spData + + _pageSize * sslr._sparsePageNum, + key, + sslr._l6Word, + sslr._lastWord, + sslr._l6StartOffset, + sslr._l6WordNum, + sslr._pageNum); + + PageDict4PLookupRes plr; + plr.lookup(_ssr, + _pData + _pageSize * splr._pageNum, + key, + splr._l3Word, + splr._lastWord, + splr._l3StartOffset, + splr._l3WordNum); + wordNum = plr._wordNum; + offsets = plr._startOffset; + if (plr._res) { + counts = plr._counts; + return true; + } + counts.clear(); + return false; + } +}; + +} + +class PageDict4TestApp : public FastOS_Application +{ +public: + search::Rand48 _rnd; + bool _stress; + bool _emptyWord; + bool _firstWordForcedCommon; + bool _lastWordForcedCommon; + + void + usage(void); + + int + Main(void); + + void + testWords(void); + + PageDict4TestApp(void) + : _rnd(), + _stress(false), + _emptyWord(false), + _firstWordForcedCommon(false), + _lastWordForcedCommon(false) + { + } +}; + + +void +PageDict4TestApp::usage(void) +{ + printf("Usage: wordnumbers\n"); + fflush(stdout); +} + + +int +PageDict4TestApp::Main(void) +{ + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + _rnd.srand48(32); + for (int32_t i = 1; i < _argc; ++i) { + if (strcmp(_argv[i], "stress") == 0) + _stress = true; + if (strcmp(_argv[i], "emptyword") == 0) + _emptyWord = true; + if (strcmp(_argv[i], "firstwordforcedcommon") == 0) + _firstWordForcedCommon = true; + if (strcmp(_argv[i], "lastwordforcedcommon") == 0) + _lastWordForcedCommon = true; + } + testWords(); + + LOG(info, + "_stress is %s", + _stress ? "true" : "false"); + LOG(info, + "_emptyWord is %s", + _emptyWord ? "true" : "false"); + LOG(info, + "_firstWordForcedCommon is %s", + _firstWordForcedCommon ? "true" : "false"); + LOG(info, + "_lastWordForcedCommon is %s", + _lastWordForcedCommon ? "true" : "false"); + + LOG(info, "SUCCESS"); + return 0; +} + + +class WordIndexCounts +{ +public: + uint32_t _numDocs; + uint64_t _fileOffset; + uint64_t _bitLength; + uint64_t _accNumDocs; + + WordIndexCounts(uint64_t bitLength, + uint32_t numDocs) + : _numDocs(numDocs), + _fileOffset(0), + _bitLength(bitLength), + _accNumDocs(0) + { + } + + WordIndexCounts() + : _numDocs(0), + _fileOffset(0), + _bitLength(0), + _accNumDocs(0) + { + } +}; + +class WordCounts +{ +public: + std::string _word; + WordIndexCounts _counts; + + bool + operator!=(const WordCounts &rhs) const + { + return _word != rhs._word; + } + + WordCounts(const std::string &word) + : _word(word), + _counts() + { + } + + bool + operator<(const WordCounts &rhs) const + { + return _word < rhs._word; + } +}; + + +void +deDup(std::vector<WordCounts> &v) +{ + std::vector<WordCounts> v2; + std::sort(v.begin(), v.end()); + for (std::vector<WordCounts>::const_iterator + i = v.begin(), + ie = v.end(); + i != ie; + ++i) { + if (v2.empty() || v2.back() != *i) + v2.push_back(*i); + } + std::swap(v, v2); +} + + +void +deDup(std::vector<uint32_t> &v) +{ + std::vector<uint32_t> v2; + std::sort(v.begin(), v.end()); + for (std::vector<uint32_t>::const_iterator + i = v.begin(), + ie = v.end(); + i != ie; + ++i) { + if (v2.empty() || v2.back() != *i) + v2.push_back(*i); + } + std::swap(v, v2); +} + + +static WordIndexCounts +makeIndex(search::Rand48 &rnd, bool forceCommon) +{ + uint64_t bitLength = 10; + uint32_t numDocs = 1; + if ((rnd.lrand48() % 150) == 0 || forceCommon) { + bitLength = 1000000000; + numDocs = 500000; + } + return WordIndexCounts(bitLength, numDocs); +} + + +void +makeIndexes(search::Rand48 &rnd, + WordIndexCounts &counts, + bool forceCommon) +{ + counts = makeIndex(rnd, forceCommon); +} + + +static void +makeWords(std::vector<WordCounts> &v, + search::Rand48 &rnd, + uint32_t numWordIds, + uint32_t tupleCount, + bool emptyWord, + bool firstWordForcedCommon, + bool lastWordForcedCommon) +{ + v.clear(); + for (unsigned int i = 0; i < tupleCount; ++i) { + uint64_t word = rnd.lrand48() % numWordIds; + uint64_t wordCount = (rnd.lrand48() % 10) + 1; + for (unsigned int j = 0; j < wordCount; ++j) { + uint64_t nextWord = rnd.lrand48() % numWordIds; + uint64_t nextWordCount = 0; + bool incomplete = true; + nextWordCount = rnd.lrand48() % 10; + incomplete = (rnd.lrand48() % 3) == 0 || nextWordCount == 0; + for (unsigned int k = 0; k < nextWordCount; ++k) { + uint64_t nextNextWord = rnd.lrand48() % numWordIds; + std::ostringstream w; + w << word; + w << "-"; + w << nextWord; + w << "-"; + w << nextNextWord; + v.push_back(WordCounts(w.str())); + } + if (incomplete) { + std::ostringstream w; + w << word; + w << "-"; + w << nextWord; + w << "-"; + w << "9999999999999999"; + v.push_back(WordCounts(w.str())); + } + } + } + deDup(v); + if (!v.empty() && emptyWord) + v.front()._word = ""; + for (std::vector<WordCounts>::iterator + i = v.begin(), ib = v.begin(), ie = v.end(); + i != ie; ++i) { + std::vector<WordIndexCounts> indexes; + makeIndexes(rnd, i->_counts, + (i == ib && firstWordForcedCommon) || + (i + 1 == ie && lastWordForcedCommon)); + } + uint64_t fileOffset = 0; + uint64_t accNumDocs = 0; + for (std::vector<WordCounts>::iterator + i = v.begin(), + ie = v.end(); + i != ie; + ++i) { + WordIndexCounts *f = &i->_counts; + assert(f->_numDocs > 0); + assert(f->_bitLength > 0); + f->_fileOffset = fileOffset; + f->_accNumDocs = accNumDocs; + fileOffset += f->_bitLength; + accNumDocs += f->_numDocs; + } +} + + +void +makeCounts(PostingListCounts &counts, + const WordCounts &i, + uint32_t chunkSize) +{ + PostingListCounts c; + const WordIndexCounts *j = &i._counts; + c._bitLength = j->_bitLength; + c._numDocs = j->_numDocs; + c._segments.clear(); + assert(j->_numDocs > 0); + uint32_t numChunks = (j->_numDocs + chunkSize - 1) / chunkSize; + if (numChunks > 1) { + uint32_t chunkBits = j->_bitLength / numChunks; + for (uint32_t chunkNo = 0; chunkNo < numChunks; ++chunkNo) { + PostingListCounts::Segment seg; + seg._bitLength = chunkBits; + seg._numDocs = chunkSize; + seg._lastDoc = (chunkNo + 1) * chunkSize - 1; + if (chunkNo + 1 == numChunks) { + seg._bitLength = c._bitLength - + (numChunks - 1) * chunkBits; + seg._lastDoc = c._numDocs - 1; + seg._numDocs = c._numDocs - (numChunks - 1) * chunkSize; + } + c._segments.push_back(seg); + } + } + counts = c; +} + + +void +checkCounts(const std::string &word, + const PostingListCounts &counts, + const StartOffset &fileOffset, + const WordCounts &i, + uint32_t chunkSize) +{ + PostingListCounts answer; + + makeCounts(answer, i, chunkSize); + assert(word == i._word); + (void) word; + (void) fileOffset; + const WordIndexCounts *j = &i._counts; + assert(counts._bitLength == j->_bitLength); + assert(counts._numDocs == j->_numDocs); + assert(fileOffset._fileOffset == j->_fileOffset); + assert(fileOffset._accNumDocs == j->_accNumDocs); + assert(counts._segments == answer._segments); + assert(counts == answer); + (void) counts; +} + + +void +testWords(const std::string &logname, + search::Rand48 &rnd, + uint64_t numWordIds, + uint32_t tupleCount, + uint32_t chunkSize, + uint32_t ssPad, + uint32_t spPad, + uint32_t pPad, + bool emptyWord, + bool firstWordForcedCommon, + bool lastWordForcedCommon) +{ + typedef search::bitcompression::PostingListCountFileEncodeContext EC; + typedef search::bitcompression::PostingListCountFileDecodeContext DC; + + LOG(info, "%s: word test start", logname.c_str()); + std::vector<WordCounts> myrand; + makeWords(myrand, rnd, numWordIds, tupleCount, + emptyWord, firstWordForcedCommon, lastWordForcedCommon); + + PostingListCounts xcounts; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i) { + makeCounts(xcounts, *i, chunkSize); + } + LOG(info, "%s: word counts generated", logname.c_str()); + + EC pe; + EC spe; + EC sse; + + sse._minChunkDocs = chunkSize; + sse._numWordIds = numWordIds; + spe.copyParams(sse); + pe.copyParams(sse); + Writer w(sse, spe, pe); + w.startPad(ssPad, spPad, pPad); + w.allocWriters(); + + PostingListCounts counts; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i) { + makeCounts(counts, *i, chunkSize); + w.addCounts(i->_word, counts); + } + w.flush(); + + LOG(info, + "%s: Used %" PRIu64 "+%" PRIu64 "+%" PRIu64 + " bits for %d words", + logname.c_str(), + w._pFileBitSize, + w._spFileBitSize, + w._ssFileBitSize, + (int) myrand.size()); + + StartOffset checkOffset; + + { + DC ssd; + ssd._minChunkDocs = chunkSize; + ssd._numWordIds = numWordIds; + DC spd; + spd.copyParams(ssd); + DC pd; + pd.copyParams(ssd); + + SeqReader r(ssd, spd, pd, w); + + uint64_t wordNum = 1; + uint64_t checkWordNum = 0; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + vespalib::string word; + counts.clear(); + r.readCounts(word, checkWordNum, counts); + checkCounts(word, counts, checkOffset, *i, chunkSize); + assert(checkWordNum == wordNum); + checkOffset._fileOffset += counts._bitLength; + checkOffset._accNumDocs += counts._numDocs; + } + assert(pd.getReadOffset() == w._pFileBitSize); + LOG(info, "%s: words seqRead test OK", logname.c_str()); + } + + { + DC ssd; + ssd._minChunkDocs = chunkSize; + ssd._numWordIds = numWordIds; + DC spd; + spd.copyParams(ssd); + DC pd; + pd.copyParams(ssd); + + RandReader rr(ssd, spd, pd, w); + + uint64_t wordNum = 1; + uint64_t checkWordNum = 0; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + checkWordNum = 0; + bool res = rr.lookup(i->_word, + checkWordNum, + counts, + checkOffset); + assert(res); + (void) res; + checkCounts(i->_word, counts, checkOffset, + *i, chunkSize); + assert(checkWordNum == wordNum); + } + LOG(info, "%s: word randRead test OK", logname.c_str()); + } + + Schema schema; + std::vector<uint32_t> indexes; + { + std::ostringstream fn; + fn << "f0"; + schema.addIndexField(Schema:: + IndexField(fn.str(), + Schema::STRING, + Schema::SINGLE)); + indexes.push_back(0); + } + { + std::unique_ptr<DictionaryFileSeqWrite> + dw(new PageDict4FileSeqWrite); + std::vector<uint32_t> wIndexes; + std::vector<PostingListCounts> wCounts; + search::TuneFileSeqWrite tuneFileWrite; + DummyFileHeaderContext fileHeaderContext; + PostingListParams params; + params.set("numWordIds", numWordIds); + params.set("minChunkDocs", chunkSize); + dw->setParams(params); + bool openres = dw->open("fakedict", + tuneFileWrite, + fileHeaderContext); + assert(openres); + + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i) { + makeCounts(counts, *i, chunkSize); + dw->writeWord(i->_word, counts); + } + bool closeres = dw->close(); + assert(closeres); + (void) closeres; + + LOG(info, "%s: pagedict4 written", logname.c_str()); + } + { + std::unique_ptr<DictionaryFileSeqRead> dr(new PageDict4FileSeqRead); + search::TuneFileSeqRead tuneFileRead; + + bool openres = dr->open("fakedict", + tuneFileRead); + assert(openres); + (void) openres; + std::string lastWord; + vespalib::string checkWord; + PostingListCounts wCounts; + PostingListCounts rCounts; + uint64_t wordNum = 1; + uint64_t checkWordNum = 5; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + makeCounts(counts, *i, chunkSize); + wCounts = counts; + checkWord.clear(); + checkWordNum = 0; + dr->readWord(checkWord, checkWordNum, rCounts); + assert(rCounts == wCounts); + assert(wordNum == checkWordNum); + assert(checkWord == i->_word); + } + + checkWord = "bad"; + checkWordNum = 5; + dr->readWord(checkWord, checkWordNum, rCounts); + assert(checkWord.empty()); + assert(checkWordNum == DictionaryFileSeqRead::noWordNumHigh()); + bool closeres = dr->close(); + assert(closeres); + (void) closeres; + + LOG(info, "%s: pagedict4 seqverify OK", logname.c_str()); + } + { + std::unique_ptr<DictionaryFileRandRead> drr(new PageDict4RandRead); + search::TuneFileRandRead tuneFileRead; + bool openres = drr->open("fakedict", + tuneFileRead); + assert(openres); + (void) openres; + std::string lastWord; + vespalib::string checkWord; + PostingListCounts wCounts; + PostingListCounts rCounts; + uint64_t wOffset; + uint64_t rOffset; + PostingListOffsetAndCounts rOffsetAndCounts; + uint64_t wordNum = 1; + uint64_t checkWordNum = 5; + std::string missWord; + wOffset = 0; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + makeCounts(counts, *i, chunkSize); + wCounts = counts; + + checkWordNum = 0; + rCounts.clear(); + rOffset = 0; + bool lres = drr->lookup(i->_word, checkWordNum, + rOffsetAndCounts); + assert(lres); + (void) lres; + assert((rOffsetAndCounts._counts._bitLength == 0) == + (rOffsetAndCounts._counts._numDocs == 0)); + rOffset = rOffsetAndCounts._offset; + rCounts = rOffsetAndCounts._counts; + assert(rCounts == wCounts); + assert(wordNum == checkWordNum); + assert(rOffset == wOffset); + + wOffset += wCounts._bitLength; + lastWord = i->_word; + + missWord = i->_word; + missWord.append(1, '\1'); + checkWordNum = 0; + lres = drr->lookup(missWord, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == wordNum + 1); + } + + checkWordNum = 0; + std::string notfoundword = "Thiswordhasbetternotbeindictionary"; + bool lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + checkWordNum = 0; + notfoundword = lastWord + "somethingmore"; + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + (void) lres; + LOG(info, "Lookup beyond dict EOF gave wordnum %d", (int) checkWordNum); + + if (firstWordForcedCommon) { + if (!emptyWord) { + checkWordNum = 0; + notfoundword = ""; + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == 1); + } + if (!myrand.empty()) { + checkWordNum = 0; + notfoundword = myrand.front()._word; + notfoundword.append(1, '\1'); + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == 2); + } + } + if (lastWordForcedCommon && !myrand.empty()) { + if (myrand.size() > 1) { + checkWordNum = 0; + notfoundword = myrand[myrand.size() - 2]._word; + notfoundword.append(1, '\1'); + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == myrand.size()); + } + checkWordNum = 0; + notfoundword = myrand[myrand.size() - 1]._word; + notfoundword.append(1, '\1'); + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == myrand.size() + 1); + } + bool closeres = drr->close(); + assert(closeres); + (void) closeres; + LOG(info, "%s: pagedict4 randverify OK", logname.c_str()); + } +} + + +void +PageDict4TestApp::testWords(void) +{ + ::testWords("smallchunkwordsempty", _rnd, + 1000000, 0, + 64, 80, 72, 64, + false, false, false); + ::testWords("smallchunkwordsempty2", _rnd, + 0, 0, + 64, 80, 72, 64, + false, false, false); + ::testWords("smallchunkwords", _rnd, + 1000000, 100, + 64, 80, 72, 64, + false, false, false); + ::testWords("smallchunkwordswithemptyword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + true, false, false); + ::testWords("smallchunkwordswithcommonfirstword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + false, true, false); + ::testWords("smallchunkwordswithcommonemptyfirstword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + true, true, false); + ::testWords("smallchunkwordswithcommonlastword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + false, false, true); +#if 1 + ::testWords("smallchunkwords2", _rnd, + 1000000, _stress ? 10000 : 100, + 64, 80, 72, 64, + _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon); +#endif +#if 1 + ::testWords("stdwords", _rnd, + 1000000, _stress ? 10000 : 100, + 262144, 80, 72, 64, + _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon); +#endif +} + +FASTOS_MAIN(PageDict4TestApp); |