From f68f6bc837d622fa58b135311167c26f75765837 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 12 Apr 2018 23:04:20 +0200 Subject: Factor out classes for read/write access to memory based pagedict4 structures used by unit tests. --- .../tests/diskindex/pagedict4/pagedict4test.cpp | 164 +-------------------- .../vespa/searchlib/test/diskindex/CMakeLists.txt | 3 + .../test/diskindex/pagedict4_mem_rand_reader.cpp | 74 ++++++++++ .../test/diskindex/pagedict4_mem_rand_reader.h | 34 +++++ .../test/diskindex/pagedict4_mem_seq_reader.cpp | 32 ++++ .../test/diskindex/pagedict4_mem_seq_reader.h | 32 ++++ .../test/diskindex/pagedict4_mem_writer.cpp | 49 ++++++ .../test/diskindex/pagedict4_mem_writer.h | 41 ++++++ 8 files changed, 271 insertions(+), 158 deletions(-) create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.cpp create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.h create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.cpp create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.h create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.cpp create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.h diff --git a/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp index cf90356c949..f75c04dee59 100644 --- a/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp +++ b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -44,164 +47,9 @@ using search::index::schema::DataType; using namespace search::index; using StartOffset = search::bitcompression::PageDict4StartOffset; - -namespace -{ - - -class Writer : public search::diskindex::ThreeLevelCountWriteBuffers -{ -public: - PageDict4SSWriter *_ssw; - PageDict4SPWriter *_spw; - PageDict4PWriter *_pw; - - Writer(EC &sse, - EC &spe, - EC &pe) - : ThreeLevelCountWriteBuffers(sse, spe, pe), - _ssw(NULL), - _spw(NULL), - _pw(NULL) - { - } - - ~Writer() - { - delete _ssw; - delete _spw; - delete _pw; - } - - void allocWriters() - { - _ssw = new PageDict4SSWriter(_sse); - _spw = new PageDict4SPWriter(*_ssw, _spe); - _pw = new PageDict4PWriter(*_spw, _pe); - _spw->setup(); - _pw->setup(); - } - - void flush() - { - _pw->flush(); - ThreeLevelCountWriteBuffers::flush(); - } - - void addCounts(const std::string &word, - const PostingListCounts &counts) - { - _pw->addCounts(word, counts); - } -}; - - -class SeqReader : public search::diskindex::ThreeLevelCountReadBuffers -{ -public: - PageDict4SSReader _ssr; - PageDict4Reader _pr; - - SeqReader(DC &ssd, - DC &spd, - DC &pd, - search::diskindex::ThreeLevelCountWriteBuffers &wb) - : ThreeLevelCountReadBuffers(ssd, spd, pd, wb), - _ssr(_rcssd, - wb._ssHeaderLen, wb._ssFileBitSize, - wb._spHeaderLen, wb._spFileBitSize, - wb._pHeaderLen, wb._pFileBitSize), - _pr(_ssr, spd, pd) - { - _ssr.setup(ssd); - _pr.setup(); - } - - void readCounts(vespalib::string &word, - uint64_t &wordNum, - PostingListCounts &counts) - { - _pr.readCounts(word, wordNum, counts); - } -}; - -class RandReader : public search::diskindex::ThreeLevelCountReadBuffers -{ -public: - PageDict4SSReader _ssr; - const char *_spData; - const char *_pData; - size_t _pageSize; - - RandReader(DC &ssd, - DC &spd, - DC &pd, - search::diskindex::ThreeLevelCountWriteBuffers &wb) - : ThreeLevelCountReadBuffers(ssd, spd, pd, wb), - _ssr(_rcssd, - wb._ssHeaderLen, wb._ssFileBitSize, - wb._spHeaderLen, wb._spFileBitSize, - wb._pHeaderLen, wb._pFileBitSize), - _spData(static_cast(_rcspd._comprBuf)), - _pData(static_cast(_rcpd._comprBuf)), - _pageSize(search::bitcompression::PageDict4PageParams::getPageByteSize()) - { - _ssr.setup(ssd); - } - - bool - lookup(const std::string &key, - uint64_t &wordNum, - PostingListCounts &counts, - StartOffset &offsets) - { - PageDict4SSLookupRes sslr; - - sslr = _ssr.lookup(key); - if (!sslr._res) { - counts.clear(); - offsets = sslr._l6StartOffset; - wordNum = sslr._l6WordNum; - return false; - } - - if (sslr._overflow) { - wordNum = sslr._l6WordNum; - counts = sslr._counts; - offsets = sslr._startOffset; - return true; - } - PageDict4SPLookupRes splr; - splr.lookup(_ssr, - _spData + - _pageSize * sslr._sparsePageNum, - key, - sslr._l6Word, - sslr._lastWord, - sslr._l6StartOffset, - sslr._l6WordNum, - sslr._pageNum); - - PageDict4PLookupRes plr; - plr.lookup(_ssr, - _pData + _pageSize * splr._pageNum, - key, - splr._l3Word, - splr._lastWord, - splr._l3StartOffset, - splr._l3WordNum); - wordNum = plr._wordNum; - offsets = plr._startOffset; - if (plr._res) { - counts = plr._counts; - return true; - } - counts.clear(); - return false; - } -}; - -} +using Writer = search::diskindex::test::PageDict4MemWriter; +using SeqReader = search::diskindex::test::PageDict4MemSeqReader; +using RandReader = search::diskindex::test::PageDict4MemRandReader; class PageDict4TestApp : public FastOS_Application { diff --git a/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt index 5b698234b90..3c80b981ae6 100644 --- a/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt @@ -1,6 +1,9 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(searchlib_searchlib_test_diskindex OBJECT SOURCES + pagedict4_mem_seq_reader.cpp + pagedict4_mem_rand_reader.cpp + pagedict4_mem_writer.cpp threelevelcountbuffers.cpp testdiskindex.cpp DEPENDS diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.cpp b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.cpp new file mode 100644 index 00000000000..abea52bb2ed --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.cpp @@ -0,0 +1,74 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "pagedict4_mem_rand_reader.h" + +namespace search::diskindex::test { + +PageDict4MemRandReader::PageDict4MemRandReader(DC &ssd, + DC &spd, + DC &pd, + ThreeLevelCountWriteBuffers &wb) + : ThreeLevelCountReadBuffers(ssd, spd, pd, wb), + _ssr(_rcssd, + wb._ssHeaderLen, wb._ssFileBitSize, + wb._spHeaderLen, wb._spFileBitSize, + wb._pHeaderLen, wb._pFileBitSize), + _spData(static_cast(_rcspd._comprBuf)), + _pData(static_cast(_rcpd._comprBuf)), + _pageSize(search::bitcompression::PageDict4PageParams::getPageByteSize()) +{ + _ssr.setup(ssd); +} + +PageDict4MemRandReader::~PageDict4MemRandReader() = default; + +bool +PageDict4MemRandReader::lookup(const std::string &key, uint64_t &wordNum, + PostingListCounts &counts, StartOffset &offsets) +{ + PageDict4SSLookupRes sslr; + + sslr = _ssr.lookup(key); + if (!sslr._res) { + counts.clear(); + offsets = sslr._l6StartOffset; + wordNum = sslr._l6WordNum; + return false; + } + + if (sslr._overflow) { + wordNum = sslr._l6WordNum; + counts = sslr._counts; + offsets = sslr._startOffset; + return true; + } + PageDict4SPLookupRes splr; + splr.lookup(_ssr, + _spData + + _pageSize * sslr._sparsePageNum, + key, + sslr._l6Word, + sslr._lastWord, + sslr._l6StartOffset, + sslr._l6WordNum, + sslr._pageNum); + + PageDict4PLookupRes plr; + plr.lookup(_ssr, + _pData + _pageSize * splr._pageNum, + key, + splr._l3Word, + splr._lastWord, + splr._l3StartOffset, + splr._l3WordNum); + wordNum = plr._wordNum; + offsets = plr._startOffset; + if (plr._res) { + counts = plr._counts; + return true; + } + counts.clear(); + return false; +} + +} diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.h b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.h new file mode 100644 index 00000000000..05a6f386225 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.h @@ -0,0 +1,34 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "threelevelcountbuffers.h" +#include + +namespace search::diskindex::test { + +/* + * Class for performing random lookups in memory based pagedict4 structure + */ +class PageDict4MemRandReader : public ThreeLevelCountReadBuffers +{ +public: + using PageDict4SSReader = search::bitcompression::PageDict4SSReader; + using PageDict4SSLookupRes = search::bitcompression::PageDict4SSLookupRes; + using PageDict4SPLookupRes = search::bitcompression::PageDict4SPLookupRes; + using PageDict4PLookupRes = search::bitcompression::PageDict4PLookupRes; + using StartOffset = search::bitcompression::PageDict4StartOffset; + using PostingListCounts = search::index::PostingListCounts; + PageDict4SSReader _ssr; + const char *_spData; + const char *_pData; + size_t _pageSize; + + PageDict4MemRandReader(DC &ssd, DC &spd, DC &pd, + ThreeLevelCountWriteBuffers &wb); + ~PageDict4MemRandReader(); + bool lookup(const std::string &key, uint64_t &wordNum, + PostingListCounts &counts, StartOffset &offsets); +}; + +} diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.cpp b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.cpp new file mode 100644 index 00000000000..848088ee6fa --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.cpp @@ -0,0 +1,32 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "pagedict4_mem_seq_reader.h" + +namespace search::diskindex::test { + +PageDict4MemSeqReader::PageDict4MemSeqReader(DC &ssd, + DC &spd, + DC &pd, + ThreeLevelCountWriteBuffers &wb) + : ThreeLevelCountReadBuffers(ssd, spd, pd, wb), + _ssr(_rcssd, + wb._ssHeaderLen, wb._ssFileBitSize, + wb._spHeaderLen, wb._spFileBitSize, + wb._pHeaderLen, wb._pFileBitSize), + _pr(_ssr, spd, pd) +{ + _ssr.setup(ssd); + _pr.setup(); +} + +PageDict4MemSeqReader::~PageDict4MemSeqReader() = default; + +void +PageDict4MemSeqReader::readCounts(vespalib::string &word, + uint64_t &wordNum, + PostingListCounts &counts) +{ + _pr.readCounts(word, wordNum, counts); +} + +} diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.h b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.h new file mode 100644 index 00000000000..f1573c6f3f8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.h @@ -0,0 +1,32 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "threelevelcountbuffers.h" +#include + +namespace search::diskindex::test { + +/* + * Class for performing sequential reads in memory based pagedict4 structure + */ +class PageDict4MemSeqReader : public ThreeLevelCountReadBuffers +{ +public: + using PageDict4SSReader = search::bitcompression::PageDict4SSReader; + using PageDict4Reader = search::bitcompression::PageDict4Reader; + using PostingListCounts = search::index::PostingListCounts; + PageDict4SSReader _ssr; + PageDict4Reader _pr; + + PageDict4MemSeqReader(DC &ssd, + DC &spd, + DC &pd, + ThreeLevelCountWriteBuffers &wb); + ~PageDict4MemSeqReader(); + void readCounts(vespalib::string &word, + uint64_t &wordNum, + PostingListCounts &counts); +}; + +} diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.cpp b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.cpp new file mode 100644 index 00000000000..0c49b7f7935 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.cpp @@ -0,0 +1,49 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "pagedict4_mem_writer.h" +#include + +namespace search::diskindex::test { + +PageDict4MemWriter::PageDict4MemWriter(EC &sse, + EC &spe, + EC &pe) + : ThreeLevelCountWriteBuffers(sse, spe, pe), + _ssw(NULL), + _spw(NULL), + _pw(NULL) +{ +} + +PageDict4MemWriter::~PageDict4MemWriter() +{ + delete _ssw; + delete _spw; + delete _pw; +} + +void +PageDict4MemWriter::allocWriters() +{ + _ssw = new PageDict4SSWriter(_sse); + _spw = new PageDict4SPWriter(*_ssw, _spe); + _pw = new PageDict4PWriter(*_spw, _pe); + _spw->setup(); + _pw->setup(); +} + +void +PageDict4MemWriter::flush() +{ + _pw->flush(); + ThreeLevelCountWriteBuffers::flush(); +} + +void +PageDict4MemWriter::addCounts(const std::string &word, + const PostingListCounts &counts) +{ + _pw->addCounts(word, counts); +} + +} diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.h b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.h new file mode 100644 index 00000000000..96f11c0896c --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.h @@ -0,0 +1,41 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "threelevelcountbuffers.h" + +namespace search::bitcompression { + +class PageDict4SSWriter; +class PageDict4SPWriter; +class PageDict4PWriter; + +} + +namespace search::index { class PostingListCounts; } + +namespace search::diskindex::test { + +/* + * Class for writing to memory based pagedict4 structure + */ +class PageDict4MemWriter : public ThreeLevelCountWriteBuffers +{ +public: + using PageDict4SSWriter = search::bitcompression::PageDict4SSWriter; + using PageDict4SPWriter = search::bitcompression::PageDict4SPWriter; + using PageDict4PWriter = search::bitcompression::PageDict4PWriter; + using PostingListCounts = search::index::PostingListCounts; + + PageDict4SSWriter *_ssw; + PageDict4SPWriter *_spw; + PageDict4PWriter *_pw; + + PageDict4MemWriter(EC &sse, EC &spe, EC &pe); + ~PageDict4MemWriter(); + void allocWriters(); + void flush(); + void addCounts(const std::string &word, const PostingListCounts &counts); +}; + +} -- cgit v1.2.3