diff options
author | Tor Egge <Tor.Egge@yahooinc.com> | 2022-01-31 14:32:35 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-31 14:32:35 +0100 |
commit | d1d19bbb3ae7f1d39a56624c45347b437c1fcb19 (patch) | |
tree | 22af4da0d8389cbc8b217a764356492c6779aeba | |
parent | 9c39af2f740e4c0e8e6d0de5d6ee712dd321e022 (diff) | |
parent | 67837c8bbd9583a0b0cdd388436328a4b8649362 (diff) |
Merge pull request #20966 from vespa-engine/balder/reduce-code-duplication-for-write
Refactor to reduce code duplication for write path.
33 files changed, 273 insertions, 445 deletions
diff --git a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp index c5ea12a7568..570f1a6ea03 100644 --- a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp @@ -1,13 +1,16 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/log/log.h> -LOG_SETUP("bitvector_test"); + #include <vespa/vespalib/testkit/testapp.h> #include <vespa/searchlib/index/field_length_info.h> #include <vespa/searchlib/diskindex/bitvectordictionary.h> #include <vespa/searchlib/diskindex/fieldwriter.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchcommon/common/schema.h> #include <vespa/vespalib/io/fileutil.h> +#include <vespa/log/log.h> +LOG_SETUP("bitvector_test"); + using namespace search::index; using search::index::schema::DataType; diff --git a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp index 9a326af4c3b..34f9f7d27a9 100644 --- a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp +++ b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchlib/memoryindex/feature_store.h> +#include <vespa/searchcommon/common/schema.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/log/log.h> diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp index 0d5835b4fa9..5ac506e4fc2 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp @@ -3,6 +3,7 @@ #include "compression.h" #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/util/arrayref.h> diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h index 973d622461a..45005d499fb 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h @@ -3,7 +3,6 @@ #pragma once #include <vespa/searchlib/util/comprfile.h> -#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/vespalib/stllike/string.h> #include <cassert> @@ -14,7 +13,10 @@ template <typename T> class ConstArrayRef; } -namespace search::index { class DocIdAndFeatures; } +namespace search::index { + class DocIdAndFeatures; + class PostingListParams; +} namespace search::fef { class TermFieldMatchDataArray; } diff --git a/searchlib/src/vespa/searchlib/bitcompression/countcompression.h b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h index 664a1245c2f..6eb37e1d1ad 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/countcompression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h @@ -19,15 +19,13 @@ public: uint32_t _minChunkDocs; // Minimum number of documents for chunking uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit) uint64_t _numWordIds; // Number of words in dictionary - uint64_t _minWordNum; // Minimum word number PostingListCountFileDecodeContext() : ParentClass(), _avgBitsPerDoc(10), _minChunkDocs(262144), _docIdLimit(10000000), - _numWordIds(0), - _minWordNum(0u) + _numWordIds(0) { } @@ -50,15 +48,13 @@ public: uint32_t _minChunkDocs; // Minimum number of documents for chunking uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit) uint64_t _numWordIds; // Number of words in dictionary - uint64_t _minWordNum; // Mininum word number PostingListCountFileEncodeContext() : ParentClass(), _avgBitsPerDoc(10), _minChunkDocs(262144), _docIdLimit(10000000), - _numWordIds(0), - _minWordNum(0u) + _numWordIds(0) { } diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp index 76a65a7244a..fd6c723e901 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp +++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp @@ -1,10 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "compression.h" #include "posocccompression.h" #include "posocc_fields_params.h" #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/data/fileheader.h> @@ -36,8 +36,7 @@ EG2PosOccDecodeContext<bigEndian>:: readHeader(const vespalib::GenericHeader &header, const vespalib::string &prefix) { - const_cast<PosOccFieldsParams *>(_fieldsParams)->readHeader(header, - prefix); + const_cast<PosOccFieldsParams *>(_fieldsParams)->readHeader(header, prefix); } diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h index 184e2414638..aadd58f9152 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h @@ -1,8 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include "compression.h" #include <vespa/searchlib/index/docidandfeatures.h> -#include <vespa/searchcommon/common/schema.h> #define K_VALUE_POSOCC_FIRST_WORDPOS 8 @@ -16,35 +16,6 @@ #define K_VALUE_POSOCC_ELEMENTID 0 #define K_VALUE_POSOCC_ELEMENTWEIGHT 9 -namespace search::index { - -class DocIdAndPosOccFeatures : public DocIdAndFeatures -{ -public: - - void - addNextOcc(uint32_t elementId, - uint32_t wordPos, - int32_t elementWeight, - uint32_t elementLen) - { - assert(wordPos < elementLen); - if (_elements.empty() || elementId > _elements.back().getElementId()) { - _elements.emplace_back(elementId, elementWeight, elementLen); - } else { - assert(elementId == _elements.back().getElementId()); - assert(elementWeight == _elements.back().getWeight()); - assert(elementLen == _elements.back().getElementLen()); - } - assert(_elements.back().getNumOccs() == 0 || - wordPos > _word_positions.back().getWordPos()); - _elements.back().incNumOccs(); - _word_positions.emplace_back(wordPos); - } -}; - -} - namespace search::bitcompression { class PosOccFieldsParams; diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.h b/searchlib/src/vespa/searchlib/diskindex/diskindex.h index 05492a59ee3..12be8979cc3 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.h +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.h @@ -7,6 +7,7 @@ #include <vespa/searchlib/index/dictionaryfile.h> #include <vespa/searchlib/index/field_length_info.h> #include <vespa/searchlib/queryeval/searchable.h> +#include <vespa/searchcommon/common/schema.h> #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/stllike/cache.h> diff --git a/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp index a4e9e4d06f7..dcf897df955 100644 --- a/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp @@ -5,8 +5,8 @@ #include "fileheader.h" #include <vespa/searchlib/index/postinglistcounts.h> #include <vespa/searchlib/index/docidandfeatures.h> -#include <vespa/searchlib/index/postinglistcounts.h> #include <vespa/searchlib/index/postinglistcountfile.h> +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/log/log.h> LOG_SETUP(".diskindex.extposocc"); diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h index f1b5c487e40..bf62965719d 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h +++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h @@ -4,9 +4,10 @@ #include "bitvectorfile.h" #include <vespa/searchlib/index/dictionaryfile.h> #include <vespa/searchlib/index/postinglistfile.h> -#include <vespa/searchlib/bitcompression/compression.h> -#include <vespa/searchlib/bitcompression/countcompression.h> #include <vespa/searchlib/bitcompression/posocccompression.h> +#include <vespa/searchlib/bitcompression/countcompression.h> + +namespace search::index { class Schema; } namespace search::diskindex { diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp index 32d0105b7c1..4462c90f4c5 100644 --- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp @@ -6,6 +6,7 @@ #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/util/size_literals.h> +#include <vespa/fastos/file.h> #include <vespa/log/log.h> LOG_SETUP(".diskindex.pagedict4file"); @@ -123,7 +124,6 @@ PageDict4FileSeqRead::PageDict4FileSeqRead() _wordNum(0u) { } - PageDict4FileSeqRead::~PageDict4FileSeqRead() = default; void @@ -192,7 +192,6 @@ PageDict4FileSeqRead::open(const vespalib::string &name, return true; } - bool PageDict4FileSeqRead::close() { @@ -204,7 +203,6 @@ PageDict4FileSeqRead::close() return true; } - void PageDict4FileSeqRead::getParams(PostingListParams ¶ms) { @@ -219,32 +217,83 @@ PageDict4FileSeqRead::getParams(PostingListParams ¶ms) } } +struct PageDict4FileSeqWrite::DictFileContext { + DictFileContext(bool extended, vespalib::stringref id, vespalib::stringref desc, + const vespalib::string &name, const TuneFileSeqWrite &tune); + ~DictFileContext(); + void makeHeader(const FileHeaderContext &fileHeaderContext); + bool updateHeader(uint64_t fileBitSize, uint64_t wordNum); + void writeExtendedHeader(vespalib::GenericHeader &header); + bool close(); + const vespalib::string _id; + const vespalib::string _desc; + const bool _extended; + uint32_t _headerLen; + bool _valid; + EC _ec; + ComprFileWriteContext _writeContext; + FastOS_File _file; +}; -PageDict4FileSeqWrite::PageDict4FileSeqWrite() - : _pWriter(), - _spWriter(), - _ssWriter(), - _pe(), - _pWriteContext(_pe), - _pfile(), - _spe(), - _spWriteContext(_spe), - _spfile(), - _sse(), - _ssWriteContext(_sse), - _ssfile(), - _pHeaderLen(0), - _spHeaderLen(0), - _ssHeaderLen(0) +PageDict4FileSeqWrite::DictFileContext::DictFileContext(bool extended, vespalib::stringref id, vespalib::stringref desc, + const vespalib::string & name, const TuneFileSeqWrite &tune) + : _id(id), + _desc(desc), + _extended(extended), + _headerLen(0u), + _valid(false), + _ec(), + _writeContext(_ec), + _file() { - _pe.setWriteContext(&_pWriteContext); - _spe.setWriteContext(&_spWriteContext); - _sse.setWriteContext(&_ssWriteContext); + _ec.setWriteContext(&_writeContext); + if (tune.getWantSyncWrites()) { + _file.EnableSyncWrites(); + } + if (tune.getWantDirectIO()) { + _file.EnableDirectIO(); + } + bool ok = _file.OpenWriteOnly(name.c_str()); + assertOpenWriteOnly(ok, name); + _writeContext.setFile(&_file); + _writeContext.allocComprBuf(64_Ki, 32_Ki); + uint64_t fileSize = _file.GetSize(); + uint64_t bufferStartFilePos = _writeContext.getBufferStartFilePos(); + assert(fileSize >= bufferStartFilePos); + _file.SetSize(bufferStartFilePos); + assert(bufferStartFilePos == static_cast<uint64_t>(_file.GetPosition())); + + _ec.setupWrite(_writeContext); + assert(_ec.getWriteOffset() == 0); + _valid = true; } +bool +PageDict4FileSeqWrite::DictFileContext::DictFileContext::close() { + //uint64_t usedPBits = _ec.getWriteOffset(); + _ec.flush(); + _writeContext.writeComprBuffer(true); + + _writeContext.dropComprBuf(); + bool success = _file.Sync(); + success &= _file.Close(); + _writeContext.setFile(nullptr); + return success; +} -PageDict4FileSeqWrite::~PageDict4FileSeqWrite() = default; +PageDict4FileSeqWrite::DictFileContext::~DictFileContext() = default; +PageDict4FileSeqWrite::PageDict4FileSeqWrite() + : _params(), + _pWriter(), + _spWriter(), + _ssWriter(), + _ss(), + _sp(), + _p() +{ } + +PageDict4FileSeqWrite::~PageDict4FileSeqWrite() = default; void PageDict4FileSeqWrite::writeWord(vespalib::stringref word, const PostingListCounts &counts) @@ -252,122 +301,48 @@ PageDict4FileSeqWrite::writeWord(vespalib::stringref word, const PostingListCoun _pWriter->addCounts(word, counts); } - bool PageDict4FileSeqWrite::open(const vespalib::string &name, - const TuneFileSeqWrite &tuneFileWrite, + const TuneFileSeqWrite &tune, const FileHeaderContext &fileHeaderContext) { assert( ! _pWriter); assert( ! _spWriter); assert( ! _ssWriter); - - vespalib::string pname = name + ".pdat"; - vespalib::string spname = name + ".spdat"; - vespalib::string ssname = name + ".ssdat"; - - if (tuneFileWrite.getWantSyncWrites()) { - _pfile.EnableSyncWrites(); - _spfile.EnableSyncWrites(); - _ssfile.EnableSyncWrites(); - } - if (tuneFileWrite.getWantDirectIO()) { - _pfile.EnableDirectIO(); - _spfile.EnableDirectIO(); - _ssfile.EnableDirectIO(); - } - bool ok = _pfile.OpenWriteOnly(pname.c_str()); - assertOpenWriteOnly(ok, pname); - _pWriteContext.setFile(&_pfile); - - ok = _spfile.OpenWriteOnly(spname.c_str()); - assertOpenWriteOnly(ok, spname); - _spWriteContext.setFile(&_spfile); - - ok = _ssfile.OpenWriteOnly(ssname.c_str()); - assertOpenWriteOnly(ok, ssname); - _ssWriteContext.setFile(&_ssfile); - - _pWriteContext.allocComprBuf(64_Ki, 32_Ki); - _spWriteContext.allocComprBuf(64_Ki, 32_Ki); - _ssWriteContext.allocComprBuf(64_Ki, 32_Ki); - - uint64_t pFileSize = _pfile.GetSize(); - uint64_t spFileSize = _spfile.GetSize(); - uint64_t ssFileSize = _ssfile.GetSize(); - uint64_t pBufferStartFilePos = _pWriteContext.getBufferStartFilePos(); - uint64_t spBufferStartFilePos = _spWriteContext.getBufferStartFilePos(); - uint64_t ssBufferStartFilePos = _ssWriteContext.getBufferStartFilePos(); - assert(pFileSize >= pBufferStartFilePos); - assert(spFileSize >= spBufferStartFilePos); - assert(ssFileSize >= ssBufferStartFilePos); - (void) pFileSize; - (void) spFileSize; - (void) ssFileSize; - _pfile.SetSize(pBufferStartFilePos); - _spfile.SetSize(spBufferStartFilePos); - _ssfile.SetSize(ssBufferStartFilePos); - assert(pBufferStartFilePos == static_cast<uint64_t>(_pfile.GetPosition())); - assert(spBufferStartFilePos == - static_cast<uint64_t>(_spfile.GetPosition())); - assert(ssBufferStartFilePos == - static_cast<uint64_t>(_ssfile.GetPosition())); - - _pe.setupWrite(_pWriteContext); - _spe.setupWrite(_spWriteContext); - _sse.setupWrite(_ssWriteContext); - assert(_pe.getWriteOffset() == 0); - assert(_spe.getWriteOffset() == 0); - assert(_sse.getWriteOffset() == 0); - _spe.copyParams(_sse); - _pe.copyParams(_sse); + _ss = std::make_unique<DictFileContext>(true, mySSId, "Dictionary sparse sparse file", name + ".ssdat", tune); + _sp = std::make_unique<DictFileContext>(false, mySPId, "Dictionary sparse page file", name + ".spdat", tune); + _p = std::make_unique<DictFileContext>(false, myPId, "Dictionary page file", name + ".pdat", tune); + activateParams(_params); // Write initial file headers - makePHeader(fileHeaderContext); - makeSPHeader(fileHeaderContext); - makeSSHeader(fileHeaderContext); + _p->makeHeader(fileHeaderContext); + _sp->makeHeader(fileHeaderContext); + _ss->makeHeader(fileHeaderContext); - _ssWriter = std::make_unique<SSWriter>(_sse); - _spWriter = std::make_unique<SPWriter>(*_ssWriter, _spe); - _pWriter = std::make_unique<PWriter>(*_spWriter, _pe); + _ssWriter = std::make_unique<SSWriter>(_ss->_ec); + _spWriter = std::make_unique<SPWriter>(*_ssWriter, _sp->_ec); + _pWriter = std::make_unique<PWriter>(*_spWriter, _p->_ec); _spWriter->setup(); _pWriter->setup(); - return true; } - bool PageDict4FileSeqWrite::close() { bool success = true; _pWriter->flush(); - uint64_t usedPBits = _pe.getWriteOffset(); - uint64_t usedSPBits = _spe.getWriteOffset(); - uint64_t usedSSBits = _sse.getWriteOffset(); - _pe.flush(); - _pWriteContext.writeComprBuffer(true); - _spe.flush(); - _spWriteContext.writeComprBuffer(true); - _sse.flush(); - _ssWriteContext.writeComprBuffer(true); - - _pWriteContext.dropComprBuf(); - success &= _pfile.Sync(); - success &= _pfile.Close(); - _pWriteContext.setFile(nullptr); - _spWriteContext.dropComprBuf(); - success &= _spfile.Sync(); - success &= _spfile.Close(); - _spWriteContext.setFile(nullptr); - _ssWriteContext.dropComprBuf(); - success &= _ssfile.Sync(); - success &= _ssfile.Close(); - _ssWriteContext.setFile(nullptr); + uint64_t usedPBits = _p->_ec.getWriteOffset(); + uint64_t usedSPBits = _sp->_ec.getWriteOffset(); + uint64_t usedSSBits = _ss->_ec.getWriteOffset(); + success &= _p->close(); + success &= _sp->close(); + success &= _ss->close(); + uint64_t wordNum = _pWriter->getWordNum(); // Update file headers - success &= updatePHeader(usedPBits); - success &= updateSPHeader(usedSPBits); - success &= updateSSHeader(usedSSBits); + success &= _p->updateHeader(usedPBits, wordNum); + success &= _sp->updateHeader(usedSPBits, wordNum); + success &= _ss->updateHeader(usedSSBits, wordNum); _pWriter.reset(); _spWriter.reset(); @@ -376,192 +351,99 @@ PageDict4FileSeqWrite::close() return success; } - void -PageDict4FileSeqWrite::writeSSSubHeader(vespalib::GenericHeader &header) +PageDict4FileSeqWrite::DictFileContext::writeExtendedHeader(vespalib::GenericHeader &header) { - SSEC &e = _sse; typedef vespalib::GenericHeader::Tag Tag; - header.putTag(Tag("numWordIds", e._numWordIds)); - header.putTag(Tag("avgBitsPerDoc", e._avgBitsPerDoc)); - header.putTag(Tag("minChunkDocs", e._minChunkDocs)); - header.putTag(Tag("docIdLimit", e._docIdLimit)); + header.putTag(Tag("numWordIds", _ec._numWordIds)); + header.putTag(Tag("avgBitsPerDoc", _ec._avgBitsPerDoc)); + header.putTag(Tag("minChunkDocs", _ec._minChunkDocs)); + header.putTag(Tag("docIdLimit", _ec._docIdLimit)); } - void -PageDict4FileSeqWrite::makePHeader(const FileHeaderContext &fileHeaderContext) +PageDict4FileSeqWrite::DictFileContext::makeHeader(const FileHeaderContext &fileHeaderContext) { - PEC &e = _pe; - ComprFileWriteContext &wc = _pWriteContext; - - // subheader only written to SS file. - typedef vespalib::GenericHeader::Tag Tag; vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT); - fileHeaderContext.addTags(header, _pfile.GetFileName()); + fileHeaderContext.addTags(header, _file.GetFileName()); header.putTag(Tag("frozen", 0)); header.putTag(Tag("fileBitSize", 0)); - header.putTag(Tag("format.0", myPId)); + header.putTag(Tag("format.0", _id)); header.putTag(Tag("endian", "big")); - header.putTag(Tag("desc", "Dictionary page file")); - e.setupWrite(wc); - e.writeHeader(header); - e.smallAlign(64); - e.flush(); - uint32_t headerLen = header.getSize(); - headerLen += (-headerLen & 7); - assert(e.getWriteOffset() == headerLen * 8); - assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned - if (_pHeaderLen != 0) { - assert(_pHeaderLen == headerLen); + header.putTag(Tag("desc", _desc)); + if (_extended) { + writeExtendedHeader(header); } - _pHeaderLen = headerLen; -} - - -void -PageDict4FileSeqWrite::makeSPHeader(const FileHeaderContext &fileHeaderContext) -{ - SPEC &e = _spe; - ComprFileWriteContext &wc = _spWriteContext; - - // subheader only written to SS file. - - typedef vespalib::GenericHeader::Tag Tag; - vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT); - - fileHeaderContext.addTags(header, _spfile.GetFileName()); - header.putTag(Tag("frozen", 0)); - header.putTag(Tag("fileBitSize", 0)); - header.putTag(Tag("format.0", mySPId)); - header.putTag(Tag("endian", "big")); - header.putTag(Tag("desc", "Dictionary sparse page file")); - e.setupWrite(wc); - e.writeHeader(header); - e.smallAlign(64); - e.flush(); - uint32_t headerLen = header.getSize(); - headerLen += (-headerLen & 7); - assert(e.getWriteOffset() == headerLen * 8); - assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned - if (_spHeaderLen != 0) { - assert(_spHeaderLen == headerLen); - } - _spHeaderLen = headerLen; -} - - -void -PageDict4FileSeqWrite::makeSSHeader(const FileHeaderContext &fileHeaderContext) -{ - SSEC &e = _sse; - ComprFileWriteContext &wc = _ssWriteContext; - - typedef vespalib::GenericHeader::Tag Tag; - vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT); - - fileHeaderContext.addTags(header, _ssfile.GetFileName()); - header.putTag(Tag("frozen", 0)); - header.putTag(Tag("fileBitSize", 0)); - header.putTag(Tag("format.0", mySSId)); - header.putTag(Tag("endian", "big")); - header.putTag(Tag("desc", "Dictionary sparse sparse file")); - writeSSSubHeader(header); - - e.setupWrite(wc); - e.writeHeader(header); - e.smallAlign(64); - e.flush(); + _ec.setupWrite(_writeContext); + _ec.writeHeader(header); + _ec.smallAlign(64); + _ec.flush(); uint32_t headerLen = header.getSize(); headerLen += (-headerLen & 7); - assert(e.getWriteOffset() == headerLen * 8); - assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned - if (_ssHeaderLen != 0) { - assert(_ssHeaderLen == headerLen); + assert(_ec.getWriteOffset() == headerLen * 8); + assert((_ec.getWriteOffset() & 63) == 0); // Header must be word aligned + if (_headerLen != 0) { + assert(_headerLen == headerLen); } - _ssHeaderLen = headerLen; -} - - -bool -PageDict4FileSeqWrite::updatePHeader(uint64_t fileBitSize) -{ - vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); - FastOS_File f; - f.OpenReadWrite(_pfile.GetFileName()); - h.readFile(f); - FileHeaderContext::setFreezeTime(h); - typedef vespalib::GenericHeader::Tag Tag; - h.putTag(Tag("frozen", 1)); - h.putTag(Tag("fileBitSize", fileBitSize)); - h.rewriteFile(f); - bool success = f.Sync(); - success &= f.Close(); - return success; + _headerLen = headerLen; } - bool -PageDict4FileSeqWrite::updateSPHeader(uint64_t fileBitSize) +PageDict4FileSeqWrite::DictFileContext::updateHeader(uint64_t fileBitSize, uint64_t wordNum) { vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); FastOS_File f; - f.OpenReadWrite(_spfile.GetFileName()); + f.OpenReadWrite(_file.GetFileName()); h.readFile(f); FileHeaderContext::setFreezeTime(h); typedef vespalib::GenericHeader::Tag Tag; h.putTag(Tag("frozen", 1)); h.putTag(Tag("fileBitSize", fileBitSize)); + if (_extended) { + assert(wordNum <= _ec._numWordIds); + h.putTag(Tag("numWordIds", wordNum)); + } h.rewriteFile(f); bool success = f.Sync(); success &= f.Close(); return success; } - -bool -PageDict4FileSeqWrite::updateSSHeader(uint64_t fileBitSize) -{ - vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); - FastOS_File f; - f.OpenReadWrite(_ssfile.GetFileName()); - h.readFile(f); - FileHeaderContext::setFreezeTime(h); - typedef vespalib::GenericHeader::Tag Tag; - h.putTag(Tag("frozen", 1)); - h.putTag(Tag("fileBitSize", fileBitSize)); - uint64_t wordNum = _pWriter->getWordNum(); - assert(wordNum <= _sse._numWordIds); - h.putTag(Tag("numWordIds", wordNum)); - h.rewriteFile(f); - bool success = f.Sync(); - success &= f.Close(); - return success; +void +PageDict4FileSeqWrite::setParams(const PostingListParams ¶ms) { + _params.add(params); + if (_ss) { + activateParams(_params); + } } - void -PageDict4FileSeqWrite::setParams(const PostingListParams ¶ms) -{ - params.get("avgBitsPerDoc", _sse._avgBitsPerDoc); - params.get("minChunkDocs", _sse._minChunkDocs); - params.get("docIdLimit", _sse._docIdLimit); - params.get("numWordIds", _sse._numWordIds); - _spe.copyParams(_sse); - _pe.copyParams(_sse); +PageDict4FileSeqWrite::activateParams(const PostingListParams ¶ms) { + assert(_ss); + EC & ec = _ss->_ec; + params.get("avgBitsPerDoc", ec._avgBitsPerDoc); + params.get("minChunkDocs", ec._minChunkDocs); + params.get("docIdLimit", ec._docIdLimit); + params.get("numWordIds", ec._numWordIds); + _sp->_ec.copyParams(_ss->_ec); + _p->_ec.copyParams(_ss->_ec); } - void PageDict4FileSeqWrite::getParams(PostingListParams ¶ms) { params.clear(); - params.set("avgBitsPerDoc", _sse._avgBitsPerDoc); - params.set("minChunkDocs", _sse._minChunkDocs); - params.set("docIdLimit", _sse._docIdLimit); - params.set("numWordIds", _sse._numWordIds); + if (_ss) { + EC &ec = _ss->_ec; + params.set("avgBitsPerDoc", ec._avgBitsPerDoc); + params.set("minChunkDocs", ec._minChunkDocs); + params.set("docIdLimit", ec._docIdLimit); + params.set("numWordIds", ec._numWordIds); + } else { + params = _params; + } } } diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h index 6e2ab6f9ffa..1c43c20a219 100644 --- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h @@ -2,10 +2,8 @@ #pragma once #include <vespa/searchlib/index/dictionaryfile.h> -#include <vespa/searchlib/bitcompression/compression.h> -#include <vespa/searchlib/bitcompression/countcompression.h> +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/searchlib/bitcompression/pagedict4.h> -#include <vespa/fastos/file.h> namespace vespalib { class GenericHeader; } @@ -47,44 +45,23 @@ public: */ class PageDict4FileSeqWrite : public index::DictionaryFileSeqWrite { - typedef bitcompression::PostingListCountFileEncodeContext EC; - typedef EC SPEC; - typedef EC PEC; - typedef EC SSEC; - typedef bitcompression::PageDict4SSWriter SSWriter; - typedef bitcompression::PageDict4SPWriter SPWriter; - typedef bitcompression::PageDict4PWriter PWriter; - - typedef index::PostingListCounts PostingListCounts; + using EC = bitcompression::PostingListCountFileEncodeContext; + using SSWriter = bitcompression::PageDict4SSWriter; + using SPWriter = bitcompression::PageDict4SPWriter; + using PWriter = bitcompression::PageDict4PWriter; + using PostingListCounts = index::PostingListCounts; using FileHeaderContext = common::FileHeaderContext; + struct DictFileContext; + index::PostingListParams _params; std::unique_ptr<PWriter> _pWriter; std::unique_ptr<SPWriter> _spWriter; std::unique_ptr<SSWriter> _ssWriter; + std::unique_ptr<DictFileContext> _ss; + std::unique_ptr<DictFileContext> _sp; + std::unique_ptr<DictFileContext> _p; - EC _pe; - ComprFileWriteContext _pWriteContext; - FastOS_File _pfile; - - EC _spe; - ComprFileWriteContext _spWriteContext; - FastOS_File _spfile; - - EC _sse; - ComprFileWriteContext _ssWriteContext; - FastOS_File _ssfile; - - uint32_t _pHeaderLen; // Length of header for page file (bytes) - uint32_t _spHeaderLen; // Length of header for sparse page file (bytes) - uint32_t _ssHeaderLen; // Length of header for sparse sparse file (bytes) - - void writeSSSubHeader(vespalib::GenericHeader &header); - void makePHeader(const FileHeaderContext &fileHeaderContext); - void makeSPHeader(const FileHeaderContext &fileHeaderContext); - void makeSSHeader(const FileHeaderContext &fileHeaderContext); - bool updatePHeader(uint64_t fileBitSize); - bool updateSPHeader(uint64_t fileBitSize); - bool updateSSHeader(uint64_t fileBitSize); + void activateParams(const index::PostingListParams ¶ms); public: PageDict4FileSeqWrite(); ~PageDict4FileSeqWrite(); @@ -95,7 +72,7 @@ public: * Open dictionary file for sequential write. The index with most * words should be first for optimal compression. */ - bool open(const vespalib::string &name, const TuneFileSeqWrite &tuneFileWrite, + bool open(const vespalib::string &name, const TuneFileSeqWrite &tune, const FileHeaderContext &fileHeaderContext) override; bool close() override; diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp index a14c880a214..3f44b56706a 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp @@ -2,6 +2,7 @@ #include "zc4_posting_writer_base.h" #include <vespa/searchlib/index/postinglistcounts.h> +#include <vespa/searchlib/index/postinglistparams.h> using search::index::PostingListCounts; using search::index::PostingListParams; @@ -225,9 +226,7 @@ Zc4PostingWriterBase::Zc4PostingWriterBase(PostingListCounts &counts) _l4Skip.maybeExpand(); } -Zc4PostingWriterBase::~Zc4PostingWriterBase() -{ -} +Zc4PostingWriterBase::~Zc4PostingWriterBase() = default; #define L1SKIPSTRIDE 16 #define L2SKIPSTRIDE 8 diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp index 593d5567266..d0b7fb42692 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp @@ -3,9 +3,7 @@ #include "zcposocc.h" #include <vespa/searchlib/index/postinglistcounts.h> #include <vespa/searchlib/index/postinglistcountfile.h> -#include <vespa/searchlib/index/postinglistfile.h> -#include <vespa/searchlib/index/docidandfeatures.h> - +#include <vespa/searchlib/index/postinglistparams.h> namespace search::diskindex { diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp index 544e8d9f262..1f399971406 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp @@ -5,6 +5,7 @@ #include <vespa/searchlib/index/postinglistcountfile.h> #include <vespa/searchlib/index/postinglistfile.h> #include <vespa/searchlib/index/docidandfeatures.h> +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/searchlib/common/fileheadercontext.h> #include <vespa/vespalib/data/fileheader.h> @@ -46,9 +47,7 @@ Zc4PostingSeqRead::Zc4PostingSeqRead(PostingListCountFileSeqRead *countFile, boo } -Zc4PostingSeqRead::~Zc4PostingSeqRead() -{ -} +Zc4PostingSeqRead::~Zc4PostingSeqRead() = default; void Zc4PostingSeqRead::readDocIdAndFeatures(DocIdAndFeatures &features) @@ -201,9 +200,7 @@ Zc4PostingSeqWrite(PostingListCountFileSeqWrite *countFile) } -Zc4PostingSeqWrite::~Zc4PostingSeqWrite() -{ -} +Zc4PostingSeqWrite::~Zc4PostingSeqWrite() = default; void diff --git a/searchlib/src/vespa/searchlib/index/dictionaryfile.h b/searchlib/src/vespa/searchlib/index/dictionaryfile.h index 5063143d323..6c8535f8563 100644 --- a/searchlib/src/vespa/searchlib/index/dictionaryfile.h +++ b/searchlib/src/vespa/searchlib/index/dictionaryfile.h @@ -1,7 +1,6 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include "postinglistcounts.h" #include "postinglisthandle.h" #include "postinglistcountfile.h" #include <vespa/searchlib/common/tunefileinfo.h> diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp index f62a4bc7997..4341bcb9a46 100644 --- a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp +++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp @@ -1,8 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "docidandfeatures.h" -#include <vespa/log/log.h> -LOG_SETUP(".index.docidandfeatures"); +#include <cassert> namespace search::index { @@ -23,4 +22,21 @@ DocIdAndFeatures::DocIdAndFeatures(const DocIdAndFeatures &) = default; DocIdAndFeatures & DocIdAndFeatures::operator = (const DocIdAndFeatures &) = default; DocIdAndFeatures::~DocIdAndFeatures() = default; +void +DocIdAndPosOccFeatures::addNextOcc(uint32_t elementId, uint32_t wordPos, int32_t elementWeight, uint32_t elementLen) +{ + assert(wordPos < elementLen); + if (_elements.empty() || elementId > _elements.back().getElementId()) { + _elements.emplace_back(elementId, elementWeight, elementLen); + } else { + assert(elementId == _elements.back().getElementId()); + assert(elementWeight == _elements.back().getWeight()); + assert(elementLen == _elements.back().getElementLen()); + } + assert(_elements.back().getNumOccs() == 0 || + wordPos > _word_positions.back().getWordPos()); + _elements.back().incNumOccs(); + _word_positions.emplace_back(wordPos); +} + } diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.h b/searchlib/src/vespa/searchlib/index/docidandfeatures.h index 6b1659771fa..e595ec833ef 100644 --- a/searchlib/src/vespa/searchlib/index/docidandfeatures.h +++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.h @@ -163,4 +163,10 @@ public: void set_has_raw_data(bool val) { _has_raw_data = val; } }; +class DocIdAndPosOccFeatures : public DocIdAndFeatures +{ +public: + void addNextOcc(uint32_t elementId, uint32_t wordPos, int32_t elementWeight, uint32_t elementLen); +}; + } diff --git a/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp index f4c38636d01..edf4f8c43b2 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp +++ b/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp @@ -1,16 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "postinglistcountfile.h" +#include <vespa/searchlib/index/postinglistparams.h> namespace search::index { -PostingListCountFileSeqRead::PostingListCountFileSeqRead() -{ -} - -PostingListCountFileSeqRead::~PostingListCountFileSeqRead() -{ -} +PostingListCountFileSeqRead::PostingListCountFileSeqRead() = default; +PostingListCountFileSeqRead::~PostingListCountFileSeqRead() = default; void PostingListCountFileSeqRead:: @@ -19,13 +15,8 @@ getParams(PostingListParams ¶ms) params.clear(); } -PostingListCountFileSeqWrite::PostingListCountFileSeqWrite() -{ -} - -PostingListCountFileSeqWrite::~PostingListCountFileSeqWrite() -{ -} +PostingListCountFileSeqWrite::PostingListCountFileSeqWrite() = default; +PostingListCountFileSeqWrite::~PostingListCountFileSeqWrite() = default; void PostingListCountFileSeqWrite:: diff --git a/searchlib/src/vespa/searchlib/index/postinglistcountfile.h b/searchlib/src/vespa/searchlib/index/postinglistcountfile.h index 47ec202dad1..7e17fc5bb9e 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistcountfile.h +++ b/searchlib/src/vespa/searchlib/index/postinglistcountfile.h @@ -1,9 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include "postinglistparams.h" #include "postinglistcounts.h" #include <vespa/searchlib/common/tunefileinfo.h> +#include <vespa/vespalib/stllike/string.h> namespace search::common { class FileHeaderContext; } @@ -11,6 +11,7 @@ namespace search::index { class PostingListCounts; class PostingListHandle; +class PostingListParams; /** * Interface for count files describing where in a posting list file diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp index 15412fcd5f1..4d53790bd73 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp @@ -1,14 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "postinglistfile.h" +#include <vespa/searchlib/index/postinglistparams.h> #include <vespa/fastos/file.h> namespace search::index { -PostingListFileSeqRead::PostingListFileSeqRead() -{ -} - +PostingListFileSeqRead::PostingListFileSeqRead() = default; PostingListFileSeqRead::~PostingListFileSeqRead() = default; void @@ -37,9 +35,7 @@ PostingListFileSeqWrite::PostingListFileSeqWrite() { } -PostingListFileSeqWrite::~PostingListFileSeqWrite() -{ -} +PostingListFileSeqWrite::~PostingListFileSeqWrite() = default; void PostingListFileSeqWrite:: @@ -75,9 +71,7 @@ PostingListFileRandRead() { } -PostingListFileRandRead::~PostingListFileRandRead() -{ -} +PostingListFileRandRead::~PostingListFileRandRead() = default; void PostingListFileRandRead::afterOpen(FastOS_FileInterface &file) @@ -117,8 +111,7 @@ readPostingList(const PostingListCounts &counts, uint32_t numSegments, PostingListHandle &handle) { - _lower->readPostingList(counts, firstSegment, numSegments, - handle); + _lower->readPostingList(counts, firstSegment, numSegments,handle); } bool diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h index d731b3f0f67..a33319e1d4f 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.h +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h @@ -3,8 +3,8 @@ #include "postinglistcounts.h" #include "postinglisthandle.h" -#include "postinglistparams.h" #include <vespa/searchlib/common/tunefileinfo.h> +#include <vespa/vespalib/stllike/string.h> class FastOS_FileInterface; @@ -14,6 +14,7 @@ namespace search::index { class DocIdAndFeatures; class FieldLengthInfo; +class PostingListParams; /** * Interface for posting list files containing document ids and features diff --git a/searchlib/src/vespa/searchlib/index/postinglistparams.cpp b/searchlib/src/vespa/searchlib/index/postinglistparams.cpp index 74e8f731f6f..6275399c498 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistparams.cpp +++ b/searchlib/src/vespa/searchlib/index/postinglistparams.cpp @@ -49,6 +49,12 @@ PostingListParams::clear() } void +PostingListParams::add(const PostingListParams & toAdd) +{ + _map.insert(toAdd._map.begin(), toAdd._map.end()); +} + +void PostingListParams::erase(const vespalib::string &key) { _map.erase(key); @@ -62,8 +68,7 @@ PostingListParams::operator!=(const PostingListParams &rhs) const template <typename TYPE> void -PostingListParams::set(const vespalib::string &key, - const TYPE &val) +PostingListParams::set(const vespalib::string &key, const TYPE &val) { std::ostringstream os; @@ -73,8 +78,7 @@ PostingListParams::set(const vespalib::string &key, template <typename TYPE> void -PostingListParams::get(const vespalib::string &key, - TYPE &val) const +PostingListParams::get(const vespalib::string &key, TYPE &val) const { std::istringstream is; Map::const_iterator it; @@ -87,35 +91,27 @@ PostingListParams::get(const vespalib::string &key, } template void -PostingListParams::set<bool>(const vespalib::string &key, - const bool &val); +PostingListParams::set<bool>(const vespalib::string &key, const bool &val); template void -PostingListParams::get<bool>(const vespalib::string &key, - bool &val) const; +PostingListParams::get<bool>(const vespalib::string &key, bool &val) const; template void -PostingListParams::set<int32_t>(const vespalib::string &key, - const int32_t &val); +PostingListParams::set<int32_t>(const vespalib::string &key, const int32_t &val); template void -PostingListParams::get<int32_t>(const vespalib::string &key, - int32_t &val) const; +PostingListParams::get<int32_t>(const vespalib::string &key, int32_t &val) const; template void -PostingListParams::set<uint32_t>(const vespalib::string &key, - const uint32_t &val); +PostingListParams::set<uint32_t>(const vespalib::string &key, const uint32_t &val); template void -PostingListParams::get<uint32_t>(const vespalib::string &key, - uint32_t &val) const; +PostingListParams::get<uint32_t>(const vespalib::string &key, uint32_t &val) const; template void -PostingListParams::set<uint64_t>(const vespalib::string &key, - const uint64_t &val); +PostingListParams::set<uint64_t>(const vespalib::string &key, const uint64_t &val); template void -PostingListParams::get<uint64_t>(const vespalib::string &key, - uint64_t &val) const; +PostingListParams::get<uint64_t>(const vespalib::string &key, uint64_t &val) const; } diff --git a/searchlib/src/vespa/searchlib/index/postinglistparams.h b/searchlib/src/vespa/searchlib/index/postinglistparams.h index 9797eef5278..42da5855c23 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistparams.h +++ b/searchlib/src/vespa/searchlib/index/postinglistparams.h @@ -7,34 +7,22 @@ namespace search::index { class PostingListParams { - typedef std::map<vespalib::string, vespalib::string> Map; + using Map = std::map<vespalib::string, vespalib::string>; Map _map; public: template <typename TYPE> - void - set(const vespalib::string &key, const TYPE &val); + void set(const vespalib::string &key, const TYPE &val); template <typename TYPE> - void - get(const vespalib::string &key, TYPE &val) const; - - bool - isSet(const vespalib::string &key) const; - - void - setStr(const vespalib::string &key, const vespalib::string &val); - - const vespalib::string & - getStr(const vespalib::string &key) const; - - void - clear(); - - void - erase(const vespalib::string &key); - - bool - operator!=(const PostingListParams &rhs) const; + void get(const vespalib::string &key, TYPE &val) const; + + bool isSet(const vespalib::string &key) const; + void setStr(const vespalib::string &key, const vespalib::string &val); + const vespalib::string & getStr(const vespalib::string &key) const; + void clear(); + void erase(const vespalib::string &key); + bool operator!=(const PostingListParams &rhs) const; + void add(const PostingListParams & toAdd); }; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp index fdb2de8fb59..c55de3890cd 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp @@ -11,6 +11,7 @@ #include <vespa/searchlib/common/schedule_sequenced_task_callback.h> #include <vespa/vespalib/util/isequencedtaskexecutor.h> #include <vespa/vespalib/util/retain_guard.h> +#include <cassert> namespace search::memoryindex { @@ -28,8 +29,7 @@ DocumentInverter::DocumentInverter(DocumentInverterContext& context) { auto& schema = context.get_schema(); auto& field_indexes = context.get_field_indexes(); - for (uint32_t fieldId = 0; fieldId < schema.getNumIndexFields(); - ++fieldId) { + for (uint32_t fieldId = 0; fieldId < schema.getNumIndexFields(); ++fieldId) { auto &remover(field_indexes.get_remover(fieldId)); auto &inserter(field_indexes.get_inserter(fieldId)); auto &calculator(field_indexes.get_calculator(fieldId)); diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h index 2ad1fd78f07..9f17d369208 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h @@ -3,7 +3,6 @@ #pragma once #include <vespa/searchlib/index/docidandfeatures.h> -#include <vespa/searchlib/bitcompression/compression.h> #include <vespa/searchlib/bitcompression/posocccompression.h> #include <vespa/searchlib/bitcompression/posocc_fields_params.h> #include <vespa/vespalib/datastore/datastore.h> diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp index 5f4d02d23db..c606b9b6340 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp @@ -5,6 +5,7 @@ #include "ordered_field_index_inserter.h" #include <vespa/searchlib/bitcompression/posocccompression.h> #include <vespa/searchlib/index/i_field_length_inspector.h> +#include <vespa/searchcommon/common/schema.h> #include <vespa/vespalib/btree/btree.hpp> #include <vespa/vespalib/btree/btreeiterator.hpp> #include <vespa/vespalib/btree/btreenode.hpp> diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp index d96b8491027..a443e994559 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp @@ -13,6 +13,7 @@ #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> #include <vespa/searchlib/bitcompression/compression.h> #include <vespa/searchlib/bitcompression/posocccompression.h> +#include <vespa/searchcommon/common/schema.h> #include <vespa/searchlib/common/sort.h> #include <vespa/searchlib/util/url.h> #include <vespa/vespalib/text/utf8.h> @@ -443,6 +444,17 @@ FieldInverter::invertField(uint32_t docId, const FieldValue::UP &val) } void +FieldInverter::startDoc(uint32_t docId) { + assert(_docId == 0); + assert(docId != 0); + abortPendingDoc(docId); + _removeDocs.push_back(docId); + _docId = docId; + _elem = 0; + _wpos = 0; +} + +void FieldInverter::invertNormalDocTextField(const FieldValue &val) { const vespalib::Identifiable::RuntimeClass & cInfo(val.getClass()); diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h index 56cb1677f67..36dd6339b54 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h @@ -6,13 +6,14 @@ #include <vespa/document/annotation/span.h> #include <vespa/document/datatype/datatypes.h> #include <vespa/document/fieldvalue/document.h> -#include <vespa/searchlib/bitcompression/compression.h> -#include <vespa/searchlib/bitcompression/posocccompression.h> #include <vespa/searchlib/index/docidandfeatures.h> #include <vespa/vespalib/stllike/allocator.h> #include <limits> -namespace search::index { class FieldLengthCalculator; } +namespace search::index { + class FieldLengthCalculator; + class Schema; +} namespace search::memoryindex { @@ -310,15 +311,7 @@ public: _removeDocs.push_back(docId); } - void startDoc(uint32_t docId) { - assert(_docId == 0); - assert(docId != 0); - abortPendingDoc(docId); - _removeDocs.push_back(docId); - _docId = docId; - _elem = 0; - _wpos = 0; - } + void startDoc(uint32_t docId); void endDoc(); diff --git a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp index 10918a83c50..326b7b0967a 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp @@ -389,7 +389,7 @@ UrlFieldInverter::pushDocuments() _hostname->pushDocuments(); } -UrlFieldInverter::UrlFieldInverter(index::Schema::CollectionType collectionType, +UrlFieldInverter::UrlFieldInverter(index::schema::CollectionType collectionType, FieldInverter *all, FieldInverter *scheme, FieldInverter *host, diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h index 088019749af..c6646f2e61f 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h @@ -5,6 +5,8 @@ #include <vespa/searchlib/bitcompression/compression.h> #include <vespa/searchlib/bitcompression/posocccompression.h> #include <vespa/searchlib/bitcompression/posocc_fields_params.h> +#include <vespa/searchcommon/common/schema.h> + namespace vespalib { class Rand48; } diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp index 87efc8132ee..8d5f6d6db4e 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp @@ -8,6 +8,7 @@ #include <vespa/searchlib/diskindex/zc4_posting_params.h> #include <vespa/searchlib/diskindex/zc4_posting_reader.h> #include <vespa/searchlib/diskindex/zc4_posting_writer.h> +#include <vespa/searchlib/index/postinglistparams.h> using search::fef::TermFieldMatchData; using search::fef::TermFieldMatchDataArray; diff --git a/staging_vespalib/src/vespa/vespalib/stllike/cache.h b/staging_vespalib/src/vespa/vespalib/stllike/cache.h index 0f4349eb15a..181bb2ac63a 100644 --- a/staging_vespalib/src/vespa/vespalib/stllike/cache.h +++ b/staging_vespalib/src/vespa/vespalib/stllike/cache.h @@ -3,6 +3,7 @@ #include <vespa/vespalib/stllike/lrucache_map.h> #include <atomic> +#include <mutex> namespace vespalib { |