summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-01-27 16:27:31 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2022-01-27 20:54:24 +0000
commit67837c8bbd9583a0b0cdd388436328a4b8649362 (patch)
treeefe7cfe9b17c1726abe20c0cd5d383407ed7cf79
parent1452e6339dfcbd9168c780966d4cf33ce6a71cf1 (diff)
Refactor to reduce code duplication for write path.
-rw-r--r--searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp7
-rw-r--r--searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/compression.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/compression.h6
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/countcompression.h8
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/posocccompression.h31
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/diskindex.h1
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/extposocc.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fieldwriter.h5
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp404
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/pagedict4file.h49
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcposting.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/index/dictionaryfile.h1
-rw-r--r--searchlib/src/vespa/searchlib/index/docidandfeatures.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/index/docidandfeatures.h6
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistcountfile.h3
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistfile.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistfile.h3
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistparams.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistparams.h34
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/feature_store.h1
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_inverter.h17
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h2
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp1
-rw-r--r--staging_vespalib/src/vespa/vespalib/stllike/cache.h1
33 files changed, 273 insertions, 445 deletions
diff --git a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp
index c5ea12a7568..570f1a6ea03 100644
--- a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp
+++ b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp
@@ -1,13 +1,16 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/log/log.h>
-LOG_SETUP("bitvector_test");
+
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/searchlib/index/field_length_info.h>
#include <vespa/searchlib/diskindex/bitvectordictionary.h>
#include <vespa/searchlib/diskindex/fieldwriter.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchcommon/common/schema.h>
#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/log/log.h>
+LOG_SETUP("bitvector_test");
+
using namespace search::index;
using search::index::schema::DataType;
diff --git a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp
index 9a326af4c3b..34f9f7d27a9 100644
--- a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp
+++ b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/searchlib/memoryindex/feature_store.h>
+#include <vespa/searchcommon/common/schema.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/log/log.h>
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
index 0d5835b4fa9..5ac506e4fc2 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
@@ -3,6 +3,7 @@
#include "compression.h"
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/index/postinglistparams.h>
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/util/arrayref.h>
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h
index 973d622461a..45005d499fb 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/compression.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h
@@ -3,7 +3,6 @@
#pragma once
#include <vespa/searchlib/util/comprfile.h>
-#include <vespa/searchlib/index/postinglistparams.h>
#include <vespa/vespalib/stllike/string.h>
#include <cassert>
@@ -14,7 +13,10 @@ template <typename T> class ConstArrayRef;
}
-namespace search::index { class DocIdAndFeatures; }
+namespace search::index {
+ class DocIdAndFeatures;
+ class PostingListParams;
+}
namespace search::fef { class TermFieldMatchDataArray; }
diff --git a/searchlib/src/vespa/searchlib/bitcompression/countcompression.h b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h
index 664a1245c2f..6eb37e1d1ad 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/countcompression.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h
@@ -19,15 +19,13 @@ public:
uint32_t _minChunkDocs; // Minimum number of documents for chunking
uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit)
uint64_t _numWordIds; // Number of words in dictionary
- uint64_t _minWordNum; // Minimum word number
PostingListCountFileDecodeContext()
: ParentClass(),
_avgBitsPerDoc(10),
_minChunkDocs(262144),
_docIdLimit(10000000),
- _numWordIds(0),
- _minWordNum(0u)
+ _numWordIds(0)
{
}
@@ -50,15 +48,13 @@ public:
uint32_t _minChunkDocs; // Minimum number of documents for chunking
uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit)
uint64_t _numWordIds; // Number of words in dictionary
- uint64_t _minWordNum; // Mininum word number
PostingListCountFileEncodeContext()
: ParentClass(),
_avgBitsPerDoc(10),
_minChunkDocs(262144),
_docIdLimit(10000000),
- _numWordIds(0),
- _minWordNum(0u)
+ _numWordIds(0)
{
}
diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp
index 76a65a7244a..fd6c723e901 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp
@@ -1,10 +1,10 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "compression.h"
#include "posocccompression.h"
#include "posocc_fields_params.h"
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/index/postinglistparams.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/data/fileheader.h>
@@ -36,8 +36,7 @@ EG2PosOccDecodeContext<bigEndian>::
readHeader(const vespalib::GenericHeader &header,
const vespalib::string &prefix)
{
- const_cast<PosOccFieldsParams *>(_fieldsParams)->readHeader(header,
- prefix);
+ const_cast<PosOccFieldsParams *>(_fieldsParams)->readHeader(header, prefix);
}
diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h
index 184e2414638..aadd58f9152 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h
@@ -1,8 +1,8 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
+#include "compression.h"
#include <vespa/searchlib/index/docidandfeatures.h>
-#include <vespa/searchcommon/common/schema.h>
#define K_VALUE_POSOCC_FIRST_WORDPOS 8
@@ -16,35 +16,6 @@
#define K_VALUE_POSOCC_ELEMENTID 0
#define K_VALUE_POSOCC_ELEMENTWEIGHT 9
-namespace search::index {
-
-class DocIdAndPosOccFeatures : public DocIdAndFeatures
-{
-public:
-
- void
- addNextOcc(uint32_t elementId,
- uint32_t wordPos,
- int32_t elementWeight,
- uint32_t elementLen)
- {
- assert(wordPos < elementLen);
- if (_elements.empty() || elementId > _elements.back().getElementId()) {
- _elements.emplace_back(elementId, elementWeight, elementLen);
- } else {
- assert(elementId == _elements.back().getElementId());
- assert(elementWeight == _elements.back().getWeight());
- assert(elementLen == _elements.back().getElementLen());
- }
- assert(_elements.back().getNumOccs() == 0 ||
- wordPos > _word_positions.back().getWordPos());
- _elements.back().incNumOccs();
- _word_positions.emplace_back(wordPos);
- }
-};
-
-}
-
namespace search::bitcompression {
class PosOccFieldsParams;
diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.h b/searchlib/src/vespa/searchlib/diskindex/diskindex.h
index 05492a59ee3..12be8979cc3 100644
--- a/searchlib/src/vespa/searchlib/diskindex/diskindex.h
+++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.h
@@ -7,6 +7,7 @@
#include <vespa/searchlib/index/dictionaryfile.h>
#include <vespa/searchlib/index/field_length_info.h>
#include <vespa/searchlib/queryeval/searchable.h>
+#include <vespa/searchcommon/common/schema.h>
#include <vespa/vespalib/stllike/string.h>
#include <vespa/vespalib/stllike/cache.h>
diff --git a/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp
index a4e9e4d06f7..dcf897df955 100644
--- a/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp
@@ -5,8 +5,8 @@
#include "fileheader.h"
#include <vespa/searchlib/index/postinglistcounts.h>
#include <vespa/searchlib/index/docidandfeatures.h>
-#include <vespa/searchlib/index/postinglistcounts.h>
#include <vespa/searchlib/index/postinglistcountfile.h>
+#include <vespa/searchlib/index/postinglistparams.h>
#include <vespa/log/log.h>
LOG_SETUP(".diskindex.extposocc");
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
index f1b5c487e40..bf62965719d 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
@@ -4,9 +4,10 @@
#include "bitvectorfile.h"
#include <vespa/searchlib/index/dictionaryfile.h>
#include <vespa/searchlib/index/postinglistfile.h>
-#include <vespa/searchlib/bitcompression/compression.h>
-#include <vespa/searchlib/bitcompression/countcompression.h>
#include <vespa/searchlib/bitcompression/posocccompression.h>
+#include <vespa/searchlib/bitcompression/countcompression.h>
+
+namespace search::index { class Schema; }
namespace search::diskindex {
diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp
index 32d0105b7c1..4462c90f4c5 100644
--- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp
@@ -6,6 +6,7 @@
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/util/size_literals.h>
+#include <vespa/fastos/file.h>
#include <vespa/log/log.h>
LOG_SETUP(".diskindex.pagedict4file");
@@ -123,7 +124,6 @@ PageDict4FileSeqRead::PageDict4FileSeqRead()
_wordNum(0u)
{ }
-
PageDict4FileSeqRead::~PageDict4FileSeqRead() = default;
void
@@ -192,7 +192,6 @@ PageDict4FileSeqRead::open(const vespalib::string &name,
return true;
}
-
bool
PageDict4FileSeqRead::close()
{
@@ -204,7 +203,6 @@ PageDict4FileSeqRead::close()
return true;
}
-
void
PageDict4FileSeqRead::getParams(PostingListParams &params)
{
@@ -219,32 +217,83 @@ PageDict4FileSeqRead::getParams(PostingListParams &params)
}
}
+struct PageDict4FileSeqWrite::DictFileContext {
+ DictFileContext(bool extended, vespalib::stringref id, vespalib::stringref desc,
+ const vespalib::string &name, const TuneFileSeqWrite &tune);
+ ~DictFileContext();
+ void makeHeader(const FileHeaderContext &fileHeaderContext);
+ bool updateHeader(uint64_t fileBitSize, uint64_t wordNum);
+ void writeExtendedHeader(vespalib::GenericHeader &header);
+ bool close();
+ const vespalib::string _id;
+ const vespalib::string _desc;
+ const bool _extended;
+ uint32_t _headerLen;
+ bool _valid;
+ EC _ec;
+ ComprFileWriteContext _writeContext;
+ FastOS_File _file;
+};
-PageDict4FileSeqWrite::PageDict4FileSeqWrite()
- : _pWriter(),
- _spWriter(),
- _ssWriter(),
- _pe(),
- _pWriteContext(_pe),
- _pfile(),
- _spe(),
- _spWriteContext(_spe),
- _spfile(),
- _sse(),
- _ssWriteContext(_sse),
- _ssfile(),
- _pHeaderLen(0),
- _spHeaderLen(0),
- _ssHeaderLen(0)
+PageDict4FileSeqWrite::DictFileContext::DictFileContext(bool extended, vespalib::stringref id, vespalib::stringref desc,
+ const vespalib::string & name, const TuneFileSeqWrite &tune)
+ : _id(id),
+ _desc(desc),
+ _extended(extended),
+ _headerLen(0u),
+ _valid(false),
+ _ec(),
+ _writeContext(_ec),
+ _file()
{
- _pe.setWriteContext(&_pWriteContext);
- _spe.setWriteContext(&_spWriteContext);
- _sse.setWriteContext(&_ssWriteContext);
+ _ec.setWriteContext(&_writeContext);
+ if (tune.getWantSyncWrites()) {
+ _file.EnableSyncWrites();
+ }
+ if (tune.getWantDirectIO()) {
+ _file.EnableDirectIO();
+ }
+ bool ok = _file.OpenWriteOnly(name.c_str());
+ assertOpenWriteOnly(ok, name);
+ _writeContext.setFile(&_file);
+ _writeContext.allocComprBuf(64_Ki, 32_Ki);
+ uint64_t fileSize = _file.GetSize();
+ uint64_t bufferStartFilePos = _writeContext.getBufferStartFilePos();
+ assert(fileSize >= bufferStartFilePos);
+ _file.SetSize(bufferStartFilePos);
+ assert(bufferStartFilePos == static_cast<uint64_t>(_file.GetPosition()));
+
+ _ec.setupWrite(_writeContext);
+ assert(_ec.getWriteOffset() == 0);
+ _valid = true;
}
+bool
+PageDict4FileSeqWrite::DictFileContext::DictFileContext::close() {
+ //uint64_t usedPBits = _ec.getWriteOffset();
+ _ec.flush();
+ _writeContext.writeComprBuffer(true);
+
+ _writeContext.dropComprBuf();
+ bool success = _file.Sync();
+ success &= _file.Close();
+ _writeContext.setFile(nullptr);
+ return success;
+}
-PageDict4FileSeqWrite::~PageDict4FileSeqWrite() = default;
+PageDict4FileSeqWrite::DictFileContext::~DictFileContext() = default;
+PageDict4FileSeqWrite::PageDict4FileSeqWrite()
+ : _params(),
+ _pWriter(),
+ _spWriter(),
+ _ssWriter(),
+ _ss(),
+ _sp(),
+ _p()
+{ }
+
+PageDict4FileSeqWrite::~PageDict4FileSeqWrite() = default;
void
PageDict4FileSeqWrite::writeWord(vespalib::stringref word, const PostingListCounts &counts)
@@ -252,122 +301,48 @@ PageDict4FileSeqWrite::writeWord(vespalib::stringref word, const PostingListCoun
_pWriter->addCounts(word, counts);
}
-
bool
PageDict4FileSeqWrite::open(const vespalib::string &name,
- const TuneFileSeqWrite &tuneFileWrite,
+ const TuneFileSeqWrite &tune,
const FileHeaderContext &fileHeaderContext)
{
assert( ! _pWriter);
assert( ! _spWriter);
assert( ! _ssWriter);
-
- vespalib::string pname = name + ".pdat";
- vespalib::string spname = name + ".spdat";
- vespalib::string ssname = name + ".ssdat";
-
- if (tuneFileWrite.getWantSyncWrites()) {
- _pfile.EnableSyncWrites();
- _spfile.EnableSyncWrites();
- _ssfile.EnableSyncWrites();
- }
- if (tuneFileWrite.getWantDirectIO()) {
- _pfile.EnableDirectIO();
- _spfile.EnableDirectIO();
- _ssfile.EnableDirectIO();
- }
- bool ok = _pfile.OpenWriteOnly(pname.c_str());
- assertOpenWriteOnly(ok, pname);
- _pWriteContext.setFile(&_pfile);
-
- ok = _spfile.OpenWriteOnly(spname.c_str());
- assertOpenWriteOnly(ok, spname);
- _spWriteContext.setFile(&_spfile);
-
- ok = _ssfile.OpenWriteOnly(ssname.c_str());
- assertOpenWriteOnly(ok, ssname);
- _ssWriteContext.setFile(&_ssfile);
-
- _pWriteContext.allocComprBuf(64_Ki, 32_Ki);
- _spWriteContext.allocComprBuf(64_Ki, 32_Ki);
- _ssWriteContext.allocComprBuf(64_Ki, 32_Ki);
-
- uint64_t pFileSize = _pfile.GetSize();
- uint64_t spFileSize = _spfile.GetSize();
- uint64_t ssFileSize = _ssfile.GetSize();
- uint64_t pBufferStartFilePos = _pWriteContext.getBufferStartFilePos();
- uint64_t spBufferStartFilePos = _spWriteContext.getBufferStartFilePos();
- uint64_t ssBufferStartFilePos = _ssWriteContext.getBufferStartFilePos();
- assert(pFileSize >= pBufferStartFilePos);
- assert(spFileSize >= spBufferStartFilePos);
- assert(ssFileSize >= ssBufferStartFilePos);
- (void) pFileSize;
- (void) spFileSize;
- (void) ssFileSize;
- _pfile.SetSize(pBufferStartFilePos);
- _spfile.SetSize(spBufferStartFilePos);
- _ssfile.SetSize(ssBufferStartFilePos);
- assert(pBufferStartFilePos == static_cast<uint64_t>(_pfile.GetPosition()));
- assert(spBufferStartFilePos ==
- static_cast<uint64_t>(_spfile.GetPosition()));
- assert(ssBufferStartFilePos ==
- static_cast<uint64_t>(_ssfile.GetPosition()));
-
- _pe.setupWrite(_pWriteContext);
- _spe.setupWrite(_spWriteContext);
- _sse.setupWrite(_ssWriteContext);
- assert(_pe.getWriteOffset() == 0);
- assert(_spe.getWriteOffset() == 0);
- assert(_sse.getWriteOffset() == 0);
- _spe.copyParams(_sse);
- _pe.copyParams(_sse);
+ _ss = std::make_unique<DictFileContext>(true, mySSId, "Dictionary sparse sparse file", name + ".ssdat", tune);
+ _sp = std::make_unique<DictFileContext>(false, mySPId, "Dictionary sparse page file", name + ".spdat", tune);
+ _p = std::make_unique<DictFileContext>(false, myPId, "Dictionary page file", name + ".pdat", tune);
+ activateParams(_params);
// Write initial file headers
- makePHeader(fileHeaderContext);
- makeSPHeader(fileHeaderContext);
- makeSSHeader(fileHeaderContext);
+ _p->makeHeader(fileHeaderContext);
+ _sp->makeHeader(fileHeaderContext);
+ _ss->makeHeader(fileHeaderContext);
- _ssWriter = std::make_unique<SSWriter>(_sse);
- _spWriter = std::make_unique<SPWriter>(*_ssWriter, _spe);
- _pWriter = std::make_unique<PWriter>(*_spWriter, _pe);
+ _ssWriter = std::make_unique<SSWriter>(_ss->_ec);
+ _spWriter = std::make_unique<SPWriter>(*_ssWriter, _sp->_ec);
+ _pWriter = std::make_unique<PWriter>(*_spWriter, _p->_ec);
_spWriter->setup();
_pWriter->setup();
-
return true;
}
-
bool
PageDict4FileSeqWrite::close()
{
bool success = true;
_pWriter->flush();
- uint64_t usedPBits = _pe.getWriteOffset();
- uint64_t usedSPBits = _spe.getWriteOffset();
- uint64_t usedSSBits = _sse.getWriteOffset();
- _pe.flush();
- _pWriteContext.writeComprBuffer(true);
- _spe.flush();
- _spWriteContext.writeComprBuffer(true);
- _sse.flush();
- _ssWriteContext.writeComprBuffer(true);
-
- _pWriteContext.dropComprBuf();
- success &= _pfile.Sync();
- success &= _pfile.Close();
- _pWriteContext.setFile(nullptr);
- _spWriteContext.dropComprBuf();
- success &= _spfile.Sync();
- success &= _spfile.Close();
- _spWriteContext.setFile(nullptr);
- _ssWriteContext.dropComprBuf();
- success &= _ssfile.Sync();
- success &= _ssfile.Close();
- _ssWriteContext.setFile(nullptr);
+ uint64_t usedPBits = _p->_ec.getWriteOffset();
+ uint64_t usedSPBits = _sp->_ec.getWriteOffset();
+ uint64_t usedSSBits = _ss->_ec.getWriteOffset();
+ success &= _p->close();
+ success &= _sp->close();
+ success &= _ss->close();
+ uint64_t wordNum = _pWriter->getWordNum();
// Update file headers
- success &= updatePHeader(usedPBits);
- success &= updateSPHeader(usedSPBits);
- success &= updateSSHeader(usedSSBits);
+ success &= _p->updateHeader(usedPBits, wordNum);
+ success &= _sp->updateHeader(usedSPBits, wordNum);
+ success &= _ss->updateHeader(usedSSBits, wordNum);
_pWriter.reset();
_spWriter.reset();
@@ -376,192 +351,99 @@ PageDict4FileSeqWrite::close()
return success;
}
-
void
-PageDict4FileSeqWrite::writeSSSubHeader(vespalib::GenericHeader &header)
+PageDict4FileSeqWrite::DictFileContext::writeExtendedHeader(vespalib::GenericHeader &header)
{
- SSEC &e = _sse;
typedef vespalib::GenericHeader::Tag Tag;
- header.putTag(Tag("numWordIds", e._numWordIds));
- header.putTag(Tag("avgBitsPerDoc", e._avgBitsPerDoc));
- header.putTag(Tag("minChunkDocs", e._minChunkDocs));
- header.putTag(Tag("docIdLimit", e._docIdLimit));
+ header.putTag(Tag("numWordIds", _ec._numWordIds));
+ header.putTag(Tag("avgBitsPerDoc", _ec._avgBitsPerDoc));
+ header.putTag(Tag("minChunkDocs", _ec._minChunkDocs));
+ header.putTag(Tag("docIdLimit", _ec._docIdLimit));
}
-
void
-PageDict4FileSeqWrite::makePHeader(const FileHeaderContext &fileHeaderContext)
+PageDict4FileSeqWrite::DictFileContext::makeHeader(const FileHeaderContext &fileHeaderContext)
{
- PEC &e = _pe;
- ComprFileWriteContext &wc = _pWriteContext;
-
- // subheader only written to SS file.
-
typedef vespalib::GenericHeader::Tag Tag;
vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT);
- fileHeaderContext.addTags(header, _pfile.GetFileName());
+ fileHeaderContext.addTags(header, _file.GetFileName());
header.putTag(Tag("frozen", 0));
header.putTag(Tag("fileBitSize", 0));
- header.putTag(Tag("format.0", myPId));
+ header.putTag(Tag("format.0", _id));
header.putTag(Tag("endian", "big"));
- header.putTag(Tag("desc", "Dictionary page file"));
- e.setupWrite(wc);
- e.writeHeader(header);
- e.smallAlign(64);
- e.flush();
- uint32_t headerLen = header.getSize();
- headerLen += (-headerLen & 7);
- assert(e.getWriteOffset() == headerLen * 8);
- assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned
- if (_pHeaderLen != 0) {
- assert(_pHeaderLen == headerLen);
+ header.putTag(Tag("desc", _desc));
+ if (_extended) {
+ writeExtendedHeader(header);
}
- _pHeaderLen = headerLen;
-}
-
-
-void
-PageDict4FileSeqWrite::makeSPHeader(const FileHeaderContext &fileHeaderContext)
-{
- SPEC &e = _spe;
- ComprFileWriteContext &wc = _spWriteContext;
-
- // subheader only written to SS file.
-
- typedef vespalib::GenericHeader::Tag Tag;
- vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT);
-
- fileHeaderContext.addTags(header, _spfile.GetFileName());
- header.putTag(Tag("frozen", 0));
- header.putTag(Tag("fileBitSize", 0));
- header.putTag(Tag("format.0", mySPId));
- header.putTag(Tag("endian", "big"));
- header.putTag(Tag("desc", "Dictionary sparse page file"));
- e.setupWrite(wc);
- e.writeHeader(header);
- e.smallAlign(64);
- e.flush();
- uint32_t headerLen = header.getSize();
- headerLen += (-headerLen & 7);
- assert(e.getWriteOffset() == headerLen * 8);
- assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned
- if (_spHeaderLen != 0) {
- assert(_spHeaderLen == headerLen);
- }
- _spHeaderLen = headerLen;
-}
-
-
-void
-PageDict4FileSeqWrite::makeSSHeader(const FileHeaderContext &fileHeaderContext)
-{
- SSEC &e = _sse;
- ComprFileWriteContext &wc = _ssWriteContext;
-
- typedef vespalib::GenericHeader::Tag Tag;
- vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT);
-
- fileHeaderContext.addTags(header, _ssfile.GetFileName());
- header.putTag(Tag("frozen", 0));
- header.putTag(Tag("fileBitSize", 0));
- header.putTag(Tag("format.0", mySSId));
- header.putTag(Tag("endian", "big"));
- header.putTag(Tag("desc", "Dictionary sparse sparse file"));
- writeSSSubHeader(header);
-
- e.setupWrite(wc);
- e.writeHeader(header);
- e.smallAlign(64);
- e.flush();
+ _ec.setupWrite(_writeContext);
+ _ec.writeHeader(header);
+ _ec.smallAlign(64);
+ _ec.flush();
uint32_t headerLen = header.getSize();
headerLen += (-headerLen & 7);
- assert(e.getWriteOffset() == headerLen * 8);
- assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned
- if (_ssHeaderLen != 0) {
- assert(_ssHeaderLen == headerLen);
+ assert(_ec.getWriteOffset() == headerLen * 8);
+ assert((_ec.getWriteOffset() & 63) == 0); // Header must be word aligned
+ if (_headerLen != 0) {
+ assert(_headerLen == headerLen);
}
- _ssHeaderLen = headerLen;
-}
-
-
-bool
-PageDict4FileSeqWrite::updatePHeader(uint64_t fileBitSize)
-{
- vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
- FastOS_File f;
- f.OpenReadWrite(_pfile.GetFileName());
- h.readFile(f);
- FileHeaderContext::setFreezeTime(h);
- typedef vespalib::GenericHeader::Tag Tag;
- h.putTag(Tag("frozen", 1));
- h.putTag(Tag("fileBitSize", fileBitSize));
- h.rewriteFile(f);
- bool success = f.Sync();
- success &= f.Close();
- return success;
+ _headerLen = headerLen;
}
-
bool
-PageDict4FileSeqWrite::updateSPHeader(uint64_t fileBitSize)
+PageDict4FileSeqWrite::DictFileContext::updateHeader(uint64_t fileBitSize, uint64_t wordNum)
{
vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
FastOS_File f;
- f.OpenReadWrite(_spfile.GetFileName());
+ f.OpenReadWrite(_file.GetFileName());
h.readFile(f);
FileHeaderContext::setFreezeTime(h);
typedef vespalib::GenericHeader::Tag Tag;
h.putTag(Tag("frozen", 1));
h.putTag(Tag("fileBitSize", fileBitSize));
+ if (_extended) {
+ assert(wordNum <= _ec._numWordIds);
+ h.putTag(Tag("numWordIds", wordNum));
+ }
h.rewriteFile(f);
bool success = f.Sync();
success &= f.Close();
return success;
}
-
-bool
-PageDict4FileSeqWrite::updateSSHeader(uint64_t fileBitSize)
-{
- vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
- FastOS_File f;
- f.OpenReadWrite(_ssfile.GetFileName());
- h.readFile(f);
- FileHeaderContext::setFreezeTime(h);
- typedef vespalib::GenericHeader::Tag Tag;
- h.putTag(Tag("frozen", 1));
- h.putTag(Tag("fileBitSize", fileBitSize));
- uint64_t wordNum = _pWriter->getWordNum();
- assert(wordNum <= _sse._numWordIds);
- h.putTag(Tag("numWordIds", wordNum));
- h.rewriteFile(f);
- bool success = f.Sync();
- success &= f.Close();
- return success;
+void
+PageDict4FileSeqWrite::setParams(const PostingListParams &params) {
+ _params.add(params);
+ if (_ss) {
+ activateParams(_params);
+ }
}
-
void
-PageDict4FileSeqWrite::setParams(const PostingListParams &params)
-{
- params.get("avgBitsPerDoc", _sse._avgBitsPerDoc);
- params.get("minChunkDocs", _sse._minChunkDocs);
- params.get("docIdLimit", _sse._docIdLimit);
- params.get("numWordIds", _sse._numWordIds);
- _spe.copyParams(_sse);
- _pe.copyParams(_sse);
+PageDict4FileSeqWrite::activateParams(const PostingListParams &params) {
+ assert(_ss);
+ EC & ec = _ss->_ec;
+ params.get("avgBitsPerDoc", ec._avgBitsPerDoc);
+ params.get("minChunkDocs", ec._minChunkDocs);
+ params.get("docIdLimit", ec._docIdLimit);
+ params.get("numWordIds", ec._numWordIds);
+ _sp->_ec.copyParams(_ss->_ec);
+ _p->_ec.copyParams(_ss->_ec);
}
-
void
PageDict4FileSeqWrite::getParams(PostingListParams &params)
{
params.clear();
- params.set("avgBitsPerDoc", _sse._avgBitsPerDoc);
- params.set("minChunkDocs", _sse._minChunkDocs);
- params.set("docIdLimit", _sse._docIdLimit);
- params.set("numWordIds", _sse._numWordIds);
+ if (_ss) {
+ EC &ec = _ss->_ec;
+ params.set("avgBitsPerDoc", ec._avgBitsPerDoc);
+ params.set("minChunkDocs", ec._minChunkDocs);
+ params.set("docIdLimit", ec._docIdLimit);
+ params.set("numWordIds", ec._numWordIds);
+ } else {
+ params = _params;
+ }
}
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h
index 6e2ab6f9ffa..1c43c20a219 100644
--- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h
+++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h
@@ -2,10 +2,8 @@
#pragma once
#include <vespa/searchlib/index/dictionaryfile.h>
-#include <vespa/searchlib/bitcompression/compression.h>
-#include <vespa/searchlib/bitcompression/countcompression.h>
+#include <vespa/searchlib/index/postinglistparams.h>
#include <vespa/searchlib/bitcompression/pagedict4.h>
-#include <vespa/fastos/file.h>
namespace vespalib { class GenericHeader; }
@@ -47,44 +45,23 @@ public:
*/
class PageDict4FileSeqWrite : public index::DictionaryFileSeqWrite
{
- typedef bitcompression::PostingListCountFileEncodeContext EC;
- typedef EC SPEC;
- typedef EC PEC;
- typedef EC SSEC;
- typedef bitcompression::PageDict4SSWriter SSWriter;
- typedef bitcompression::PageDict4SPWriter SPWriter;
- typedef bitcompression::PageDict4PWriter PWriter;
-
- typedef index::PostingListCounts PostingListCounts;
+ using EC = bitcompression::PostingListCountFileEncodeContext;
+ using SSWriter = bitcompression::PageDict4SSWriter;
+ using SPWriter = bitcompression::PageDict4SPWriter;
+ using PWriter = bitcompression::PageDict4PWriter;
+ using PostingListCounts = index::PostingListCounts;
using FileHeaderContext = common::FileHeaderContext;
+ struct DictFileContext;
+ index::PostingListParams _params;
std::unique_ptr<PWriter> _pWriter;
std::unique_ptr<SPWriter> _spWriter;
std::unique_ptr<SSWriter> _ssWriter;
+ std::unique_ptr<DictFileContext> _ss;
+ std::unique_ptr<DictFileContext> _sp;
+ std::unique_ptr<DictFileContext> _p;
- EC _pe;
- ComprFileWriteContext _pWriteContext;
- FastOS_File _pfile;
-
- EC _spe;
- ComprFileWriteContext _spWriteContext;
- FastOS_File _spfile;
-
- EC _sse;
- ComprFileWriteContext _ssWriteContext;
- FastOS_File _ssfile;
-
- uint32_t _pHeaderLen; // Length of header for page file (bytes)
- uint32_t _spHeaderLen; // Length of header for sparse page file (bytes)
- uint32_t _ssHeaderLen; // Length of header for sparse sparse file (bytes)
-
- void writeSSSubHeader(vespalib::GenericHeader &header);
- void makePHeader(const FileHeaderContext &fileHeaderContext);
- void makeSPHeader(const FileHeaderContext &fileHeaderContext);
- void makeSSHeader(const FileHeaderContext &fileHeaderContext);
- bool updatePHeader(uint64_t fileBitSize);
- bool updateSPHeader(uint64_t fileBitSize);
- bool updateSSHeader(uint64_t fileBitSize);
+ void activateParams(const index::PostingListParams &params);
public:
PageDict4FileSeqWrite();
~PageDict4FileSeqWrite();
@@ -95,7 +72,7 @@ public:
* Open dictionary file for sequential write. The index with most
* words should be first for optimal compression.
*/
- bool open(const vespalib::string &name, const TuneFileSeqWrite &tuneFileWrite,
+ bool open(const vespalib::string &name, const TuneFileSeqWrite &tune,
const FileHeaderContext &fileHeaderContext) override;
bool close() override;
diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp
index a14c880a214..3f44b56706a 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer_base.cpp
@@ -2,6 +2,7 @@
#include "zc4_posting_writer_base.h"
#include <vespa/searchlib/index/postinglistcounts.h>
+#include <vespa/searchlib/index/postinglistparams.h>
using search::index::PostingListCounts;
using search::index::PostingListParams;
@@ -225,9 +226,7 @@ Zc4PostingWriterBase::Zc4PostingWriterBase(PostingListCounts &counts)
_l4Skip.maybeExpand();
}
-Zc4PostingWriterBase::~Zc4PostingWriterBase()
-{
-}
+Zc4PostingWriterBase::~Zc4PostingWriterBase() = default;
#define L1SKIPSTRIDE 16
#define L2SKIPSTRIDE 8
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp
index 593d5567266..d0b7fb42692 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp
@@ -3,9 +3,7 @@
#include "zcposocc.h"
#include <vespa/searchlib/index/postinglistcounts.h>
#include <vespa/searchlib/index/postinglistcountfile.h>
-#include <vespa/searchlib/index/postinglistfile.h>
-#include <vespa/searchlib/index/docidandfeatures.h>
-
+#include <vespa/searchlib/index/postinglistparams.h>
namespace search::diskindex {
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp
index 544e8d9f262..1f399971406 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp
@@ -5,6 +5,7 @@
#include <vespa/searchlib/index/postinglistcountfile.h>
#include <vespa/searchlib/index/postinglistfile.h>
#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/index/postinglistparams.h>
#include <vespa/searchlib/common/fileheadercontext.h>
#include <vespa/vespalib/data/fileheader.h>
@@ -46,9 +47,7 @@ Zc4PostingSeqRead::Zc4PostingSeqRead(PostingListCountFileSeqRead *countFile, boo
}
-Zc4PostingSeqRead::~Zc4PostingSeqRead()
-{
-}
+Zc4PostingSeqRead::~Zc4PostingSeqRead() = default;
void
Zc4PostingSeqRead::readDocIdAndFeatures(DocIdAndFeatures &features)
@@ -201,9 +200,7 @@ Zc4PostingSeqWrite(PostingListCountFileSeqWrite *countFile)
}
-Zc4PostingSeqWrite::~Zc4PostingSeqWrite()
-{
-}
+Zc4PostingSeqWrite::~Zc4PostingSeqWrite() = default;
void
diff --git a/searchlib/src/vespa/searchlib/index/dictionaryfile.h b/searchlib/src/vespa/searchlib/index/dictionaryfile.h
index 5063143d323..6c8535f8563 100644
--- a/searchlib/src/vespa/searchlib/index/dictionaryfile.h
+++ b/searchlib/src/vespa/searchlib/index/dictionaryfile.h
@@ -1,7 +1,6 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
-#include "postinglistcounts.h"
#include "postinglisthandle.h"
#include "postinglistcountfile.h"
#include <vespa/searchlib/common/tunefileinfo.h>
diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp
index f62a4bc7997..4341bcb9a46 100644
--- a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp
+++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp
@@ -1,8 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "docidandfeatures.h"
-#include <vespa/log/log.h>
-LOG_SETUP(".index.docidandfeatures");
+#include <cassert>
namespace search::index {
@@ -23,4 +22,21 @@ DocIdAndFeatures::DocIdAndFeatures(const DocIdAndFeatures &) = default;
DocIdAndFeatures & DocIdAndFeatures::operator = (const DocIdAndFeatures &) = default;
DocIdAndFeatures::~DocIdAndFeatures() = default;
+void
+DocIdAndPosOccFeatures::addNextOcc(uint32_t elementId, uint32_t wordPos, int32_t elementWeight, uint32_t elementLen)
+{
+ assert(wordPos < elementLen);
+ if (_elements.empty() || elementId > _elements.back().getElementId()) {
+ _elements.emplace_back(elementId, elementWeight, elementLen);
+ } else {
+ assert(elementId == _elements.back().getElementId());
+ assert(elementWeight == _elements.back().getWeight());
+ assert(elementLen == _elements.back().getElementLen());
+ }
+ assert(_elements.back().getNumOccs() == 0 ||
+ wordPos > _word_positions.back().getWordPos());
+ _elements.back().incNumOccs();
+ _word_positions.emplace_back(wordPos);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.h b/searchlib/src/vespa/searchlib/index/docidandfeatures.h
index 6b1659771fa..e595ec833ef 100644
--- a/searchlib/src/vespa/searchlib/index/docidandfeatures.h
+++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.h
@@ -163,4 +163,10 @@ public:
void set_has_raw_data(bool val) { _has_raw_data = val; }
};
+class DocIdAndPosOccFeatures : public DocIdAndFeatures
+{
+public:
+ void addNextOcc(uint32_t elementId, uint32_t wordPos, int32_t elementWeight, uint32_t elementLen);
+};
+
}
diff --git a/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp
index f4c38636d01..edf4f8c43b2 100644
--- a/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp
+++ b/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp
@@ -1,16 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "postinglistcountfile.h"
+#include <vespa/searchlib/index/postinglistparams.h>
namespace search::index {
-PostingListCountFileSeqRead::PostingListCountFileSeqRead()
-{
-}
-
-PostingListCountFileSeqRead::~PostingListCountFileSeqRead()
-{
-}
+PostingListCountFileSeqRead::PostingListCountFileSeqRead() = default;
+PostingListCountFileSeqRead::~PostingListCountFileSeqRead() = default;
void
PostingListCountFileSeqRead::
@@ -19,13 +15,8 @@ getParams(PostingListParams &params)
params.clear();
}
-PostingListCountFileSeqWrite::PostingListCountFileSeqWrite()
-{
-}
-
-PostingListCountFileSeqWrite::~PostingListCountFileSeqWrite()
-{
-}
+PostingListCountFileSeqWrite::PostingListCountFileSeqWrite() = default;
+PostingListCountFileSeqWrite::~PostingListCountFileSeqWrite() = default;
void
PostingListCountFileSeqWrite::
diff --git a/searchlib/src/vespa/searchlib/index/postinglistcountfile.h b/searchlib/src/vespa/searchlib/index/postinglistcountfile.h
index 47ec202dad1..7e17fc5bb9e 100644
--- a/searchlib/src/vespa/searchlib/index/postinglistcountfile.h
+++ b/searchlib/src/vespa/searchlib/index/postinglistcountfile.h
@@ -1,9 +1,9 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
-#include "postinglistparams.h"
#include "postinglistcounts.h"
#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/vespalib/stllike/string.h>
namespace search::common { class FileHeaderContext; }
@@ -11,6 +11,7 @@ namespace search::index {
class PostingListCounts;
class PostingListHandle;
+class PostingListParams;
/**
* Interface for count files describing where in a posting list file
diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp
index 15412fcd5f1..4d53790bd73 100644
--- a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp
+++ b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp
@@ -1,14 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "postinglistfile.h"
+#include <vespa/searchlib/index/postinglistparams.h>
#include <vespa/fastos/file.h>
namespace search::index {
-PostingListFileSeqRead::PostingListFileSeqRead()
-{
-}
-
+PostingListFileSeqRead::PostingListFileSeqRead() = default;
PostingListFileSeqRead::~PostingListFileSeqRead() = default;
void
@@ -37,9 +35,7 @@ PostingListFileSeqWrite::PostingListFileSeqWrite()
{
}
-PostingListFileSeqWrite::~PostingListFileSeqWrite()
-{
-}
+PostingListFileSeqWrite::~PostingListFileSeqWrite() = default;
void
PostingListFileSeqWrite::
@@ -75,9 +71,7 @@ PostingListFileRandRead()
{
}
-PostingListFileRandRead::~PostingListFileRandRead()
-{
-}
+PostingListFileRandRead::~PostingListFileRandRead() = default;
void
PostingListFileRandRead::afterOpen(FastOS_FileInterface &file)
@@ -117,8 +111,7 @@ readPostingList(const PostingListCounts &counts,
uint32_t numSegments,
PostingListHandle &handle)
{
- _lower->readPostingList(counts, firstSegment, numSegments,
- handle);
+ _lower->readPostingList(counts, firstSegment, numSegments,handle);
}
bool
diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h
index d731b3f0f67..a33319e1d4f 100644
--- a/searchlib/src/vespa/searchlib/index/postinglistfile.h
+++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h
@@ -3,8 +3,8 @@
#include "postinglistcounts.h"
#include "postinglisthandle.h"
-#include "postinglistparams.h"
#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/vespalib/stllike/string.h>
class FastOS_FileInterface;
@@ -14,6 +14,7 @@ namespace search::index {
class DocIdAndFeatures;
class FieldLengthInfo;
+class PostingListParams;
/**
* Interface for posting list files containing document ids and features
diff --git a/searchlib/src/vespa/searchlib/index/postinglistparams.cpp b/searchlib/src/vespa/searchlib/index/postinglistparams.cpp
index 74e8f731f6f..6275399c498 100644
--- a/searchlib/src/vespa/searchlib/index/postinglistparams.cpp
+++ b/searchlib/src/vespa/searchlib/index/postinglistparams.cpp
@@ -49,6 +49,12 @@ PostingListParams::clear()
}
void
+PostingListParams::add(const PostingListParams & toAdd)
+{
+ _map.insert(toAdd._map.begin(), toAdd._map.end());
+}
+
+void
PostingListParams::erase(const vespalib::string &key)
{
_map.erase(key);
@@ -62,8 +68,7 @@ PostingListParams::operator!=(const PostingListParams &rhs) const
template <typename TYPE>
void
-PostingListParams::set(const vespalib::string &key,
- const TYPE &val)
+PostingListParams::set(const vespalib::string &key, const TYPE &val)
{
std::ostringstream os;
@@ -73,8 +78,7 @@ PostingListParams::set(const vespalib::string &key,
template <typename TYPE>
void
-PostingListParams::get(const vespalib::string &key,
- TYPE &val) const
+PostingListParams::get(const vespalib::string &key, TYPE &val) const
{
std::istringstream is;
Map::const_iterator it;
@@ -87,35 +91,27 @@ PostingListParams::get(const vespalib::string &key,
}
template void
-PostingListParams::set<bool>(const vespalib::string &key,
- const bool &val);
+PostingListParams::set<bool>(const vespalib::string &key, const bool &val);
template void
-PostingListParams::get<bool>(const vespalib::string &key,
- bool &val) const;
+PostingListParams::get<bool>(const vespalib::string &key, bool &val) const;
template void
-PostingListParams::set<int32_t>(const vespalib::string &key,
- const int32_t &val);
+PostingListParams::set<int32_t>(const vespalib::string &key, const int32_t &val);
template void
-PostingListParams::get<int32_t>(const vespalib::string &key,
- int32_t &val) const;
+PostingListParams::get<int32_t>(const vespalib::string &key, int32_t &val) const;
template void
-PostingListParams::set<uint32_t>(const vespalib::string &key,
- const uint32_t &val);
+PostingListParams::set<uint32_t>(const vespalib::string &key, const uint32_t &val);
template void
-PostingListParams::get<uint32_t>(const vespalib::string &key,
- uint32_t &val) const;
+PostingListParams::get<uint32_t>(const vespalib::string &key, uint32_t &val) const;
template void
-PostingListParams::set<uint64_t>(const vespalib::string &key,
- const uint64_t &val);
+PostingListParams::set<uint64_t>(const vespalib::string &key, const uint64_t &val);
template void
-PostingListParams::get<uint64_t>(const vespalib::string &key,
- uint64_t &val) const;
+PostingListParams::get<uint64_t>(const vespalib::string &key, uint64_t &val) const;
}
diff --git a/searchlib/src/vespa/searchlib/index/postinglistparams.h b/searchlib/src/vespa/searchlib/index/postinglistparams.h
index 9797eef5278..42da5855c23 100644
--- a/searchlib/src/vespa/searchlib/index/postinglistparams.h
+++ b/searchlib/src/vespa/searchlib/index/postinglistparams.h
@@ -7,34 +7,22 @@
namespace search::index {
class PostingListParams {
- typedef std::map<vespalib::string, vespalib::string> Map;
+ using Map = std::map<vespalib::string, vespalib::string>;
Map _map;
public:
template <typename TYPE>
- void
- set(const vespalib::string &key, const TYPE &val);
+ void set(const vespalib::string &key, const TYPE &val);
template <typename TYPE>
- void
- get(const vespalib::string &key, TYPE &val) const;
-
- bool
- isSet(const vespalib::string &key) const;
-
- void
- setStr(const vespalib::string &key, const vespalib::string &val);
-
- const vespalib::string &
- getStr(const vespalib::string &key) const;
-
- void
- clear();
-
- void
- erase(const vespalib::string &key);
-
- bool
- operator!=(const PostingListParams &rhs) const;
+ void get(const vespalib::string &key, TYPE &val) const;
+
+ bool isSet(const vespalib::string &key) const;
+ void setStr(const vespalib::string &key, const vespalib::string &val);
+ const vespalib::string & getStr(const vespalib::string &key) const;
+ void clear();
+ void erase(const vespalib::string &key);
+ bool operator!=(const PostingListParams &rhs) const;
+ void add(const PostingListParams & toAdd);
};
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
index fdb2de8fb59..c55de3890cd 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
@@ -11,6 +11,7 @@
#include <vespa/searchlib/common/schedule_sequenced_task_callback.h>
#include <vespa/vespalib/util/isequencedtaskexecutor.h>
#include <vespa/vespalib/util/retain_guard.h>
+#include <cassert>
namespace search::memoryindex {
@@ -28,8 +29,7 @@ DocumentInverter::DocumentInverter(DocumentInverterContext& context)
{
auto& schema = context.get_schema();
auto& field_indexes = context.get_field_indexes();
- for (uint32_t fieldId = 0; fieldId < schema.getNumIndexFields();
- ++fieldId) {
+ for (uint32_t fieldId = 0; fieldId < schema.getNumIndexFields(); ++fieldId) {
auto &remover(field_indexes.get_remover(fieldId));
auto &inserter(field_indexes.get_inserter(fieldId));
auto &calculator(field_indexes.get_calculator(fieldId));
diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h
index 2ad1fd78f07..9f17d369208 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h
@@ -3,7 +3,6 @@
#pragma once
#include <vespa/searchlib/index/docidandfeatures.h>
-#include <vespa/searchlib/bitcompression/compression.h>
#include <vespa/searchlib/bitcompression/posocccompression.h>
#include <vespa/searchlib/bitcompression/posocc_fields_params.h>
#include <vespa/vespalib/datastore/datastore.h>
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp
index 5f4d02d23db..c606b9b6340 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp
@@ -5,6 +5,7 @@
#include "ordered_field_index_inserter.h"
#include <vespa/searchlib/bitcompression/posocccompression.h>
#include <vespa/searchlib/index/i_field_length_inspector.h>
+#include <vespa/searchcommon/common/schema.h>
#include <vespa/vespalib/btree/btree.hpp>
#include <vespa/vespalib/btree/btreeiterator.hpp>
#include <vespa/vespalib/btree/btreenode.hpp>
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp
index d96b8491027..a443e994559 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp
@@ -13,6 +13,7 @@
#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
#include <vespa/searchlib/bitcompression/compression.h>
#include <vespa/searchlib/bitcompression/posocccompression.h>
+#include <vespa/searchcommon/common/schema.h>
#include <vespa/searchlib/common/sort.h>
#include <vespa/searchlib/util/url.h>
#include <vespa/vespalib/text/utf8.h>
@@ -443,6 +444,17 @@ FieldInverter::invertField(uint32_t docId, const FieldValue::UP &val)
}
void
+FieldInverter::startDoc(uint32_t docId) {
+ assert(_docId == 0);
+ assert(docId != 0);
+ abortPendingDoc(docId);
+ _removeDocs.push_back(docId);
+ _docId = docId;
+ _elem = 0;
+ _wpos = 0;
+}
+
+void
FieldInverter::invertNormalDocTextField(const FieldValue &val)
{
const vespalib::Identifiable::RuntimeClass & cInfo(val.getClass());
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
index 56cb1677f67..36dd6339b54 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
@@ -6,13 +6,14 @@
#include <vespa/document/annotation/span.h>
#include <vespa/document/datatype/datatypes.h>
#include <vespa/document/fieldvalue/document.h>
-#include <vespa/searchlib/bitcompression/compression.h>
-#include <vespa/searchlib/bitcompression/posocccompression.h>
#include <vespa/searchlib/index/docidandfeatures.h>
#include <vespa/vespalib/stllike/allocator.h>
#include <limits>
-namespace search::index { class FieldLengthCalculator; }
+namespace search::index {
+ class FieldLengthCalculator;
+ class Schema;
+}
namespace search::memoryindex {
@@ -310,15 +311,7 @@ public:
_removeDocs.push_back(docId);
}
- void startDoc(uint32_t docId) {
- assert(_docId == 0);
- assert(docId != 0);
- abortPendingDoc(docId);
- _removeDocs.push_back(docId);
- _docId = docId;
- _elem = 0;
- _wpos = 0;
- }
+ void startDoc(uint32_t docId);
void endDoc();
diff --git a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp
index 10918a83c50..326b7b0967a 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp
@@ -389,7 +389,7 @@ UrlFieldInverter::pushDocuments()
_hostname->pushDocuments();
}
-UrlFieldInverter::UrlFieldInverter(index::Schema::CollectionType collectionType,
+UrlFieldInverter::UrlFieldInverter(index::schema::CollectionType collectionType,
FieldInverter *all,
FieldInverter *scheme,
FieldInverter *host,
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h
index 088019749af..c6646f2e61f 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h
@@ -5,6 +5,8 @@
#include <vespa/searchlib/bitcompression/compression.h>
#include <vespa/searchlib/bitcompression/posocccompression.h>
#include <vespa/searchlib/bitcompression/posocc_fields_params.h>
+#include <vespa/searchcommon/common/schema.h>
+
namespace vespalib { class Rand48; }
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
index 87efc8132ee..8d5f6d6db4e 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
@@ -8,6 +8,7 @@
#include <vespa/searchlib/diskindex/zc4_posting_params.h>
#include <vespa/searchlib/diskindex/zc4_posting_reader.h>
#include <vespa/searchlib/diskindex/zc4_posting_writer.h>
+#include <vespa/searchlib/index/postinglistparams.h>
using search::fef::TermFieldMatchData;
using search::fef::TermFieldMatchDataArray;
diff --git a/staging_vespalib/src/vespa/vespalib/stllike/cache.h b/staging_vespalib/src/vespa/vespalib/stllike/cache.h
index 0f4349eb15a..181bb2ac63a 100644
--- a/staging_vespalib/src/vespa/vespalib/stllike/cache.h
+++ b/staging_vespalib/src/vespa/vespalib/stllike/cache.h
@@ -3,6 +3,7 @@
#include <vespa/vespalib/stllike/lrucache_map.h>
#include <atomic>
+#include <mutex>
namespace vespalib {