diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-04-16 12:31:46 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-04-16 12:31:46 +0200 |
commit | b56c435909bdbec532d4f29c41e9d54900bc78fd (patch) | |
tree | dd6a22417f29fbbb9ec77eedacaec0a80c1ddb0c | |
parent | fca990d5ed32c408df42bbe178b174711fa54a08 (diff) |
Pad disk index dictionary files at end.
4 files changed, 24 insertions, 18 deletions
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp index 0f089c60e4b..f3fc31ac8b1 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp @@ -359,6 +359,24 @@ getParams(PostingListParams ¶ms) const params.clear(); } +template <bool bigEndian> +void +FeatureEncodeContext<bigEndian>::pad_for_memory_map_and_flush() +{ + // Write some pad bits to avoid decompression readahead going past + // memory mapped file during search and into SIGSEGV territory. + + // First pad to 64 bits alignment. + this->smallAlign(64); + writeComprBufferIfNeeded(); + + // Then write 128 more bits. This allows for 64-bit decoding + // with a readbits that always leaves a nonzero preRead + padBits(128); + this->alignDirectIO(); + this->flush(); + writeComprBuffer(); // Also flushes slack +} template <bool bigEndian> void diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h index 9d4ca38eed3..4124f1f659f 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h @@ -1595,6 +1595,8 @@ public: writeComprBufferIfNeeded(); } + void pad_for_memory_map_and_flush(); + virtual void readHeader(const vespalib::GenericHeader &header, const vespalib::string &prefix); virtual void writeHeader(vespalib::GenericHeader &header, const vespalib::string &prefix) const; virtual const vespalib::string &getIdentifier() const; diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp index 387d95bce66..bceeb1e7bc1 100644 --- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp @@ -269,11 +269,9 @@ PageDict4FileSeqWrite::DictFileContext::DictFileContext(bool extended, vespalib: } bool -PageDict4FileSeqWrite::DictFileContext::DictFileContext::close() { - //uint64_t usedPBits = _ec.getWriteOffset(); - _ec.flush(); - _writeContext.writeComprBuffer(true); - +PageDict4FileSeqWrite::DictFileContext::DictFileContext::close() +{ + _ec.pad_for_memory_map_and_flush(); _writeContext.dropComprBuf(); bool success = _file.Sync(); success &= _file.Close(); diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp index c7480633e21..f2b7911ba55 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_writer.cpp @@ -247,19 +247,7 @@ template <bool bigEndian> void Zc4PostingWriter<bigEndian>::on_close() { - // Write some pad bits to avoid decompression readahead going past - // memory mapped file during search and into SIGSEGV territory. - - // First pad to 64 bits alignment. - _encode_context.smallAlign(64); - _encode_context.writeComprBufferIfNeeded(); - - // Then write 128 more bits. This allows for 64-bit decoding - // with a readbits that always leaves a nonzero preRead - _encode_context.padBits(128); - _encode_context.alignDirectIO(); - _encode_context.flush(); - _encode_context.writeComprBuffer(); // Also flushes slack + _encode_context.pad_for_memory_map_and_flush(); } template class Zc4PostingWriter<false>; |