diff options
10 files changed, 62 insertions, 39 deletions
diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def index 95851af2826..3335af2f02d 100644 --- a/searchcore/src/vespa/searchcore/config/proton.def +++ b/searchcore/src/vespa/searchcore/config/proton.def @@ -195,6 +195,13 @@ summary.cache.compression.type enum {NONE, LZ4} default=LZ4 restart ## Control compression level of the summary while in cache. summary.cache.compression.level int default=9 restart +## Control compression type of the summary while in memory during compaction +## NB So far only stragey=LOG honours it. +summary.log.compact.compression.type enum {NONE, LZ4} default=LZ4 restart + +## Control compression level of the summary while in memory during compaction +summary.log.compact.compression.level int default=9 restart + ## Control compression type of the summary ## NB So far only stragey=LOG honours it. summary.log.chunk.compression.type enum {NONE, LZ4} default=LZ4 restart diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp index 01484112460..08abeeff51a 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp @@ -141,18 +141,26 @@ SummaryManager::SummaryManager(vespalib::ThreadStackExecutorBase & executor, search::DocumentStore::Config config(getStoreConfig(summary.cache)); const ProtonConfig::Summary::Log & log(summary.log); const ProtonConfig::Summary::Log::Chunk & chunk(log.chunk); - document::CompressionConfig compression; + document::CompressionConfig chunkCompression; if (chunk.compression.type == ProtonConfig::Summary::Log::Chunk::Compression::LZ4) { - compression.type = document::CompressionConfig::LZ4; + chunkCompression.type = document::CompressionConfig::LZ4; + } + chunkCompression.compressionLevel = chunk.compression.level; + + document::CompressionConfig compactCompression; + if (chunk.compression.type == ProtonConfig::Summary::Log::Chunk::Compression::LZ4) { + compactCompression.type = document::CompressionConfig::LZ4; } - compression.compressionLevel = chunk.compression.level; - search::WriteableFileChunk::Config fileConfig(compression, chunk.maxbytes, chunk.maxentries); + compactCompression.compressionLevel = chunk.compression.level; + + search::WriteableFileChunk::Config fileConfig(chunkCompression, chunk.maxbytes, chunk.maxentries); search::LogDataStore::Config logConfig(log.maxfilesize, log.maxdiskbloatfactor, log.maxbucketspread, log.minfilesizefactor, log.numthreads, log.compact2activefile, + compactCompression, fileConfig); logConfig.disableCrcOnRead(chunk.skipcrconread); _docStore.reset( diff --git a/searchlib/src/tests/datastore/logdatastore_test.cpp b/searchlib/src/tests/datastore/logdatastore_test.cpp index 819cd0bf2cd..09e7cc13207 100644 --- a/searchlib/src/tests/datastore/logdatastore_test.cpp +++ b/searchlib/src/tests/datastore/logdatastore_test.cpp @@ -34,6 +34,7 @@ public: using namespace search; using namespace search::docstore; using search::index::DummyFileHeaderContext; +using document::CompressionConfig; namespace { @@ -210,11 +211,9 @@ TEST("test that DirectIOPadding works accordng to spec") { TEST("testGrowing") { FastOS_File::EmptyAndRemoveDirectory("growing"); EXPECT_TRUE(FastOS_File::MakeDirectory("growing")); - LogDataStore::Config config(100000, 0.1, 3.0, 0.2, 8, true, + LogDataStore::Config config(100000, 0.1, 3.0, 0.2, 8, true, CompressionConfig::LZ4, WriteableFileChunk::Config( - document::CompressionConfig( - document::CompressionConfig:: - LZ4, 9, 60), + CompressionConfig(CompressionConfig::LZ4, 9, 60), 1000, 20)); vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); @@ -362,7 +361,7 @@ TEST("test visit cache does not cache empty ones and is able to access some back VisitStore store; IDataStore & datastore = store.getStore(); - VisitCache visitCache(datastore, 100000, document::CompressionConfig::Type::LZ4); + VisitCache visitCache(datastore, 100000, CompressionConfig::Type::LZ4); EXPECT_EQUAL(0u, visitCache.read({1}).size()); EXPECT_TRUE(visitCache.read({1}).empty()); datastore.write(1,1, A7, 7); @@ -444,9 +443,9 @@ public: VisitCacheStore() : _myDir("visitcache"), _repo(makeDocTypeRepoConfig()), - _config(DocumentStore::Config(document::CompressionConfig::LZ4, 1000000, 0).allowVisitCaching(true), - LogDataStore::Config(50000, 0.2, 3.0, 0.2, 1, true, - WriteableFileChunk::Config(document::CompressionConfig(), 16384, 64))), + _config(DocumentStore::Config(CompressionConfig::LZ4, 1000000, 0).allowVisitCaching(true), + LogDataStore::Config(50000, 0.2, 3.0, 0.2, 1, true,CompressionConfig::LZ4, + WriteableFileChunk::Config(CompressionConfig(), 16384, 64))), _fileHeaderContext(), _executor(_config.getLogConfig().getNumThreads(), 128*1024), _tlSyncer(), @@ -724,7 +723,7 @@ TEST("requireThatChunkCanProduceUniqueList") { void testChunkFormat(ChunkFormat & cf, size_t expectedLen, const vespalib::string & expectedContent) { - document::CompressionConfig cfg; + CompressionConfig cfg; uint64_t MAGIC_CONTENT(0xabcdef9876543210); cf.getBuffer() << MAGIC_CONTENT; vespalib::DataBuffer buffer; @@ -831,7 +830,7 @@ private: TEST("test that StoreByBucket gives bucket by bucket and ordered within") { vespalib::MemoryDataStore backing; - StoreByBucket sbb(backing);; + StoreByBucket sbb(backing, CompressionConfig::LZ4); for (size_t i(1); i <=500; i++) { add(sbb, i); } diff --git a/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp b/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp index 1898fa35a29..729edd90179 100644 --- a/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp +++ b/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp @@ -21,6 +21,7 @@ using document::Document; using document::DocumentId; using document::DocumentType; using document::DocumentTypeRepo; +using document::CompressionConfig; using vespalib::asciistream; using index::DummyFileHeaderContext; @@ -272,13 +273,10 @@ Fixture::Fixture() : _baseDir("visitor"), _repo(makeDocTypeRepoConfig()), _storeConfig(DocumentStore:: - Config(document::CompressionConfig::NONE, 0, 0), + Config(CompressionConfig::NONE, 0, 0), LogDataStore:: - Config(50000, 0.2, 3.0, 0.2, 1, true, - WriteableFileChunk::Config( - document::CompressionConfig(), - 16384, - 64))), + Config(50000, 0.2, 3.0, 0.2, 1, true, CompressionConfig::LZ4, + WriteableFileChunk::Config(CompressionConfig(), 16384, 64))), _executor(_storeConfig.getLogConfig().getNumThreads(), 128 * 1024), _fileHeaderContext(), _tlSyncer(), diff --git a/searchlib/src/vespa/searchlib/docstore/compacter.cpp b/searchlib/src/vespa/searchlib/docstore/compacter.cpp index 3156fb970a1..1544393d620 100644 --- a/searchlib/src/vespa/searchlib/docstore/compacter.cpp +++ b/searchlib/src/vespa/searchlib/docstore/compacter.cpp @@ -9,6 +9,8 @@ LOG_SETUP(".searchlib.docstore.compacter"); namespace search { namespace docstore { +using document::CompressionConfig; + void Compacter::write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) { (void) chunkId; @@ -16,7 +18,7 @@ Compacter::write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *bu _ds.write(guard, fileId, lid, buffer, sz); } -BucketCompacter::BucketCompacter(LogDataStore & ds, const IBucketizer & bucketizer, FileId source, FileId destination) : +BucketCompacter::BucketCompacter(const CompressionConfig & compression, LogDataStore & ds, const IBucketizer & bucketizer, FileId source, FileId destination) : _sourceFileId(source), _destinationFileId(destination), _ds(ds), @@ -30,7 +32,7 @@ BucketCompacter::BucketCompacter(LogDataStore & ds, const IBucketizer & bucketiz { _tmpStore.reserve(256); for (size_t i(0); i < 256; i++) { - _tmpStore.emplace_back(_backingMemory); + _tmpStore.emplace_back(_backingMemory, compression); } } diff --git a/searchlib/src/vespa/searchlib/docstore/compacter.h b/searchlib/src/vespa/searchlib/docstore/compacter.h index 53d968e3dbc..951eecb43fb 100644 --- a/searchlib/src/vespa/searchlib/docstore/compacter.h +++ b/searchlib/src/vespa/searchlib/docstore/compacter.h @@ -35,7 +35,8 @@ class BucketCompacter : public IWriteData, public StoreByBucket::IWrite { public: using FileId = FileChunk::FileId; - BucketCompacter(LogDataStore & ds, const IBucketizer & bucketizer, FileId source, FileId destination); + BucketCompacter(const document::CompressionConfig & compression, LogDataStore & ds, + const IBucketizer & bucketizer, FileId source, FileId destination); void write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override ; void write(BucketId bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override; void close() override; diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp index 19e59276258..ad8920582a9 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp @@ -445,7 +445,7 @@ void LogDataStore::compactFile(FileId fileId) setNewFileChunk(guard, createWritableFile(destinationFileId, fc->getLastPersistedSerialNum(), fc->getNameId().next())); } - compacter.reset(new docstore::BucketCompacter(*this, *_bucketizer, fc->getFileId(), destinationFileId)); + compacter.reset(new docstore::BucketCompacter(_config.compactCompression(), *this, *_bucketizer, fc->getFileId(), destinationFileId)); } else { compacter.reset(new docstore::Compacter(*this)); } diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h index 1d9aa051b7c..2702d4060c2 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h @@ -36,7 +36,8 @@ private: using NameId = FileChunk::NameId; using FileId = FileChunk::FileId; public: - typedef vespalib::LockGuard LockGuard; + using LockGuard = vespalib::LockGuard; + using CompressionConfig = document::CompressionConfig; class Config { public: Config() @@ -46,7 +47,8 @@ public: _minFileSizeFactor(0.2), _numThreads(8), _skipCrcOnRead(false), - _compactToActiveFile(true) + _compactToActiveFile(true), + _compactCompression(CompressionConfig::LZ4) { } Config(size_t maxFileSize, @@ -55,6 +57,7 @@ public: double minFileSizeFactor, size_t numThreads, bool compactToActiveFile, + const CompressionConfig & compactCompression, const WriteableFileChunk::Config & fileConfig) : _maxFileSize(maxFileSize), _maxDiskBloatFactor(maxDiskBloatFactor), @@ -63,6 +66,7 @@ public: _numThreads(numThreads), _skipCrcOnRead(false), _compactToActiveFile(compactToActiveFile), + _compactCompression(compactCompression), _fileConfig(fileConfig) { } @@ -75,17 +79,19 @@ public: bool crcOnReadDisabled() const { return _skipCrcOnRead; } void disableCrcOnRead(bool v) { _skipCrcOnRead = v; } bool compact2ActiveFile() const { return _compactToActiveFile; } + const CompressionConfig & compactCompression() const { return _compactCompression; } const WriteableFileChunk::Config & getFileConfig() const { return _fileConfig; } private: - size_t _maxFileSize; - double _maxDiskBloatFactor; - double _maxBucketSpread; - double _minFileSizeFactor; - size_t _numThreads; - bool _skipCrcOnRead; - bool _compactToActiveFile; - WriteableFileChunk::Config _fileConfig; + size_t _maxFileSize; + double _maxDiskBloatFactor; + double _maxBucketSpread; + double _minFileSizeFactor; + size_t _numThreads; + bool _skipCrcOnRead; + bool _compactToActiveFile; + CompressionConfig _compactCompression; + WriteableFileChunk::Config _fileConfig; }; public: /** diff --git a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp index 42edb028f52..86337e393e4 100644 --- a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp +++ b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp @@ -6,12 +6,14 @@ namespace search { namespace docstore { using document::BucketId; +using document::CompressionConfig; -StoreByBucket::StoreByBucket(vespalib::MemoryDataStore & backingMemory) : +StoreByBucket::StoreByBucket(vespalib::MemoryDataStore & backingMemory, const CompressionConfig & compression) : _chunks(), _current(), _where(), - _backingMemory(backingMemory) + _backingMemory(backingMemory), + _compression(compression) { createCurrent(); } @@ -37,8 +39,7 @@ void StoreByBucket::closeCurrent() { vespalib::DataBuffer buffer; - document::CompressionConfig lz4(document::CompressionConfig::LZ4); - _current->pack(1, buffer, lz4); + _current->pack(1, buffer, _compression); buffer.shrink(buffer.getDataLen()); _chunks.emplace_back(_backingMemory.push_back(buffer.getData(), buffer.getDataLen()).data(), buffer.getDataLen()); _current.reset(); diff --git a/searchlib/src/vespa/searchlib/docstore/storebybucket.h b/searchlib/src/vespa/searchlib/docstore/storebybucket.h index c80b7593fd7..6107c6e7c2b 100644 --- a/searchlib/src/vespa/searchlib/docstore/storebybucket.h +++ b/searchlib/src/vespa/searchlib/docstore/storebybucket.h @@ -18,7 +18,7 @@ namespace docstore { class StoreByBucket { public: - StoreByBucket(vespalib::MemoryDataStore & backingMemory); + StoreByBucket(vespalib::MemoryDataStore & backingMemory, const document::CompressionConfig & compression); class IWrite { public: using BucketId=document::BucketId; @@ -56,6 +56,7 @@ private: Chunk::UP _current; std::map<uint64_t, std::vector<Index>> _where; vespalib::MemoryDataStore & _backingMemory; + document::CompressionConfig _compression; }; } |