summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2016-08-30 19:40:15 +0200
committerGitHub <noreply@github.com>2016-08-30 19:40:15 +0200
commitbca73f4f4e56d0a016b99b3917a075823c193c57 (patch)
treee3c6b78862d7b556aa942a1ce0158c3d00456440
parent60955723730fa69134a74d652d669f0f16fb8dc7 (diff)
parent4b391f17fe5522836173888162ba5d7ea29831d9 (diff)
Merge pull request #509 from yahoo/balder/control-in-memory-temporary-compression
Balder/control in memory temporary compression
-rw-r--r--searchcore/src/vespa/searchcore/config/proton.def7
-rw-r--r--searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp16
-rw-r--r--searchlib/src/tests/datastore/logdatastore_test.cpp19
-rw-r--r--searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/docstore/compacter.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/docstore/compacter.h3
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.h26
-rw-r--r--searchlib/src/vespa/searchlib/docstore/storebybucket.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/docstore/storebybucket.h3
10 files changed, 62 insertions, 39 deletions
diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def
index 95851af2826..3335af2f02d 100644
--- a/searchcore/src/vespa/searchcore/config/proton.def
+++ b/searchcore/src/vespa/searchcore/config/proton.def
@@ -195,6 +195,13 @@ summary.cache.compression.type enum {NONE, LZ4} default=LZ4 restart
## Control compression level of the summary while in cache.
summary.cache.compression.level int default=9 restart
+## Control compression type of the summary while in memory during compaction
+## NB So far only stragey=LOG honours it.
+summary.log.compact.compression.type enum {NONE, LZ4} default=LZ4 restart
+
+## Control compression level of the summary while in memory during compaction
+summary.log.compact.compression.level int default=9 restart
+
## Control compression type of the summary
## NB So far only stragey=LOG honours it.
summary.log.chunk.compression.type enum {NONE, LZ4} default=LZ4 restart
diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp
index 01484112460..08abeeff51a 100644
--- a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp
+++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp
@@ -141,18 +141,26 @@ SummaryManager::SummaryManager(vespalib::ThreadStackExecutorBase & executor,
search::DocumentStore::Config config(getStoreConfig(summary.cache));
const ProtonConfig::Summary::Log & log(summary.log);
const ProtonConfig::Summary::Log::Chunk & chunk(log.chunk);
- document::CompressionConfig compression;
+ document::CompressionConfig chunkCompression;
if (chunk.compression.type == ProtonConfig::Summary::Log::Chunk::Compression::LZ4) {
- compression.type = document::CompressionConfig::LZ4;
+ chunkCompression.type = document::CompressionConfig::LZ4;
+ }
+ chunkCompression.compressionLevel = chunk.compression.level;
+
+ document::CompressionConfig compactCompression;
+ if (chunk.compression.type == ProtonConfig::Summary::Log::Chunk::Compression::LZ4) {
+ compactCompression.type = document::CompressionConfig::LZ4;
}
- compression.compressionLevel = chunk.compression.level;
- search::WriteableFileChunk::Config fileConfig(compression, chunk.maxbytes, chunk.maxentries);
+ compactCompression.compressionLevel = chunk.compression.level;
+
+ search::WriteableFileChunk::Config fileConfig(chunkCompression, chunk.maxbytes, chunk.maxentries);
search::LogDataStore::Config logConfig(log.maxfilesize,
log.maxdiskbloatfactor,
log.maxbucketspread,
log.minfilesizefactor,
log.numthreads,
log.compact2activefile,
+ compactCompression,
fileConfig);
logConfig.disableCrcOnRead(chunk.skipcrconread);
_docStore.reset(
diff --git a/searchlib/src/tests/datastore/logdatastore_test.cpp b/searchlib/src/tests/datastore/logdatastore_test.cpp
index 819cd0bf2cd..09e7cc13207 100644
--- a/searchlib/src/tests/datastore/logdatastore_test.cpp
+++ b/searchlib/src/tests/datastore/logdatastore_test.cpp
@@ -34,6 +34,7 @@ public:
using namespace search;
using namespace search::docstore;
using search::index::DummyFileHeaderContext;
+using document::CompressionConfig;
namespace {
@@ -210,11 +211,9 @@ TEST("test that DirectIOPadding works accordng to spec") {
TEST("testGrowing") {
FastOS_File::EmptyAndRemoveDirectory("growing");
EXPECT_TRUE(FastOS_File::MakeDirectory("growing"));
- LogDataStore::Config config(100000, 0.1, 3.0, 0.2, 8, true,
+ LogDataStore::Config config(100000, 0.1, 3.0, 0.2, 8, true, CompressionConfig::LZ4,
WriteableFileChunk::Config(
- document::CompressionConfig(
- document::CompressionConfig::
- LZ4, 9, 60),
+ CompressionConfig(CompressionConfig::LZ4, 9, 60),
1000,
20));
vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024);
@@ -362,7 +361,7 @@ TEST("test visit cache does not cache empty ones and is able to access some back
VisitStore store;
IDataStore & datastore = store.getStore();
- VisitCache visitCache(datastore, 100000, document::CompressionConfig::Type::LZ4);
+ VisitCache visitCache(datastore, 100000, CompressionConfig::Type::LZ4);
EXPECT_EQUAL(0u, visitCache.read({1}).size());
EXPECT_TRUE(visitCache.read({1}).empty());
datastore.write(1,1, A7, 7);
@@ -444,9 +443,9 @@ public:
VisitCacheStore() :
_myDir("visitcache"),
_repo(makeDocTypeRepoConfig()),
- _config(DocumentStore::Config(document::CompressionConfig::LZ4, 1000000, 0).allowVisitCaching(true),
- LogDataStore::Config(50000, 0.2, 3.0, 0.2, 1, true,
- WriteableFileChunk::Config(document::CompressionConfig(), 16384, 64))),
+ _config(DocumentStore::Config(CompressionConfig::LZ4, 1000000, 0).allowVisitCaching(true),
+ LogDataStore::Config(50000, 0.2, 3.0, 0.2, 1, true,CompressionConfig::LZ4,
+ WriteableFileChunk::Config(CompressionConfig(), 16384, 64))),
_fileHeaderContext(),
_executor(_config.getLogConfig().getNumThreads(), 128*1024),
_tlSyncer(),
@@ -724,7 +723,7 @@ TEST("requireThatChunkCanProduceUniqueList") {
void testChunkFormat(ChunkFormat & cf, size_t expectedLen, const vespalib::string & expectedContent)
{
- document::CompressionConfig cfg;
+ CompressionConfig cfg;
uint64_t MAGIC_CONTENT(0xabcdef9876543210);
cf.getBuffer() << MAGIC_CONTENT;
vespalib::DataBuffer buffer;
@@ -831,7 +830,7 @@ private:
TEST("test that StoreByBucket gives bucket by bucket and ordered within") {
vespalib::MemoryDataStore backing;
- StoreByBucket sbb(backing);;
+ StoreByBucket sbb(backing, CompressionConfig::LZ4);
for (size_t i(1); i <=500; i++) {
add(sbb, i);
}
diff --git a/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp b/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp
index 1898fa35a29..729edd90179 100644
--- a/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp
+++ b/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp
@@ -21,6 +21,7 @@ using document::Document;
using document::DocumentId;
using document::DocumentType;
using document::DocumentTypeRepo;
+using document::CompressionConfig;
using vespalib::asciistream;
using index::DummyFileHeaderContext;
@@ -272,13 +273,10 @@ Fixture::Fixture()
: _baseDir("visitor"),
_repo(makeDocTypeRepoConfig()),
_storeConfig(DocumentStore::
- Config(document::CompressionConfig::NONE, 0, 0),
+ Config(CompressionConfig::NONE, 0, 0),
LogDataStore::
- Config(50000, 0.2, 3.0, 0.2, 1, true,
- WriteableFileChunk::Config(
- document::CompressionConfig(),
- 16384,
- 64))),
+ Config(50000, 0.2, 3.0, 0.2, 1, true, CompressionConfig::LZ4,
+ WriteableFileChunk::Config(CompressionConfig(), 16384, 64))),
_executor(_storeConfig.getLogConfig().getNumThreads(), 128 * 1024),
_fileHeaderContext(),
_tlSyncer(),
diff --git a/searchlib/src/vespa/searchlib/docstore/compacter.cpp b/searchlib/src/vespa/searchlib/docstore/compacter.cpp
index 3156fb970a1..1544393d620 100644
--- a/searchlib/src/vespa/searchlib/docstore/compacter.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/compacter.cpp
@@ -9,6 +9,8 @@ LOG_SETUP(".searchlib.docstore.compacter");
namespace search {
namespace docstore {
+using document::CompressionConfig;
+
void
Compacter::write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) {
(void) chunkId;
@@ -16,7 +18,7 @@ Compacter::write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *bu
_ds.write(guard, fileId, lid, buffer, sz);
}
-BucketCompacter::BucketCompacter(LogDataStore & ds, const IBucketizer & bucketizer, FileId source, FileId destination) :
+BucketCompacter::BucketCompacter(const CompressionConfig & compression, LogDataStore & ds, const IBucketizer & bucketizer, FileId source, FileId destination) :
_sourceFileId(source),
_destinationFileId(destination),
_ds(ds),
@@ -30,7 +32,7 @@ BucketCompacter::BucketCompacter(LogDataStore & ds, const IBucketizer & bucketiz
{
_tmpStore.reserve(256);
for (size_t i(0); i < 256; i++) {
- _tmpStore.emplace_back(_backingMemory);
+ _tmpStore.emplace_back(_backingMemory, compression);
}
}
diff --git a/searchlib/src/vespa/searchlib/docstore/compacter.h b/searchlib/src/vespa/searchlib/docstore/compacter.h
index 53d968e3dbc..951eecb43fb 100644
--- a/searchlib/src/vespa/searchlib/docstore/compacter.h
+++ b/searchlib/src/vespa/searchlib/docstore/compacter.h
@@ -35,7 +35,8 @@ class BucketCompacter : public IWriteData, public StoreByBucket::IWrite
{
public:
using FileId = FileChunk::FileId;
- BucketCompacter(LogDataStore & ds, const IBucketizer & bucketizer, FileId source, FileId destination);
+ BucketCompacter(const document::CompressionConfig & compression, LogDataStore & ds,
+ const IBucketizer & bucketizer, FileId source, FileId destination);
void write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override ;
void write(BucketId bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override;
void close() override;
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
index 19e59276258..ad8920582a9 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
@@ -445,7 +445,7 @@ void LogDataStore::compactFile(FileId fileId)
setNewFileChunk(guard, createWritableFile(destinationFileId, fc->getLastPersistedSerialNum(), fc->getNameId().next()));
}
- compacter.reset(new docstore::BucketCompacter(*this, *_bucketizer, fc->getFileId(), destinationFileId));
+ compacter.reset(new docstore::BucketCompacter(_config.compactCompression(), *this, *_bucketizer, fc->getFileId(), destinationFileId));
} else {
compacter.reset(new docstore::Compacter(*this));
}
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
index 1d9aa051b7c..2702d4060c2 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
@@ -36,7 +36,8 @@ private:
using NameId = FileChunk::NameId;
using FileId = FileChunk::FileId;
public:
- typedef vespalib::LockGuard LockGuard;
+ using LockGuard = vespalib::LockGuard;
+ using CompressionConfig = document::CompressionConfig;
class Config {
public:
Config()
@@ -46,7 +47,8 @@ public:
_minFileSizeFactor(0.2),
_numThreads(8),
_skipCrcOnRead(false),
- _compactToActiveFile(true)
+ _compactToActiveFile(true),
+ _compactCompression(CompressionConfig::LZ4)
{ }
Config(size_t maxFileSize,
@@ -55,6 +57,7 @@ public:
double minFileSizeFactor,
size_t numThreads,
bool compactToActiveFile,
+ const CompressionConfig & compactCompression,
const WriteableFileChunk::Config & fileConfig)
: _maxFileSize(maxFileSize),
_maxDiskBloatFactor(maxDiskBloatFactor),
@@ -63,6 +66,7 @@ public:
_numThreads(numThreads),
_skipCrcOnRead(false),
_compactToActiveFile(compactToActiveFile),
+ _compactCompression(compactCompression),
_fileConfig(fileConfig)
{ }
@@ -75,17 +79,19 @@ public:
bool crcOnReadDisabled() const { return _skipCrcOnRead; }
void disableCrcOnRead(bool v) { _skipCrcOnRead = v; }
bool compact2ActiveFile() const { return _compactToActiveFile; }
+ const CompressionConfig & compactCompression() const { return _compactCompression; }
const WriteableFileChunk::Config & getFileConfig() const { return _fileConfig; }
private:
- size_t _maxFileSize;
- double _maxDiskBloatFactor;
- double _maxBucketSpread;
- double _minFileSizeFactor;
- size_t _numThreads;
- bool _skipCrcOnRead;
- bool _compactToActiveFile;
- WriteableFileChunk::Config _fileConfig;
+ size_t _maxFileSize;
+ double _maxDiskBloatFactor;
+ double _maxBucketSpread;
+ double _minFileSizeFactor;
+ size_t _numThreads;
+ bool _skipCrcOnRead;
+ bool _compactToActiveFile;
+ CompressionConfig _compactCompression;
+ WriteableFileChunk::Config _fileConfig;
};
public:
/**
diff --git a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp
index 42edb028f52..86337e393e4 100644
--- a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp
@@ -6,12 +6,14 @@ namespace search {
namespace docstore {
using document::BucketId;
+using document::CompressionConfig;
-StoreByBucket::StoreByBucket(vespalib::MemoryDataStore & backingMemory) :
+StoreByBucket::StoreByBucket(vespalib::MemoryDataStore & backingMemory, const CompressionConfig & compression) :
_chunks(),
_current(),
_where(),
- _backingMemory(backingMemory)
+ _backingMemory(backingMemory),
+ _compression(compression)
{
createCurrent();
}
@@ -37,8 +39,7 @@ void
StoreByBucket::closeCurrent()
{
vespalib::DataBuffer buffer;
- document::CompressionConfig lz4(document::CompressionConfig::LZ4);
- _current->pack(1, buffer, lz4);
+ _current->pack(1, buffer, _compression);
buffer.shrink(buffer.getDataLen());
_chunks.emplace_back(_backingMemory.push_back(buffer.getData(), buffer.getDataLen()).data(), buffer.getDataLen());
_current.reset();
diff --git a/searchlib/src/vespa/searchlib/docstore/storebybucket.h b/searchlib/src/vespa/searchlib/docstore/storebybucket.h
index c80b7593fd7..6107c6e7c2b 100644
--- a/searchlib/src/vespa/searchlib/docstore/storebybucket.h
+++ b/searchlib/src/vespa/searchlib/docstore/storebybucket.h
@@ -18,7 +18,7 @@ namespace docstore {
class StoreByBucket
{
public:
- StoreByBucket(vespalib::MemoryDataStore & backingMemory);
+ StoreByBucket(vespalib::MemoryDataStore & backingMemory, const document::CompressionConfig & compression);
class IWrite {
public:
using BucketId=document::BucketId;
@@ -56,6 +56,7 @@ private:
Chunk::UP _current;
std::map<uint64_t, std::vector<Index>> _where;
vespalib::MemoryDataStore & _backingMemory;
+ document::CompressionConfig _compression;
};
}