diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-05-13 15:06:40 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-13 15:06:40 +0200 |
commit | 3b9a4d250a93644e595472ea13851f454f3dc897 (patch) | |
tree | 3342f672ca9e37c6a5e5b4e61e5d191a4f615f15 /searchlib/src | |
parent | 40253cdee99ad3030b4dae02b38ff4a006d868be (diff) | |
parent | 53238ea2d254f35b5b0032c95c1eb3476d061318 (diff) |
Merge pull request #13216 from vespa-engine/balder/limit-number-of-lids-per-file
Account and limit number of lids per file to reduce spkies during com…
Diffstat (limited to 'searchlib/src')
12 files changed, 88 insertions, 65 deletions
diff --git a/searchlib/src/tests/docstore/file_chunk/file_chunk_test.cpp b/searchlib/src/tests/docstore/file_chunk/file_chunk_test.cpp index 31c21723cd0..70c23dc4191 100644 --- a/searchlib/src/tests/docstore/file_chunk/file_chunk_test.cpp +++ b/searchlib/src/tests/docstore/file_chunk/file_chunk_test.cpp @@ -147,6 +147,7 @@ struct WriteFixture : public FixtureBase { void updateLidMap(uint32_t docIdLimit) { vespalib::LockGuard guard(updateLock); chunk.updateLidMap(guard, lidObserver, serialNum, docIdLimit); + serialNum = chunk.getSerialNum(); } }; @@ -180,6 +181,30 @@ TEST("require that docIdLimit is written to and read from idx file header") } } +TEST("require that numlids are updated") { + { + WriteFixture f("tmp", 1000, false); + f.updateLidMap(1000); + EXPECT_EQUAL(0u, f.chunk.getNumLids()); + f.append(1); + EXPECT_EQUAL(1u, f.chunk.getNumLids()); + f.append(2); + f.append(3); + EXPECT_EQUAL(3u, f.chunk.getNumLids()); + f.append(3); + EXPECT_EQUAL(4u, f.chunk.getNumLids()); + f.flush(); + } + { + WriteFixture f("tmp", 1000, true); + EXPECT_EQUAL(0u, f.chunk.getNumLids()); + f.updateLidMap(1000); + EXPECT_EQUAL(4u, f.chunk.getNumLids()); + f.append(7); + EXPECT_EQUAL(5u, f.chunk.getNumLids()); + } +} + template <typename FixtureType> void assertUpdateLidMap(FixtureType &f) diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp index c383358db9e..9a19a795076 100644 --- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp +++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp @@ -22,6 +22,7 @@ using namespace search::docstore; using namespace search; using namespace vespalib::alloc; using search::index::DummyFileHeaderContext; +using search::test::DirectoryHandler; class MyTlSyncer : public transactionlog::SyncProxy { SerialNum _syncedTo; @@ -202,15 +203,9 @@ TEST("test that DirectIOPadding works accordng to spec") { } #endif -TEST("testGrowing") { - FastOS_File::EmptyAndRemoveDirectory("growing"); - EXPECT_TRUE(FastOS_File::MakeDirectory("growing")); - LogDataStore::Config config; //(100000, 0.1, 3.0, 0.2, 8, true, CompressionConfig::LZ4, - // WriteableFileChunk::Config(CompressionConfig(CompressionConfig::LZ4, 9, 60), 1000)); - config.setMaxFileSize(100000).setMaxDiskBloatFactor(0.1).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2) - .compactCompression({CompressionConfig::LZ4}) - .setFileConfig({{CompressionConfig::LZ4, 9, 60}, 1000}); - vespalib::ThreadStackExecutor executor(8, 128*1024); +void verifyGrowing(const LogDataStore::Config & config, uint32_t minFiles, uint32_t maxFiles) { + DirectoryHandler tmpDir("growing"); + vespalib::ThreadStackExecutor executor(4, 128*1024); DummyFileHeaderContext fileHeaderContext; MyTlSyncer tlSyncer; { @@ -243,30 +238,32 @@ TEST("testGrowing") { datastore.compact(30000); datastore.remove(31000, 0); checkStats(datastore, 31000, 30000); + EXPECT_LESS_EQUAL(minFiles, datastore.getAllActiveFiles().size()); + EXPECT_GREATER_EQUAL(maxFiles, datastore.getAllActiveFiles().size()); } { LogDataStore datastore(executor, "growing", config, GrowStrategy(), TuneFileSummary(), fileHeaderContext, tlSyncer, nullptr); checkStats(datastore, 30000, 30000); + EXPECT_LESS_EQUAL(minFiles, datastore.getAllActiveFiles().size()); + EXPECT_GREATER_EQUAL(maxFiles, datastore.getAllActiveFiles().size()); } - - FastOS_File::EmptyAndRemoveDirectory("growing"); +} +TEST("testGrowingChunkedBySize") { + LogDataStore::Config config; + config.setMaxFileSize(100000).setMaxDiskBloatFactor(0.1).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2) + .compactCompression({CompressionConfig::LZ4}) + .setFileConfig({{CompressionConfig::LZ4, 9, 60}, 1000}); + verifyGrowing(config, 40, 120); } -class TmpDirectory { -public: - TmpDirectory(const vespalib::string & dir) : _dir(dir) - { - FastOS_File::EmptyAndRemoveDirectory(_dir.c_str()); - ASSERT_TRUE(FastOS_File::MakeDirectory(_dir.c_str())); - } - ~TmpDirectory() { - FastOS_File::EmptyAndRemoveDirectory(_dir.c_str()); - } - const vespalib::string & getDir() const { return _dir; } -private: - vespalib::string _dir; -}; +TEST("testGrowingChunkedByNumLids") { + LogDataStore::Config config; + config.setMaxNumLids(1000).setMaxDiskBloatFactor(0.1).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2) + .compactCompression({CompressionConfig::LZ4}) + .setFileConfig({{CompressionConfig::LZ4, 9, 60}, 1000}); + verifyGrowing(config,10, 10); +} void fetchAndTest(IDataStore & datastore, uint32_t lid, const void *a, size_t sz) { @@ -349,7 +346,7 @@ public: ~VisitStore(); IDataStore & getStore() { return _datastore; } private: - TmpDirectory _myDir; + DirectoryHandler _myDir; LogDataStore::Config _config; DummyFileHeaderContext _fileHeaderContext; vespalib::ThreadStackExecutor _executor; @@ -357,8 +354,7 @@ private: LogDataStore _datastore; }; -VisitStore::~VisitStore() { -} +VisitStore::~VisitStore() =default; TEST("test visit cache does not cache empty ones and is able to access some backing store.") { const char * A7 = "aAaAaAa"; @@ -500,7 +496,7 @@ private: vespalib::hash_set<uint32_t> _actual; bool _allowVisitCaching; }; - TmpDirectory _myDir; + DirectoryHandler _myDir; document::DocumentTypeRepo _repo; LogDocumentStore::Config _config; DummyFileHeaderContext _fileHeaderContext; @@ -756,7 +752,7 @@ TEST("requireThatSyncTokenIsUpdatedAfterFlush") { } TEST("requireThatFlushTimeIsAvailableAfterFlush") { - TmpDirectory testDir("flushtime"); + DirectoryHandler testDir("flushtime"); vespalib::system_time before(vespalib::system_clock::now()); DummyFileHeaderContext fileHeaderContext; LogDataStore::Config config; diff --git a/searchlib/src/vespa/searchlib/docstore/chunk.cpp b/searchlib/src/vespa/searchlib/docstore/chunk.cpp index 847cd3623c3..f60bf69d908 100644 --- a/searchlib/src/vespa/searchlib/docstore/chunk.cpp +++ b/searchlib/src/vespa/searchlib/docstore/chunk.cpp @@ -57,7 +57,7 @@ Chunk::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const Compre Chunk::Chunk(uint32_t id, const Config & config) : _id(id), _lastSerial(static_cast<uint64_t>(-1l)), - _format(new ChunkFormatV2(config.getMaxBytes())) + _format(std::make_unique<ChunkFormatV2>(config.getMaxBytes())) { _lids.reserve(4096/sizeof(Entry)); } diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp index a5bed4c33ce..fbbdcff3c5d 100644 --- a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp +++ b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp @@ -81,33 +81,26 @@ ChunkFormat::deserialize(const void * buffer, size_t len, bool skipcrc) uint32_t crc32(0); raw >> crc32; raw.rp(currPos); - ChunkFormat::UP format; if (version == ChunkFormatV1::VERSION) { if (skipcrc) { - format.reset(new ChunkFormatV1(raw)); + return std::make_unique<ChunkFormatV1>(raw); } else { - format.reset(new ChunkFormatV1(raw, crc32)); + return std::make_unique<ChunkFormatV1>(raw, crc32); } } else if (version == ChunkFormatV2::VERSION) { if (skipcrc) { - format.reset(new ChunkFormatV2(raw)); + return std::make_unique<ChunkFormatV2>(raw); } else { - format.reset(new ChunkFormatV2(raw, crc32)); + return std::make_unique<ChunkFormatV2>(raw, crc32); } } else { throw ChunkException(make_string("Unknown version %d", version), VESPA_STRLOC); } - return format; } -ChunkFormat::ChunkFormat() : - _dataBuf() -{ -} +ChunkFormat::ChunkFormat() = default; -ChunkFormat::~ChunkFormat() -{ -} +ChunkFormat::~ChunkFormat() = default; ChunkFormat::ChunkFormat(size_t maxSize) : _dataBuf(maxSize) diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp index 84d64954e40..b4ef45187ee 100644 --- a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp +++ b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp @@ -78,9 +78,10 @@ FileChunk::FileChunk(FileId fileId, NameId nameId, const vespalib::string & base _dataFileName(createDatFileName(_name)), _idxFileName(createIdxFileName(_name)), _chunkInfo(), + _lastPersistedSerialNum(0), _dataHeaderLen(0u), _idxHeaderLen(0u), - _lastPersistedSerialNum(0), + _numLids(0), _docIdLimit(std::numeric_limits<uint32_t>::max()), _modificationTime() { @@ -228,6 +229,7 @@ FileChunk::updateLidMap(const LockGuard &guard, ISetLid &ds, uint64_t serialNum, globalBucketMap.recordLid(bucketId); } ds.setLid(guard, lidMeta.getLid(), LidInfo(getFileId().getId(), _chunkInfo.size(), lidMeta.size())); + _numLids++; } else { remove(lidMeta.getLid(), lidMeta.size()); } diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.h b/searchlib/src/vespa/searchlib/docstore/filechunk.h index 0f139f507c6..b68db801d60 100644 --- a/searchlib/src/vespa/searchlib/docstore/filechunk.h +++ b/searchlib/src/vespa/searchlib/docstore/filechunk.h @@ -154,6 +154,7 @@ public: FileId getFileId() const { return _fileId; } NameId getNameId() const { return _nameId; } + uint32_t getNumLids() const { return _numLids; } size_t getBloatCount() const { return _erasedCount; } size_t getAddedBytes() const { return _addedBytes; } size_t getErasedBytes() const { return _erasedBytes; } @@ -245,9 +246,10 @@ protected: vespalib::string _dataFileName; vespalib::string _idxFileName; ChunkInfoVector _chunkInfo; + uint64_t _lastPersistedSerialNum; uint32_t _dataHeaderLen; uint32_t _idxHeaderLen; - uint64_t _lastPersistedSerialNum; + uint32_t _numLids; uint32_t _docIdLimit; // Limit when the file was created. Stored in idx file header. vespalib::system_time _modificationTime; }; diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp index 91de6ba4276..ae0696d5824 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp @@ -16,6 +16,11 @@ LOG_SETUP(".searchlib.docstore.logdatastore"); namespace search { +namespace { + constexpr size_t DEFAULT_MAX_FILESIZE = 1000000000ul; + constexpr uint32_t DEFAULT_MAX_LIDS_PER_FILE = 32 * 1024 * 1024; +} + using vespalib::LockGuard; using vespalib::getLastErrorString; using vespalib::getErrorString; @@ -30,10 +35,11 @@ using docstore::BucketCompacter; using namespace std::literals; LogDataStore::Config::Config() - : _maxFileSize(1000000000ul), + : _maxFileSize(DEFAULT_MAX_FILESIZE), _maxDiskBloatFactor(0.2), _maxBucketSpread(2.5), _minFileSizeFactor(0.2), + _maxNumLids(DEFAULT_MAX_LIDS_PER_FILE), _skipCrcOnRead(false), _compactCompression(CompressionConfig::LZ4), _fileConfig() @@ -202,9 +208,9 @@ LogDataStore::requireSpace(LockGuard guard, WriteableFileChunk & active) { assert(active.getFileId() == getActiveFileId(guard)); size_t oldSz(active.getDiskFootprint()); - LOG(spam, "Checking file %s size %ld < %ld", - active.getName().c_str(), oldSz, _config.getMaxFileSize()); - if (oldSz > _config.getMaxFileSize()) { + LOG(spam, "Checking file %s size %ld < %ld AND #lids %u < %u", + active.getName().c_str(), oldSz, _config.getMaxFileSize(), active.getNumLids(), _config.getMaxNumLids()); + if ((oldSz > _config.getMaxFileSize()) || (active.getNumLids() >= _config.getMaxNumLids())) { FileId fileId = allocateFileId(guard); setNewFileChunk(guard, createWritableFile(fileId, active.getSerialNum())); setActive(guard, fileId); @@ -220,9 +226,9 @@ LogDataStore::requireSpace(LockGuard guard, WriteableFileChunk & active) active.flushPendingChunks(active.getSerialNum()); active.freeze(); // TODO: Delay create of new file - LOG(debug, "Closed file %s of size %ld due to maxsize of %ld reached. Bloat is %ld", - active.getName().c_str(), active.getDiskFootprint(), - _config.getMaxFileSize(), active.getDiskBloat()); + LOG(debug, "Closed file %s of size %ld and %u lids due to maxsize of %ld or maxlids %u reached. Bloat is %ld", + active.getName().c_str(), active.getDiskFootprint(), active.getNumLids(), + _config.getMaxFileSize(), _config.getMaxNumLids(), active.getDiskBloat()); } } diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h index 39b2c2b7100..7bd55599611 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h @@ -40,6 +40,7 @@ public: Config(); Config & setMaxFileSize(size_t v) { _maxFileSize = v; return *this; } + Config & setMaxNumLids(size_t v) { _maxNumLids = v; return *this; } Config & setMaxDiskBloatFactor(double v) { _maxDiskBloatFactor = v; return *this; } Config & setMaxBucketSpread(double v) { _maxBucketSpread = v; return *this; } Config & setMinFileSizeFactor(double v) { _minFileSizeFactor = v; return *this; } @@ -51,6 +52,7 @@ public: double getMaxDiskBloatFactor() const { return _maxDiskBloatFactor; } double getMaxBucketSpread() const { return _maxBucketSpread; } double getMinFileSizeFactor() const { return _minFileSizeFactor; } + uint32_t getMaxNumLids() const { return _maxNumLids; } bool crcOnReadDisabled() const { return _skipCrcOnRead; } const CompressionConfig & compactCompression() const { return _compactCompression; } @@ -64,6 +66,7 @@ public: double _maxDiskBloatFactor; double _maxBucketSpread; double _minFileSizeFactor; + uint32_t _maxNumLids; bool _skipCrcOnRead; CompressionConfig _compactCompression; WriteableFileChunk::Config _fileConfig; diff --git a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp index 13de5687ee0..5e595d0bb14 100644 --- a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp +++ b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp @@ -92,7 +92,7 @@ StoreByBucket::drain(IWrite & drainer) chunks.resize(_chunks.size()); for (const auto & it : _chunks) { ConstBufferRef buf(it.second); - chunks[it.first].reset(new Chunk(it.first, buf.data(), buf.size())); + chunks[it.first] = std::make_unique<Chunk>(it.first, buf.data(), buf.size()); } _chunks.clear(); for (auto & it : _where) { diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp index d12a3a54a93..3517595d00a 100644 --- a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp +++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp @@ -708,6 +708,7 @@ WriteableFileChunk::append(uint64_t serialNum, uint32_t lid, const void * buffer assert(serialNum >= _serialNum); _serialNum = serialNum; _addedBytes += adjustSize(len); + _numLids++; size_t oldSz(_active->size()); LidMeta lm = _active->append(lid, buffer, len); setDiskFootprint(FileChunk::getDiskFootprint() - oldSz + _active->size()); diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h index 6d870cae019..65dd822ac1f 100644 --- a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h +++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h @@ -47,7 +47,7 @@ public: uint32_t docIdLimit, const Config & config, const TuneFileSummary &tune, const common::FileHeaderContext &fileHeaderContext, const IBucketizer * bucketizer, bool crcOnReadDisabled); - ~WriteableFileChunk(); + ~WriteableFileChunk() override; ssize_t read(uint32_t lid, SubChunkId chunk, vespalib::DataBuffer & buffer) const override; void read(LidInfoWithLidV::const_iterator begin, size_t count, IBufferVisitor & visitor) const override; @@ -128,8 +128,8 @@ private: bool _writeTaskIsRunning; vespalib::Monitor _writeMonitor; ProcessedChunkQ _writeQ; - vespalib::Executor & _executor; - ProcessedChunkMap _orderedChunks; + vespalib::Executor & _executor; + ProcessedChunkMap _orderedChunks; BucketDensityComputer _bucketMap; }; diff --git a/searchlib/src/vespa/searchlib/test/directory_handler.h b/searchlib/src/vespa/searchlib/test/directory_handler.h index 72f9cf83d85..66e5a710870 100644 --- a/searchlib/src/vespa/searchlib/test/directory_handler.h +++ b/searchlib/src/vespa/searchlib/test/directory_handler.h @@ -5,8 +5,7 @@ #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/stllike/string.h> -namespace search { -namespace test { +namespace search::test { class DirectoryHandler { @@ -17,11 +16,8 @@ private: public: DirectoryHandler(const vespalib::string &mkdir) - : _mkdir(mkdir), - _rmdir(mkdir), - _cleanup(true) + : DirectoryHandler(mkdir, mkdir) { - vespalib::mkdir(_mkdir); } DirectoryHandler(const vespalib::string &mkdir, const vespalib::string &rmdir) @@ -37,8 +33,7 @@ public: } } void cleanup(bool v) { _cleanup = v; } + const vespalib::string & getDir() const { return _mkdir; } }; } -} - |