summaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-05-13 15:06:40 +0200
committerGitHub <noreply@github.com>2020-05-13 15:06:40 +0200
commit3b9a4d250a93644e595472ea13851f454f3dc897 (patch)
tree3342f672ca9e37c6a5e5b4e61e5d191a4f615f15 /searchlib/src
parent40253cdee99ad3030b4dae02b38ff4a006d868be (diff)
parent53238ea2d254f35b5b0032c95c1eb3476d061318 (diff)
Merge pull request #13216 from vespa-engine/balder/limit-number-of-lids-per-file
Account and limit number of lids per file to reduce spkies during com…
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/tests/docstore/file_chunk/file_chunk_test.cpp25
-rw-r--r--searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp56
-rw-r--r--searchlib/src/vespa/searchlib/docstore/chunk.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/docstore/chunkformat.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/docstore/filechunk.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/docstore/filechunk.h4
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.h3
-rw-r--r--searchlib/src/vespa/searchlib/docstore/storebybucket.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h6
-rw-r--r--searchlib/src/vespa/searchlib/test/directory_handler.h11
12 files changed, 88 insertions, 65 deletions
diff --git a/searchlib/src/tests/docstore/file_chunk/file_chunk_test.cpp b/searchlib/src/tests/docstore/file_chunk/file_chunk_test.cpp
index 31c21723cd0..70c23dc4191 100644
--- a/searchlib/src/tests/docstore/file_chunk/file_chunk_test.cpp
+++ b/searchlib/src/tests/docstore/file_chunk/file_chunk_test.cpp
@@ -147,6 +147,7 @@ struct WriteFixture : public FixtureBase {
void updateLidMap(uint32_t docIdLimit) {
vespalib::LockGuard guard(updateLock);
chunk.updateLidMap(guard, lidObserver, serialNum, docIdLimit);
+ serialNum = chunk.getSerialNum();
}
};
@@ -180,6 +181,30 @@ TEST("require that docIdLimit is written to and read from idx file header")
}
}
+TEST("require that numlids are updated") {
+ {
+ WriteFixture f("tmp", 1000, false);
+ f.updateLidMap(1000);
+ EXPECT_EQUAL(0u, f.chunk.getNumLids());
+ f.append(1);
+ EXPECT_EQUAL(1u, f.chunk.getNumLids());
+ f.append(2);
+ f.append(3);
+ EXPECT_EQUAL(3u, f.chunk.getNumLids());
+ f.append(3);
+ EXPECT_EQUAL(4u, f.chunk.getNumLids());
+ f.flush();
+ }
+ {
+ WriteFixture f("tmp", 1000, true);
+ EXPECT_EQUAL(0u, f.chunk.getNumLids());
+ f.updateLidMap(1000);
+ EXPECT_EQUAL(4u, f.chunk.getNumLids());
+ f.append(7);
+ EXPECT_EQUAL(5u, f.chunk.getNumLids());
+ }
+}
+
template <typename FixtureType>
void
assertUpdateLidMap(FixtureType &f)
diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
index c383358db9e..9a19a795076 100644
--- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
+++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
@@ -22,6 +22,7 @@ using namespace search::docstore;
using namespace search;
using namespace vespalib::alloc;
using search::index::DummyFileHeaderContext;
+using search::test::DirectoryHandler;
class MyTlSyncer : public transactionlog::SyncProxy {
SerialNum _syncedTo;
@@ -202,15 +203,9 @@ TEST("test that DirectIOPadding works accordng to spec") {
}
#endif
-TEST("testGrowing") {
- FastOS_File::EmptyAndRemoveDirectory("growing");
- EXPECT_TRUE(FastOS_File::MakeDirectory("growing"));
- LogDataStore::Config config; //(100000, 0.1, 3.0, 0.2, 8, true, CompressionConfig::LZ4,
- // WriteableFileChunk::Config(CompressionConfig(CompressionConfig::LZ4, 9, 60), 1000));
- config.setMaxFileSize(100000).setMaxDiskBloatFactor(0.1).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2)
- .compactCompression({CompressionConfig::LZ4})
- .setFileConfig({{CompressionConfig::LZ4, 9, 60}, 1000});
- vespalib::ThreadStackExecutor executor(8, 128*1024);
+void verifyGrowing(const LogDataStore::Config & config, uint32_t minFiles, uint32_t maxFiles) {
+ DirectoryHandler tmpDir("growing");
+ vespalib::ThreadStackExecutor executor(4, 128*1024);
DummyFileHeaderContext fileHeaderContext;
MyTlSyncer tlSyncer;
{
@@ -243,30 +238,32 @@ TEST("testGrowing") {
datastore.compact(30000);
datastore.remove(31000, 0);
checkStats(datastore, 31000, 30000);
+ EXPECT_LESS_EQUAL(minFiles, datastore.getAllActiveFiles().size());
+ EXPECT_GREATER_EQUAL(maxFiles, datastore.getAllActiveFiles().size());
}
{
LogDataStore datastore(executor, "growing", config, GrowStrategy(),
TuneFileSummary(), fileHeaderContext, tlSyncer, nullptr);
checkStats(datastore, 30000, 30000);
+ EXPECT_LESS_EQUAL(minFiles, datastore.getAllActiveFiles().size());
+ EXPECT_GREATER_EQUAL(maxFiles, datastore.getAllActiveFiles().size());
}
-
- FastOS_File::EmptyAndRemoveDirectory("growing");
+}
+TEST("testGrowingChunkedBySize") {
+ LogDataStore::Config config;
+ config.setMaxFileSize(100000).setMaxDiskBloatFactor(0.1).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2)
+ .compactCompression({CompressionConfig::LZ4})
+ .setFileConfig({{CompressionConfig::LZ4, 9, 60}, 1000});
+ verifyGrowing(config, 40, 120);
}
-class TmpDirectory {
-public:
- TmpDirectory(const vespalib::string & dir) : _dir(dir)
- {
- FastOS_File::EmptyAndRemoveDirectory(_dir.c_str());
- ASSERT_TRUE(FastOS_File::MakeDirectory(_dir.c_str()));
- }
- ~TmpDirectory() {
- FastOS_File::EmptyAndRemoveDirectory(_dir.c_str());
- }
- const vespalib::string & getDir() const { return _dir; }
-private:
- vespalib::string _dir;
-};
+TEST("testGrowingChunkedByNumLids") {
+ LogDataStore::Config config;
+ config.setMaxNumLids(1000).setMaxDiskBloatFactor(0.1).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2)
+ .compactCompression({CompressionConfig::LZ4})
+ .setFileConfig({{CompressionConfig::LZ4, 9, 60}, 1000});
+ verifyGrowing(config,10, 10);
+}
void fetchAndTest(IDataStore & datastore, uint32_t lid, const void *a, size_t sz)
{
@@ -349,7 +346,7 @@ public:
~VisitStore();
IDataStore & getStore() { return _datastore; }
private:
- TmpDirectory _myDir;
+ DirectoryHandler _myDir;
LogDataStore::Config _config;
DummyFileHeaderContext _fileHeaderContext;
vespalib::ThreadStackExecutor _executor;
@@ -357,8 +354,7 @@ private:
LogDataStore _datastore;
};
-VisitStore::~VisitStore() {
-}
+VisitStore::~VisitStore() =default;
TEST("test visit cache does not cache empty ones and is able to access some backing store.") {
const char * A7 = "aAaAaAa";
@@ -500,7 +496,7 @@ private:
vespalib::hash_set<uint32_t> _actual;
bool _allowVisitCaching;
};
- TmpDirectory _myDir;
+ DirectoryHandler _myDir;
document::DocumentTypeRepo _repo;
LogDocumentStore::Config _config;
DummyFileHeaderContext _fileHeaderContext;
@@ -756,7 +752,7 @@ TEST("requireThatSyncTokenIsUpdatedAfterFlush") {
}
TEST("requireThatFlushTimeIsAvailableAfterFlush") {
- TmpDirectory testDir("flushtime");
+ DirectoryHandler testDir("flushtime");
vespalib::system_time before(vespalib::system_clock::now());
DummyFileHeaderContext fileHeaderContext;
LogDataStore::Config config;
diff --git a/searchlib/src/vespa/searchlib/docstore/chunk.cpp b/searchlib/src/vespa/searchlib/docstore/chunk.cpp
index 847cd3623c3..f60bf69d908 100644
--- a/searchlib/src/vespa/searchlib/docstore/chunk.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/chunk.cpp
@@ -57,7 +57,7 @@ Chunk::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const Compre
Chunk::Chunk(uint32_t id, const Config & config) :
_id(id),
_lastSerial(static_cast<uint64_t>(-1l)),
- _format(new ChunkFormatV2(config.getMaxBytes()))
+ _format(std::make_unique<ChunkFormatV2>(config.getMaxBytes()))
{
_lids.reserve(4096/sizeof(Entry));
}
diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp
index a5bed4c33ce..fbbdcff3c5d 100644
--- a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp
@@ -81,33 +81,26 @@ ChunkFormat::deserialize(const void * buffer, size_t len, bool skipcrc)
uint32_t crc32(0);
raw >> crc32;
raw.rp(currPos);
- ChunkFormat::UP format;
if (version == ChunkFormatV1::VERSION) {
if (skipcrc) {
- format.reset(new ChunkFormatV1(raw));
+ return std::make_unique<ChunkFormatV1>(raw);
} else {
- format.reset(new ChunkFormatV1(raw, crc32));
+ return std::make_unique<ChunkFormatV1>(raw, crc32);
}
} else if (version == ChunkFormatV2::VERSION) {
if (skipcrc) {
- format.reset(new ChunkFormatV2(raw));
+ return std::make_unique<ChunkFormatV2>(raw);
} else {
- format.reset(new ChunkFormatV2(raw, crc32));
+ return std::make_unique<ChunkFormatV2>(raw, crc32);
}
} else {
throw ChunkException(make_string("Unknown version %d", version), VESPA_STRLOC);
}
- return format;
}
-ChunkFormat::ChunkFormat() :
- _dataBuf()
-{
-}
+ChunkFormat::ChunkFormat() = default;
-ChunkFormat::~ChunkFormat()
-{
-}
+ChunkFormat::~ChunkFormat() = default;
ChunkFormat::ChunkFormat(size_t maxSize) :
_dataBuf(maxSize)
diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp
index 84d64954e40..b4ef45187ee 100644
--- a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp
@@ -78,9 +78,10 @@ FileChunk::FileChunk(FileId fileId, NameId nameId, const vespalib::string & base
_dataFileName(createDatFileName(_name)),
_idxFileName(createIdxFileName(_name)),
_chunkInfo(),
+ _lastPersistedSerialNum(0),
_dataHeaderLen(0u),
_idxHeaderLen(0u),
- _lastPersistedSerialNum(0),
+ _numLids(0),
_docIdLimit(std::numeric_limits<uint32_t>::max()),
_modificationTime()
{
@@ -228,6 +229,7 @@ FileChunk::updateLidMap(const LockGuard &guard, ISetLid &ds, uint64_t serialNum,
globalBucketMap.recordLid(bucketId);
}
ds.setLid(guard, lidMeta.getLid(), LidInfo(getFileId().getId(), _chunkInfo.size(), lidMeta.size()));
+ _numLids++;
} else {
remove(lidMeta.getLid(), lidMeta.size());
}
diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.h b/searchlib/src/vespa/searchlib/docstore/filechunk.h
index 0f139f507c6..b68db801d60 100644
--- a/searchlib/src/vespa/searchlib/docstore/filechunk.h
+++ b/searchlib/src/vespa/searchlib/docstore/filechunk.h
@@ -154,6 +154,7 @@ public:
FileId getFileId() const { return _fileId; }
NameId getNameId() const { return _nameId; }
+ uint32_t getNumLids() const { return _numLids; }
size_t getBloatCount() const { return _erasedCount; }
size_t getAddedBytes() const { return _addedBytes; }
size_t getErasedBytes() const { return _erasedBytes; }
@@ -245,9 +246,10 @@ protected:
vespalib::string _dataFileName;
vespalib::string _idxFileName;
ChunkInfoVector _chunkInfo;
+ uint64_t _lastPersistedSerialNum;
uint32_t _dataHeaderLen;
uint32_t _idxHeaderLen;
- uint64_t _lastPersistedSerialNum;
+ uint32_t _numLids;
uint32_t _docIdLimit; // Limit when the file was created. Stored in idx file header.
vespalib::system_time _modificationTime;
};
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
index 91de6ba4276..ae0696d5824 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
@@ -16,6 +16,11 @@ LOG_SETUP(".searchlib.docstore.logdatastore");
namespace search {
+namespace {
+ constexpr size_t DEFAULT_MAX_FILESIZE = 1000000000ul;
+ constexpr uint32_t DEFAULT_MAX_LIDS_PER_FILE = 32 * 1024 * 1024;
+}
+
using vespalib::LockGuard;
using vespalib::getLastErrorString;
using vespalib::getErrorString;
@@ -30,10 +35,11 @@ using docstore::BucketCompacter;
using namespace std::literals;
LogDataStore::Config::Config()
- : _maxFileSize(1000000000ul),
+ : _maxFileSize(DEFAULT_MAX_FILESIZE),
_maxDiskBloatFactor(0.2),
_maxBucketSpread(2.5),
_minFileSizeFactor(0.2),
+ _maxNumLids(DEFAULT_MAX_LIDS_PER_FILE),
_skipCrcOnRead(false),
_compactCompression(CompressionConfig::LZ4),
_fileConfig()
@@ -202,9 +208,9 @@ LogDataStore::requireSpace(LockGuard guard, WriteableFileChunk & active)
{
assert(active.getFileId() == getActiveFileId(guard));
size_t oldSz(active.getDiskFootprint());
- LOG(spam, "Checking file %s size %ld < %ld",
- active.getName().c_str(), oldSz, _config.getMaxFileSize());
- if (oldSz > _config.getMaxFileSize()) {
+ LOG(spam, "Checking file %s size %ld < %ld AND #lids %u < %u",
+ active.getName().c_str(), oldSz, _config.getMaxFileSize(), active.getNumLids(), _config.getMaxNumLids());
+ if ((oldSz > _config.getMaxFileSize()) || (active.getNumLids() >= _config.getMaxNumLids())) {
FileId fileId = allocateFileId(guard);
setNewFileChunk(guard, createWritableFile(fileId, active.getSerialNum()));
setActive(guard, fileId);
@@ -220,9 +226,9 @@ LogDataStore::requireSpace(LockGuard guard, WriteableFileChunk & active)
active.flushPendingChunks(active.getSerialNum());
active.freeze();
// TODO: Delay create of new file
- LOG(debug, "Closed file %s of size %ld due to maxsize of %ld reached. Bloat is %ld",
- active.getName().c_str(), active.getDiskFootprint(),
- _config.getMaxFileSize(), active.getDiskBloat());
+ LOG(debug, "Closed file %s of size %ld and %u lids due to maxsize of %ld or maxlids %u reached. Bloat is %ld",
+ active.getName().c_str(), active.getDiskFootprint(), active.getNumLids(),
+ _config.getMaxFileSize(), _config.getMaxNumLids(), active.getDiskBloat());
}
}
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
index 39b2c2b7100..7bd55599611 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
@@ -40,6 +40,7 @@ public:
Config();
Config & setMaxFileSize(size_t v) { _maxFileSize = v; return *this; }
+ Config & setMaxNumLids(size_t v) { _maxNumLids = v; return *this; }
Config & setMaxDiskBloatFactor(double v) { _maxDiskBloatFactor = v; return *this; }
Config & setMaxBucketSpread(double v) { _maxBucketSpread = v; return *this; }
Config & setMinFileSizeFactor(double v) { _minFileSizeFactor = v; return *this; }
@@ -51,6 +52,7 @@ public:
double getMaxDiskBloatFactor() const { return _maxDiskBloatFactor; }
double getMaxBucketSpread() const { return _maxBucketSpread; }
double getMinFileSizeFactor() const { return _minFileSizeFactor; }
+ uint32_t getMaxNumLids() const { return _maxNumLids; }
bool crcOnReadDisabled() const { return _skipCrcOnRead; }
const CompressionConfig & compactCompression() const { return _compactCompression; }
@@ -64,6 +66,7 @@ public:
double _maxDiskBloatFactor;
double _maxBucketSpread;
double _minFileSizeFactor;
+ uint32_t _maxNumLids;
bool _skipCrcOnRead;
CompressionConfig _compactCompression;
WriteableFileChunk::Config _fileConfig;
diff --git a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp
index 13de5687ee0..5e595d0bb14 100644
--- a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp
@@ -92,7 +92,7 @@ StoreByBucket::drain(IWrite & drainer)
chunks.resize(_chunks.size());
for (const auto & it : _chunks) {
ConstBufferRef buf(it.second);
- chunks[it.first].reset(new Chunk(it.first, buf.data(), buf.size()));
+ chunks[it.first] = std::make_unique<Chunk>(it.first, buf.data(), buf.size());
}
_chunks.clear();
for (auto & it : _where) {
diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp
index d12a3a54a93..3517595d00a 100644
--- a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp
@@ -708,6 +708,7 @@ WriteableFileChunk::append(uint64_t serialNum, uint32_t lid, const void * buffer
assert(serialNum >= _serialNum);
_serialNum = serialNum;
_addedBytes += adjustSize(len);
+ _numLids++;
size_t oldSz(_active->size());
LidMeta lm = _active->append(lid, buffer, len);
setDiskFootprint(FileChunk::getDiskFootprint() - oldSz + _active->size());
diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h
index 6d870cae019..65dd822ac1f 100644
--- a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h
+++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h
@@ -47,7 +47,7 @@ public:
uint32_t docIdLimit, const Config & config,
const TuneFileSummary &tune, const common::FileHeaderContext &fileHeaderContext,
const IBucketizer * bucketizer, bool crcOnReadDisabled);
- ~WriteableFileChunk();
+ ~WriteableFileChunk() override;
ssize_t read(uint32_t lid, SubChunkId chunk, vespalib::DataBuffer & buffer) const override;
void read(LidInfoWithLidV::const_iterator begin, size_t count, IBufferVisitor & visitor) const override;
@@ -128,8 +128,8 @@ private:
bool _writeTaskIsRunning;
vespalib::Monitor _writeMonitor;
ProcessedChunkQ _writeQ;
- vespalib::Executor & _executor;
- ProcessedChunkMap _orderedChunks;
+ vespalib::Executor & _executor;
+ ProcessedChunkMap _orderedChunks;
BucketDensityComputer _bucketMap;
};
diff --git a/searchlib/src/vespa/searchlib/test/directory_handler.h b/searchlib/src/vespa/searchlib/test/directory_handler.h
index 72f9cf83d85..66e5a710870 100644
--- a/searchlib/src/vespa/searchlib/test/directory_handler.h
+++ b/searchlib/src/vespa/searchlib/test/directory_handler.h
@@ -5,8 +5,7 @@
#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/stllike/string.h>
-namespace search {
-namespace test {
+namespace search::test {
class DirectoryHandler
{
@@ -17,11 +16,8 @@ private:
public:
DirectoryHandler(const vespalib::string &mkdir)
- : _mkdir(mkdir),
- _rmdir(mkdir),
- _cleanup(true)
+ : DirectoryHandler(mkdir, mkdir)
{
- vespalib::mkdir(_mkdir);
}
DirectoryHandler(const vespalib::string &mkdir,
const vespalib::string &rmdir)
@@ -37,8 +33,7 @@ public:
}
}
void cleanup(bool v) { _cleanup = v; }
+ const vespalib::string & getDir() const { return _mkdir; }
};
}
-}
-