diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2017-08-04 16:35:30 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-08-04 16:35:30 +0200 |
commit | 89b3dac6831e9c48d031029b34d1c83831c96239 (patch) | |
tree | 13d3ba6ae16e28a77cb454d40b7ae0083d1190fe | |
parent | e0736f4fa8f56b7d7158bebface912c3783c4c38 (diff) | |
parent | 65c64037c9c233d28af557f9943a9bce99487361 (diff) |
Merge pull request #3049 from yahoo/balder/cleanup-partially-compacted-files
Clean up incompletely compacted files at startup.
6 files changed, 131 insertions, 27 deletions
diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp index 57763b931f5..407bf3fad98 100644 --- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp +++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp @@ -323,6 +323,23 @@ TEST("testThatEmptyIdxFilesAndDanglingDatFilesAreRemoved") { EXPECT_EQUAL(datastore.getDiskHeaderFootprint() + 94016u, datastore.getDiskFootprint()); } +TEST("testThatIncompleteCompactedFilesAreRemoved") { + LogDataStore::Config config; + DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + MyTlSyncer tlSyncer; + LogDataStore datastore(executor, "incompletecompact-test", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + EXPECT_EQUAL(354ul, datastore.lastSyncToken()); + EXPECT_EQUAL(3*(4096u + 480u), datastore.getDiskHeaderFootprint()); + LogDataStore::NameIdSet files = datastore.getAllActiveFiles(); + EXPECT_EQUAL(3u, files.size()); + EXPECT_TRUE(files.find(FileChunk::NameId(1422358701368384000)) != files.end()); + EXPECT_TRUE(files.find(FileChunk::NameId(2000000000000000000)) != files.end()); + EXPECT_TRUE(files.find(FileChunk::NameId(2422358701368384000)) != files.end()); +} + class VisitStore { public: VisitStore() : @@ -953,6 +970,30 @@ TEST_F("require that lid space can be shrunk only after read guards are deleted" EXPECT_EQUAL(8u, f.store.getEstimatedShrinkLidSpaceGain()); } +LogDataStore::NameIdSet create(std::vector<size_t> list) { + LogDataStore::NameIdSet l; + for (size_t id : list) { + l.emplace(id); + } + return l; +} + +TEST("require that findIncompleteCompactedFiles does expected filtering") { + EXPECT_TRUE(LogDataStore::findIncompleteCompactedFiles(create({1,3,100,200,202,204})).empty()); + LogDataStore::NameIdSet toRemove = LogDataStore::findIncompleteCompactedFiles(create({1,3,100,200,201,204})); + EXPECT_EQUAL(1u, toRemove.size()); + EXPECT_TRUE(toRemove.find(FileChunk::NameId(201)) != toRemove.end()); + toRemove = LogDataStore::findIncompleteCompactedFiles(create({1,2,4,100,200,201,204,205})); + EXPECT_EQUAL(3u, toRemove.size()); + EXPECT_TRUE(toRemove.find(FileChunk::NameId(2)) != toRemove.end()); + EXPECT_TRUE(toRemove.find(FileChunk::NameId(201)) != toRemove.end()); + EXPECT_TRUE(toRemove.find(FileChunk::NameId(205)) != toRemove.end()); + + EXPECT_EXCEPTION(LogDataStore::findIncompleteCompactedFiles(create({1,3,100,200,201,202,204})).empty(), + vespalib::IllegalStateException, "3 consecutive files {200, 201, 202}. Impossible"); + +} + TEST_MAIN() { DummyFileHeaderContext::setCreator("logdatastore_test"); TEST_RUN_ALL(); diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.sh b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.sh index 7a42489331b..45b3c804014 100755 --- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.sh +++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.sh @@ -12,8 +12,19 @@ cp -a $SOURCE_DIRECTORY/bug-7257706/*.dat dangling-test/ cp -a $SOURCE_DIRECTORY/bug-7257706/*.idx dangling-test/ cp -a $SOURCE_DIRECTORY/dangling/*.dat dangling-test/ cp -a $SOURCE_DIRECTORY/dangling/*.idx dangling-test/ + +mkdir -p incompletecompact-test +cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.dat incompletecompact-test/ +cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.idx incompletecompact-test/ +cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.dat incompletecompact-test/2000000000000000000.dat +cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.idx incompletecompact-test/2000000000000000000.idx +cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.dat incompletecompact-test/2000000000000000001.dat +cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.idx incompletecompact-test/2000000000000000001.idx +cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.dat incompletecompact-test/2422358701368384000.dat +cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.idx incompletecompact-test/2422358701368384000.idx + truncate --size 3830 bug-7257706-truncated/1422358701368384000.idx fail=0 VESPA_LOG_TARGET=file:vlog2.txt $VALGRIND ./searchlib_logdatastore_test_app || fail=1 -rm -rf bug-7257706-truncated dangling-test +rm -rf bug-7257706-truncated dangling-test incompletecompact-test exit $fail diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp index cd0a839475c..0f753b4914c 100644 --- a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp +++ b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp @@ -526,7 +526,16 @@ FileChunk::isIdxFileEmpty(const vespalib::string & name) void FileChunk::eraseIdxFile(const vespalib::string & name) { - vespalib::string fileName(name + ".idx"); + vespalib::string fileName(createIdxFileName(name)); + if ( ! FastOS_File::Delete(fileName.c_str())) { + throw std::runtime_error(make_string("Failed to delete '%s'", fileName.c_str())); + } +} + +void +FileChunk::eraseDatFile(const vespalib::string & name) +{ + vespalib::string fileName(createDatFileName(name)); if ( ! FastOS_File::Delete(fileName.c_str())) { throw std::runtime_error(make_string("Failed to delete '%s'", fileName.c_str())); } diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.h b/searchlib/src/vespa/searchlib/docstore/filechunk.h index df2fb29ecb1..326a185f9de 100644 --- a/searchlib/src/vespa/searchlib/docstore/filechunk.h +++ b/searchlib/src/vespa/searchlib/docstore/filechunk.h @@ -192,6 +192,7 @@ public: static uint64_t readDataHeader(FileRandRead &idxFile); static bool isIdxFileEmpty(const vespalib::string & name); static void eraseIdxFile(const vespalib::string & name); + static void eraseDatFile(const vespalib::string & name); static vespalib::string createIdxFileName(const vespalib::string & name); static vespalib::string createDatFileName(const vespalib::string & name); private: diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp index d20055e37d5..024e64c0bdd 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp @@ -21,6 +21,7 @@ using vespalib::getLastErrorString; using vespalib::getErrorString; using vespalib::GenerationHandler; using vespalib::make_string; +using vespalib::IllegalStateException; using common::FileHeaderContext; using std::runtime_error; using document::BucketId; @@ -656,7 +657,8 @@ lsSingleFile(const vespalib::string & fileName) } -vespalib::string LogDataStore::ls(const NameIdSet & partList) +vespalib::string +LogDataStore::ls(const NameIdSet & partList) { vespalib::string s; for (auto it(++partList.begin()), mt(partList.end()); it != mt; ++it) { @@ -751,8 +753,9 @@ LogDataStore::preload() NameIdSet partList = scanDir(getBaseDir(), ".idx"); NameIdSet datPartList = scanDir(getBaseDir(), ".dat"); - partList = eraseEmptyIdxFiles(partList); + partList = eraseEmptyIdxFiles(std::move(partList)); eraseDanglingDatFiles(partList, datPartList); + partList = eraseIncompleteCompactedFiles(std::move(partList)); if (!partList.empty()) { verifyModificationTime(partList); @@ -785,7 +788,7 @@ LogDataStore::getLastFileChunkDocIdLimit() } LogDataStore::NameIdSet -LogDataStore::eraseEmptyIdxFiles(const NameIdSet &partList) +LogDataStore::eraseEmptyIdxFiles(NameIdSet partList) { NameIdSet nonEmptyIdxPartList; for (const auto & part : partList) { @@ -800,6 +803,52 @@ LogDataStore::eraseEmptyIdxFiles(const NameIdSet &partList) return nonEmptyIdxPartList; } +LogDataStore::NameIdSet +LogDataStore::findIncompleteCompactedFiles(const NameIdSet & partList) { + NameIdSet incomplete; + if ( !partList.empty()) { + NameIdSet::const_iterator it = partList.begin(); + for (FileChunk::NameId prev = *it++; it != partList.end(); it++) { + if (prev.next() == *it) { + if (!incomplete.empty() && (*incomplete.rbegin() == prev)) { + throw IllegalStateException(make_string("3 consecutive files {%ld, %ld, %ld}. Impossible", + prev.getId()-1, prev.getId(), it->getId())); + } + incomplete.insert(*it); + } + prev = *it; + } + } + return incomplete; +} + +LogDataStore::NameIdSet +LogDataStore::getAllActiveFiles() const { + NameIdSet files; + vespalib::LockGuard guard(_updateLock); + for (const auto & fc : _fileChunks) { + if (fc) { + files.insert(fc->getNameId()); + } + } + return files; +} + +LogDataStore::NameIdSet +LogDataStore::eraseIncompleteCompactedFiles(NameIdSet partList) +{ + NameIdSet toRemove = findIncompleteCompactedFiles(partList); + for (NameId toBeRemoved : toRemove) { + partList.erase(toBeRemoved); + vespalib::string name(createFileName(toBeRemoved)); + LOG(warning, "'%s' has been detected as an incompletely compacted file. Erasing it.", name.c_str()); + FileChunk::eraseIdxFile(name); + FileChunk::eraseDatFile(name); + } + + return std::move(partList); +} + void LogDataStore::eraseDanglingDatFiles(const NameIdSet &partList, const NameIdSet &datPartList) { @@ -828,13 +877,9 @@ LogDataStore::eraseDanglingDatFiles(const NameIdSet &partList, const NameIdSet & const char *s = name.c_str(); throw runtime_error(make_string( "Missing file '%s.dat', found '%s.idx'", s, s)); } else if (dbase < ibase) { - vespalib::string name(createDatFileName(dbase)); - const char *s = name.c_str(); - LOG(warning, "Removing dangling file '%s'", s); - if (!FastOS_File::Delete(s)) { - vespalib::string e = getErrorString(errno); - throw runtime_error(make_string("Error erasing dangling file '%s'. Error is '%s'", s, e.c_str())); - } + vespalib::string fileName = createFileName(dbase); + LOG(warning, "Removing dangling file '%s'", FileChunk::createDatFileName(fileName).c_str()); + FileChunk::eraseDatFile(fileName); ++di; } else { ++ii; diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h index a0f522e25d3..0ca658ba803 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h @@ -31,6 +31,7 @@ private: using NameId = FileChunk::NameId; using FileId = FileChunk::FileId; public: + using NameIdSet = std::set<NameId>; using LockGuard = vespalib::LockGuard; using CompressionConfig = document::CompressionConfig; class Config { @@ -80,7 +81,6 @@ public: const WriteableFileChunk::Config & getFileConfig() const { return _fileConfig; } private: size_t _maxFileSize; - size_t _maxEntriesPerFile; double _maxDiskBloatFactor; double _maxBucketSpread; double _minFileSizeFactor; @@ -190,20 +190,17 @@ public: return _active; } - virtual DataStoreStorageStats getStorageStats() const override; + DataStoreStorageStats getStorageStats() const override; + MemoryUsage getMemoryUsage() const override; + std::vector<DataStoreFileChunkStats> getFileChunkStats() const override; - virtual MemoryUsage getMemoryUsage() const override; + void compactLidSpace(uint32_t wantedDocLidLimit) override; + bool canShrinkLidSpace() const override; + size_t getEstimatedShrinkLidSpaceGain() const override; + void shrinkLidSpace() override; + static NameIdSet findIncompleteCompactedFiles(const NameIdSet & partList); - virtual std::vector<DataStoreFileChunkStats> - getFileChunkStats() const override; - - /** - * Implements common::ICompactableLidSpace - */ - virtual void compactLidSpace(uint32_t wantedDocLidLimit) override; - virtual bool canShrinkLidSpace() const override; - virtual size_t getEstimatedShrinkLidSpaceGain() const override; - virtual void shrinkLidSpace() override; + NameIdSet getAllActiveFiles() const; private: class WrapVisitor; @@ -218,7 +215,6 @@ private: void compactWorst(); void compactFile(FileId chunkId); - typedef std::set<NameId> NameIdSet; typedef attribute::RcuVector<uint64_t> LidInfoVector; typedef std::vector<FileChunk::UP> FileChunkVector; @@ -228,7 +224,8 @@ private: void verifyModificationTime(const NameIdSet & partList); void eraseDanglingDatFiles(const NameIdSet &partList, const NameIdSet &datPartList); - NameIdSet eraseEmptyIdxFiles(const NameIdSet &partList); + NameIdSet eraseEmptyIdxFiles(NameIdSet partList); + NameIdSet eraseIncompleteCompactedFiles(NameIdSet partList); void internalFlushAll(); NameIdSet scanDir(const vespalib::string &dir, const vespalib::string &suffix); |