aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2017-08-04 16:35:30 +0200
committerGitHub <noreply@github.com>2017-08-04 16:35:30 +0200
commit89b3dac6831e9c48d031029b34d1c83831c96239 (patch)
tree13d3ba6ae16e28a77cb454d40b7ae0083d1190fe
parente0736f4fa8f56b7d7158bebface912c3783c4c38 (diff)
parent65c64037c9c233d28af557f9943a9bce99487361 (diff)
Merge pull request #3049 from yahoo/balder/cleanup-partially-compacted-files
Clean up incompletely compacted files at startup.
-rw-r--r--searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp41
-rwxr-xr-xsearchlib/src/tests/docstore/logdatastore/logdatastore_test.sh13
-rw-r--r--searchlib/src/vespa/searchlib/docstore/filechunk.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/docstore/filechunk.h1
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.cpp65
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.h27
6 files changed, 131 insertions, 27 deletions
diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
index 57763b931f5..407bf3fad98 100644
--- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
+++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
@@ -323,6 +323,23 @@ TEST("testThatEmptyIdxFilesAndDanglingDatFilesAreRemoved") {
EXPECT_EQUAL(datastore.getDiskHeaderFootprint() + 94016u, datastore.getDiskFootprint());
}
+TEST("testThatIncompleteCompactedFilesAreRemoved") {
+ LogDataStore::Config config;
+ DummyFileHeaderContext fileHeaderContext;
+ vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024);
+ MyTlSyncer tlSyncer;
+ LogDataStore datastore(executor, "incompletecompact-test", config,
+ GrowStrategy(), TuneFileSummary(),
+ fileHeaderContext, tlSyncer, NULL);
+ EXPECT_EQUAL(354ul, datastore.lastSyncToken());
+ EXPECT_EQUAL(3*(4096u + 480u), datastore.getDiskHeaderFootprint());
+ LogDataStore::NameIdSet files = datastore.getAllActiveFiles();
+ EXPECT_EQUAL(3u, files.size());
+ EXPECT_TRUE(files.find(FileChunk::NameId(1422358701368384000)) != files.end());
+ EXPECT_TRUE(files.find(FileChunk::NameId(2000000000000000000)) != files.end());
+ EXPECT_TRUE(files.find(FileChunk::NameId(2422358701368384000)) != files.end());
+}
+
class VisitStore {
public:
VisitStore() :
@@ -953,6 +970,30 @@ TEST_F("require that lid space can be shrunk only after read guards are deleted"
EXPECT_EQUAL(8u, f.store.getEstimatedShrinkLidSpaceGain());
}
+LogDataStore::NameIdSet create(std::vector<size_t> list) {
+ LogDataStore::NameIdSet l;
+ for (size_t id : list) {
+ l.emplace(id);
+ }
+ return l;
+}
+
+TEST("require that findIncompleteCompactedFiles does expected filtering") {
+ EXPECT_TRUE(LogDataStore::findIncompleteCompactedFiles(create({1,3,100,200,202,204})).empty());
+ LogDataStore::NameIdSet toRemove = LogDataStore::findIncompleteCompactedFiles(create({1,3,100,200,201,204}));
+ EXPECT_EQUAL(1u, toRemove.size());
+ EXPECT_TRUE(toRemove.find(FileChunk::NameId(201)) != toRemove.end());
+ toRemove = LogDataStore::findIncompleteCompactedFiles(create({1,2,4,100,200,201,204,205}));
+ EXPECT_EQUAL(3u, toRemove.size());
+ EXPECT_TRUE(toRemove.find(FileChunk::NameId(2)) != toRemove.end());
+ EXPECT_TRUE(toRemove.find(FileChunk::NameId(201)) != toRemove.end());
+ EXPECT_TRUE(toRemove.find(FileChunk::NameId(205)) != toRemove.end());
+
+ EXPECT_EXCEPTION(LogDataStore::findIncompleteCompactedFiles(create({1,3,100,200,201,202,204})).empty(),
+ vespalib::IllegalStateException, "3 consecutive files {200, 201, 202}. Impossible");
+
+}
+
TEST_MAIN() {
DummyFileHeaderContext::setCreator("logdatastore_test");
TEST_RUN_ALL();
diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.sh b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.sh
index 7a42489331b..45b3c804014 100755
--- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.sh
+++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.sh
@@ -12,8 +12,19 @@ cp -a $SOURCE_DIRECTORY/bug-7257706/*.dat dangling-test/
cp -a $SOURCE_DIRECTORY/bug-7257706/*.idx dangling-test/
cp -a $SOURCE_DIRECTORY/dangling/*.dat dangling-test/
cp -a $SOURCE_DIRECTORY/dangling/*.idx dangling-test/
+
+mkdir -p incompletecompact-test
+cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.dat incompletecompact-test/
+cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.idx incompletecompact-test/
+cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.dat incompletecompact-test/2000000000000000000.dat
+cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.idx incompletecompact-test/2000000000000000000.idx
+cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.dat incompletecompact-test/2000000000000000001.dat
+cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.idx incompletecompact-test/2000000000000000001.idx
+cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.dat incompletecompact-test/2422358701368384000.dat
+cp -a $SOURCE_DIRECTORY/bug-7257706/1422358701368384000.idx incompletecompact-test/2422358701368384000.idx
+
truncate --size 3830 bug-7257706-truncated/1422358701368384000.idx
fail=0
VESPA_LOG_TARGET=file:vlog2.txt $VALGRIND ./searchlib_logdatastore_test_app || fail=1
-rm -rf bug-7257706-truncated dangling-test
+rm -rf bug-7257706-truncated dangling-test incompletecompact-test
exit $fail
diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp
index cd0a839475c..0f753b4914c 100644
--- a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp
@@ -526,7 +526,16 @@ FileChunk::isIdxFileEmpty(const vespalib::string & name)
void
FileChunk::eraseIdxFile(const vespalib::string & name)
{
- vespalib::string fileName(name + ".idx");
+ vespalib::string fileName(createIdxFileName(name));
+ if ( ! FastOS_File::Delete(fileName.c_str())) {
+ throw std::runtime_error(make_string("Failed to delete '%s'", fileName.c_str()));
+ }
+}
+
+void
+FileChunk::eraseDatFile(const vespalib::string & name)
+{
+ vespalib::string fileName(createDatFileName(name));
if ( ! FastOS_File::Delete(fileName.c_str())) {
throw std::runtime_error(make_string("Failed to delete '%s'", fileName.c_str()));
}
diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.h b/searchlib/src/vespa/searchlib/docstore/filechunk.h
index df2fb29ecb1..326a185f9de 100644
--- a/searchlib/src/vespa/searchlib/docstore/filechunk.h
+++ b/searchlib/src/vespa/searchlib/docstore/filechunk.h
@@ -192,6 +192,7 @@ public:
static uint64_t readDataHeader(FileRandRead &idxFile);
static bool isIdxFileEmpty(const vespalib::string & name);
static void eraseIdxFile(const vespalib::string & name);
+ static void eraseDatFile(const vespalib::string & name);
static vespalib::string createIdxFileName(const vespalib::string & name);
static vespalib::string createDatFileName(const vespalib::string & name);
private:
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
index d20055e37d5..024e64c0bdd 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
@@ -21,6 +21,7 @@ using vespalib::getLastErrorString;
using vespalib::getErrorString;
using vespalib::GenerationHandler;
using vespalib::make_string;
+using vespalib::IllegalStateException;
using common::FileHeaderContext;
using std::runtime_error;
using document::BucketId;
@@ -656,7 +657,8 @@ lsSingleFile(const vespalib::string & fileName)
}
-vespalib::string LogDataStore::ls(const NameIdSet & partList)
+vespalib::string
+LogDataStore::ls(const NameIdSet & partList)
{
vespalib::string s;
for (auto it(++partList.begin()), mt(partList.end()); it != mt; ++it) {
@@ -751,8 +753,9 @@ LogDataStore::preload()
NameIdSet partList = scanDir(getBaseDir(), ".idx");
NameIdSet datPartList = scanDir(getBaseDir(), ".dat");
- partList = eraseEmptyIdxFiles(partList);
+ partList = eraseEmptyIdxFiles(std::move(partList));
eraseDanglingDatFiles(partList, datPartList);
+ partList = eraseIncompleteCompactedFiles(std::move(partList));
if (!partList.empty()) {
verifyModificationTime(partList);
@@ -785,7 +788,7 @@ LogDataStore::getLastFileChunkDocIdLimit()
}
LogDataStore::NameIdSet
-LogDataStore::eraseEmptyIdxFiles(const NameIdSet &partList)
+LogDataStore::eraseEmptyIdxFiles(NameIdSet partList)
{
NameIdSet nonEmptyIdxPartList;
for (const auto & part : partList) {
@@ -800,6 +803,52 @@ LogDataStore::eraseEmptyIdxFiles(const NameIdSet &partList)
return nonEmptyIdxPartList;
}
+LogDataStore::NameIdSet
+LogDataStore::findIncompleteCompactedFiles(const NameIdSet & partList) {
+ NameIdSet incomplete;
+ if ( !partList.empty()) {
+ NameIdSet::const_iterator it = partList.begin();
+ for (FileChunk::NameId prev = *it++; it != partList.end(); it++) {
+ if (prev.next() == *it) {
+ if (!incomplete.empty() && (*incomplete.rbegin() == prev)) {
+ throw IllegalStateException(make_string("3 consecutive files {%ld, %ld, %ld}. Impossible",
+ prev.getId()-1, prev.getId(), it->getId()));
+ }
+ incomplete.insert(*it);
+ }
+ prev = *it;
+ }
+ }
+ return incomplete;
+}
+
+LogDataStore::NameIdSet
+LogDataStore::getAllActiveFiles() const {
+ NameIdSet files;
+ vespalib::LockGuard guard(_updateLock);
+ for (const auto & fc : _fileChunks) {
+ if (fc) {
+ files.insert(fc->getNameId());
+ }
+ }
+ return files;
+}
+
+LogDataStore::NameIdSet
+LogDataStore::eraseIncompleteCompactedFiles(NameIdSet partList)
+{
+ NameIdSet toRemove = findIncompleteCompactedFiles(partList);
+ for (NameId toBeRemoved : toRemove) {
+ partList.erase(toBeRemoved);
+ vespalib::string name(createFileName(toBeRemoved));
+ LOG(warning, "'%s' has been detected as an incompletely compacted file. Erasing it.", name.c_str());
+ FileChunk::eraseIdxFile(name);
+ FileChunk::eraseDatFile(name);
+ }
+
+ return std::move(partList);
+}
+
void
LogDataStore::eraseDanglingDatFiles(const NameIdSet &partList, const NameIdSet &datPartList)
{
@@ -828,13 +877,9 @@ LogDataStore::eraseDanglingDatFiles(const NameIdSet &partList, const NameIdSet &
const char *s = name.c_str();
throw runtime_error(make_string( "Missing file '%s.dat', found '%s.idx'", s, s));
} else if (dbase < ibase) {
- vespalib::string name(createDatFileName(dbase));
- const char *s = name.c_str();
- LOG(warning, "Removing dangling file '%s'", s);
- if (!FastOS_File::Delete(s)) {
- vespalib::string e = getErrorString(errno);
- throw runtime_error(make_string("Error erasing dangling file '%s'. Error is '%s'", s, e.c_str()));
- }
+ vespalib::string fileName = createFileName(dbase);
+ LOG(warning, "Removing dangling file '%s'", FileChunk::createDatFileName(fileName).c_str());
+ FileChunk::eraseDatFile(fileName);
++di;
} else {
++ii;
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
index a0f522e25d3..0ca658ba803 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
@@ -31,6 +31,7 @@ private:
using NameId = FileChunk::NameId;
using FileId = FileChunk::FileId;
public:
+ using NameIdSet = std::set<NameId>;
using LockGuard = vespalib::LockGuard;
using CompressionConfig = document::CompressionConfig;
class Config {
@@ -80,7 +81,6 @@ public:
const WriteableFileChunk::Config & getFileConfig() const { return _fileConfig; }
private:
size_t _maxFileSize;
- size_t _maxEntriesPerFile;
double _maxDiskBloatFactor;
double _maxBucketSpread;
double _minFileSizeFactor;
@@ -190,20 +190,17 @@ public:
return _active;
}
- virtual DataStoreStorageStats getStorageStats() const override;
+ DataStoreStorageStats getStorageStats() const override;
+ MemoryUsage getMemoryUsage() const override;
+ std::vector<DataStoreFileChunkStats> getFileChunkStats() const override;
- virtual MemoryUsage getMemoryUsage() const override;
+ void compactLidSpace(uint32_t wantedDocLidLimit) override;
+ bool canShrinkLidSpace() const override;
+ size_t getEstimatedShrinkLidSpaceGain() const override;
+ void shrinkLidSpace() override;
+ static NameIdSet findIncompleteCompactedFiles(const NameIdSet & partList);
- virtual std::vector<DataStoreFileChunkStats>
- getFileChunkStats() const override;
-
- /**
- * Implements common::ICompactableLidSpace
- */
- virtual void compactLidSpace(uint32_t wantedDocLidLimit) override;
- virtual bool canShrinkLidSpace() const override;
- virtual size_t getEstimatedShrinkLidSpaceGain() const override;
- virtual void shrinkLidSpace() override;
+ NameIdSet getAllActiveFiles() const;
private:
class WrapVisitor;
@@ -218,7 +215,6 @@ private:
void compactWorst();
void compactFile(FileId chunkId);
- typedef std::set<NameId> NameIdSet;
typedef attribute::RcuVector<uint64_t> LidInfoVector;
typedef std::vector<FileChunk::UP> FileChunkVector;
@@ -228,7 +224,8 @@ private:
void verifyModificationTime(const NameIdSet & partList);
void eraseDanglingDatFiles(const NameIdSet &partList, const NameIdSet &datPartList);
- NameIdSet eraseEmptyIdxFiles(const NameIdSet &partList);
+ NameIdSet eraseEmptyIdxFiles(NameIdSet partList);
+ NameIdSet eraseIncompleteCompactedFiles(NameIdSet partList);
void internalFlushAll();
NameIdSet scanDir(const vespalib::string &dir, const vespalib::string &suffix);