1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "diskindexcleaner.h"
#include "disk_indexes.h"
#include "indexdisklayout.h"
#include "index_disk_dir.h"
#include <vespa/fastos/file.h>
#include <vespa/vespalib/io/fileutil.h>
#include <filesystem>
#include <sstream>
#include <vector>
#include <vespa/log/log.h>
LOG_SETUP(".searchcorespi.index.diskindexcleaner");
using std::istringstream;
using vespalib::string;
using std::vector;
namespace searchcorespi::index {
namespace {
vector<string> readIndexes(const string &base_dir) {
vector<string> indexes;
std::filesystem::directory_iterator dir_scan{std::filesystem::path(base_dir)};
for (auto& entry : dir_scan) {
if (entry.is_directory() && entry.path().filename().string().find("index.") == 0) {
indexes.emplace_back(entry.path().filename().string());
}
}
return indexes;
}
bool isValidIndex(const string &index_dir) {
FastOS_File serial_file((index_dir + "/serial.dat").c_str());
return serial_file.OpenReadOnlyExisting();
}
void invalidateIndex(const string &index_dir) {
std::filesystem::remove(std::filesystem::path(index_dir + "/serial.dat"));
vespalib::File::sync(index_dir);
}
uint32_t findLastFusionId(const string &base_dir,
const vector<string> &indexes) {
uint32_t fusion_id = 0;
const string prefix = "index.fusion.";
for (size_t i = 0; i < indexes.size(); ++i) {
if (indexes[i].find(prefix) != 0) {
continue;
}
if (!isValidIndex(base_dir + "/" + indexes[i])) {
continue;
}
uint32_t new_id = 0;
istringstream ist(indexes[i].substr(prefix.size()));
ist >> new_id;
fusion_id = std::max(fusion_id, new_id);
}
return fusion_id;
}
void removeDir(const string &dir) {
LOG(debug, "Removing index dir '%s'", dir.c_str());
invalidateIndex(dir);
std::filesystem::remove_all(std::filesystem::path(dir));
}
bool isOldIndex(const string &index, uint32_t last_fusion_id) {
string::size_type pos = index.rfind(".");
istringstream ist(index.substr(pos + 1));
uint32_t id = last_fusion_id;
ist >> id;
if (id < last_fusion_id) {
return true;
} else if (id == last_fusion_id) {
return index.find("flush") != string::npos;
}
return false;
}
void removeOld(const string &base_dir, const vector<string> &indexes,
DiskIndexes &disk_indexes, bool remove) {
uint32_t last_fusion_id = findLastFusionId(base_dir, indexes);
for (size_t i = 0; i < indexes.size(); ++i) {
const string index_dir = base_dir + "/" + indexes[i];
auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(indexes[i]);
if (isOldIndex(indexes[i], last_fusion_id) &&
disk_indexes.remove(index_disk_dir)) {
if (remove) {
removeDir(index_dir);
} else {
invalidateIndex(index_dir);
}
}
}
}
void removeInvalid(const string &base_dir, const vector<string> &indexes) {
for (size_t i = 0; i < indexes.size(); ++i) {
const string index_dir = base_dir + "/" + indexes[i];
if (!isValidIndex(index_dir)) {
LOG(debug, "Found invalid index dir '%s'", index_dir.c_str());
removeDir(index_dir);
}
}
}
} // namespace
void DiskIndexCleaner::clean(const string &base_dir,
DiskIndexes &disk_indexes) {
vector<string> indexes = readIndexes(base_dir);
removeOld(base_dir, indexes, disk_indexes, false);
removeInvalid(base_dir, indexes);
}
void DiskIndexCleaner::removeOldIndexes(
const string &base_dir, DiskIndexes &disk_indexes) {
vector<string> indexes = readIndexes(base_dir);
removeOld(base_dir, indexes, disk_indexes, true);
}
}
|