aboutsummaryrefslogtreecommitdiffstats
path: root/searchcore/src/vespa/searchcorespi/index/diskindexcleaner.cpp
blob: 8126774078ecac9c28e5392d5a38a88b3dc17e97 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "diskindexcleaner.h"
#include "disk_indexes.h"
#include "indexdisklayout.h"
#include "index_disk_dir.h"
#include <vespa/fastos/file.h>
#include <vespa/vespalib/io/fileutil.h>
#include <filesystem>
#include <sstream>
#include <vector>

#include <vespa/log/log.h>
LOG_SETUP(".searchcorespi.index.diskindexcleaner");

using std::istringstream;
using vespalib::string;
using std::vector;

namespace searchcorespi::index {

namespace {
vector<string> readIndexes(const string &base_dir) {
    vector<string> indexes;
    std::filesystem::directory_iterator dir_scan{std::filesystem::path(base_dir)};
    for (auto& entry : dir_scan) {
        if (entry.is_directory() && entry.path().filename().string().find("index.") == 0) {
            indexes.emplace_back(entry.path().filename().string());
        }
    }
    return indexes;
}

bool isValidIndex(const string &index_dir) {
    FastOS_File serial_file((index_dir + "/serial.dat").c_str());
    return serial_file.OpenReadOnlyExisting();
}

void invalidateIndex(const string &index_dir) {
    std::filesystem::remove(std::filesystem::path(index_dir + "/serial.dat"));
    vespalib::File::sync(index_dir);
}

uint32_t findLastFusionId(const string &base_dir,
                          const vector<string> &indexes) {
    uint32_t fusion_id = 0;
    const string prefix = "index.fusion.";
    for (size_t i = 0; i < indexes.size(); ++i) {
        if (indexes[i].find(prefix) != 0) {
            continue;
        }
        if (!isValidIndex(base_dir + "/" + indexes[i])) {
            continue;
        }

        uint32_t new_id = 0;
        istringstream ist(indexes[i].substr(prefix.size()));
        ist >> new_id;
        fusion_id = std::max(fusion_id, new_id);
    }
    return fusion_id;
}

void removeDir(const string &dir) {
    LOG(debug, "Removing index dir '%s'", dir.c_str());
    invalidateIndex(dir);
    std::filesystem::remove_all(std::filesystem::path(dir));
}

bool isOldIndex(const string &index, uint32_t last_fusion_id) {
    string::size_type pos = index.rfind(".");
    istringstream ist(index.substr(pos + 1));
    uint32_t id = last_fusion_id;
    ist >> id;
    if (id < last_fusion_id) {
        return true;
    } else if (id == last_fusion_id) {
        return index.find("flush") != string::npos;
    }
    return false;
}

void removeOld(const string &base_dir, const vector<string> &indexes,
               DiskIndexes &disk_indexes, bool remove) {
    uint32_t last_fusion_id = findLastFusionId(base_dir, indexes);
    for (size_t i = 0; i < indexes.size(); ++i) {
        const string index_dir = base_dir + "/" + indexes[i];
        auto index_disk_dir = IndexDiskLayout::get_index_disk_dir(indexes[i]);
        if (isOldIndex(indexes[i], last_fusion_id) &&
            disk_indexes.remove(index_disk_dir)) {
            if (remove) {
                removeDir(index_dir);
            } else {
                invalidateIndex(index_dir);
            }
        }
    }
}

void removeInvalid(const string &base_dir, const vector<string> &indexes) {
    for (size_t i = 0; i < indexes.size(); ++i) {
        const string index_dir = base_dir + "/" + indexes[i];
        if (!isValidIndex(index_dir)) {
            LOG(debug, "Found invalid index dir '%s'", index_dir.c_str());
            removeDir(index_dir);
        }
    }
}
}  // namespace

void DiskIndexCleaner::clean(const string &base_dir,
                             DiskIndexes &disk_indexes) {
    vector<string> indexes = readIndexes(base_dir);
    removeOld(base_dir, indexes, disk_indexes, false);
    removeInvalid(base_dir, indexes);
}

void DiskIndexCleaner::removeOldIndexes(
        const string &base_dir, DiskIndexes &disk_indexes) {
    vector<string> indexes = readIndexes(base_dir);
    removeOld(base_dir, indexes, disk_indexes, true);
}

}