aboutsummaryrefslogtreecommitdiffstats
path: root/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp
blob: ec606af0690ce1406737bc973184f814a158c253 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "global_bucket_space_distribution_converter.h"
#include <vespa/vdslib/distribution/distribution.h>
#include <vespa/config/print/asciiconfigwriter.h>
#include <vespa/config/print/asciiconfigreader.hpp>
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/vdslib/distribution/distribution_config_util.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <cassert>
#include <map>

namespace storage {

using DistributionConfig = vespa::config::content::StorDistributionConfig;
using DistributionConfigBuilder = vespa::config::content::StorDistributionConfigBuilder;

namespace {

struct Group {
    uint16_t nested_leaf_count{0};
    std::map<uint16_t, std::unique_ptr<Group>> sub_groups;
};

void set_distribution_invariant_config_fields(DistributionConfigBuilder& builder, const DistributionConfig& source) {
    builder.diskDistribution = source.diskDistribution;
    builder.distributorAutoOwnershipTransferOnWholeGroupDown = true;
    builder.activePerLeafGroup = true;
    // TODO consider how to best support n-of-m replication for global docs
    builder.ensurePrimaryPersisted = true;
    builder.initialRedundancy = 0;
}

const Group& find_non_root_group_by_index(const vespalib::string& index, const Group& root) {
    auto path = lib::DistributionConfigUtil::getGroupPath(index);
    auto* node = &root;
    for (auto idx : path) {
        auto child_iter = node->sub_groups.find(idx);
        assert(child_iter != node->sub_groups.end());
        node = child_iter->second.get();
    }
    return *node;
}

vespalib::string sub_groups_to_partition_spec(const Group& parent) {
    if (parent.sub_groups.empty()) {
        return "*";
    }
    vespalib::asciistream spec;
    // We simplify the generated partition spec by only emitting wildcard entries.
    // These will have replicas evenly divided amongst them.
    for (size_t i = 0; i < parent.sub_groups.size(); ++i) {
        if (i != 0) {
            spec << '|';
        }
        spec << '*';
    }
    return spec.str();
}

bool is_leaf_group(const DistributionConfigBuilder::Group& g) noexcept {
    return !g.nodes.empty();
}

void insert_new_group_into_tree(
        std::unique_ptr<Group> new_group,
        const DistributionConfigBuilder::Group& config_source_group,
        Group& root) {
    const auto path = lib::DistributionConfigUtil::getGroupPath(config_source_group.index);
    assert(!path.empty());

    Group* parent = &root;
    for (size_t i = 0; i < path.size(); ++i) {
        const auto idx = path[i];
        parent->nested_leaf_count += config_source_group.nodes.size(); // Empty if added group is not a leaf.
        auto g_iter = parent->sub_groups.find(idx);
        if (g_iter != parent->sub_groups.end()) {
            assert(i != path.size() - 1);
            parent = g_iter->second.get();
        } else {
            assert(i == path.size() - 1); // Only valid case for last item in path.
            parent->sub_groups.emplace(path.back(), std::move(new_group));
        }
    }
}

void build_transformed_root_group(DistributionConfigBuilder& builder,
                                  const DistributionConfigBuilder::Group& config_source_root,
                                  const Group& parsed_root) {
    DistributionConfigBuilder::Group new_root(config_source_root);
    new_root.partitions = sub_groups_to_partition_spec(parsed_root);
    builder.group.emplace_back(std::move(new_root));
}

void build_transformed_non_root_group(DistributionConfigBuilder& builder,
                                      const DistributionConfigBuilder::Group& config_source_group,
                                      const Group& parsed_root) {
    DistributionConfigBuilder::Group new_group(config_source_group);
    if (!is_leaf_group(config_source_group)) { // Partition specs only apply to inner nodes
        const auto& g = find_non_root_group_by_index(config_source_group.index, parsed_root);
        new_group.partitions = sub_groups_to_partition_spec(g);
    }
    builder.group.emplace_back(std::move(new_group));
}

std::unique_ptr<Group> create_group_tree_from_config(const DistributionConfig& source) {
    std::unique_ptr<Group> root;
    for (auto& g : source.group) {
        auto new_group = std::make_unique<Group>();
        assert(g.nodes.size() < UINT16_MAX);
        new_group->nested_leaf_count = static_cast<uint16_t>(g.nodes.size());
        if (root) {
            insert_new_group_into_tree(std::move(new_group), g, *root);
        } else {
            root = std::move(new_group);
        }
    }
    return root;
}

/* Even though groups are inherently hierarchical, the config is a flat array with a
 * hierarchy bolted on through the use of (more or less) "multi-dimensional" index strings.
 * Index string of root group is always "invalid" (or possibly some other string that cannot
 * be interpreted as a dot-separated tree node path). Other groups have an index of the
 * form "X.Y.Z", where Z is the group's immediate parent index, Y is Z's parent and so on. Just
 * stating Z itself is not sufficient to uniquely identify the group, as group indices are
 * not unique _across_ groups. For indices "0.1" and "1.1", the trailing "1" refers to 2
 * distinct groups, as they have different parents.
 *
 * It may be noted that the group index strings do _not_ include the root group, so we
 * have to always implicitly include it ourselves.
 *
 * Config groups are ordered so that when a group is encountered, all its parents (and
 * transitively, its parents again etc) have already been processed. This directly
 * implies that the root group is always the first group present in the config.
 */
void build_global_groups(DistributionConfigBuilder& builder, const DistributionConfig& source) {
    assert(!source.group.empty()); // TODO gracefully handle empty config?
    auto root = create_group_tree_from_config(source);

    auto g_iter = source.group.begin();
    const auto g_end = source.group.end();
    build_transformed_root_group(builder, *g_iter, *root);
    ++g_iter;
    for (; g_iter != g_end; ++g_iter) {
        build_transformed_non_root_group(builder, *g_iter, *root);
    }

    builder.redundancy = root->nested_leaf_count;
    builder.readyCopies = builder.redundancy;
}

} // anon ns

std::shared_ptr<DistributionConfig>
GlobalBucketSpaceDistributionConverter::convert_to_global(const DistributionConfig& source) {
    DistributionConfigBuilder builder;
    set_distribution_invariant_config_fields(builder, source);
    build_global_groups(builder, source);
    return std::make_shared<DistributionConfig>(builder);
}

std::shared_ptr<lib::Distribution>
GlobalBucketSpaceDistributionConverter::convert_to_global(const lib::Distribution& distr) {
    const auto src_config = distr.serialize();
    auto global_config = convert_to_global(*string_to_config(src_config));
    return std::make_shared<lib::Distribution>(*global_config);
}

std::unique_ptr<DistributionConfig>
GlobalBucketSpaceDistributionConverter::string_to_config(const vespalib::string& cfg) {
    vespalib::asciistream iss(cfg);
    config::AsciiConfigReader<vespa::config::content::StorDistributionConfig> reader(iss);
    return reader.read();
}

vespalib::string GlobalBucketSpaceDistributionConverter::config_to_string(const DistributionConfig& cfg) {
    vespalib::asciistream ost;
    config::AsciiConfigWriter writer(ost);
    writer.write(cfg);
    return ost.str();
}

}