aboutsummaryrefslogtreecommitdiffstats
path: root/vdslib/src/vespa/vdslib/distribution/redundancygroupdistribution.cpp
blob: d9ecc60862b634ef9fa187d341abf999a4016cb6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "redundancygroupdistribution.h"
#include <vespa/vespalib/util/exceptions.h>
#include <vespa/vespalib/text/stringtokenizer.h>
#include <boost/lexical_cast.hpp>
#include <algorithm>
#include <cassert>

namespace storage::lib {

namespace {
    void verifyLegal(vespalib::StringTokenizer& st,
                     vespalib::stringref serialized)
    {
        // First, verify sanity of the serialized string
        uint32_t firstAsterisk = st.size();
        for (uint32_t i=0; i<st.size(); ++i) {
            if (i > firstAsterisk) {
                if (st[i] != "*") {
                    throw vespalib::IllegalArgumentException(
                            "Illegal distribution spec \"" + serialized + "\". "
                            "Asterisk specifications must be tailing the "
                            "specification.", VESPA_STRLOC);
                }
                continue;
            }
            if (i < firstAsterisk && st[i] == "*") {
                firstAsterisk = i;
                continue;
            }
            uint32_t number = atoi(vespalib::string(st[i]).c_str());
            if (number <= 0 || number >= 256) {
                throw vespalib::IllegalArgumentException(
                    "Illegal distribution spec \"" + serialized + "\". "
                    "Copy counts must be in the range 1-255.", VESPA_STRLOC);
            }
            for (vespalib::StringTokenizer::Token::const_iterator it
                    = st[i].begin(); it != st[i].end(); ++it)
            {
                if (*it < '0' || *it > '9') {
                    throw vespalib::IllegalArgumentException(
                        "Illegal distribution spec \"" + serialized + "\". "
                        "Token isn't asterisk or number.", VESPA_STRLOC);
                }
            }
        }
    }

    std::vector<uint16_t> parse(vespalib::stringref serialized) {
        std::vector<uint16_t> result;
        if (serialized == "") return result;
        vespalib::StringTokenizer st(serialized, "|");
        verifyLegal(st, serialized);
        for (vespalib::StringTokenizer::Iterator it = st.begin();
             it != st.end(); ++it)
        {
            if (*it == "*") {
                result.push_back(0);
            } else {
                result.push_back(boost::lexical_cast<uint16_t>(*it));
            }
        }
        return result;
    }
}

RedundancyGroupDistribution::RedundancyGroupDistribution(
        vespalib::stringref serialized)
    : _values(parse(serialized))
{
}

RedundancyGroupDistribution::RedundancyGroupDistribution(
        const RedundancyGroupDistribution& spec,
        uint16_t redundancy)
{
    uint16_t firstAsterisk = spec.getFirstAsteriskIndex();
    // If redundancy is less than the group size, we only get one copy
    // in redundancy groups.
    if (redundancy <= spec.size()) {
        _values = std::vector<uint16_t>(redundancy, 1);
        return;
    }
    // If not we will have one copy at least for every wanted group.
    _values = std::vector<uint16_t>(spec.size(), 1);
    redundancy -= spec.size();
    // Distribute extra copies to non-asterisk entries first
    redundancy = divideSpecifiedCopies(0, firstAsterisk, redundancy, spec._values);
    // Distribute remaining copies to asterisk entries
    divideSpecifiedCopies(firstAsterisk, spec.size(), redundancy, spec._values);
    // Lastly sort, so the most copies will end up first in ideal state
    std::sort(_values.begin(), _values.end());
    std::reverse(_values.begin(), _values.end());
    assert(_values.front() >= _values.back());
}

RedundancyGroupDistribution::~RedundancyGroupDistribution() = default;

void
RedundancyGroupDistribution::print(std::ostream& out,
                                   bool, const std::string&) const
{
    for (uint32_t i=0; i<_values.size(); ++i) {
        if (i != 0) out << '|';
        if (_values[i] == 0) {
            out << '*';
        } else {
            out << _values[i];
        }
    }
}

uint16_t
RedundancyGroupDistribution::getFirstAsteriskIndex() const
{
    if (_values.empty() || _values.back() != 0) {
        throw vespalib::IllegalArgumentException(
                "Invalid spec given. No asterisk entries found.",
                VESPA_STRLOC);
    }
    uint16_t firstAsterisk = _values.size() - 1;
    while (firstAsterisk > 0 && _values[firstAsterisk - 1] == 0) {
        --firstAsterisk;
    }
    return firstAsterisk;
}

uint16_t
RedundancyGroupDistribution::divideSpecifiedCopies(
            uint16_t start, uint16_t end,
            uint16_t redundancy, const std::vector<uint16_t>& maxValues)
{
    uint16_t lastRedundancy = redundancy;
    while (redundancy > 0) {
        for (uint16_t i=start; i<end && redundancy > 0; ++i) {
            if (maxValues[i] == 0 || _values[i] < maxValues[i]) {
                ++_values[i];
                --redundancy;
            }
        }
        if (redundancy == lastRedundancy) break;
        lastRedundancy = redundancy;
    }
    return redundancy;
}

}