aboutsummaryrefslogtreecommitdiffstats
path: root/config-model-api/src/main/java/com/yahoo/config/application/api/Bcp.java
blob: c2c8af6b0a93387da90d3209f384883096524fc0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.config.application.api;

import com.yahoo.config.provision.RegionName;

import java.time.Duration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Defines the BCP structure for an instance in a deployment spec:
 * A list of region groups where each group contains a set of regions
 * which will handle the traffic of a member in the group when it becomes unreachable.
 *
 * This is used to make bcp-aware autoscaling decisions. If no explicit BCP spec
 * is provided, it is assumed that a regions traffic will be divided equally over all
 * the other regions when it becomes unreachable - i.e a single BCP group is implicitly
 * defined having all defined production regions as members with fraction 1.0.
 *
 * It is assumed that the traffic of the unreachable region is distributed
 * evenly to the other members of the group.
 *
 * A region can be a fractional member of a group, in which case it is assumed that
 * region will only handle that fraction of its share of the unreachable regions traffic,
 * and symmetrically that the other members of the group will only handle that fraction
 * of the fraction regions traffic if it becomes unreachable.
 *
 * Each production region defined in the instance must have fractional memberships in groups that sums to exactly one.
 *
 * If a group has one member it will not set aside any capacity for BCP.
 * If a group has more than two members, the system will attempt to provision capacity
 * for BCP also when a region is unreachable. That is, if there are three member regions, A, B and C,
 * each handling 100 qps, then they each aim to handle 150 in case one goes down. If C goes down,
 * A and B will now handle 150 each, but will each aim to handle 300 each in case the other goes down.
 *
 * @author bratseth
 */
public class Bcp {

    private static final Bcp empty = new Bcp(List.of(), Optional.empty());

    private final Optional<Duration> defaultDeadline;
    private final List<Group> groups;

    public Bcp(List<Group> groups, Optional<Duration> defaultDeadline) {
        totalMembershipSumsToOne(groups);
        this.defaultDeadline = defaultDeadline;
        this.groups = List.copyOf(groups);
    }

    public Optional<Duration> defaultDeadline() { return defaultDeadline; }
    public List<Group> groups() { return groups; }

    public Bcp withGroups(List<Group> groups) {
        return new Bcp(groups, defaultDeadline);
    }

    /** Returns the set of regions declared in the groups of this. */
    public Set<RegionName> regions() {
        return groups.stream().flatMap(group -> group.members().stream()).map(member -> member.region()).collect(Collectors.toSet());
    }

    public boolean isEmpty() { return groups.isEmpty() && defaultDeadline.isEmpty(); }

    /** Returns this bcp spec, or if it is empty, the given bcp spec. */
    public Bcp orElse(Bcp other) {
        return this.isEmpty() ? other : this;
    }

    private void totalMembershipSumsToOne(List<Group> groups) {
        Map<RegionName, Double> totalMembership = new HashMap<>();
        for (var group : groups) {
            for (var member : group.members())
                totalMembership.compute(member.region(), (__, fraction) -> fraction == null ? member.fraction()
                                                                                            : fraction + member.fraction());
        }
        for (var entry : totalMembership.entrySet()) {
            if (entry.getValue() != 1.0)
                throw new IllegalArgumentException("Illegal BCP spec: All regions must have total membership fractions summing to 1.0, but " +
                                                   entry.getKey() + " sums to " + entry.getValue());
        }
    }

    public static Bcp empty() { return empty; }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        Bcp bcp = (Bcp) o;
        return defaultDeadline.equals(bcp.defaultDeadline) && groups.equals(bcp.groups);
    }

    @Override
    public int hashCode() {
        return Objects.hash(defaultDeadline, groups);
    }

    @Override
    public String toString() {
        if (isEmpty()) return "empty BCP";
        return "BCP of " +
               ( groups.isEmpty() ? "no groups" : groups ) +
               (defaultDeadline.isEmpty() ? "" : ", deadline: " + defaultDeadline.get());
    }

    public static class Group {

        private final List<RegionMember> members;
        private final Set<RegionName> memberRegions;
        private final Duration deadline;

        public Group(List<RegionMember> members, Duration deadline) {
            this.members = List.copyOf(members);
            this.memberRegions = members.stream().map(member -> member.region()).collect(Collectors.toSet());
            this.deadline = deadline;
        }

        public List<RegionMember> members() { return members; }

        public Set<RegionName> memberRegions() { return memberRegions; }

        /**
         * Returns the max time until the other regions must be able to handle the additional traffic
         * when a region becomes unreachable, which by default is Duration.ZERO.
         */
        public Duration deadline() { return deadline; }

        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (o == null || getClass() != o.getClass()) return false;
            Group group = (Group) o;
            return members.equals(group.members) && memberRegions.equals(group.memberRegions) && deadline.equals(group.deadline);
        }

        @Override
        public int hashCode() {
            return Objects.hash(members, memberRegions, deadline);
        }

        @Override
        public String toString() {
            return "BCP group of " + members;
        }

    }

    public record RegionMember(RegionName region, double fraction) {

        public RegionMember {
            if (fraction < 0 || fraction > 1)
                throw new IllegalArgumentException("Fraction must be a number between 0.0 and 1.0, but got " + fraction);
        }


    }

}