aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
blob: 2c03520ec0175e6fb39d0cd5491af0970683e796 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.clustercontroller.core;

import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;

import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * This class handles master election.
 */
public class MasterElectionHandler implements MasterInterface {

    private static final Logger log = Logger.getLogger(MasterElectionHandler.class.getName());

    private final Object monitor;
    private final Timer timer;
    private final int index;
    private int totalCount;
    private Integer masterCandidate; // The lowest indexed node in zookeeper
    private int nextInLineCount; // Our position in line of the nodes in zookeeper
    private int followers; // How many nodes are currently voting for the master candidate
    private Map<Integer, Integer> masterData;
    private Map<Integer, Integer> nextMasterData;
    private long masterGoneFromZooKeeperTime; // Set to time master fleet controller disappears from zookeeper
    private long masterZooKeeperCooldownPeriod; // The period in ms that we won't take over unless master come back.
    private boolean usingZooKeeper = false; // Unit tests may not use ZooKeeper at all.

    public MasterElectionHandler(int index, int totalCount, Object monitor, Timer timer) {
        this.monitor = monitor;
        this.timer = timer;
        this.index = index;
        this.totalCount = totalCount;
        this.nextInLineCount = Integer.MAX_VALUE;
        // Only a given set of nodes can ever become master
        if (index > (totalCount - 1) / 2) {
            log.log(Level.FINE, "Cluster controller " + index + ": We can never become master and will always stay a follower.");
        }
            // Tag current time as when we have not seen any other master. Make sure we're not taking over at once for master that is on the way down
        masterGoneFromZooKeeperTime = timer.getCurrentTimeInMillis();
    }

    public void setFleetControllerCount(int count) {
        totalCount = count;
        if (count == 1 && !usingZooKeeper) {
            masterCandidate = 0;
            followers = 1;
            nextInLineCount = 0;
        }
    }

    public void setMasterZooKeeperCooldownPeriod(int period) {
        masterZooKeeperCooldownPeriod = period;
    }

    public void setUsingZooKeeper(boolean usingZK) {
        if (!usingZooKeeper && usingZK) {
            // Reset any shortcuts taken by non-ZK election logic.
            resetElectionProgress();
        }
        usingZooKeeper = usingZK;
    }

    @Override
    public boolean isMaster() {
        Integer master = getMaster();
        return (master != null && master == index);
    }

    @Override
    public boolean inMasterMoratorium() {
        return false;
    }

    @Override
    public Integer getMaster() {
            // If too few followers there can be no master
        if (2 * followers <= totalCount) {
            return null;
        }
            // If all are following master candidate, it is master if it exists.
        if (followers == totalCount) {
            return masterCandidate;
        }
            // If not all are following we only accept master candidate if old master
            // disappeared sufficient time ago
        if (masterGoneFromZooKeeperTime + masterZooKeeperCooldownPeriod > timer.getCurrentTimeInMillis()) {
            return null;
        }
        return masterCandidate;
    }

    public String getMasterReason() {
        if (masterCandidate == null) {
            return "There is currently no master candidate.";
        }
        // If too few followers there can be no master
        if (2 * followers <= totalCount) {
            return "More than half of the nodes must agree for there to be a master. Only " + followers + " of "
                    + totalCount + " nodes agree on current master candidate (" + masterCandidate + ").";
        }
        // If all are following master candidate, it is master if it exists.
        if (followers == totalCount) {
            return "All " + totalCount + " nodes agree that " + masterCandidate + " is current master.";
        }

        // If not all are following we only accept master candidate if old master
        // disappeared sufficient time ago
        if (masterGoneFromZooKeeperTime + masterZooKeeperCooldownPeriod > timer.getCurrentTimeInMillis()) {
            return followers + " of " + totalCount + " nodes agree " + masterCandidate + " should be master, "
                    + "but old master cooldown period of " + masterZooKeeperCooldownPeriod + " ms has not passed yet. "
                    + "To ensure it has got time to realize it is no longer master before we elect a new one, "
                    + "currently there is no master.";
        }
        return followers + " of " + totalCount + " nodes agree " + masterCandidate + " is master.";
    }

    public boolean isAmongNthFirst(int first) { return (nextInLineCount < first); }

    public boolean watchMasterElection(DatabaseHandler database,
                                       DatabaseHandler.Context dbContext) throws InterruptedException {
        if (totalCount == 1 && !usingZooKeeper) {
            return false; // Allow single configured node to become master implicitly if no ZK configured
        }
        if (nextMasterData == null) {
            if (masterCandidate == null) {
                log.log(Level.FINEST, "Cluster controller " + index + ": No current master candidate. Waiting for data to do master election.");
            }
            return false; // Nothing have happened since last time.
        }
            // Move next data to temporary, such that we don't need to keep lock, and such that we don't retry
            // if we happen to fail processing the data.
        Map<Integer, Integer> state;
        log.log(Level.INFO, "Cluster controller " + index + ": Handling new master election, as we have received " + nextMasterData.size() + " entries");
        synchronized (monitor) {
            state = nextMasterData;
            nextMasterData = null;
        }
        log.log(Level.INFO, "Cluster controller " + index + ": Got master election state " + toString(state) + ".");
        if (state.isEmpty()) throw new IllegalStateException("Database has no master data. We should at least have data for ourselves.");
        Map.Entry<Integer, Integer> first = state.entrySet().iterator().next();
        Integer currentMaster = getMaster();
        if (currentMaster != null && first.getKey().intValue() != currentMaster.intValue()) {
            log.log(Level.INFO, "Cluster controller " + index + ": Master gone from ZooKeeper. Tagging timestamp. Will wait " + this.masterZooKeeperCooldownPeriod + " ms.");
            masterGoneFromZooKeeperTime = timer.getCurrentTimeInMillis();
            masterCandidate = null;
        }
        if (first.getValue().intValue() != first.getKey().intValue()) {
            log.log(Level.INFO, "Fleet controller " + index + ": First index is not currently trying to become master. Waiting for it to change state");
            masterCandidate = null;
            if (first.getKey() == index) {
                log.log(Level.INFO, "Cluster controller " + index + ": We are next in line to become master. Altering our state to look for followers");
                database.setMasterVote(dbContext, index);
            }
        } else {
            masterCandidate = first.getValue();
            followers = 0;
            for (Map.Entry<Integer, Integer> current : state.entrySet()) {
                if (current.getValue().intValue() == first.getKey().intValue()) {
                    ++followers;
                }
            }
            if (2 * followers > totalCount) {
                Integer newMaster = getMaster();
                if (newMaster != null && currentMaster != null && newMaster.intValue() == currentMaster.intValue()) {
                    log.log(Level.INFO, "MASTER_ELECTION: Cluster controller " + index + ": " + currentMaster + " is still the master");
                } else if (newMaster != null && currentMaster != null) {
                    log.log(Level.INFO, "MASTER_ELECTION: Cluster controller " + index + ": " + newMaster + " took over for fleet controller " + currentMaster + " as master");
                } else if (newMaster == null) {
                    log.log(Level.INFO, "MASTER_ELECTION: Cluster controller " + index + ": " + masterCandidate + " is new master candidate, but needs to wait before it can take over");
                }  else {
                    log.log(Level.INFO, "MASTER_ELECTION: Cluster controller " + index + ": " + newMaster + " is newly elected master");
                }
            } else {
                log.log(Level.INFO, "MASTER_ELECTION: Cluster controller " + index + ": Currently too few followers for cluster controller candidate " + masterCandidate + ". No current master. (" + followers + "/" + totalCount + " followers)");
            }
            Integer ourState = state.get(index);
            if (ourState == null) throw new IllegalStateException("Database lacks data from ourselves. This should always be present.");
            if (ourState.intValue() != first.getKey().intValue()) {
                log.log(Level.INFO, "Cluster controller " + index + ": Altering our state to follow new fleet controller master candidate " + first.getKey());
                database.setMasterVote(dbContext, first.getKey());
            }
        }
            // Only a given set of nodes can ever become master
        if (index <= (totalCount - 1) / 2) {
            int ourPosition = 0;
            for (Map.Entry<Integer, Integer> entry : state.entrySet()) {
                if (entry.getKey() != index) {
                    ++ourPosition;
                } else {
                    break;
                }
            }
            if (nextInLineCount != ourPosition) {
                nextInLineCount = ourPosition;
                if (ourPosition > 0) {
                    log.log(Level.FINE, "Cluster controller " + index + ": We are now " + getPosition(nextInLineCount) + " in queue to take over being master.");
                }
            }
        }
        masterData = state;
        return true;
    }

    private static String toString(Map<Integer, Integer> data) {
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<Integer, Integer> entry : data.entrySet()) {
            sb.append(", ").append(entry.getKey()).append(" -> ").append(entry.getValue() == null ? "null" : entry.getValue());
        }
        if (sb.length() > 2) {
            sb.delete(0, 2);
        }
        sb.insert(0, "data(");
        sb.append(")");
        return sb.toString();
    }

    private String getPosition(int val) {
        if (val < 1) return "invalid(" + val + ")";
        if (val == 1) { return "first"; }
        if (val == 2) { return "second"; }
        if (val == 3) { return "third"; }
        return val + "th";
    }

    public void handleFleetData(Map<Integer, Integer> data) {
        log.log(Level.INFO, "Cluster controller " + index + ": Got new fleet data with " + data.size() + " entries: " + data);
        synchronized (monitor) {
            nextMasterData = data;
            monitor.notifyAll();
        }
    }

    public void lostDatabaseConnection() {
        if (totalCount > 1 || usingZooKeeper) {
            log.log(Level.INFO, "Cluster controller " + index + ": Clearing master data as we lost connection on node " + index);
            resetElectionProgress();
        }
    }

    private void resetElectionProgress() {
        masterData = null;
        masterCandidate = null;
        followers = 0;
        nextMasterData = null;
    }

    public void writeHtmlState(StringBuilder sb, int stateGatherCount) {
        sb.append("<h2>Master state</h2>\n");
        Integer master = getMaster();
        if (master != null) {
            sb.append("<p>Current cluster controller master is node " + master + ".");
            if (master.intValue() == index) sb.append(" (This node)");
            sb.append("</p>");
        } else {
            if (2 * followers <= totalCount) {
                sb.append("<p>There is currently no master. Less than half the fleet controllers (")
                  .append(followers).append(") are following master candidate ").append(masterCandidate)
                  .append(".</p>");
            } else if (masterGoneFromZooKeeperTime + masterZooKeeperCooldownPeriod > timer.getCurrentTimeInMillis()) {
                long time = timer.getCurrentTimeInMillis() - masterGoneFromZooKeeperTime;
                sb.append("<p>There is currently no master. Only " + (time / 1000) + " seconds have passed since")
                  .append(" old master disappeared. At least " + (masterZooKeeperCooldownPeriod / 1000) + " must pass")
                  .append(" before electing new master unless all possible master candidates are online.</p>");
            }
        }
        if ((master == null || master.intValue() != index) && nextInLineCount < stateGatherCount) {
            sb.append("<p>As we are number ").append(nextInLineCount)
                    .append(" in line for taking over as master, we're gathering state from nodes.</p>");
            sb.append("<p><font color=\"red\">As we are not the master, we don't know about nodes current system state"
                    + " or wanted states, so some statistics below may be stale. Look at status page on master "
                    + "for updated data.</font></p>");
        }
        if (index * 2 > totalCount) {
            sb.append("<p>As lowest index fleet controller is prioritized to become master, and more than half "
                    + "of the fleet controllers need to be available to select a master, we can never become master.</p>");
        }

            // Debug data
        sb.append("<p><font size=\"-1\" color=\"grey\">Master election handler internal state:")
          .append("<br>Index: " + index)
          .append("<br>Fleet controller count: " + totalCount)
          .append("<br>Master candidate: " + masterCandidate)
          .append("<br>Next in line count: " + nextInLineCount)
          .append("<br>Followers: " + followers)
          .append("<br>Master data:");
        if (masterData == null) {
            sb.append("null");
        } else {
            for (Map.Entry<Integer, Integer> e : masterData.entrySet()) {
                sb.append(" ").append(e.getKey()).append("->").append(e.getValue());
            }
        }
        sb.append("<br>Next master data:");
        if (nextMasterData == null) {
            sb.append("null");
        } else {
            for (Map.Entry<Integer, Integer> e : nextMasterData.entrySet()) {
                sb.append(" ").append(e.getKey()).append("->").append(e.getValue());
            }
        }
        sb.append("<br>Master gone from zookeeper time: " + masterGoneFromZooKeeperTime)
          .append("<br>Master cooldown period: " + masterZooKeeperCooldownPeriod)
          .append("</font></p>");
    }
}