aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2023-03-30 08:55:23 +0200
committerHarald Musum <musum@yahooinc.com>2023-03-30 08:55:23 +0200
commitdba0eb02a4811c954b55a81a2126f3c84a59a643 (patch)
treebd20fd2a04b60e3d2467138a430d5dc75555ca24
parent0272c98d3ebd07f488c3d5c7ee71c71f601a84c8 (diff)
Minor cleanup, no functional changes
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java11
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java2
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java17
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java38
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java171
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java38
6 files changed, 133 insertions, 144 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java
index 2cfaf64fe83..0f119d8de50 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterInfo.java
@@ -15,7 +15,7 @@ import java.util.Set;
import java.util.TreeMap;
/**
- * Detail information about the current state of all the distributor and storage nodes of the cluster.
+ * Detailed information about the current state of all the distributor and storage nodes of the cluster.
*
* @author hakonhall
* @author bratseth
@@ -127,11 +127,10 @@ public class ClusterInfo {
/** Returns the node info object for a given node identifier */
private NodeInfo getInfo(Node node) {
- switch (node.getType()) {
- case DISTRIBUTOR : return getDistributorNodeInfo(node.getIndex());
- case STORAGE : return getStorageNodeInfo(node.getIndex());
- default : throw new IllegalArgumentException("No node type " + node.getType().toString());
- }
+ return switch (node.getType()) {
+ case DISTRIBUTOR -> getDistributorNodeInfo(node.getIndex());
+ case STORAGE -> getStorageNodeInfo(node.getIndex());
+ };
}
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java
index 70fbbb60e26..6855859c96f 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java
@@ -2,7 +2,7 @@
package com.yahoo.vespa.clustercontroller.core;
/**
- * Base class for distributor/content node node RPC requests that are bound
+ * Base class for distributor/content node RPC requests that are bound
* to a particular cluster state version.
*/
public abstract class ClusterStateVersionSpecificRequest {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java
index 3f1a7ab5d7b..d7aac1c26fa 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java
@@ -55,7 +55,10 @@ abstract public class NodeInfo implements Comparable<NodeInfo> {
private long nextAttemptTime;
/** Cached connection to this node. */
private Target connection;
- /** We cache last connection we did request info on, as we want to report appropriate error for node regardless of whether other commands have created new connection. */
+ /**
+ * We cache last connection we did request info on, as we want to report appropriate error for
+ * node regardless of whether other commands have created new connection.
+ */
public Target lastRequestInfoConnection;
/**
* Counts the number of attempts we have tried since last time we had
@@ -163,7 +166,7 @@ abstract public class NodeInfo implements Comparable<NodeInfo> {
}
if (prematureCrashCount != count) {
prematureCrashCount = count;
- log.log(Level.FINE, () -> "Premature crash count on " + toString() + " set to " + count);
+ log.log(Level.FINE, () -> "Premature crash count on " + this + " set to " + count);
}
}
public int getPrematureCrashCount() { return prematureCrashCount; }
@@ -311,13 +314,13 @@ abstract public class NodeInfo implements Comparable<NodeInfo> {
}
if (state.getState().equals(State.DOWN) && !reportedState.getState().oneOf("d")) {
downStableStateTime = time;
- log.log(Level.FINE, () -> "Down stable state on " + toString() + " altered to " + time);
+ log.log(Level.FINE, () -> "Down stable state on " + this + " altered to " + time);
if (reportedState.getState() == State.INITIALIZING) {
recentlyObservedUnstableDuringInit = true;
}
} else if (state.getState().equals(State.UP) && !reportedState.getState().oneOf("u")) {
upStableStateTime = time;
- log.log(Level.FINE, () -> "Up stable state on " + toString() + " altered to " + time);
+ log.log(Level.FINE, () -> "Up stable state on " + this + " altered to " + time);
}
if (!state.getState().validReportedNodeState(node.getType())) {
throw new IllegalStateException("Trying to set illegal reported node state: " + state);
@@ -340,14 +343,14 @@ abstract public class NodeInfo implements Comparable<NodeInfo> {
} else {
nextAttemptTime = time + 5000;
}
- log.log(Level.FINEST, () -> "Failed to get state from node " + toString() + ", scheduling next attempt in " + (nextAttemptTime - time) + " ms.");
+ log.log(Level.FINEST, () -> "Failed to get state from node " + this + ", scheduling next attempt in " + (nextAttemptTime - time) + " ms.");
} else {
connectionAttemptCount = 0;
timeOfFirstFailingConnectionAttempt = 0;
reportedState = state;
if (version == 0 || state.getState().equals(State.STOPPING)) {
nextAttemptTime = time + cluster.getPollingFrequency();
- log.log(Level.FINEST, () -> "Scheduling next attempt to get state from " + toString() + " in " + (nextAttemptTime - time) + " ms (polling freq).");
+ log.log(Level.FINEST, () -> "Scheduling next attempt to get state from " + this + " in " + (nextAttemptTime - time) + " ms (polling freq).");
} else {
nextAttemptTime = time;
}
@@ -368,7 +371,7 @@ abstract public class NodeInfo implements Comparable<NodeInfo> {
} catch (Exception e) {
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
- log.warning("Attempted to set wanted state with more than just a main state. Extra data stripped. Original data '" + state.serialize(true) + ":\n" + sw.toString());
+ log.warning("Attempted to set wanted state with more than just a main state. Extra data stripped. Original data '" + state.serialize(true) + ":\n" + sw);
}
}
wantedState = newWanted;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java
index 68e46414c22..6f4d0749f3f 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java
@@ -4,7 +4,6 @@ package com.yahoo.vespa.clustercontroller.core;
import com.yahoo.jrt.ErrorCode;
import com.yahoo.jrt.Target;
import com.yahoo.vdslib.state.NodeState;
-import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.core.listeners.NodeListener;
import java.util.LinkedList;
@@ -12,6 +11,9 @@ import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
+import static com.yahoo.vdslib.state.State.DOWN;
+import static com.yahoo.vdslib.state.State.STOPPING;
+
/**
* Collects the state of all nodes by making remote requests and handling the replies.
*/
@@ -65,20 +67,20 @@ public class NodeStateGatherer {
if (info.getRpcAddress() == null || info.isNotInSlobrok()) { // Cannot query state of node without RPC address or not in slobrok
log.log(Level.FINE, () -> "Not sending getNodeState request to node " + info.getNode() + ": Not in slobrok");
NodeState reportedState = info.getReportedState().clone();
- if (( ! reportedState.getState().equals(State.DOWN) && currentTime - info.lastSeenInSlobrok() > maxSlobrokDisconnectGracePeriod)
- || reportedState.getState().equals(State.STOPPING)) // Don't wait for grace period if we expect node to be stopping
+ if (( ! reportedState.getState().equals(DOWN) && currentTime - info.lastSeenInSlobrok() > maxSlobrokDisconnectGracePeriod)
+ || reportedState.getState().equals(STOPPING)) // Don't wait for grace period if we expect node to be stopping
{
log.log(Level.FINE, () -> "Setting reported state to DOWN "
- + (reportedState.getState().equals(State.STOPPING)
+ + (reportedState.getState().equals(STOPPING)
? "as node completed stopping."
- : "as node has been out of slobrok longer than " + maxSlobrokDisconnectGracePeriod + "."));
+ : "as node has been out of slobrok longer than " + maxSlobrokDisconnectGracePeriod + " ms."));
if (reportedState.getState().oneOf("iur") || ! reportedState.hasDescription()) {
- StringBuilder sb = new StringBuilder().append("Set node down as it has been out of slobrok for ")
- .append(currentTime - info.lastSeenInSlobrok()).append(" ms which is more than the max limit of ")
- .append(maxSlobrokDisconnectGracePeriod).append(" ms.");
- reportedState.setDescription(sb.toString());
+ reportedState.setDescription("Set node down as it has been out of slobrok for " +
+ (currentTime - info.lastSeenInSlobrok()) +
+ " ms which is more than the max limit of " +
+ maxSlobrokDisconnectGracePeriod + " ms.");
}
- reportedState.setState(State.DOWN);
+ reportedState.setState(DOWN);
listener.handleNewNodeState(info, reportedState.clone());
}
info.setReportedState(reportedState, currentTime); // Must reset it to null to get connection attempts counted
@@ -135,7 +137,7 @@ public class NodeStateGatherer {
info.setReportedState(state, currentTime);
} catch (Exception e) {
log.log(Level.WARNING, "Failed to process get node state response", e);
- info.setReportedState(new NodeState(info.getNode().getType(), State.DOWN), currentTime);
+ info.setReportedState(new NodeState(info.getNode().getType(), DOWN), currentTime);
}
// Important: The old host info should be accessible in info.getHostInfo(), see interface.
@@ -152,7 +154,7 @@ public class NodeStateGatherer {
private NodeState handleError(GetNodeStateRequest req, NodeInfo info, long currentTime) {
String prefix = "Failed get node state request: ";
- NodeState newState = new NodeState(info.getNode().getType(), State.DOWN);
+ NodeState newState = new NodeState(info.getNode().getType(), DOWN);
if (req.getReply().getReturnCode() == ErrorCode.TIMEOUT) {
String msg = "RPC timeout";
if (info.getReportedState().getState().oneOf("ui")) {
@@ -177,7 +179,7 @@ public class NodeStateGatherer {
log.log(Level.FINE, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode()
+ " " + req.getReply().getReturnMessage() + ": " + msg);
}
- newState.setState(State.DOWN);
+ newState.setState(DOWN);
} else if (msg.equals("jrt: Connection closed by peer") || msg.equals("Connection reset by peer")) {
msg = "Connection error: Closed at other end. (Node or switch likely shut down)";
if (info.isNotInSlobrok()) {
@@ -189,7 +191,7 @@ public class NodeStateGatherer {
if (log.isLoggable(Level.FINE))
log.log(Level.FINE, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
- newState.setState(State.DOWN).setDescription(msg);
+ newState.setState(DOWN).setDescription(msg);
} else if (msg.equals("Connection timed out")) {
if (info.getReportedState().getState().oneOf("ui")) {
msg = "Connection error: Timeout";
@@ -228,11 +230,11 @@ public class NodeStateGatherer {
} else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
log.log(Level.FINE, () -> "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
- newState.setState(State.DOWN).setDescription(msg + ": get node state");
+ newState.setState(DOWN).setDescription(msg + ": get node state");
} else if (req.getReply().getReturnCode() == 75004) {
String msg = "Node refused to answer RPC request and is likely stopping: " + req.getReply().getReturnMessage();
// The node is shutting down and is not accepting requests from anyone
- if (info.getReportedState().getState().equals(State.STOPPING)) {
+ if (info.getReportedState().getState().equals(STOPPING)) {
log.log(Level.FINE, () -> "Failed to get node state from " + info + " because it is still shutting down.");
} else {
if (info.getReportedState().getState().oneOf("ui")) {
@@ -241,7 +243,7 @@ public class NodeStateGatherer {
log.log(Level.FINE, () -> "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
}
- newState.setState(State.STOPPING).setDescription(msg);
+ newState.setState(STOPPING).setDescription(msg);
} else {
String msg = "Got unexpected error, assumed to be node issue " + req.getReply().getReturnCode() + ": " + req.getReply().getReturnMessage();
if (info.getReportedState().getState().oneOf("ui")) {
@@ -249,7 +251,7 @@ public class NodeStateGatherer {
} else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
log.log(Level.FINE, () -> "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
- newState.setState(State.DOWN).setDescription(msg);
+ newState.setState(DOWN).setDescription(msg);
}
return newState;
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java
index 4ab80ec6d7a..a9325788cb6 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java
@@ -16,6 +16,12 @@ import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
+import static com.yahoo.vdslib.state.State.DOWN;
+import static com.yahoo.vdslib.state.State.INITIALIZING;
+import static com.yahoo.vdslib.state.State.STOPPING;
+import static java.util.logging.Level.FINE;
+import static java.util.logging.Level.FINEST;
+
/**
* This class gets node state updates and timer events and uses these to decide
* whether a new cluster state should be generated.
@@ -52,9 +58,9 @@ public class StateChangeHandler {
public void handleAllDistributorsInSync(final ClusterState currentState,
final Set<ConfiguredNode> nodes,
final DatabaseHandler database,
- final DatabaseHandler.DatabaseContext dbContext) throws InterruptedException {
+ final DatabaseHandler.DatabaseContext dbContext) {
int startTimestampsReset = 0;
- context.log(log, Level.FINE, "handleAllDistributorsInSync invoked for state version %d", currentState.getVersion());
+ context.log(log, FINE, "handleAllDistributorsInSync invoked for state version %d", currentState.getVersion());
for (NodeType nodeType : NodeType.getTypes()) {
for (ConfiguredNode configuredNode : nodes) {
final Node node = new Node(nodeType, configuredNode.index());
@@ -62,15 +68,15 @@ public class StateChangeHandler {
final NodeState nodeState = currentState.getNodeState(node);
if (nodeInfo != null && nodeState != null) {
if (nodeState.getStartTimestamp() > nodeInfo.getStartTimestamp()) {
- log.log(Level.FINE, () -> String.format("Storing away new start timestamp for node %s (%d)", node, nodeState.getStartTimestamp()));
+ log.log(FINE, () -> String.format("Storing away new start timestamp for node %s (%d)", node, nodeState.getStartTimestamp()));
nodeInfo.setStartTimestamp(nodeState.getStartTimestamp());
}
if (nodeState.getStartTimestamp() > 0) {
- log.log(Level.FINE, "Resetting timestamp in cluster state for node %s", node);
+ log.log(FINE, "Resetting timestamp in cluster state for node %s", node);
++startTimestampsReset;
}
- } else if (log.isLoggable(Level.FINE)) {
- log.log(Level.FINE, node + ": " +
+ } else if (log.isLoggable(FINE)) {
+ log.log(FINE, node + ": " +
(nodeInfo == null ? "null" : nodeInfo.getStartTimestamp()) + ", " +
(nodeState == null ? "null" : nodeState.getStartTimestamp()));
}
@@ -83,7 +89,7 @@ public class StateChangeHandler {
stateMayHaveChanged = true;
database.saveStartTimestamps(dbContext);
} else {
- log.log(Level.FINE, "Found no start timestamps to reset in cluster state.");
+ log.log(FINE, "Found no start timestamps to reset in cluster state.");
}
}
@@ -110,48 +116,45 @@ public class StateChangeHandler {
// TODO nodeListener is only used via updateNodeInfoFromReportedState -> handlePrematureCrash
// TODO this will recursively invoke proposeNewNodeState, which will presumably (i.e. hopefully) be a no-op...
- public void handleNewReportedNodeState(final ClusterState currentClusterState,
- final NodeInfo node,
- final NodeState reportedState,
- final NodeListener nodeListener)
- {
- final NodeState currentState = currentClusterState.getNodeState(node.getNode());
- final Level level = (currentState.equals(reportedState) && node.getVersion() == 0) ? Level.FINEST : Level.FINE;
- if (log.isLoggable(level)) {
- log.log(level, String.format("Got nodestate reply from %s: %s (Current state is %s)",
- node, node.getReportedState().getTextualDifference(reportedState), currentState.toString(true)));
- }
- final long currentTime = timer.getCurrentTimeInMillis();
-
- if (reportedState.getState().equals(State.DOWN)) {
+ public void handleNewReportedNodeState(ClusterState currentClusterState,
+ NodeInfo node,
+ NodeState reportedState,
+ NodeListener nodeListener) {
+ NodeState currentState = currentClusterState.getNodeState(node.getNode());
+ Level level = (currentState.equals(reportedState) && node.getVersion() == 0) ? FINEST : FINE;
+ log.log(level, () -> String.format("Got nodestate reply from %s: %s (Current state is %s)",
+ node, node.getReportedState().getTextualDifference(reportedState), currentState.toString(true)));
+ long currentTime = timer.getCurrentTimeInMillis();
+
+ if (reportedState.getState().equals(DOWN)) {
node.setTimeOfFirstFailingConnectionAttempt(currentTime);
}
// *** LOGGING ONLY
if ( ! reportedState.similarTo(node.getReportedState())) {
- if (reportedState.getState().equals(State.DOWN)) {
+ if (reportedState.getState().equals(DOWN)) {
eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(node, "Failed to get node state: " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), Level.INFO);
} else {
- eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(node, "Now reporting state " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), Level.FINE);
+ eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(node, "Now reporting state " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), FINE);
}
}
- if (reportedState.equals(node.getReportedState()) && ! reportedState.getState().equals(State.INITIALIZING)) {
+ if (reportedState.equals(node.getReportedState()) && ! reportedState.getState().equals(INITIALIZING)) {
return;
}
updateNodeInfoFromReportedState(node, currentState, reportedState, nodeListener);
if (reportedState.getMinUsedBits() != currentState.getMinUsedBits()) {
- final int oldCount = currentState.getMinUsedBits();
- final int newCount = reportedState.getMinUsedBits();
- log.log(Level.FINE,
+ int oldCount = currentState.getMinUsedBits();
+ int newCount = reportedState.getMinUsedBits();
+ log.log(FINE,
() -> String.format("Altering node state to reflect that min distribution bit count has changed from %d to %d", oldCount, newCount));
eventLog.add(NodeEvent.forBaseline(node, String.format("Altered min distribution bit count from %d to %d", oldCount, newCount),
NodeEvent.Type.CURRENT, currentTime), isMaster);
} else {
- log.log(Level.FINE, () -> String.format("Not altering state of %s in cluster state because new state is too similar: %s",
- node, currentState.getTextualDifference(reportedState)));
+ log.log(FINE, () -> String.format("Not altering state of %s in cluster state because new state is too similar: %s",
+ node, currentState.getTextualDifference(reportedState)));
}
stateMayHaveChanged = true;
@@ -162,10 +165,8 @@ public class StateChangeHandler {
eventLog.add(NodeEvent.forBaseline(node, message, NodeEvent.Type.REPORTED, timer.getCurrentTimeInMillis()), isMaster);
}
- public void handleMissingNode(final ClusterState currentClusterState,
- final NodeInfo node,
- final NodeListener nodeListener) {
- final long timeNow = timer.getCurrentTimeInMillis();
+ public void handleMissingNode(ClusterState currentClusterState, NodeInfo node, NodeListener nodeListener) {
+ long timeNow = timer.getCurrentTimeInMillis();
if (node.getLatestNodeStateRequestTime() != null) {
eventLog.add(NodeEvent.forBaseline(node, "Node is no longer in slobrok, but we still have a pending state request.", NodeEvent.Type.REPORTED, timeNow), isMaster);
@@ -173,13 +174,13 @@ public class StateChangeHandler {
eventLog.add(NodeEvent.forBaseline(node, "Node is no longer in slobrok. No pending state request to node.", NodeEvent.Type.REPORTED, timeNow), isMaster);
}
- if (node.getReportedState().getState().equals(State.STOPPING)) {
- log.log(Level.FINE, () -> "Node " + node.getNode() + " is no longer in slobrok. Was in stopping state, so assuming it has shut down normally. Setting node down");
+ if (node.getReportedState().getState().equals(STOPPING)) {
+ log.log(FINE, () -> "Node " + node.getNode() + " is no longer in slobrok. Was in stopping state, so assuming it has shut down normally. Setting node down");
NodeState ns = node.getReportedState().clone();
- ns.setState(State.DOWN);
+ ns.setState(DOWN);
handleNewReportedNodeState(currentClusterState, node, ns.clone(), nodeListener);
} else {
- log.log(Level.FINE, () -> "Node " + node.getNode() + " no longer in slobrok was in state " + node.getReportedState() + ". Waiting to see if it reappears in slobrok");
+ log.log(FINE, () -> "Node " + node.getNode() + " no longer in slobrok was in state " + node.getReportedState() + ". Waiting to see if it reappears in slobrok");
}
stateMayHaveChanged = true;
@@ -192,19 +193,19 @@ public class StateChangeHandler {
* If the newly proposed state differs from the state the node currently has in the system,
* a cluster state regeneration will be triggered.
*/
- public void proposeNewNodeState(final ClusterState currentClusterState, final NodeInfo node, final NodeState proposedState) {
- final NodeState currentState = currentClusterState.getNodeState(node.getNode());
- final NodeState currentReported = node.getReportedState();
+ public void proposeNewNodeState(ClusterState currentClusterState, NodeInfo node, NodeState proposedState) {
+ NodeState currentState = currentClusterState.getNodeState(node.getNode());
- if (currentState.getState().equals(proposedState.getState())) {
+ if (currentState.getState().equals(proposedState.getState()))
return;
- }
+
stateMayHaveChanged = true;
- log.log(Level.FINE, () -> String.format("Got new wanted nodestate for %s: %s", node, currentState.getTextualDifference(proposedState)));
+ log.log(FINE, () -> String.format("Got new wanted nodestate for %s: %s", node, currentState.getTextualDifference(proposedState)));
// Should be checked earlier before state was set in cluster
assert(proposedState.getState().validWantedNodeState(node.getNode().getType()));
long timeNow = timer.getCurrentTimeInMillis();
+ NodeState currentReported = node.getReportedState();
if (proposedState.above(currentReported)) {
eventLog.add(NodeEvent.forBaseline(node, String.format("Wanted state %s, but we cannot force node into that " +
"state yet as it is currently in %s", proposedState, currentReported),
@@ -239,12 +240,9 @@ public class StateChangeHandler {
// generated cluster state. Still a bit of a mine field...
// TODO remove all node state mutation from this function entirely in favor of ClusterStateGenerator!
// `--> this will require adding more event edges and premature crash handling to it. Which is fine.
- public boolean watchTimers(final ContentCluster cluster,
- final ClusterState currentClusterState,
- final NodeListener nodeListener)
- {
+ public boolean watchTimers(ContentCluster cluster, ClusterState currentClusterState, NodeListener nodeListener) {
boolean triggeredAnyTimers = false;
- final long currentTime = timer.getCurrentTimeInMillis();
+ long currentTime = timer.getCurrentTimeInMillis();
for(NodeInfo node : cluster.getNodeInfos()) {
triggeredAnyTimers |= handleTimeDependentOpsForNode(currentClusterState, nodeListener, currentTime, node);
@@ -256,23 +254,17 @@ public class StateChangeHandler {
return triggeredAnyTimers;
}
- private boolean handleTimeDependentOpsForNode(final ClusterState currentClusterState,
- final NodeListener nodeListener,
- final long currentTime,
- final NodeInfo node)
- {
- final NodeState currentStateInSystem = currentClusterState.getNodeState(node.getNode());
- final NodeState lastReportedState = node.getReportedState();
- boolean triggeredAnyTimers = false;
+ private boolean handleTimeDependentOpsForNode(ClusterState currentClusterState,
+ NodeListener nodeListener,
+ long currentTime,
+ NodeInfo node) {
+ NodeState currentStateInSystem = currentClusterState.getNodeState(node.getNode());
+ NodeState lastReportedState = node.getReportedState();
+ boolean triggeredAnyTimers =
+ reportDownIfOutdatedSlobrokNode(currentClusterState, nodeListener, currentTime, node, lastReportedState);
- triggeredAnyTimers = reportDownIfOutdatedSlobrokNode(
- currentClusterState, nodeListener, currentTime, node, lastReportedState);
-
- if (nodeStillUnavailableAfterTransitionTimeExceeded(
- currentTime, node, currentStateInSystem, lastReportedState))
- {
+ if (nodeStillUnavailableAfterTransitionTimeExceeded(currentTime, node, currentStateInSystem, lastReportedState))
triggeredAnyTimers = true;
- }
if (nodeInitProgressHasTimedOut(currentTime, node, currentStateInSystem, lastReportedState)) {
eventLog.add(NodeEvent.forBaseline(node, String.format(
@@ -287,11 +279,11 @@ public class StateChangeHandler {
if (mayResetCrashCounterOnStableUpNode(currentTime, node, lastReportedState)) {
node.setPrematureCrashCount(0);
- log.log(Level.FINE, () -> "Resetting premature crash count on node " + node + " as it has been up for a long time.");
+ log.log(FINE, () -> "Resetting premature crash count on node " + node + " as it has been up for a long time.");
triggeredAnyTimers = true;
} else if (mayResetCrashCounterOnStableDownNode(currentTime, node, lastReportedState)) {
node.setPrematureCrashCount(0);
- log.log(Level.FINE, () -> "Resetting premature crash count on node " + node + " as it has been down for a long time.");
+ log.log(FINE, () -> "Resetting premature crash count on node " + node + " as it has been down for a long time.");
triggeredAnyTimers = true;
}
@@ -299,17 +291,18 @@ public class StateChangeHandler {
}
private boolean nodeInitProgressHasTimedOut(long currentTime, NodeInfo node, NodeState currentStateInSystem, NodeState lastReportedState) {
- return !currentStateInSystem.getState().equals(State.DOWN)
- && node.getWantedState().above(new NodeState(node.getNode().getType(), State.DOWN))
- && lastReportedState.getState().equals(State.INITIALIZING)
+ return !currentStateInSystem.getState().equals(DOWN)
+ && node.getWantedState().above(new NodeState(node.getNode().getType(), DOWN))
+ && lastReportedState.getState().equals(INITIALIZING)
&& maxInitProgressTime != 0
&& node.getInitProgressTime() + maxInitProgressTime <= currentTime
&& node.getNode().getType().equals(NodeType.STORAGE);
}
+ // TODO: Merge this and the below method
private boolean mayResetCrashCounterOnStableDownNode(long currentTime, NodeInfo node, NodeState lastReportedState) {
return node.getDownStableStateTime() + stableStateTimePeriod <= currentTime
- && lastReportedState.getState().equals(State.DOWN)
+ && lastReportedState.getState().equals(DOWN)
&& node.getPrematureCrashCount() <= maxPrematureCrashes
&& node.getPrematureCrashCount() != 0;
}
@@ -328,8 +321,8 @@ public class StateChangeHandler {
NodeState lastReportedState)
{
return currentStateInSystem.getState().equals(State.MAINTENANCE)
- && node.getWantedState().above(new NodeState(node.getNode().getType(), State.DOWN))
- && (lastReportedState.getState().equals(State.DOWN) || node.isNotInSlobrok())
+ && node.getWantedState().above(new NodeState(node.getNode().getType(), DOWN))
+ && (lastReportedState.getState().equals(DOWN) || node.isNotInSlobrok())
&& node.getTransitionTime() + maxTransitionTime.get(node.getNode().getType()) < currentTime;
}
@@ -340,7 +333,7 @@ public class StateChangeHandler {
NodeState lastReportedState)
{
if (node.isNotInSlobrok()
- && !lastReportedState.getState().equals(State.DOWN)
+ && !lastReportedState.getState().equals(DOWN)
&& node.lastSeenInSlobrok() + maxSlobrokDisconnectGracePeriod <= currentTime)
{
final String desc = String.format(
@@ -350,7 +343,7 @@ public class StateChangeHandler {
maxSlobrokDisconnectGracePeriod);
node.abortCurrentNodeStateRequests();
NodeState state = lastReportedState.clone();
- state.setState(State.DOWN);
+ state.setState(DOWN);
if (!state.hasDescription()) {
state.setDescription(desc);
}
@@ -362,10 +355,10 @@ public class StateChangeHandler {
return false;
}
- private boolean isControlledShutdown(NodeState state) {
- return (state.getState() == State.STOPPING
- && (state.getDescription().contains("Received signal 15 (SIGTERM - Termination signal)")
- || state.getDescription().contains("controlled shutdown")));
+ private boolean isNotControlledShutdown(NodeState state) {
+ return (state.getState() != STOPPING
+ || (!state.getDescription().contains("Received signal 15 (SIGTERM - Termination signal)")
+ && !state.getDescription().contains("controlled shutdown")));
}
/**
@@ -381,14 +374,14 @@ public class StateChangeHandler {
final NodeState reportedState,
final NodeListener nodeListener) {
final long timeNow = timer.getCurrentTimeInMillis();
- log.log(Level.FINE, () -> String.format("Finding new cluster state entry for %s switching state %s", node, currentState.getTextualDifference(reportedState)));
+ log.log(FINE, () -> String.format("Finding new cluster state entry for %s switching state %s", node, currentState.getTextualDifference(reportedState)));
if (handleReportedNodeCrashEdge(node, currentState, reportedState, nodeListener, timeNow)) {
return;
}
if (initializationProgressHasIncreased(currentState, reportedState)) {
node.setInitProgressTime(timeNow);
- log.log(Level.FINEST, () -> "Reset initialize timer on " + node + " to " + node.getInitProgressTime());
+ log.log(FINEST, () -> "Reset initialize timer on " + node + " to " + node.getInitProgressTime());
}
if (handleImplicitCrashEdgeFromReverseInitProgress(node, currentState, reportedState, nodeListener, timeNow)) {
return;
@@ -402,9 +395,9 @@ public class StateChangeHandler {
final NodeState reportedState,
final NodeListener nodeListener,
final long timeNow) {
- if (currentState.getState().equals(State.INITIALIZING)
+ if (currentState.getState().equals(INITIALIZING)
&& reportedState.getState().oneOf("ds")
- && !isControlledShutdown(reportedState))
+ && isNotControlledShutdown(reportedState))
{
eventLog.add(NodeEvent.forBaseline(node, String.format("Stop or crash during initialization. " +
"Premature crash count is now %d.", node.getPrematureCrashCount() + 1),
@@ -421,8 +414,8 @@ public class StateChangeHandler {
final NodeState reportedState,
final NodeListener nodeListener,
final long timeNow) {
- if (currentState.getState().equals(State.INITIALIZING) &&
- (reportedState.getState().equals(State.INITIALIZING) && reportedState.getInitProgress() < currentState.getInitProgress()))
+ if (currentState.getState().equals(INITIALIZING) &&
+ (reportedState.getState().equals(INITIALIZING) && reportedState.getInitProgress() < currentState.getInitProgress()))
{
eventLog.add(NodeEvent.forBaseline(node, String.format(
"Stop or crash during initialization detected from reverse initializing progress." +
@@ -442,8 +435,8 @@ public class StateChangeHandler {
long timeNow) {
if (nodeUpToDownEdge(node, currentState, reportedState)) {
node.setTransitionTime(timeNow);
- if (node.getUpStableStateTime() + stableStateTimePeriod > timeNow && !isControlledShutdown(reportedState)) {
- log.log(Level.FINE, () -> "Stable state: " + node.getUpStableStateTime() + " + " + stableStateTimePeriod + " > " + timeNow);
+ if (node.getUpStableStateTime() + stableStateTimePeriod > timeNow && isNotControlledShutdown(reportedState)) {
+ log.log(FINE, () -> "Stable state: " + node.getUpStableStateTime() + " + " + stableStateTimePeriod + " > " + timeNow);
eventLog.add(NodeEvent.forBaseline(node,
String.format("Stopped or possibly crashed after %d ms, which is before " +
"stable state time period. Premature crash count is now %d.",
@@ -457,20 +450,20 @@ public class StateChangeHandler {
}
private boolean initializationProgressHasIncreased(NodeState currentState, NodeState reportedState) {
- return reportedState.getState().equals(State.INITIALIZING) &&
- (!currentState.getState().equals(State.INITIALIZING) ||
+ return reportedState.getState().equals(INITIALIZING) &&
+ (!currentState.getState().equals(INITIALIZING) ||
reportedState.getInitProgress() > currentState.getInitProgress());
}
private boolean nodeUpToDownEdge(NodeInfo node, NodeState currentState, NodeState reportedState) {
return currentState.getState().oneOf("ur") && reportedState.getState().oneOf("dis")
- && (node.getWantedState().getState().equals(State.RETIRED) || !reportedState.getState().equals(State.INITIALIZING));
+ && (node.getWantedState().getState().equals(State.RETIRED) || !reportedState.getState().equals(INITIALIZING));
}
private boolean handlePrematureCrash(NodeInfo node, NodeListener changeListener) {
node.setPrematureCrashCount(node.getPrematureCrashCount() + 1);
if (disableUnstableNodes && node.getPrematureCrashCount() > maxPrematureCrashes) {
- NodeState wantedState = new NodeState(node.getNode().getType(), State.DOWN)
+ NodeState wantedState = new NodeState(node.getNode().getType(), DOWN)
.setDescription("Disabled by fleet controller as it prematurely shut down " + node.getPrematureCrashCount() + " times in a row");
NodeState oldState = node.getWantedState();
node.setWantedState(wantedState);
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
index 1c72594377a..01a75034ddf 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
@@ -72,14 +72,13 @@ public class SetNodeStateRequest extends Request<SetResponse> {
static NodeState getRequestedNodeState(Map<String, UnitState> newStates, Node n) throws StateRestApiException {
UnitState newState = newStates.get("user");
if (newState == null) throw new InvalidContentException("No new user state given in request");
- State state;
- switch (newState.getId().toLowerCase()) {
- case "up": state = State.UP; break;
- case "retired": state = State.RETIRED; break;
- case "maintenance": state = State.MAINTENANCE; break;
- case "down": state = State.DOWN; break;
- default: throw new InvalidContentException("Invalid user state '" + newState.getId() + "' given.");
- }
+ State state = switch (newState.getId().toLowerCase()) {
+ case "up" -> State.UP;
+ case "retired" -> State.RETIRED;
+ case "maintenance" -> State.MAINTENANCE;
+ case "down" -> State.DOWN;
+ default -> throw new InvalidContentException("Invalid user state '" + newState.getId() + "' given.");
+ };
return new NodeState(n.getType(), state).setDescription(newState.getReason());
}
@@ -191,25 +190,18 @@ public class SetNodeStateRequest extends Request<SetResponse> {
boolean probe) {
Node distributorNode = new Node(NodeType.DISTRIBUTOR, index);
NodeInfo nodeInfo = cluster.getNodeInfo(distributorNode);
- if (nodeInfo == null) {
- throw new IllegalStateException("Missing distributor at index " +
- distributorNode.getIndex());
- }
+ if (nodeInfo == null)
+ throw new IllegalStateException("Missing distributor at index " + distributorNode.getIndex());
State newState;
switch (newStorageWantedState.getState()) {
- case MAINTENANCE:
- newState = State.DOWN;
- break;
- case RETIRED:
- newState = State.UP;
- break;
- default:
+ case MAINTENANCE -> newState = State.DOWN;
+ case RETIRED -> newState = State.UP;
+ default -> {
newState = newStorageWantedState.getState();
- if (!newState.validWantedNodeState(distributorNode.getType())) {
- throw new IllegalStateException("Distributor cannot be set to wanted state " +
- newState);
- }
+ if (!newState.validWantedNodeState(distributorNode.getType()))
+ throw new IllegalStateException("Distributor cannot be set to wanted state " + newState);
+ }
}
NodeState newWantedState = new NodeState(distributorNode.getType(), newState);