diff options
6 files changed, 87 insertions, 64 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index da115e56fbb..72765d24771 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -601,42 +601,42 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd public void tick() throws Exception { synchronized (monitor) { boolean didWork; - didWork = database.doNextZooKeeperTask(databaseContext); - didWork |= updateMasterElectionState(); - didWork |= handleLeadershipEdgeTransitions(); + didWork = metricUpdater.forWork("doNextZooKeeperTask", () -> database.doNextZooKeeperTask(databaseContext)); + didWork |= metricUpdater.forWork("updateMasterElectionState", this::updateMasterElectionState); + didWork |= metricUpdater.forWork("handleLeadershipEdgeTransitions", this::handleLeadershipEdgeTransitions); stateChangeHandler.setMaster(isMaster); if ( ! isRunning()) { return; } // Process zero or more getNodeState responses that we have received. - didWork |= stateGatherer.processResponses(this); + didWork |= metricUpdater.forWork("stateGatherer-processResponses", () -> stateGatherer.processResponses(this)); if ( ! isRunning()) { return; } if (masterElectionHandler.isAmongNthFirst(options.stateGatherCount)) { - didWork |= resyncLocallyCachedState(); + didWork |= resyncLocallyCachedState(); // Calls to metricUpdate.forWork inside method } else { stepDownAsStateGatherer(); } if ( ! isRunning()) { return; } - didWork |= systemStateBroadcaster.processResponses(); + didWork |= metricUpdater.forWork("systemStateBroadcaster-processResponses", systemStateBroadcaster::processResponses); if ( ! isRunning()) { return; } if (isMaster) { - didWork |= broadcastClusterStateToEligibleNodes(); + didWork |= metricUpdater.forWork("broadcastClusterStateToEligibleNodes", this::broadcastClusterStateToEligibleNodes); systemStateBroadcaster.checkIfClusterStateIsAckedByAllDistributors(database, databaseContext, this); } if ( ! isRunning()) { return; } - didWork |= processAnyPendingStatusPageRequest(); + didWork |= metricUpdater.forWork("processAnyPendingStatusPageRequest", this::processAnyPendingStatusPageRequest); if ( ! isRunning()) { return; } if (rpcServer != null) { - didWork |= rpcServer.handleRpcRequests(cluster, consolidatedClusterState(), this, this); + didWork |= metricUpdater.forWork("handleRpcRequests", () -> rpcServer.handleRpcRequests(cluster, consolidatedClusterState(), this, this)); } if ( ! isRunning()) { return; } - didWork |= processNextQueuedRemoteTask(); - didWork |= completeSatisfiedVersionDependentTasks(); - didWork |= maybePublishOldMetrics(); + didWork |= metricUpdater.forWork("processNextQueuedRemoteTask", this::processNextQueuedRemoteTask); + didWork |= metricUpdater.forWork("completeSatisfiedVersionDependentTasks", this::completeSatisfiedVersionDependentTasks); + didWork |= metricUpdater.forWork("maybePublishOldMetrics", this::maybePublishOldMetrics); processingCycle = false; ++cycleCount; @@ -644,9 +644,8 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd if (tickStopTime >= tickStartTime) { metricUpdater.addTickTime(tickStopTime - tickStartTime, didWork); } - if ( ! didWork && ! waitingForCycle) { - monitor.wait(options.cycleWaitTime); - } + // Always sleep some to use avoid using too much CPU and avoid starving waiting threads + monitor.wait(didWork || waitingForCycle ? 1 : options.cycleWaitTime); if ( ! isRunning()) { return; } tickStartTime = timer.getCurrentTimeInMillis(); processingCycle = true; @@ -659,11 +658,11 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd } } - private boolean updateMasterElectionState() throws InterruptedException { + private boolean updateMasterElectionState() { try { return masterElectionHandler.watchMasterElection(database, databaseContext); } catch (InterruptedException e) { - throw (InterruptedException) new InterruptedException("Interrupted").initCause(e); + throw new RuntimeException(e); } catch (Exception e) { log.log(Level.WARNING, "Failed to watch master election: " + e.toString()); } @@ -891,23 +890,25 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd - a node that stops normally (U -> S) then goes down erroneously triggers premature crash handling - long time before content node state convergence (though this seems to be the case for legacy impl as well) */ - private boolean resyncLocallyCachedState() throws InterruptedException { + private boolean resyncLocallyCachedState() { boolean didWork = false; // Let non-master state gatherers update wanted states once in a while, so states generated and shown are close to valid. if ( ! isMaster && cycleCount % 100 == 0) { - didWork = database.loadWantedStates(databaseContext); - didWork |= database.loadStartTimestamps(cluster); + didWork = metricUpdater.forWork("loadWantedStates", () -> database.loadWantedStates(databaseContext)); + didWork |= metricUpdater.forWork("loadStartTimestamps", () -> database.loadStartTimestamps(cluster)); } // If we have new slobrok information, update our cluster. - didWork |= nodeLookup.updateCluster(cluster, this); + didWork |= metricUpdater.forWork("updateCluster", () -> nodeLookup.updateCluster(cluster, this)); // Send getNodeState requests to zero or more nodes. - didWork |= stateGatherer.sendMessages(cluster, communicator, this); + didWork |= metricUpdater.forWork("sendMessages", () -> stateGatherer.sendMessages(cluster, communicator, this)); // Important: timer events must use a state with pending changes visible, or they might // trigger edge events multiple times. - didWork |= stateChangeHandler.watchTimers(cluster, stateVersionTracker.getLatestCandidateState().getClusterState(), this); + didWork |= metricUpdater.forWork( + "watchTimers", + () -> stateChangeHandler.watchTimers(cluster, stateVersionTracker.getLatestCandidateState().getClusterState(), this)); - didWork |= recomputeClusterStateIfRequired(); + didWork |= metricUpdater.forWork("recomputeClusterStateIfRequired", this::recomputeClusterStateIfRequired); if ( ! isStateGatherer) { if ( ! isMaster) { @@ -1070,7 +1071,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd || atFirstClusterStateSendTimeEdge(); } - private boolean handleLeadershipEdgeTransitions() throws InterruptedException { + private boolean handleLeadershipEdgeTransitions() { boolean didWork = false; if (masterElectionHandler.isMaster()) { if ( ! isMaster) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java index 94b6f412ce6..58f40dc3318 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java @@ -1,14 +1,20 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; -import com.yahoo.vdslib.state.*; +import com.yahoo.vdslib.state.ClusterState; +import com.yahoo.vdslib.state.Node; +import com.yahoo.vdslib.state.NodeState; +import com.yahoo.vdslib.state.NodeType; +import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.utils.util.ComponentMetricReporter; import com.yahoo.vespa.clustercontroller.utils.util.MetricReporter; +import java.time.Duration; import java.util.HashMap; import java.util.Map; import java.util.SortedSet; import java.util.TreeSet; +import java.util.function.BooleanSupplier; public class MetricUpdater { @@ -103,4 +109,16 @@ public class MetricUpdater { public void updateRemoteTaskQueueSize(int size) { metricReporter.set("remote-task-queue.size", size); } + + public boolean forWork(String workId, BooleanSupplier work) { + long startNanos = System.nanoTime(); + boolean didWork = work.getAsBoolean(); + double seconds = Duration.ofNanos(System.nanoTime() - startNanos).toMillis() / 1000.; + + MetricReporter.Context context = createContext(Map.of("didWork", Boolean.toString(didWork), + "workId", workId)); + metricReporter.set("work-ms", seconds, context); + + return didWork; + } } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/Database.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/Database.java index 06aa9547f4e..7a241dc7ebd 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/Database.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/Database.java @@ -34,7 +34,7 @@ public abstract class Database { * * @return True if request succeeded. False if not. */ - public abstract boolean storeMasterVote(int wantedMasterIndex) throws InterruptedException; + public abstract boolean storeMasterVote(int wantedMasterIndex); /** * Store the latest system state version used. When the fleetcontroller makes a given version official it should @@ -48,7 +48,7 @@ public abstract class Database { * @throws CasWriteFailed if the expected version of the znode did not match what was actually stored in the DB. * In this case, the write has NOT been applied. */ - public abstract boolean storeLatestSystemStateVersion(int version) throws InterruptedException; + public abstract boolean storeLatestSystemStateVersion(int version); /** * Get the latest system state version used. To keep the version rising, a newly elected master will call this @@ -56,7 +56,7 @@ public abstract class Database { * * @return The last system state version used, or null if request failed. */ - public abstract Integer retrieveLatestSystemStateVersion() throws InterruptedException; + public abstract Integer retrieveLatestSystemStateVersion(); /** * Save our current wanted states in the database. Typically called after processing an RPC request for altering @@ -64,7 +64,7 @@ public abstract class Database { * * @return True if the request succeeded. False if not. */ - public abstract boolean storeWantedStates(Map<Node, NodeState> states) throws InterruptedException; + public abstract boolean storeWantedStates(Map<Node, NodeState> states); /** * Read wanted states from the database and set wanted states for all nodes in the cluster accordingly. @@ -72,17 +72,17 @@ public abstract class Database { * * @return True if wanted states was altered, false if not. Null if request failed. */ - public abstract Map<Node, NodeState> retrieveWantedStates() throws InterruptedException; + public abstract Map<Node, NodeState> retrieveWantedStates(); /** * Store start times of distributor and service layer nodes in zookeeper. */ - public abstract boolean storeStartTimestamps(Map<Node, Long> timestamps) throws InterruptedException; + public abstract boolean storeStartTimestamps(Map<Node, Long> timestamps); /** * Fetch the start times of distributor and service layer nodes. */ - public abstract Map<Node, Long> retrieveStartTimestamps() throws InterruptedException; + public abstract Map<Node, Long> retrieveStartTimestamps(); /** * Stores the last published cluster state bundle synchronously into ZooKeeper. @@ -95,8 +95,8 @@ public abstract class Database { * @throws CasWriteFailed if the expected version of the znode did not match what was actually stored in the DB. * In this case, the write has NOT been applied. */ - public abstract boolean storeLastPublishedStateBundle(ClusterStateBundle stateBundle) throws InterruptedException; + public abstract boolean storeLastPublishedStateBundle(ClusterStateBundle stateBundle); - public abstract ClusterStateBundle retrieveLastPublishedStateBundle() throws InterruptedException; + public abstract ClusterStateBundle retrieveLastPublishedStateBundle(); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java index 2bf86d7056b..b585afebbad 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java @@ -175,7 +175,7 @@ public class DatabaseHandler { private boolean usingZooKeeper() { return (zooKeeperAddress != null); } - private void connect(ContentCluster cluster, long currentTime) throws InterruptedException { + private void connect(ContentCluster cluster, long currentTime) { try { lastZooKeeperConnectionAttempt = currentTime; synchronized (databaseMonitor) { @@ -200,7 +200,7 @@ public class DatabaseHandler { log.log(Level.FINE, () -> "Fleetcontroller " + nodeIndex + ": Cannot create ephemeral fleetcontroller node. ZooKeeper server " + "not seen old fleetcontroller instance disappear? It already exists. Will retry later: " + e.getMessage()); } catch (InterruptedException e) { - throw (InterruptedException) new InterruptedException("Interrupted").initCause(e); + throw new RuntimeException(e); } catch (KeeperException.ConnectionLossException e) { log.log(Level.WARNING, "Fleetcontroller " + nodeIndex + ": Failed to connect to ZooKeeper at " + zooKeeperAddress + " with session timeout " + zooKeeperSessionTimeout + ": " + e.getMessage()); @@ -218,7 +218,7 @@ public class DatabaseHandler { * * @return true if we did or attempted any work. */ - public boolean doNextZooKeeperTask(Context context) throws InterruptedException { + public boolean doNextZooKeeperTask(Context context) { boolean didWork = false; synchronized (monitor) { if (lostZooKeeperConnectionEvent) { @@ -278,7 +278,7 @@ public class DatabaseHandler { reset(context); } - private boolean performZooKeeperWrites() throws InterruptedException { + private boolean performZooKeeperWrites() { boolean didWork = false; if (pendingStore.masterVote != null) { didWork = true; @@ -373,7 +373,7 @@ public class DatabaseHandler { } } - public int getLatestSystemStateVersion() throws InterruptedException { + public int getLatestSystemStateVersion() { log.log(Level.FINE, () -> "Fleetcontroller " + nodeIndex + ": Retrieving latest system state version."); synchronized (databaseMonitor) { if (database != null && !database.isClosed()) { @@ -390,7 +390,7 @@ public class DatabaseHandler { return version; } - public void saveLatestClusterStateBundle(Context context, ClusterStateBundle clusterStateBundle) throws InterruptedException { + public void saveLatestClusterStateBundle(Context context, ClusterStateBundle clusterStateBundle) { log.log(Level.FINE, () -> String.format("Fleetcontroller %d: Scheduling bundle %s to be saved to ZooKeeper", nodeIndex, clusterStateBundle)); pendingStore.clusterStateBundle = clusterStateBundle; doNextZooKeeperTask(context); @@ -413,7 +413,7 @@ public class DatabaseHandler { } } - public ClusterStateBundle getLatestClusterStateBundle() throws InterruptedException { + public ClusterStateBundle getLatestClusterStateBundle() { log.log(Level.FINE, () -> String.format("Fleetcontroller %d: Retrieving latest cluster state bundle from ZooKeeper", nodeIndex)); synchronized (databaseMonitor) { if (database != null && !database.isClosed()) { @@ -424,7 +424,7 @@ public class DatabaseHandler { } } - public void saveWantedStates(Context context) throws InterruptedException { + public void saveWantedStates(Context context) { log.log(Level.FINE, () -> "Fleetcontroller " + nodeIndex + ": Checking whether wanted states have changed compared to zookeeper version."); Map<Node, NodeState> wantedStates = new TreeMap<>(); for (NodeInfo info : context.getCluster().getNodeInfo()) { @@ -445,7 +445,7 @@ public class DatabaseHandler { } } - public boolean loadWantedStates(Context context) throws InterruptedException { + public boolean loadWantedStates(Context context) { log.log(Level.FINE, () -> "Fleetcontroller " + nodeIndex + ": Retrieving node wanted states."); synchronized (databaseMonitor) { if (database != null && !database.isClosed()) { @@ -486,13 +486,13 @@ public class DatabaseHandler { return altered; } - public void saveStartTimestamps(Context context) throws InterruptedException { + public void saveStartTimestamps(Context context) { log.log(Level.FINE, () -> "Fleetcontroller " + nodeIndex + ": Scheduling start timestamps to be stored into zookeeper."); pendingStore.startTimestamps = context.getCluster().getStartTimestamps(); doNextZooKeeperTask(context); } - public boolean loadStartTimestamps(ContentCluster cluster) throws InterruptedException { + public boolean loadStartTimestamps(ContentCluster cluster) { log.log(Level.FINE, () -> "Fleetcontroller " + nodeIndex + ": Retrieving start timestamps"); synchronized (databaseMonitor) { if (database == null || database.isClosed()) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java index 97f2126e2ff..af58fc4c8e6 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java @@ -193,20 +193,20 @@ public class ZooKeeperDatabase extends Database { } } - public boolean storeMasterVote(int wantedMasterIndex) throws InterruptedException { + public boolean storeMasterVote(int wantedMasterIndex) { byte[] val = String.valueOf(wantedMasterIndex).getBytes(utf8); try{ session.setData(getMyIndexPath(), val, -1); log.log(Level.INFO, "Fleetcontroller " + nodeIndex + ": Stored new vote in ephemeral node. " + nodeIndex + " -> " + wantedMasterIndex); return true; } catch (InterruptedException e) { - throw (InterruptedException) new InterruptedException("Interrupted").initCause(e); + throw new RuntimeException(e); } catch (Exception e) { maybeLogExceptionWarning(e, "Failed to create our ephemeral node and store master vote"); } return false; } - public boolean storeLatestSystemStateVersion(int version) throws InterruptedException { + public boolean storeLatestSystemStateVersion(int version) { byte[] data = Integer.toString(version).getBytes(utf8); try{ log.log(Level.INFO, String.format("Fleetcontroller %d: Storing new cluster state version in ZooKeeper: %d", nodeIndex, version)); @@ -214,7 +214,7 @@ public class ZooKeeperDatabase extends Database { lastKnownStateVersionZNodeVersion = stat.getVersion(); return true; } catch (InterruptedException e) { - throw (InterruptedException) new InterruptedException("Interrupted").initCause(e); + throw new RuntimeException(e); } catch (KeeperException.BadVersionException e) { throw new CasWriteFailed(String.format("version mismatch in cluster state version znode (expected %d): %s", lastKnownStateVersionZNodeVersion, e.getMessage()), e); @@ -224,7 +224,7 @@ public class ZooKeeperDatabase extends Database { } } - public Integer retrieveLatestSystemStateVersion() throws InterruptedException { + public Integer retrieveLatestSystemStateVersion() { Stat stat = new Stat(); try{ log.log(Level.FINE, () -> String.format("Fleetcontroller %d: Fetching latest cluster state at '%slatestversion'", @@ -236,7 +236,7 @@ public class ZooKeeperDatabase extends Database { "(znode version %d)", nodeIndex, versionNumber, stat.getVersion())); return versionNumber; } catch (InterruptedException e) { - throw (InterruptedException) new InterruptedException("Interrupted").initCause(e); + throw new RuntimeException(e); } catch (Exception e) { // If we return a default, empty version, writes dependent on this bundle should only // succeed if the previous znode version is 0, i.e. not yet created. @@ -246,7 +246,7 @@ public class ZooKeeperDatabase extends Database { } } - public boolean storeWantedStates(Map<Node, NodeState> states) throws InterruptedException { + public boolean storeWantedStates(Map<Node, NodeState> states) { if (states == null) states = new TreeMap<>(); StringBuilder sb = new StringBuilder(); for (Node node : states.keySet()) { @@ -266,14 +266,14 @@ public class ZooKeeperDatabase extends Database { session.setData(zooKeeperRoot + "wantedstates", val, -1); return true; } catch (InterruptedException e) { - throw (InterruptedException) new InterruptedException("Interrupted").initCause(e); + throw new RuntimeException(e); } catch (Exception e) { maybeLogExceptionWarning(e, "Failed to store wanted states in ZooKeeper"); return false; } } - public Map<Node, NodeState> retrieveWantedStates() throws InterruptedException { + public Map<Node, NodeState> retrieveWantedStates() { try{ log.log(Level.FINE, () -> "Fleetcontroller " + nodeIndex + ": Fetching wanted states at '" + zooKeeperRoot + "wantedstates'"); Stat stat = new Stat(); @@ -296,7 +296,7 @@ public class ZooKeeperDatabase extends Database { } return wanted; } catch (InterruptedException e) { - throw (InterruptedException) new InterruptedException("Interrupted").initCause(e); + throw new RuntimeException(e); } catch (Exception e) { maybeLogExceptionWarning(e, "Failed to retrieve wanted states from ZooKeeper"); return null; @@ -304,7 +304,7 @@ public class ZooKeeperDatabase extends Database { } @Override - public boolean storeStartTimestamps(Map<Node, Long> timestamps) throws InterruptedException { + public boolean storeStartTimestamps(Map<Node, Long> timestamps) { if (timestamps == null) timestamps = new TreeMap<>(); StringBuilder sb = new StringBuilder(); for (Node n : timestamps.keySet()) { @@ -317,7 +317,7 @@ public class ZooKeeperDatabase extends Database { session.setData(zooKeeperRoot + "starttimestamps", val, -1); return true; } catch (InterruptedException e) { - throw (InterruptedException) new InterruptedException("Interrupted").initCause(e); + throw new RuntimeException(e); } catch (Exception e) { maybeLogExceptionWarning(e, "Failed to store start timestamps in ZooKeeper"); return false; @@ -325,7 +325,7 @@ public class ZooKeeperDatabase extends Database { } @Override - public Map<Node, Long> retrieveStartTimestamps() throws InterruptedException { + public Map<Node, Long> retrieveStartTimestamps() { try{ log.log(Level.FINE, () -> "Fleetcontroller " + nodeIndex + ": Fetching start timestamps at '" + zooKeeperRoot + "starttimestamps'"); Stat stat = new Stat(); @@ -348,7 +348,7 @@ public class ZooKeeperDatabase extends Database { } return wanted; } catch (InterruptedException e) { - throw (InterruptedException) new InterruptedException("Interrupted").initCause(e); + throw new RuntimeException(e); } catch (Exception e) { maybeLogExceptionWarning(e, "Failed to retrieve start timestamps from ZooKeeper"); return null; @@ -356,7 +356,7 @@ public class ZooKeeperDatabase extends Database { } @Override - public boolean storeLastPublishedStateBundle(ClusterStateBundle stateBundle) throws InterruptedException { + public boolean storeLastPublishedStateBundle(ClusterStateBundle stateBundle) { EnvelopedClusterStateBundleCodec envelopedBundleCodec = new SlimeClusterStateBundleCodec(); byte[] encodedBundle = envelopedBundleCodec.encodeWithEnvelope(stateBundle); try{ @@ -366,7 +366,7 @@ public class ZooKeeperDatabase extends Database { var stat = session.setData(zooKeeperRoot + "published_state_bundle", encodedBundle, lastKnownStateBundleZNodeVersion); lastKnownStateBundleZNodeVersion = stat.getVersion(); } catch (InterruptedException e) { - throw (InterruptedException) new InterruptedException("Interrupted").initCause(e); + throw new RuntimeException(e); } catch (KeeperException.BadVersionException e) { throw new CasWriteFailed(String.format("version mismatch in cluster state bundle znode (expected %d): %s", lastKnownStateBundleZNodeVersion, e.getMessage()), e); @@ -378,7 +378,7 @@ public class ZooKeeperDatabase extends Database { } @Override - public ClusterStateBundle retrieveLastPublishedStateBundle() throws InterruptedException { + public ClusterStateBundle retrieveLastPublishedStateBundle() { Stat stat = new Stat(); try { byte[] data = session.getData(zooKeeperRoot + "published_state_bundle", false, stat); @@ -388,7 +388,7 @@ public class ZooKeeperDatabase extends Database { return envelopedBundleCodec.decodeWithEnvelope(data); } } catch (InterruptedException e) { - throw (InterruptedException) new InterruptedException("Interrupted").initCause(e); + throw new RuntimeException(e); } catch (Exception e) { maybeLogExceptionWarning(e, "Failed to retrieve last published cluster state bundle from " + "ZooKeeper, will use an empty state as baseline"); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java index 39a7fabf0d3..760c966a6b2 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java @@ -249,6 +249,10 @@ public class VespaMetricSet { metrics.add(new Metric("cluster-controller.idle-tick-time-ms.sum")); metrics.add(new Metric("cluster-controller.idle-tick-time-ms.count")); + metrics.add(new Metric("cluster-controller.work-ms.last")); + metrics.add(new Metric("cluster-controller.work-ms.sum")); + metrics.add(new Metric("cluster-controller.work-ms.count")); + metrics.add(new Metric("cluster-controller.is-master.last")); metrics.add(new Metric("cluster-controller.remote-task-queue.size.last")); // TODO(hakonhall): Update this name once persistent "count" metrics has been implemented. |