diff options
5 files changed, 13 insertions, 24 deletions
diff --git a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java index 9dbe5212f8c..c6a2ecc0c1c 100644 --- a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java +++ b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java @@ -79,8 +79,11 @@ public class ClusterControllerClusterConfigurer { options.minRatioOfDistributorNodesUp = config.min_distributor_up_ratio(); options.minRatioOfStorageNodesUp = config.min_storage_up_ratio(); options.cycleWaitTime = (int) (config.cycle_wait_time() * 1000); + options.minTimeBeforeFirstSystemStateBroadcast = (int) (config.min_time_before_first_system_state_broadcast() * 1000); + options.nodeStateRequestTimeoutMS = (int) (config.get_node_state_request_timeout() * 1000); options.showLocalSystemStatesInEventLog = config.show_local_systemstates_in_event_log(); options.minTimeBetweenNewSystemStates = config.min_time_between_new_systemstates(); + options.maxSlobrokDisconnectGracePeriod = (int) (config.max_slobrok_disconnect_grace_period() * 1000); options.distributionBits = config.ideal_distribution_bits(); options.minNodeRatioPerGroup = config.min_node_ratio_per_group(); options.setMaxDeferredTaskVersionWaitTime(Duration.ofMillis((int)(config.max_deferred_task_version_wait_time_sec() * 1000))); @@ -90,20 +93,6 @@ public class ClusterControllerClusterConfigurer { options.clusterFeedBlockEnabled = config.enable_cluster_feed_block(); options.clusterFeedBlockLimit = Map.copyOf(config.cluster_feed_block_limit()); options.clusterFeedBlockNoiseLevel = config.cluster_feed_block_noise_level(); - - // minTimeBeforeFirstSystemStateBroadcast is the minimum time the CC will wait for the storage - // nodes and distributors being down in Slobrok and/or getnodestate, before being allowed to - // broadcast a cluster state. We therefore force a longer timeout depending on related settings. - options.maxSlobrokDisconnectGracePeriod = (int) (config.max_slobrok_disconnect_grace_period() * 1000); - options.nodeStateRequestTimeoutMS = (int) (config.get_node_state_request_timeout() * 1000); - options.minTimeBeforeFirstSystemStateBroadcast = max( - options.maxSlobrokDisconnectGracePeriod, - options.nodeStateRequestTimeoutMS, - (int) (config.min_time_before_first_system_state_broadcast() * 1000)); - } - - private static int max(int a, int b, int c) { - return Math.max(a, Math.max(b, c)); } private static void configure(FleetControllerOptions options, SlobroksConfig config) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java index 9b1498cd809..41c7c985c0b 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java @@ -171,7 +171,8 @@ public class ContentCluster { /** * Checks if a node can be upgraded - * @param node the node to be checked for upgrad + * + * @param node the node to be checked for upgrad * @param clusterState the current cluster state version * @param condition the upgrade condition * @param oldState the old/current wanted state diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java index 7e67cf27cba..c841f4741ba 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java @@ -95,7 +95,7 @@ public class FleetControllerOptions implements Cloneable { * but should be a few seconds in a real system to prevent new nodes taking over from disturbing the system by * putting out a different systemstate just because all nodes don't answer witihin a single cycle. * The cluster state is allowed to be broadcasted before this time if all nodes have successfully - * reported their state in Slobrok and getnodestate. This value should typically be at least + * reported their state in Slobrok and getnodestate. This value should typically be in the order of * maxSlobrokDisconnectGracePeriod and nodeStateRequestTimeoutMS. */ public long minTimeBeforeFirstSystemStateBroadcast = 0; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java index 0336853e6e9..ef9885c7cb4 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java @@ -23,5 +23,6 @@ public interface WantedStateSetter { Node node, NodeStateOrHostInfoChangeHandler stateListener, ClusterState currentClusterState, - boolean inMasterMoratorium, boolean probe) throws StateRestApiException; + boolean inMasterMoratorium, + boolean probe) throws StateRestApiException; } diff --git a/configdefinitions/src/vespa/fleetcontroller.def b/configdefinitions/src/vespa/fleetcontroller.def index 08fbed7f263..b6ea50507ca 100644 --- a/configdefinitions/src/vespa/fleetcontroller.def +++ b/configdefinitions/src/vespa/fleetcontroller.def @@ -112,14 +112,12 @@ min_storage_up_ratio double default=0.01 cycle_wait_time double default=0.1 ## Minimum time to pass in seconds before broadcasting our first systemstate as -## a new fleetcontroller. (Will broadcast earlier than this if we have gathered -## state from all before this). To prevent disturbance when taking over as +## a new fleetcontroller. Will broadcast earlier than this if we have gathered +## state from all before this. To prevent disturbance when taking over as ## fleetcontroller, give nodes a bit of time to answer so we dont temporarily -## report nodes as down. The time before the first broadcast may be increased -## further by other settings like max_slobrok_disconnect_grace_period and -## get_node_state_request_timeout, but may be shorter if all nodes have -## reported their state. -min_time_before_first_system_state_broadcast double default=120.0 +## report nodes as down. See also max_slobrok_disconnect_grace_period and +## get_node_state_request_timeout. +min_time_before_first_system_state_broadcast double default=30.0 ## Request timeout of node state requests. Keeping a high timeout allows us to ## always have a pending operation with very low cost. Keeping a low timeout is |