diff options
3 files changed, 22 insertions, 6 deletions
diff --git a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java index c6a2ecc0c1c..9dbe5212f8c 100644 --- a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java +++ b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java @@ -79,11 +79,8 @@ public class ClusterControllerClusterConfigurer { options.minRatioOfDistributorNodesUp = config.min_distributor_up_ratio(); options.minRatioOfStorageNodesUp = config.min_storage_up_ratio(); options.cycleWaitTime = (int) (config.cycle_wait_time() * 1000); - options.minTimeBeforeFirstSystemStateBroadcast = (int) (config.min_time_before_first_system_state_broadcast() * 1000); - options.nodeStateRequestTimeoutMS = (int) (config.get_node_state_request_timeout() * 1000); options.showLocalSystemStatesInEventLog = config.show_local_systemstates_in_event_log(); options.minTimeBetweenNewSystemStates = config.min_time_between_new_systemstates(); - options.maxSlobrokDisconnectGracePeriod = (int) (config.max_slobrok_disconnect_grace_period() * 1000); options.distributionBits = config.ideal_distribution_bits(); options.minNodeRatioPerGroup = config.min_node_ratio_per_group(); options.setMaxDeferredTaskVersionWaitTime(Duration.ofMillis((int)(config.max_deferred_task_version_wait_time_sec() * 1000))); @@ -93,6 +90,20 @@ public class ClusterControllerClusterConfigurer { options.clusterFeedBlockEnabled = config.enable_cluster_feed_block(); options.clusterFeedBlockLimit = Map.copyOf(config.cluster_feed_block_limit()); options.clusterFeedBlockNoiseLevel = config.cluster_feed_block_noise_level(); + + // minTimeBeforeFirstSystemStateBroadcast is the minimum time the CC will wait for the storage + // nodes and distributors being down in Slobrok and/or getnodestate, before being allowed to + // broadcast a cluster state. We therefore force a longer timeout depending on related settings. + options.maxSlobrokDisconnectGracePeriod = (int) (config.max_slobrok_disconnect_grace_period() * 1000); + options.nodeStateRequestTimeoutMS = (int) (config.get_node_state_request_timeout() * 1000); + options.minTimeBeforeFirstSystemStateBroadcast = max( + options.maxSlobrokDisconnectGracePeriod, + options.nodeStateRequestTimeoutMS, + (int) (config.min_time_before_first_system_state_broadcast() * 1000)); + } + + private static int max(int a, int b, int c) { + return Math.max(a, Math.max(b, c)); } private static void configure(FleetControllerOptions options, SlobroksConfig config) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java index 528a9d79a7b..7e67cf27cba 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java @@ -94,7 +94,9 @@ public class FleetControllerOptions implements Cloneable { * Minimum time to pass (in milliseconds) before broadcasting our first systemstate. Set small in unit tests, * but should be a few seconds in a real system to prevent new nodes taking over from disturbing the system by * putting out a different systemstate just because all nodes don't answer witihin a single cycle. - * If all nodes have reported before this time, the min time is ignored and system state is broadcasted. + * The cluster state is allowed to be broadcasted before this time if all nodes have successfully + * reported their state in Slobrok and getnodestate. This value should typically be at least + * maxSlobrokDisconnectGracePeriod and nodeStateRequestTimeoutMS. */ public long minTimeBeforeFirstSystemStateBroadcast = 0; diff --git a/configdefinitions/src/vespa/fleetcontroller.def b/configdefinitions/src/vespa/fleetcontroller.def index d2d746363f0..08fbed7f263 100644 --- a/configdefinitions/src/vespa/fleetcontroller.def +++ b/configdefinitions/src/vespa/fleetcontroller.def @@ -115,8 +115,11 @@ cycle_wait_time double default=0.1 ## a new fleetcontroller. (Will broadcast earlier than this if we have gathered ## state from all before this). To prevent disturbance when taking over as ## fleetcontroller, give nodes a bit of time to answer so we dont temporarily -## report nodes as down. -min_time_before_first_system_state_broadcast double default=5.0 +## report nodes as down. The time before the first broadcast may be increased +## further by other settings like max_slobrok_disconnect_grace_period and +## get_node_state_request_timeout, but may be shorter if all nodes have +## reported their state. +min_time_before_first_system_state_broadcast double default=120.0 ## Request timeout of node state requests. Keeping a high timeout allows us to ## always have a pending operation with very low cost. Keeping a low timeout is |