3 files changed, 22 insertions, 6 deletions
diff --git a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java
index c6a2ecc0c1c..9dbe5212f8c 100644
--- a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java
+++ b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java
@@ -79,11 +79,8 @@ public class ClusterControllerClusterConfigurer {
         options.minRatioOfDistributorNodesUp = config.min_distributor_up_ratio();
         options.minRatioOfStorageNodesUp = config.min_storage_up_ratio();
         options.cycleWaitTime = (int) (config.cycle_wait_time() * 1000);
-        options.minTimeBeforeFirstSystemStateBroadcast = (int) (config.min_time_before_first_system_state_broadcast() * 1000);
-        options.nodeStateRequestTimeoutMS = (int) (config.get_node_state_request_timeout() * 1000);
         options.showLocalSystemStatesInEventLog = config.show_local_systemstates_in_event_log();
         options.minTimeBetweenNewSystemStates = config.min_time_between_new_systemstates();
-        options.maxSlobrokDisconnectGracePeriod = (int) (config.max_slobrok_disconnect_grace_period() * 1000);
         options.distributionBits = config.ideal_distribution_bits();
         options.minNodeRatioPerGroup = config.min_node_ratio_per_group();
         options.setMaxDeferredTaskVersionWaitTime(Duration.ofMillis((int)(config.max_deferred_task_version_wait_time_sec() * 1000)));
@@ -93,6 +90,20 @@ public class ClusterControllerClusterConfigurer {
         options.clusterFeedBlockEnabled = config.enable_cluster_feed_block();
         options.clusterFeedBlockLimit = Map.copyOf(config.cluster_feed_block_limit());
         options.clusterFeedBlockNoiseLevel = config.cluster_feed_block_noise_level();
+
+        // minTimeBeforeFirstSystemStateBroadcast is the minimum time the CC will wait for the storage
+        // nodes and distributors being down in Slobrok and/or getnodestate, before being allowed to
+        // broadcast a cluster state.  We therefore force a longer timeout depending on related settings.
+        options.maxSlobrokDisconnectGracePeriod = (int) (config.max_slobrok_disconnect_grace_period() * 1000);
+        options.nodeStateRequestTimeoutMS = (int) (config.get_node_state_request_timeout() * 1000);
+        options.minTimeBeforeFirstSystemStateBroadcast = max(
+                options.maxSlobrokDisconnectGracePeriod,
+                options.nodeStateRequestTimeoutMS,
+                (int) (config.min_time_before_first_system_state_broadcast() * 1000));
+    }
+
+    private static int max(int a, int b, int c) {
+        return Math.max(a, Math.max(b, c));
     }
 
     private static void configure(FleetControllerOptions options, SlobroksConfig config) {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
index 528a9d79a7b..7e67cf27cba 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
@@ -94,7 +94,9 @@ public class FleetControllerOptions implements Cloneable {
      * Minimum time to pass (in milliseconds) before broadcasting our first systemstate. Set small in unit tests,
      * but should be a few seconds in a real system to prevent new nodes taking over from disturbing the system by
      * putting out a different systemstate just because all nodes don't answer witihin a single cycle.
-     * If all nodes have reported before this time, the min time is ignored and system state is broadcasted.
+     * The cluster state is allowed to be broadcasted before this time if all nodes have successfully
+     * reported their state in Slobrok and getnodestate. This value should typically be at least
+     * maxSlobrokDisconnectGracePeriod and nodeStateRequestTimeoutMS.
      */
     public long minTimeBeforeFirstSystemStateBroadcast = 0;
 
diff --git a/configdefinitions/src/vespa/fleetcontroller.def b/configdefinitions/src/vespa/fleetcontroller.def
index d2d746363f0..08fbed7f263 100644
--- a/configdefinitions/src/vespa/fleetcontroller.def
+++ b/configdefinitions/src/vespa/fleetcontroller.def
@@ -115,8 +115,11 @@ cycle_wait_time double default=0.1
 ## a new fleetcontroller. (Will broadcast earlier than this if we have gathered
 ## state from all before this). To prevent disturbance when taking over as
 ## fleetcontroller, give nodes a bit of time to answer so we dont temporarily
-## report nodes as down.
-min_time_before_first_system_state_broadcast double default=5.0
+## report nodes as down.  The time before the first broadcast may be increased
+## further by other settings like max_slobrok_disconnect_grace_period and
+## get_node_state_request_timeout, but may be shorter if all nodes have
+## reported their state.
+min_time_before_first_system_state_broadcast double default=120.0
 
 ## Request timeout of node state requests. Keeping a high timeout allows us to
 ## always have a pending operation with very low cost. Keeping a low timeout is