summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHÃ¥kon Hallingstad <hakon.hallingstad@gmail.com>2023-06-02 12:55:24 +0200
committerGitHub <noreply@github.com>2023-06-02 12:55:24 +0200
commit3a847df3386a724be512b748280f466d7b17435b (patch)
tree28209e9fa2e3f3f3b963c447969c65b2a1dd91d6
parent449cf2278a6f7dd5c546a4efe7029766b1450fa6 (diff)
parent19dfcf47e91559ab0b29fe121e58a937aa3bcfb4 (diff)
Merge pull request #27266 from vespa-engine/hmusum/cluster-controller-cleanup-6
Hmusum/cluster controller cleanup 6
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java2
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java2
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java28
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java1
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java50
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java2
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java16
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java4
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java2
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java34
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java2
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java26
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java2
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java40
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java231
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java9
16 files changed, 187 insertions, 264 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 42460b5943e..8027cec4e3c 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -485,11 +485,9 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta
// TODO: remove as many temporal parameter dependencies as possible here. Currently duplication of state.
stateChangeHandler.reconfigureFromOptions(options);
- stateChangeHandler.setStateChangedFlag(); // Always trigger state recomputation after reconfig
masterElectionHandler.setFleetControllerCount(options.fleetControllerCount());
masterElectionHandler.setMasterZooKeeperCooldownPeriod(options.masterZooKeeperCooldownPeriod());
- masterElectionHandler.setUsingZooKeeper(options.zooKeeperServerAddress() != null && !options.zooKeeperServerAddress().isEmpty());
if (rpcServer != null) {
rpcServer.setMasterElectionHandler(masterElectionHandler);
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
index bac6a838300..e116bb28e46 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
@@ -178,7 +178,7 @@ public class FleetControllerOptions {
this.distributionBits = distributionBits;
this.zooKeeperSessionTimeout = zooKeeperSessionTimeout;
this.masterZooKeeperCooldownPeriod = masterZooKeeperCooldownPeriod;
- this.zooKeeperServerAddress = zooKeeperServerAddress;
+ this.zooKeeperServerAddress = Objects.requireNonNull(zooKeeperServerAddress, "zooKeeperServerAddress cannot be null");
this.maxTransitionTime = maxTransitionTime;
this.maxInitProgressTime = maxInitProgressTime;
this.maxPrematureCrashes = maxPrematureCrashes;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
index fa303533355..fc8a6a05573 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
@@ -26,7 +26,6 @@ public class MasterElectionHandler implements MasterInterface {
private Map<Integer, Integer> nextMasterData;
private long masterGoneFromZooKeeperTime; // Set to time master fleet controller disappears from zookeeper
private long masterZooKeeperCooldownPeriod; // The period in ms that we won't take over unless master come back.
- private boolean usingZooKeeper = false; // Unit tests may not use ZooKeeper at all.
public MasterElectionHandler(FleetControllerContext context, int index, int totalCount, Object monitor, Timer timer) {
this.context = context;
@@ -34,7 +33,8 @@ public class MasterElectionHandler implements MasterInterface {
this.timer = timer;
this.index = index;
this.totalCount = totalCount;
- this.nextInLineCount = Integer.MAX_VALUE;
+ // nextInLineCount should/will always be 0 when we have one controller
+ this.nextInLineCount = totalCount == 1 ? 0 : Integer.MAX_VALUE;
if (cannotBecomeMaster())
context.log(logger, Level.FINE, () -> "We can never become master and will always stay a follower.");
// Tag current time as when we have not seen any other master. Make sure we're not taking over at once for master that is on the way down
@@ -43,25 +43,12 @@ public class MasterElectionHandler implements MasterInterface {
public void setFleetControllerCount(int count) {
totalCount = count;
- if (count == 1 && !usingZooKeeper) {
- masterCandidate = 0;
- followers = 1;
- nextInLineCount = 0;
- }
}
public void setMasterZooKeeperCooldownPeriod(int period) {
masterZooKeeperCooldownPeriod = period;
}
- public void setUsingZooKeeper(boolean usingZK) {
- if (!usingZooKeeper && usingZK) {
- // Reset any shortcuts taken by non-ZK election logic.
- resetElectionProgress();
- }
- usingZooKeeper = usingZK;
- }
-
@Override
public boolean isMaster() {
Integer master = getMaster();
@@ -121,15 +108,13 @@ public class MasterElectionHandler implements MasterInterface {
public boolean isFirstInLine() { return (nextInLineCount < 1); }
public boolean watchMasterElection(DatabaseHandler database, DatabaseHandler.DatabaseContext dbContext) {
- if (totalCount == 1 && !usingZooKeeper) {
- return false; // Allow single configured node to become master implicitly if no ZK configured
- }
if (nextMasterData == null) {
if (masterCandidate == null) {
context.log(logger, Level.FINEST, () -> "No current master candidate. Waiting for data to do master election.");
}
return false; // Nothing have happened since last time.
}
+
// Move next data to temporary, such that we don't need to keep lock, and such that we don't retry
// if we happen to fail processing the data.
Map<Integer, Integer> state;
@@ -140,6 +125,7 @@ public class MasterElectionHandler implements MasterInterface {
}
context.log(logger, Level.INFO, "Got master election state " + toString(state) + ".");
if (state.isEmpty()) throw new IllegalStateException("Database has no master data. We should at least have data for ourselves.");
+
Map.Entry<Integer, Integer> first = state.entrySet().iterator().next();
Integer currentMaster = getMaster();
if (currentMaster != null && first.getKey().intValue() != currentMaster.intValue()) {
@@ -238,10 +224,8 @@ public class MasterElectionHandler implements MasterInterface {
}
public void lostDatabaseConnection() {
- if (totalCount > 1 || usingZooKeeper) {
- context.log(logger, Level.INFO, "Clearing master data as we lost connection on node " + index);
- resetElectionProgress();
- }
+ context.log(logger, Level.INFO, "Clearing master data as we lost connection on node " + index);
+ resetElectionProgress();
}
private void resetElectionProgress() {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java
index 28149477e36..2317777e43d 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java
@@ -234,6 +234,7 @@ public class StateChangeHandler {
setMaxInitProgressTime(options.maxInitProgressTime());
setMaxSlobrokDisconnectGracePeriod(options.maxSlobrokDisconnectGracePeriod());
setMaxTransitionTime(options.maxTransitionTime());
+ setStateChangedFlag(); // Always trigger state recomputation after reconfig
}
// TODO too many hidden behavior dependencies between this and the actually
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
index efb97a4a69e..ed194776d78 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
@@ -15,6 +15,7 @@ import org.apache.zookeeper.KeeperException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Map;
+import java.util.Objects;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -101,8 +102,7 @@ public class DatabaseHandler {
this.timer = timer;
pendingStore.masterVote = fleetControllerContext.id().index(); // To begin with we'll vote for ourselves.
this.monitor = monitor;
- // TODO: Require non-null, not possible now since at least ClusterFeedBlockTest uses null address
- this.zooKeeperAddress = zooKeeperAddress;
+ this.zooKeeperAddress = Objects.requireNonNull(zooKeeperAddress, "zooKeeperAddress cannot be null");
}
private boolean isDatabaseClosedSafe() {
@@ -161,11 +161,9 @@ public class DatabaseHandler {
}
public void setZooKeeperAddress(String address, DatabaseContext databaseContext) {
- if (address == null && zooKeeperAddress == null) return;
- if (address != null && address.equals(zooKeeperAddress)) return;
- if (zooKeeperAddress != null) {
- fleetControllerContext.log(logger, Level.INFO, "" + (address == null ? "Stopped using ZooKeeper." : "Got new ZooKeeper address to use: " + address));
- }
+ Objects.requireNonNull(address, "address cannot be null");
+ if (address.equals(zooKeeperAddress)) return;
+ fleetControllerContext.log(logger, Level.INFO, "Got new ZooKeeper address to use: " + address);
zooKeeperAddress = address;
reset(databaseContext);
}
@@ -177,8 +175,6 @@ public class DatabaseHandler {
reset(databaseContext);
}
- private boolean usingZooKeeper() { return (zooKeeperAddress != null); }
-
private void connect(long currentTime) {
try {
lastZooKeeperConnectionAttempt = currentTime;
@@ -245,7 +241,7 @@ public class DatabaseHandler {
didWork = true;
}
}
- if (isDatabaseClosedSafe() && zooKeeperIsConfigured()) {
+ if (isDatabaseClosedSafe()) {
long currentTime = timer.getCurrentTimeInMillis();
if (currentTime - lastZooKeeperConnectionAttempt < minimumWaitBetweenFailedConnectionAttempts) {
return false; // Not time to attempt connection yet.
@@ -270,11 +266,6 @@ public class DatabaseHandler {
return didWork;
}
- private boolean zooKeeperIsConfigured() {
- // This should only ever be null during unit testing.
- return zooKeeperAddress != null;
- }
-
private void relinquishDatabaseConnectivity(DatabaseContext databaseContext) {
// reset() will handle both session clearing and trigger a database loss callback into the CC.
reset(databaseContext);
@@ -383,9 +374,7 @@ public class DatabaseHandler {
}
Integer version = currentlyStored.lastSystemStateVersion;
if (version == null) {
- if (usingZooKeeper()) {
- fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve latest system state version from ZooKeeper. Returning version 0.");
- }
+ fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve latest system state version from ZooKeeper. Returning version 0.");
return 0; // FIXME "fail-oblivious" is not a good error handling mode for such a critical component!
}
return version;
@@ -395,22 +384,13 @@ public class DatabaseHandler {
fleetControllerContext.log(logger, Level.FINE, () -> "Scheduling bundle " + clusterStateBundle + " to be saved to ZooKeeper");
pendingStore.clusterStateBundle = clusterStateBundle;
doNextZooKeeperTask(databaseContext);
- // FIXME this is a nasty hack to get around the fact that a massive amount of unit tests
- // set up the system with a null ZooKeeper server address. If we don't fake that we have
- // written the state version, the tests will never progress past waiting for state broadcasts.
- if (zooKeeperAddress == null) {
- logger.warning(() -> "Simulating ZK write of version " + clusterStateBundle.getVersion() +
- ". This should not happen in production!");
- lastKnownStateBundleVersionWrittenBySelf = clusterStateBundle.getVersion();
- }
}
// TODO should we expand this to cover _any_ pending ZK write?
public boolean hasPendingClusterStateMetaDataStore() {
synchronized (databaseMonitor) {
- return ((zooKeeperAddress != null) &&
- ((pendingStore.clusterStateBundle != null) ||
- (pendingStore.lastSystemStateVersion != null)));
+ return ((pendingStore.clusterStateBundle != null) ||
+ (pendingStore.lastSystemStateVersion != null));
}
}
@@ -458,11 +438,9 @@ public class DatabaseHandler {
}
Map<Node, NodeState> wantedStates = currentlyStored.wantedStates;
if (wantedStates == null) {
- if (usingZooKeeper()) {
- // We get here if the ZooKeeper client has lost the connection to ZooKeeper.
- // TODO: Should instead fail the tick until connected!?
- fleetControllerContext.log(logger, Level.FINE, () -> "Failed to retrieve wanted states from ZooKeeper. Assuming UP for all nodes.");
- }
+ // We get here if the ZooKeeper client has lost connection to ZooKeeper.
+ // TODO: Should instead fail the tick until connected!?
+ fleetControllerContext.log(logger, Level.FINE, () -> "Failed to retrieve wanted states from ZooKeeper. Assuming UP for all nodes.");
wantedStates = new TreeMap<>();
}
boolean altered = false;
@@ -510,9 +488,7 @@ public class DatabaseHandler {
}
Map<Node, Long> startTimestamps = currentlyStored.startTimestamps;
if (startTimestamps == null) {
- if (usingZooKeeper()) {
- fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve start timestamps from ZooKeeper. Cluster state will be bloated with timestamps until we get them set.");
- }
+ fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve start timestamps from ZooKeeper. Cluster state will be bloated with timestamps until we get them set.");
startTimestamps = new TreeMap<>();
}
for (Map.Entry<Node, Long> e : startTimestamps.entrySet()) {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java
index 5aae401e157..7a9bea91b9c 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java
@@ -228,7 +228,7 @@ public class LegacyIndexPageRequestHandler implements StatusPageServer.RequestHa
sb.append("<tr><td><nobr>RPC port</nobr></td><td align=\"right\">").append(options.rpcPort() == 0 ? "Pick random available" : options.rpcPort()).append("</td></tr>");
sb.append("<tr><td><nobr>HTTP port</nobr></td><td align=\"right\">").append(options.httpPort() == 0 ? "Pick random available" : options.httpPort()).append("</td></tr>");
sb.append("<tr><td><nobr>Master cooldown period</nobr></td><td align=\"right\">").append(RealTimer.printDuration(options.masterZooKeeperCooldownPeriod())).append("</td></tr>");
- String zooKeeperAddress = (options.zooKeeperServerAddress() == null ? "Not using Zookeeper" : splitZooKeeperAddress(options.zooKeeperServerAddress()));
+ String zooKeeperAddress = splitZooKeeperAddress(options.zooKeeperServerAddress());
sb.append("<tr><td><nobr>Zookeeper server address</nobr></td><td align=\"right\">").append(zooKeeperAddress).append("</td></tr>");
sb.append("<tr><td><nobr>Zookeeper session timeout</nobr></td><td align=\"right\">").append(RealTimer.printDuration(options.zooKeeperSessionTimeout())).append("</td></tr>");
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
index d4eea261767..55e256cf89c 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
@@ -32,15 +32,17 @@ public class ClusterFeedBlockTest extends FleetControllerTest {
private FleetController ctrl;
private DummyCommunicator communicator;
- private void initialize(FleetControllerOptions options) throws Exception {
+ private void initialize(FleetControllerOptions.Builder builder) throws Exception {
List<Node> nodes = new ArrayList<>();
- for (int i = 0; i < options.nodes().size(); ++i) {
+ for (int i = 0; i < builder.nodes().size(); ++i) {
nodes.add(new Node(NodeType.STORAGE, i));
nodes.add(new Node(NodeType.DISTRIBUTOR, i));
}
- var context = new TestFleetControllerContext(options);
communicator = new DummyCommunicator(nodes, timer);
+ setUpZooKeeperServer(builder);
+ options = builder.build();
+ var context = new TestFleetControllerContext(options);
boolean start = false;
ctrl = createFleetController(timer, options, context, communicator, communicator, null, start);
@@ -57,16 +59,16 @@ public class ClusterFeedBlockTest extends FleetControllerTest {
ctrl.tick();
}
- private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits, double clusterFeedBlockNoiseLevel) {
+ private static FleetControllerOptions.Builder createOptions(Map<String, Double> feedBlockLimits, double clusterFeedBlockNoiseLevel) {
return defaultOptions()
.setStorageDistribution(DistributionBuilder.forFlatCluster(NODE_COUNT))
.setNodes(new HashSet<>(DistributionBuilder.buildConfiguredNodes(NODE_COUNT)))
.setClusterFeedBlockEnabled(true)
.setClusterFeedBlockLimit(feedBlockLimits)
- .setClusterFeedBlockNoiseLevel(clusterFeedBlockNoiseLevel).build();
+ .setClusterFeedBlockNoiseLevel(clusterFeedBlockNoiseLevel);
}
- private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits) {
+ private static FleetControllerOptions.Builder createOptions(Map<String, Double> feedBlockLimits) {
return createOptions(feedBlockLimits, 0.0);
}
@@ -109,7 +111,7 @@ public class ClusterFeedBlockTest extends FleetControllerTest {
assertTrue(ctrl.getClusterStateBundle().clusterFeedIsBlocked());
// Increase cheese allowance. Should now automatically unblock since reported usage is lower.
- ctrl.updateOptions(createOptions(mapOf(usage("cheese", 0.9), usage("wine", 0.4))));
+ ctrl.updateOptions(createOptions(mapOf(usage("cheese", 0.9), usage("wine", 0.4))).build());
ctrl.tick(); // Options propagation
ctrl.tick(); // State recomputation
assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked());
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java
index 30c90ee0664..b5aebadd82b 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java
@@ -870,7 +870,9 @@ public class ClusterStateGeneratorTest {
.setMinNodeRatioPerGroup(0.6)
.setDistributionBits(7)
.setMaxTransitionTime(NodeType.DISTRIBUTOR, 1000)
- .setMaxTransitionTime(NodeType.STORAGE, 2000).build();
+ .setMaxTransitionTime(NodeType.STORAGE, 2000)
+ .setZooKeeperServerAddress("localhost:2181")
+ .build();
final ClusterStateGenerator.Params params = ClusterStateGenerator.Params.fromOptions(options);
assertThat(params.maxPrematureCrashes, equalTo(options.maxPrematureCrashes()));
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java
index 95b9d13cad5..11bdb6ec1c8 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java
@@ -22,7 +22,7 @@ public class DistributionBitCountTest extends FleetControllerTest {
for (int i = 0 ; i < 10; i++) {
configuredNodes.add(new ConfiguredNode(i, false));
}
- var builder = defaultOptions("mycluster", configuredNodes);
+ var builder = defaultOptions(configuredNodes);
builder.setDistributionBits(17);
Timer timer = new RealTimer();
setUpFleetController(timer, builder);
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
index 238a64459ca..fb59df7e433 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
@@ -25,6 +25,8 @@ import com.yahoo.vespa.clustercontroller.core.testutils.WaitTask;
import com.yahoo.vespa.clustercontroller.core.testutils.Waiter;
import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter;
import org.junit.jupiter.api.AfterEach;
+
+import java.io.IOException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
@@ -72,31 +74,24 @@ public abstract class FleetControllerTest implements Waiter {
}
protected static FleetControllerOptions.Builder defaultOptions() {
- return defaultOptions(DEFAULT_NODE_COUNT);
- }
-
- protected static FleetControllerOptions.Builder defaultOptions(int nodeCount) {
- return defaultOptions("mycluster", IntStream.range(0, nodeCount)
- .mapToObj(i -> new ConfiguredNode(i, false))
- .collect(Collectors.toSet()));
+ return defaultOptions(IntStream.range(0, DEFAULT_NODE_COUNT)
+ .mapToObj(i -> new ConfiguredNode(i, false))
+ .collect(Collectors.toSet()));
}
- protected static FleetControllerOptions.Builder defaultOptions(String clusterName, Collection<ConfiguredNode> nodes) {
- var builder = new FleetControllerOptions.Builder(clusterName, nodes);
+ protected static FleetControllerOptions.Builder defaultOptions(Collection<ConfiguredNode> nodes) {
+ var builder = new FleetControllerOptions.Builder("mycluster", nodes);
builder.enableTwoPhaseClusterStateActivation(true); // Enable by default, tests can explicitly disable.
builder.setStorageDistribution(DistributionBuilder.forFlatCluster(builder.nodes().size()));
+ builder.setZooKeeperServerAddress("localhost:2181");
return builder;
}
- private void setUpSystem(FleetControllerOptions.Builder builder) throws Exception {
- log.log(Level.FINE, "Setting up system");
- if (builder.zooKeeperServerAddress() != null) {
- zooKeeperServer = new ZooKeeperTestServer();
- // Need to set zookeeper address again, as port number is not known until ZooKeeperTestServer has been created
- builder.setZooKeeperServerAddress(zooKeeperServer.getAddress());
- log.log(Level.FINE, "Set up new zookeeper server at " + zooKeeperServer.getAddress());
- }
- builder.setSlobrokConnectionSpecs(getSlobrokConnectionSpecs(slobrok));
+ protected void setUpZooKeeperServer(FleetControllerOptions.Builder builder) throws IOException {
+ zooKeeperServer = new ZooKeeperTestServer();
+ // Need to set zookeeper address again, as port number is not known until ZooKeeperTestServer has been created
+ builder.setZooKeeperServerAddress(zooKeeperServer.getAddress());
+ log.log(Level.FINE, "Set up new zookeeper server at " + zooKeeperServer.getAddress());
}
FleetController createFleetController(Timer timer, FleetControllerOptions options) {
@@ -144,7 +139,8 @@ public abstract class FleetControllerTest implements Waiter {
}
protected FleetControllerOptions setUpFleetController(Timer timer, FleetControllerOptions.Builder builder) throws Exception {
- setUpSystem(builder);
+ setUpZooKeeperServer(builder);
+ builder.setSlobrokConnectionSpecs(getSlobrokConnectionSpecs(slobrok));
options = builder.build();
startFleetController(timer);
return options;
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
index 77c89d77ba5..93a96be71a0 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
@@ -6,7 +6,6 @@ import com.yahoo.jrt.Spec;
import com.yahoo.jrt.Supervisor;
import com.yahoo.jrt.Target;
import com.yahoo.jrt.Transport;
-import com.yahoo.jrt.slobrok.server.Slobrok;
import com.yahoo.vdslib.state.ClusterState;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.NodeType;
@@ -53,7 +52,6 @@ public class MasterElectionTest extends FleetControllerTest {
if (zooKeeperServer == null) {
zooKeeperServer = new ZooKeeperTestServer();
}
- slobrok = new Slobrok();
builder.setZooKeeperSessionTimeout(defaultZkSessionTimeoutInMillis())
.setZooKeeperServerAddress(zooKeeperServer.getAddress())
.setSlobrokConnectionSpecs(getSlobrokConnectionSpecs(slobrok))
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java
deleted file mode 100644
index 3d3a38aacd4..00000000000
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.clustercontroller.core;
-
-import org.junit.jupiter.api.Test;
-
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-public class NoZooKeeperTest extends FleetControllerTest {
-
- @Test
- void testWantedStatesInZooKeeper() throws Exception {
- // Null is the default for zooKeeperServerAddress
- FleetControllerOptions.Builder builder = defaultOptions();
- Timer timer = new FakeTimer();
- setUpFleetController(timer, builder);
- setUpVdsNodes(timer);
- waitForStableSystem();
-
- assertTrue(nodes.get(0).isDistributor());
- nodes.get(0).disconnect();
- waitForState("version:\\d+ distributor:10 .0.s:d storage:10");
-
- nodes.get(0).connect();
- waitForState("version:\\d+ distributor:10 storage:10");
- }
-}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
index 2c77767d6b4..e432efc1447 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java
@@ -30,7 +30,7 @@ public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest
}
private FleetControllerOptions.Builder optionsForConfiguredNodes(Set<ConfiguredNode> configuredNodes) {
- return defaultOptions("mycluster", configuredNodes)
+ return defaultOptions(configuredNodes)
.setMaxSlobrokDisconnectGracePeriod(60 * 1000)
.setNodeStateRequestTimeoutMS(10000 * 60 * 1000)
.setMaxTransitionTime(NodeType.DISTRIBUTOR, 0)
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java
index 02e3a4a4125..e75ade7309c 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java
@@ -131,7 +131,7 @@ public class RpcServerTest extends FleetControllerTest {
Set<ConfiguredNode> configuredNodes = new TreeSet<>();
for (int i = 0; i < 10; i++)
configuredNodes.add(new ConfiguredNode(i, false));
- FleetControllerOptions.Builder builder = defaultOptions("mycluster", configuredNodes);
+ FleetControllerOptions.Builder builder = defaultOptions(configuredNodes);
builder.setMinRatioOfStorageNodesUp(0);
builder.setMaxInitProgressTime(30000);
builder.setStableStateTimePeriod(60000);
@@ -224,7 +224,7 @@ public class RpcServerTest extends FleetControllerTest {
for (int i = 0; i < 4; i++)
configuredNodes.add(new ConfiguredNode(i, false));
configuredNodes.add(new ConfiguredNode(4, true)); // Last node is configured retired
- FleetControllerOptions.Builder builder = defaultOptions("mycluster", configuredNodes)
+ FleetControllerOptions.Builder builder = defaultOptions(configuredNodes)
.setMinRatioOfStorageNodesUp(0)
.setMaxInitProgressTime(30000)
.setStableStateTimePeriod(60000);
@@ -257,7 +257,7 @@ public class RpcServerTest extends FleetControllerTest {
List<ConfiguredNode> configuredNodes = new ArrayList<>();
for (int i = 0; i < 5; i++)
configuredNodes.add(new ConfiguredNode(i, false));
- FleetControllerOptions.Builder builder = defaultOptions("mycluster", configuredNodes)
+ FleetControllerOptions.Builder builder = defaultOptions(configuredNodes)
.setMaxInitProgressTime(30000)
.setStableStateTimePeriod(60000);
setUpFleetController(timer, builder);
@@ -281,10 +281,8 @@ public class RpcServerTest extends FleetControllerTest {
configuredNodes.add(new ConfiguredNode(i, true));
configuredNodes.add(new ConfiguredNode(5, false));
configuredNodes.add(new ConfiguredNode(6, false));
- FleetControllerOptions.Builder builder = defaultOptions("mycluster", configuredNodes)
- .setSlobrokConnectionSpecs(this.options.slobrokConnectionSpecs())
- .setMaxInitProgressTime(30000)
- .setStableStateTimePeriod(60000);
+ var builder = FleetControllerOptions.Builder.copy(fleetController().getOptions())
+ .setNodes(configuredNodes);
fleetController().updateOptions(builder.build());
waitForState("version:\\d+ distributor:7 storage:7 .0.s:m .1.s:m .2.s:r .3.s:r .4.s:r");
}
@@ -311,10 +309,8 @@ public class RpcServerTest extends FleetControllerTest {
Set<ConfiguredNode> configuredNodes = new TreeSet<>();
for (int i = 0; i < 7; i++)
configuredNodes.add(new ConfiguredNode(i, false));
- FleetControllerOptions.Builder builder = defaultOptions("mycluster", configuredNodes)
- .setSlobrokConnectionSpecs(this.options.slobrokConnectionSpecs())
- .setMaxInitProgressTime(30000)
- .setStableStateTimePeriod(60000);
+ var builder = FleetControllerOptions.Builder.copy(fleetController().getOptions())
+ .setNodes(configuredNodes);
fleetController().updateOptions(builder.build());
waitForState("version:\\d+ distributor:7 storage:7 .0.s:m .1.s:m");
}
@@ -336,7 +332,7 @@ public class RpcServerTest extends FleetControllerTest {
List<ConfiguredNode> configuredNodes = new ArrayList<>();
for (int i = 0; i < 5; i++)
configuredNodes.add(new ConfiguredNode(i, false));
- FleetControllerOptions.Builder builder = defaultOptions("mycluster", configuredNodes)
+ FleetControllerOptions.Builder builder = defaultOptions(configuredNodes)
.setMaxInitProgressTime(30000)
.setStableStateTimePeriod(60000);
options = builder.build();
@@ -349,10 +345,8 @@ public class RpcServerTest extends FleetControllerTest {
Set<ConfiguredNode> configuredNodes = new TreeSet<>();
for (int i = 0; i < 5; i++)
configuredNodes.add(new ConfiguredNode(i, false));
- FleetControllerOptions.Builder builder = defaultOptions("mycluster", configuredNodes)
- .setSlobrokConnectionSpecs(options.slobrokConnectionSpecs())
- .setMaxInitProgressTime(30000)
- .setStableStateTimePeriod(60000);
+ var builder = FleetControllerOptions.Builder.copy(fleetController().getOptions())
+ .setNodes(configuredNodes);
fleetController().updateOptions(builder.build());
waitForState("version:\\d+ distributor:5 storage:5");
}
@@ -364,10 +358,8 @@ public class RpcServerTest extends FleetControllerTest {
configuredNodes.add(new ConfiguredNode(i, true));
configuredNodes.add(new ConfiguredNode(5, false));
configuredNodes.add(new ConfiguredNode(6, false));
- FleetControllerOptions.Builder builder = defaultOptions("mycluster", configuredNodes)
- .setSlobrokConnectionSpecs(options.slobrokConnectionSpecs())
- .setMaxInitProgressTime(30000)
- .setStableStateTimePeriod(60000);
+ var builder = FleetControllerOptions.Builder.copy(fleetController().getOptions())
+ .setNodes(configuredNodes);
fleetController().updateOptions(builder.build());
waitForState("version:\\d+ distributor:7 storage:7 .0.s:r .1.s:r .2.s:r .3.s:r .4.s:r");
}
@@ -378,10 +370,8 @@ public class RpcServerTest extends FleetControllerTest {
configuredNodes.add(new ConfiguredNode(i, true));
configuredNodes.add(new ConfiguredNode(5, false));
configuredNodes.add(new ConfiguredNode(6, false));
- FleetControllerOptions.Builder builder = defaultOptions("mycluster", configuredNodes)
- .setSlobrokConnectionSpecs(options.slobrokConnectionSpecs())
- .setMaxInitProgressTime(30000)
- .setStableStateTimePeriod(60000);
+ var builder = FleetControllerOptions.Builder.copy(fleetController().getOptions())
+ .setNodes(configuredNodes);
fleetController().updateOptions(builder.build());
waitForState("version:\\d+ distributor:7 storage:7 .0.s:r .1.s:r .2.s:r .3.s:r .4.s:r");
}
@@ -411,7 +401,7 @@ public class RpcServerTest extends FleetControllerTest {
void testSetNodeState() throws Exception {
Set<Integer> nodeIndexes = new TreeSet<>(List.of(4, 6, 9, 10, 14, 16, 21, 22, 23, 25));
Set<ConfiguredNode> configuredNodes = nodeIndexes.stream().map(i -> new ConfiguredNode(i, false)).collect(Collectors.toSet());
- FleetControllerOptions.Builder options = defaultOptions("mycluster", configuredNodes);
+ FleetControllerOptions.Builder options = defaultOptions(configuredNodes);
//options.setStorageDistribution(new Distribution(getDistConfig(nodeIndexes)));
setUpFleetController(timer, options);
setUpVdsNodes(timer, false, nodeIndexes);
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
index c0e116ef5fe..f2261794b75 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
@@ -31,16 +31,18 @@ public class StateChangeTest extends FleetControllerTest {
private FleetController ctrl;
private DummyCommunicator communicator;
- private void initialize(FleetControllerOptions options) throws Exception {
+ private void initialize(FleetControllerOptions.Builder builder) throws Exception {
List<Node> nodes = new ArrayList<>();
- for (int i = 0; i < options.nodes().size(); ++i) {
+ for (int i = 0; i < builder.nodes().size(); ++i) {
nodes.add(new Node(NodeType.STORAGE, i));
nodes.add(new Node(NodeType.DISTRIBUTOR, i));
}
- var context = new TestFleetControllerContext(options);
+ setUpZooKeeperServer(builder);
communicator = new DummyCommunicator(nodes, timer);
boolean start = false;
+ FleetControllerOptions options = builder.build();
+ var context = new TestFleetControllerContext(options);
ctrl = createFleetController(timer, options, context, communicator, communicator, null, start);
ctrl.tick();
@@ -72,7 +74,7 @@ public class StateChangeTest extends FleetControllerTest {
FleetControllerOptions.Builder options = defaultOptions();
options.setMaxInitProgressTime(50000);
- initialize(options.build());
+ initialize(options);
// Should now pick up previous node states
ctrl.tick();
@@ -95,7 +97,7 @@ public class StateChangeTest extends FleetControllerTest {
// Regular init progress does not update the cluster state until the node is done initializing (or goes down,
// whichever comes first).
- assertEquals("version:6 distributor:10 .0.s:i .0.i:0.0 .1.s:i .1.i:0.0 .2.s:i .2.i:0.0 .3.s:i .3.i:0.0 " +
+ assertEquals("version:5 distributor:10 .0.s:i .0.i:0.0 .1.s:i .1.i:0.0 .2.s:i .2.i:0.0 .3.s:i .3.i:0.0 " +
".4.s:i .4.i:0.0 .5.s:i .5.i:0.0 .6.s:i .6.i:0.0 .7.s:i .7.i:0.0 .8.s:i .8.i:0.0 " +
".9.s:i .9.i:0.0 storage:10 .0.s:i .0.i:0.1 .1.s:i .1.i:0.1 .2.s:i .2.i:0.1 .3.s:i .3.i:0.1 " +
".4.s:i .4.i:0.1 .5.s:i .5.i:0.1 .6.s:i .6.i:0.1 .7.s:i .7.i:0.1 .8.s:i .8.i:0.1 .9.s:i .9.i:0.1",
@@ -118,12 +120,12 @@ public class StateChangeTest extends FleetControllerTest {
timer.advanceTime(options.maxInitProgressTime() / 20);
ctrl.tick();
- assertEquals("version:8 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:7 distributor:10 storage:10", ctrl.getSystemState().toString());
verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0),
"""
Event: distributor.0: Now reporting state U
- Event: distributor.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: distributor.0: Altered node state in cluster state from 'D' to 'U'
Event: distributor.0: Now reporting state I, i 0.00
Event: distributor.0: Altered node state in cluster state from 'U' to 'I, i 0.00'
Event: distributor.0: Now reporting state U
@@ -133,7 +135,7 @@ public class StateChangeTest extends FleetControllerTest {
verifyNodeEvents(new Node(NodeType.STORAGE, 0),
"""
Event: storage.0: Now reporting state U
- Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: storage.0: Altered node state in cluster state from 'D' to 'U'
Event: storage.0: Now reporting state I, i 0.00 (ls)
Event: storage.0: Altered node state in cluster state from 'U' to 'D'
Event: storage.0: Now reporting state I, i 0.100 (read)
@@ -153,7 +155,7 @@ public class StateChangeTest extends FleetControllerTest {
// Two-phase cluster state activation changes this quite a bit, so disable it. At least for now.
.enableTwoPhaseClusterStateActivation(false);
- initialize(builder.build());
+ initialize(builder);
ctrl.tick();
@@ -164,7 +166,7 @@ public class StateChangeTest extends FleetControllerTest {
String desc = ctrl.getReportedNodeState(new Node(NodeType.DISTRIBUTOR, 0)).getDescription();
assertTrue(desc.contains("Closed at other end"), desc);
- assertEquals("version:4 distributor:10 .0.s:d storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 .0.s:d storage:10", ctrl.getSystemState().toString());
timer.advanceTime(1000);
@@ -175,7 +177,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:5 distributor:10 .0.t:12345678 storage:10 .0.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:4 distributor:10 .0.t:12345678 storage:10 .0.s:m", ctrl.getSystemState().toString());
assert(!ctrl.getReportedNodeState(new Node(NodeType.DISTRIBUTOR, 0)).hasDescription());
desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
@@ -185,7 +187,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:6 distributor:10 .0.t:12345678 storage:10 .0.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:5 distributor:10 .0.t:12345678 storage:10 .0.s:d", ctrl.getSystemState().toString());
desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
assertTrue(desc.contains("Closed at other end"), desc);
@@ -198,14 +200,14 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:7 distributor:10 storage:10 .0.t:12345679", ctrl.getSystemState().toString());
+ assertEquals("version:6 distributor:10 storage:10 .0.t:12345679", ctrl.getSystemState().toString());
assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).hasDescription());
verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0),
"""
Event: distributor.0: Now reporting state U
- Event: distributor.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: distributor.0: Altered node state in cluster state from 'D' to 'U'
Event: distributor.0: Failed to get node state: D: Closed at other end
Event: distributor.0: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.
Event: distributor.0: Altered node state in cluster state from 'U' to 'D: Closed at other end'
@@ -217,7 +219,7 @@ public class StateChangeTest extends FleetControllerTest {
verifyNodeEvents(new Node(NodeType.STORAGE, 0),
"""
Event: storage.0: Now reporting state U
- Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: storage.0: Altered node state in cluster state from 'D' to 'U'
Event: storage.0: Failed to get node state: D: Closed at other end
Event: storage.0: Stopped or possibly crashed after 1000 ms, which is before stable state time period. Premature crash count is now 1.
Event: storage.0: Altered node state in cluster state from 'U' to 'M: Closed at other end'
@@ -243,7 +245,7 @@ public class StateChangeTest extends FleetControllerTest {
.setNodeStateRequestTimeoutMS(60 * 60 * 1000)
.setMaxSlobrokDisconnectGracePeriod(100000);
- initialize(builder.build());
+ initialize(builder);
ctrl.tick();
@@ -264,7 +266,7 @@ public class StateChangeTest extends FleetControllerTest {
tick(1000);
- assertEquals("version:5 distributor:10 storage:10 .0.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:4 distributor:10 storage:10 .0.s:m", ctrl.getSystemState().toString());
assert(!ctrl.getReportedNodeState(new Node(NodeType.DISTRIBUTOR, 0)).hasDescription());
desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
@@ -273,7 +275,7 @@ public class StateChangeTest extends FleetControllerTest {
tick(builder.maxTransitionTime().get(NodeType.STORAGE) + 1);
- assertEquals("version:6 distributor:10 storage:10 .0.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:5 distributor:10 storage:10 .0.s:d", ctrl.getSystemState().toString());
desc = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).getDescription();
assertTrue(desc.contains("Received signal 15 (SIGTERM - Termination signal)")
|| desc.contains("controlled shutdown"), desc);
@@ -282,7 +284,7 @@ public class StateChangeTest extends FleetControllerTest {
tick(1000);
- assertEquals("version:7 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:6 distributor:10 storage:10", ctrl.getSystemState().toString());
assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).hasDescription());
assertEquals(0, ctrl.getCluster().getNodeInfo(new Node(NodeType.DISTRIBUTOR, 0)).getPrematureCrashCount());
@@ -291,7 +293,7 @@ public class StateChangeTest extends FleetControllerTest {
verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0),
"""
Event: distributor.0: Now reporting state U
- Event: distributor.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: distributor.0: Altered node state in cluster state from 'D' to 'U'
Event: distributor.0: Failed to get node state: D: controlled shutdown
Event: distributor.0: Altered node state in cluster state from 'U' to 'D: controlled shutdown'
Event: distributor.0: Now reporting state U
@@ -301,7 +303,7 @@ public class StateChangeTest extends FleetControllerTest {
verifyNodeEvents(new Node(NodeType.STORAGE, 0),
"""
Event: storage.0: Now reporting state U
- Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: storage.0: Altered node state in cluster state from 'D' to 'U'
Event: storage.0: Failed to get node state: D: controlled shutdown
Event: storage.0: Altered node state in cluster state from 'U' to 'M: controlled shutdown'
Event: storage.0: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down.
@@ -317,7 +319,7 @@ public class StateChangeTest extends FleetControllerTest {
FleetControllerOptions.Builder builder = defaultOptions()
.setMaxSlobrokDisconnectGracePeriod(60 * 1000);
- initialize(builder.build());
+ initialize(builder);
ctrl.tick();
@@ -333,7 +335,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
ctrl.tick();
- assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:2 distributor:10 storage:10", ctrl.getSystemState().toString());
nodes = new ArrayList<>();
for (int i = 0; i < 10; ++i) {
@@ -345,12 +347,12 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:2 distributor:10 storage:10", ctrl.getSystemState().toString());
verifyNodeEvents(new Node(NodeType.STORAGE, 0),
"""
Event: storage.0: Now reporting state U
- Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: storage.0: Altered node state in cluster state from 'D' to 'U'
Event: storage.0: Node is no longer in slobrok, but we still have a pending state request.
""");
}
@@ -360,13 +362,13 @@ public class StateChangeTest extends FleetControllerTest {
FleetControllerOptions.Builder builder = defaultOptions()
.setMaxSlobrokDisconnectGracePeriod(60 * 1000);
- initialize(builder.build());
+ initialize(builder);
communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
ctrl.tick();
- assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
NodeState ns = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6));
assertTrue(ns.getDescription().contains("Connection error: Closed at other end"), ns.toString());
@@ -378,14 +380,14 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
// Still maintenance since .i progress 0.0 is really down.
- assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.6f), "");
ctrl.tick();
// Now it's OK
- assertEquals("version:5 distributor:10 storage:10 .6.s:i .6.i:0.6", ctrl.getSystemState().toString());
+ assertEquals("version:4 distributor:10 storage:10 .6.s:i .6.i:0.6", ctrl.getSystemState().toString());
tick(1000);
@@ -393,13 +395,13 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:6 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:5 distributor:10 storage:10", ctrl.getSystemState().toString());
assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6)).hasDescription());
verifyNodeEvents(new Node(NodeType.STORAGE, 6),
"""
Event: storage.6: Now reporting state U
- Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: storage.6: Altered node state in cluster state from 'D' to 'U'
Event: storage.6: Failed to get node state: D: Connection error: Closed at other end
Event: storage.6: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.
Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'
@@ -419,16 +421,16 @@ public class StateChangeTest extends FleetControllerTest {
nodes.add(new ConfiguredNode(i, retired));
}
- FleetControllerOptions.Builder builder = defaultOptions("mycluster", nodes)
+ FleetControllerOptions.Builder builder = defaultOptions(nodes)
.setMaxSlobrokDisconnectGracePeriod(60 * 1000);
- initialize(builder.build());
+ initialize(builder);
communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
ctrl.tick();
- assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
NodeState ns = ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6));
assertTrue(ns.getDescription().contains("Connection error: Closed at other end"), ns.toString());
@@ -440,14 +442,14 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
// Still maintenance since .i progress 0.0 is really down.
- assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.6f), "");
ctrl.tick();
// Still maintenance since configured.
- assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
tick(1000);
@@ -455,13 +457,13 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:5 distributor:10 storage:10 .6.s:r", ctrl.getSystemState().toString());
+ assertEquals("version:4 distributor:10 storage:10 .6.s:r", ctrl.getSystemState().toString());
assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6)).hasDescription());
verifyNodeEvents(new Node(NodeType.STORAGE, 6),
"""
Event: storage.6: Now reporting state U
- Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'R'
+ Event: storage.6: Altered node state in cluster state from 'D' to 'R'
Event: storage.6: Failed to get node state: D: Connection error: Closed at other end
Event: storage.6: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.
Event: storage.6: Altered node state in cluster state from 'R' to 'M: Connection error: Closed at other end'
@@ -480,21 +482,21 @@ public class StateChangeTest extends FleetControllerTest {
nodes.add(new ConfiguredNode(i, retired));
}
- FleetControllerOptions.Builder builder = defaultOptions("mycluster", nodes)
+ FleetControllerOptions.Builder builder = defaultOptions(nodes)
.setMaxSlobrokDisconnectGracePeriod(60 * 1000);
- initialize(builder.build());
+ initialize(builder);
communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.DOWN, "Connection error: Closed at other end");
ctrl.tick();
- assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
timer.advanceTime(100000);
ctrl.tick();
- assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:4 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
}
// Test that a node that has been down for a long time (above steady state period), actually alters cluster state to
@@ -509,7 +511,7 @@ public class StateChangeTest extends FleetControllerTest {
.setNodeStateRequestTimeoutMS(1000000)
.setMaxSlobrokDisconnectGracePeriod(1000000);
- initialize(builder.build());
+ initialize(builder);
timer.advanceTime(100000); // Node has been in steady state up
ctrl.tick();
@@ -518,40 +520,40 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
timer.advanceTime(100000); // Node has been in steady state down
ctrl.tick();
- assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:4 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.001f), "");
ctrl.tick();
- assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:4 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.1f), "");
ctrl.tick();
- assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
+ assertEquals("version:5 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
ctrl.tick();
- assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
+ assertEquals("version:5 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.UP), "");
ctrl.tick();
- assertEquals("version:7 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:6 distributor:10 storage:10", ctrl.getSystemState().toString());
verifyNodeEvents(new Node(NodeType.STORAGE, 6),
"""
Event: storage.6: Now reporting state U
- Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: storage.6: Altered node state in cluster state from 'D' to 'U'
Event: storage.6: Failed to get node state: D: Connection error: Closed at other end
Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'
Event: storage.6: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down.
@@ -573,7 +575,7 @@ public class StateChangeTest extends FleetControllerTest {
.setStableStateTimePeriod(1000000)
.setMaxSlobrokDisconnectGracePeriod(10000000);
- initialize(builder.build());
+ initialize(builder);
timer.advanceTime(1000000); // Node has been in steady state up
@@ -583,26 +585,26 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
timer.advanceTime(1000000); // Node has been in steady state down
ctrl.tick();
- assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:4 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.1f), "");
ctrl.tick();
- assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
+ assertEquals("version:5 distributor:10 storage:10 .6.s:i .6.i:0.1", ctrl.getSystemState().toString());
timer.advanceTime(builder.maxInitProgressTime() + 1);
ctrl.tick();
// We should now get the node marked down.
- assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:6 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
tick(1000);
@@ -621,7 +623,7 @@ public class StateChangeTest extends FleetControllerTest {
tick(1000);
// Still down since it seemingly crashed during last init.
- assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:6 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
ctrl.tick();
@@ -629,12 +631,12 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:8 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:7 distributor:10 storage:10", ctrl.getSystemState().toString());
verifyNodeEvents(new Node(NodeType.STORAGE, 6),
"""
Event: storage.6: Now reporting state U
- Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: storage.6: Altered node state in cluster state from 'D' to 'U'
Event: storage.6: Failed to get node state: D: Connection error: Closed at other end
Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'
Event: storage.6: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down.
@@ -662,7 +664,7 @@ public class StateChangeTest extends FleetControllerTest {
// Set long so we don't time out RPC requests and mark nodes down due to advancing time to get in steady state
builder.setNodeStateRequestTimeoutMS((int) builder.stableStateTimePeriod() * 2);
- initialize(builder.build());
+ initialize(builder);
timer.advanceTime(1000000); // Node has been in steady state up
@@ -672,19 +674,19 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
timer.advanceTime(1000000); // Node has been in steady state down
ctrl.tick();
- assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:4 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.STORAGE, 6), new NodeState(NodeType.STORAGE, State.INITIALIZING).setInitProgress(0.3f), "");
ctrl.tick();
- assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.3", ctrl.getSystemState().toString());
+ assertEquals("version:5 distributor:10 storage:10 .6.s:i .6.i:0.3", ctrl.getSystemState().toString());
ctrl.tick();
@@ -692,7 +694,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:6 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
}
@Test
@@ -705,7 +707,7 @@ public class StateChangeTest extends FleetControllerTest {
// Set very high so the advanceTime don't start sending state replies right before we disconnect.
.setNodeStateRequestTimeoutMS(365 * 24 * 60 * 1000);
- initialize(builder.build());
+ initialize(builder);
timer.advanceTime(1000000); // Node has been in steady state up
@@ -715,13 +717,13 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
timer.advanceTime(1000000); // Node has been in steady state down
ctrl.tick();
- assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:4 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
ctrl.tick();
@@ -729,7 +731,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:6 distributor:10 storage:10 .6.s:i .6.i:0.3", ctrl.getSystemState().toString());
+ assertEquals("version:5 distributor:10 storage:10 .6.s:i .6.i:0.3", ctrl.getSystemState().toString());
ctrl.tick();
@@ -737,7 +739,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:6 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
tick(1000);
@@ -745,13 +747,13 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:6 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.STORAGE, 6), State.UP, "");
ctrl.tick();
- assertEquals("version:8 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:7 distributor:10 storage:10", ctrl.getSystemState().toString());
}
@Test
@@ -764,7 +766,7 @@ public class StateChangeTest extends FleetControllerTest {
.setStableStateTimePeriod(1000000)
.setMaxSlobrokDisconnectGracePeriod(10000000);
- initialize(builder.build());
+ initialize(builder);
timer.advanceTime(1000000); // Node has been in steady state up
@@ -774,13 +776,13 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:4 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 storage:10 .6.s:m", ctrl.getSystemState().toString());
timer.advanceTime(1000000); // Node has been in steady state down
ctrl.tick();
- assertEquals("version:5 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:4 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
for (int j = 0; j <= builder.maxPrematureCrashes(); ++j) {
ctrl.tick();
@@ -804,7 +806,7 @@ public class StateChangeTest extends FleetControllerTest {
tick(1000);
}
- assertEquals("version:7 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:6 distributor:10 storage:10 .6.s:d", ctrl.getSystemState().toString());
}
@Test
@@ -818,13 +820,13 @@ public class StateChangeTest extends FleetControllerTest {
.setMinRatioOfDistributorNodesUp(0.0)
.setMinRatioOfStorageNodesUp(0.0);
- initialize(builder.build());
+ initialize(builder);
timer.advanceTime(1000000); // Node has been in steady state up
ctrl.tick();
- assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:2 distributor:10 storage:10", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.DOWN, "Connection error: Closed at other end");
communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 1), State.DOWN, "Connection error: Closed at other end");
@@ -836,13 +838,13 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:4 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 4), State.DOWN, "Connection error: Closed at other end");
ctrl.tick();
- assertEquals("version:5 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:4 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
tick(1000);
@@ -850,7 +852,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:6 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:5 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
tick(1000);
@@ -858,7 +860,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:7 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d .2.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:6 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d .2.s:d", ctrl.getSystemState().toString());
}
@Test
@@ -872,13 +874,13 @@ public class StateChangeTest extends FleetControllerTest {
options.setMinRatioOfDistributorNodesUp(0.6);
options.setMinRatioOfStorageNodesUp(0.8);
- initialize(options.build());
+ initialize(options);
timer.advanceTime(1000000); // Node has been in steady state up
ctrl.tick();
- assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:2 distributor:10 storage:10", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 0), State.DOWN, "Connection error: Closed at other end");
communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 1), State.DOWN, "Connection error: Closed at other end");
@@ -890,13 +892,13 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:4 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:3 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, 4), State.DOWN, "Connection error: Closed at other end");
ctrl.tick();
- assertEquals("version:5 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:4 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d .4.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
tick(1000);
@@ -904,7 +906,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:6 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:5 distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d", ctrl.getSystemState().toString());
tick(1000);
@@ -912,7 +914,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:7 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d .2.s:d", ctrl.getSystemState().toString());
+ assertEquals("version:6 cluster:d distributor:10 .0.s:d .1.s:d .2.s:d .3.s:d storage:10 .0.s:d .1.s:d .2.s:d", ctrl.getSystemState().toString());
}
/**
@@ -1044,7 +1046,7 @@ public class StateChangeTest extends FleetControllerTest {
FleetControllerOptions.Builder options = defaultOptions();
options.setDistributionBits(17);
- initialize(options.build());
+ initialize(options);
timer.advanceTime(1000000); // Node has been in steady state up
@@ -1054,7 +1056,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:4 bits:15 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:3 bits:15 distributor:10 storage:10", ctrl.getSystemState().toString());
tick(1000);
@@ -1062,7 +1064,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
- assertEquals("version:5 bits:13 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:4 bits:13 distributor:10 storage:10", ctrl.getSystemState().toString());
tick(1000);
setMinUsedBitsForAllNodes(16);
@@ -1070,13 +1072,13 @@ public class StateChangeTest extends FleetControllerTest {
// Don't increase dist bits until we've reached at least the wanted
// level, in order to avoid multiple full redistributions of data.
- assertEquals("version:5 bits:13 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:4 bits:13 distributor:10 storage:10", ctrl.getSystemState().toString());
tick(1000);
setMinUsedBitsForAllNodes(19);
ctrl.tick();
- assertEquals("version:6 bits:17 distributor:10 storage:10", ctrl.getSystemState().toString());
+ assertEquals("version:5 bits:17 distributor:10 storage:10", ctrl.getSystemState().toString());
}
private void setMinUsedBitsForAllNodes(int bits) {
@@ -1140,16 +1142,16 @@ public class StateChangeTest extends FleetControllerTest {
options.setMaxTransitionTime(NodeType.STORAGE, 0);
options.setMinStorageNodesUp(10);
options.setMinDistributorNodesUp(10);
- initialize(options.build());
+ initialize(options);
ctrl.tick();
- assertThat(ctrl.consolidatedClusterState().toString(), equalTo("version:3 distributor:10 storage:10"));
+ assertThat(ctrl.consolidatedClusterState().toString(), equalTo("version:2 distributor:10 storage:10"));
communicator.setNodeState(new Node(NodeType.STORAGE, 2), State.DOWN, "foo");
ctrl.tick();
assertThat(ctrl.consolidatedClusterState().toString(),
- equalTo("version:4 cluster:d distributor:10 storage:10 .2.s:d"));
+ equalTo("version:3 cluster:d distributor:10 storage:10 .2.s:d"));
// After this point, any further node changes while the cluster is still down won't be published.
// This is because cluster state similarity checks are short-circuited if both are Down, as no other parts
@@ -1162,7 +1164,7 @@ public class StateChangeTest extends FleetControllerTest {
// NOTE: _same_ version, different node state content. Overall cluster down-state is still the same.
assertThat(ctrl.consolidatedClusterState().toString(),
- equalTo("version:4 cluster:d distributor:10 storage:10 .2.s:d .5.s:d"));
+ equalTo("version:3 cluster:d distributor:10 storage:10 .2.s:d .5.s:d"));
}
// Related to the above test, watchTimer invocations must receive the _current_ state and not the
@@ -1174,7 +1176,7 @@ public class StateChangeTest extends FleetControllerTest {
options.setMaxTransitionTime(NodeType.STORAGE, 1000);
options.setMinStorageNodesUp(10);
options.setMinDistributorNodesUp(10);
- initialize(options.build());
+ initialize(options);
ctrl.tick();
communicator.setNodeState(new Node(NodeType.STORAGE, 2), State.DOWN, "foo");
@@ -1182,7 +1184,7 @@ public class StateChangeTest extends FleetControllerTest {
ctrl.tick();
communicator.setNodeState(new Node(NodeType.STORAGE, 3), State.DOWN, "foo");
ctrl.tick();
- assertThat(ctrl.consolidatedClusterState().toString(), equalTo("version:4 cluster:d distributor:10 storage:10 .2.s:m .3.s:m"));
+ assertThat(ctrl.consolidatedClusterState().toString(), equalTo("version:3 cluster:d distributor:10 storage:10 .2.s:m .3.s:m"));
// Subsequent timer tick should _not_ trigger additional events. Providing published state
// only would result in "Marking node down" events for node 2 emitted per tick.
@@ -1194,7 +1196,7 @@ public class StateChangeTest extends FleetControllerTest {
verifyNodeEvents(new Node(NodeType.STORAGE, 2),
"""
Event: storage.2: Now reporting state U
- Event: storage.2: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: storage.2: Altered node state in cluster state from 'D' to 'U'
Event: storage.2: Failed to get node state: D: foo
Event: storage.2: Stopped or possibly crashed after 500 ms, which is before stable state time period. Premature crash count is now 1.
Event: storage.2: Altered node state in cluster state from 'U' to 'M: foo'
@@ -1208,7 +1210,7 @@ public class StateChangeTest extends FleetControllerTest {
@Test
void do_not_emit_multiple_events_when_node_state_does_not_match_versioned_state() throws Exception {
FleetControllerOptions.Builder options = defaultOptions();
- initialize(options.build());
+ initialize(options);
ctrl.tick();
communicator.setNodeState(
@@ -1240,7 +1242,7 @@ public class StateChangeTest extends FleetControllerTest {
verifyNodeEvents(new Node(NodeType.STORAGE, 2),
"""
Event: storage.2: Now reporting state U
- Event: storage.2: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'
+ Event: storage.2: Altered node state in cluster state from 'D' to 'U'
Event: storage.2: Now reporting state I, i 0.100 (read)
Event: storage.2: Altered node state in cluster state from 'U' to 'I, i 0.100 (read)'
Event: storage.2: Altered min distribution bit count from 16 to 17
@@ -1307,7 +1309,7 @@ public class StateChangeTest extends FleetControllerTest {
// TODO ideally we'd break this out so it doesn't depend on fields in the parent test instance, but
// fleet controller tests have a _lot_ of state, so risk of duplicating a lot of that...
class RemoteTaskFixture {
- RemoteTaskFixture(FleetControllerOptions options) throws Exception {
+ RemoteTaskFixture(FleetControllerOptions.Builder options) throws Exception {
initialize(options);
ctrl.tick();
}
@@ -1392,12 +1394,12 @@ public class StateChangeTest extends FleetControllerTest {
return options;
}
- private RemoteTaskFixture createFixtureWith(FleetControllerOptions options) throws Exception {
+ private RemoteTaskFixture createFixtureWith(FleetControllerOptions.Builder options) throws Exception {
return new RemoteTaskFixture(options);
}
private RemoteTaskFixture createDefaultFixture() throws Exception {
- return new RemoteTaskFixture(defaultOptions().build());
+ return new RemoteTaskFixture(defaultOptions());
}
@Test
@@ -1430,7 +1432,7 @@ public class StateChangeTest extends FleetControllerTest {
@Test
void no_op_synchronous_remote_task_can_complete_immediately_if_current_state_already_acked() throws Exception {
- RemoteTaskFixture fixture = createFixtureWith(optionsWithZeroTransitionTime().build());
+ RemoteTaskFixture fixture = createFixtureWith(optionsWithZeroTransitionTime());
fixture.markStorageNodeDown(0);
MockTask task = fixture.scheduleNoOpVersionDependentTask(); // Tries to set node 0 into Down; already in that state
@@ -1443,7 +1445,7 @@ public class StateChangeTest extends FleetControllerTest {
@Test
void no_op_synchronous_remote_task_waits_until_current_state_is_acked() throws Exception {
- RemoteTaskFixture fixture = createFixtureWith(optionsWithZeroTransitionTime().build());
+ RemoteTaskFixture fixture = createFixtureWith(optionsWithZeroTransitionTime());
communicator.setShouldDeferDistributorClusterStateAcks(true);
fixture.markStorageNodeDown(0);
@@ -1467,7 +1469,7 @@ public class StateChangeTest extends FleetControllerTest {
// the cluster down-state to have been published.
@Test
void immediately_complete_sync_remote_task_when_cluster_is_down() throws Exception {
- RemoteTaskFixture fixture = createFixtureWith(optionsAllowingZeroNodesDown().build());
+ RemoteTaskFixture fixture = createFixtureWith(optionsAllowingZeroNodesDown());
// Controller options require 10/10 nodes up, so take one down to trigger a cluster Down edge.
fixture.markStorageNodeDown(1);
MockTask task = fixture.scheduleVersionDependentTaskWithSideEffects();
@@ -1501,7 +1503,7 @@ public class StateChangeTest extends FleetControllerTest {
void synchronous_task_immediately_failed_when_leadership_lost() throws Exception {
FleetControllerOptions.Builder options = optionsWithZeroTransitionTime();
options.setCount(3);
- RemoteTaskFixture fixture = createFixtureWith(options.build());
+ RemoteTaskFixture fixture = createFixtureWith(options);
fixture.winLeadership();
markAllNodesAsUp(options.build());
@@ -1526,7 +1528,7 @@ public class StateChangeTest extends FleetControllerTest {
void cluster_state_ack_is_not_dependent_on_state_send_grace_period() throws Exception {
FleetControllerOptions.Builder options = defaultOptions();
options.setMinTimeBetweenNewSystemStates(10_000);
- RemoteTaskFixture fixture = createFixtureWith(options.build());
+ RemoteTaskFixture fixture = createFixtureWith(options);
// Have to increment timer here to be able to send state generated by the scheduled task
timer.advanceTime(10_000);
@@ -1546,11 +1548,10 @@ public class StateChangeTest extends FleetControllerTest {
void synchronous_task_immediately_answered_when_not_leader() throws Exception {
FleetControllerOptions.Builder builder = optionsWithZeroTransitionTime();
builder.setCount(3);
- var options = builder.build();
- RemoteTaskFixture fixture = createFixtureWith(options);
+ RemoteTaskFixture fixture = createFixtureWith(builder);
fixture.loseLeadership();
- markAllNodesAsUp(options);
+ markAllNodesAsUp(ctrl.getOptions());
MockTask task = fixture.scheduleVersionDependentTaskWithSideEffects();
@@ -1562,7 +1563,7 @@ public class StateChangeTest extends FleetControllerTest {
void task_not_completed_within_deadline_is_failed_with_deadline_exceeded_error() throws Exception {
FleetControllerOptions.Builder builder = defaultOptions();
builder.setMaxDeferredTaskVersionWaitTime(Duration.ofSeconds(60));
- RemoteTaskFixture fixture = createFixtureWith(builder.build());
+ RemoteTaskFixture fixture = createFixtureWith(builder);
MockTask task = fixture.scheduleVersionDependentTaskWithSideEffects();
communicator.setShouldDeferDistributorClusterStateAcks(true);
@@ -1588,7 +1589,7 @@ public class StateChangeTest extends FleetControllerTest {
options.setMaxDeferredTaskVersionWaitTime(Duration.ofSeconds(60));
options.enableTwoPhaseClusterStateActivation(deferredActivation);
options.setMaxDivergentNodesPrintedInTaskErrorMessages(10);
- RemoteTaskFixture fixture = createFixtureWith(options.build());
+ RemoteTaskFixture fixture = createFixtureWith(options);
MockTask task = fixture.scheduleVersionDependentTaskWithSideEffects();
communicator.setShouldDeferDistributorClusterStateAcks(true);
@@ -1610,14 +1611,14 @@ public class StateChangeTest extends FleetControllerTest {
@Test
void task_not_completed_within_deadline_lists_nodes_not_converged_in_error_message() throws Exception {
doTestTaskDeadlineExceeded(false, "the following nodes have not converged to " +
- "at least version 4: distributor.0, distributor.1, distributor.2, distributor.3, " +
+ "at least version 3: distributor.0, distributor.1, distributor.2, distributor.3, " +
"distributor.4, distributor.5, distributor.6, distributor.7, distributor.8, distributor.9");
}
@Test
void task_not_completed_within_deadline_with_deferred_activation_checks_activation_version() throws Exception {
doTestTaskDeadlineExceeded(true, "the following nodes have not converged to " +
- "at least version 4: distributor.0, distributor.1, distributor.2, distributor.3, " +
+ "at least version 3: distributor.0, distributor.1, distributor.2, distributor.3, " +
"distributor.4, distributor.5, distributor.6, distributor.7, distributor.8, distributor.9 " +
"(... and 10 more)");
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java
index b533168e61a..1018515cbfa 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java
@@ -76,10 +76,11 @@ public class RPCCommunicatorTest {
@Test
void testGenerateNodeStateRequestTimeoutMsWithUpdates() {
final RPCCommunicator communicator = new RPCCommunicator(RPCCommunicator.createRealSupervisor(), null /* Timer */, INDEX, 1, 1, 100, 0);
- FleetControllerOptions.Builder builder = new FleetControllerOptions.Builder(null /*clustername*/, Set.of(new ConfiguredNode(0, false)));
- builder.setNodeStateRequestTimeoutEarliestPercentage(100);
- builder.setNodeStateRequestTimeoutLatestPercentage(100);
- builder.setNodeStateRequestTimeoutMS(NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS);
+ FleetControllerOptions.Builder builder = new FleetControllerOptions.Builder(null /*clustername*/, Set.of(new ConfiguredNode(0, false)))
+ .setNodeStateRequestTimeoutEarliestPercentage(100)
+ .setNodeStateRequestTimeoutLatestPercentage(100)
+ .setNodeStateRequestTimeoutMS(NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS)
+ .setZooKeeperServerAddress("localhost:2181");
communicator.propagateOptions(builder.build());
long timeOutMs = communicator.generateNodeStateRequestTimeout().toMillis();
assertEquals(timeOutMs, NODE_STATE_REQUEST_TIMEOUT_INTERVAL_MAX_MS);