aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorHÃ¥kon Hallingstad <hakon.hallingstad@gmail.com>2022-09-20 12:12:09 +0200
committerGitHub <noreply@github.com>2022-09-20 12:12:09 +0200
commit5b894c7ae68032ad490a4c236c915be92f9b6c9d (patch)
tree59fb37cd1474289145935efae55511d441b54c1d /clustercontroller-core
parent93f0f40704dc6338279f8469a21d992c2c89efc8 (diff)
parent7d145e39d3d043fe696c4e999040fb6d1c24b0c6 (diff)
Merge pull request #24132 from vespa-engine/hmusum/cleanup-34
Cluster controller cleanup, part 16 [run-systemtest]
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java8
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java24
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java3
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java59
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java17
5 files changed, 52 insertions, 59 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 53dc71b2a90..80b79f4eb13 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -32,7 +32,6 @@ import java.time.Instant;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
@@ -99,7 +98,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta
// Legacy behavior is an empty set of explicitly configured bucket spaces, which means that
// only a baseline cluster state will be sent from the controller and no per-space state
// deriving is done.
- private Set<String> configuredBucketSpaces = Collections.emptySet();
+ private final Set<String> configuredBucketSpaces = Set.of(FixedBucketSpaces.defaultSpace(), FixedBucketSpaces.globalSpace());
public FleetController(FleetControllerContext context,
Timer timer,
@@ -168,7 +167,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta
options.nodeStateRequestTimeoutLatestPercentage(),
options.nodeStateRequestRoundTripTimeMaxSeconds());
var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer);
- var lookUp = new SlobrokClient(context, timer);
+ var lookUp = new SlobrokClient(context, timer, options.slobrokConnectionSpecs());
var stateGenerator = new StateChangeHandler(context, timer, log);
var stateBroadcaster = new SystemStateBroadcaster(context, timer, timer);
var masterElectionHandler = new MasterElectionHandler(context, options.fleetControllerIndex(), options.fleetControllerCount(), timer, timer);
@@ -487,7 +486,6 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta
cluster.setSlobrokGenerationCount(0);
}
- configuredBucketSpaces = Set.of(FixedBucketSpaces.defaultSpace(), FixedBucketSpaces.globalSpace());
stateVersionTracker.setMinMergeCompletionRatio(options.minMergeCompletionRatio());
communicator.propagateOptions(options);
@@ -625,7 +623,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta
if (tickStopTime >= tickStartTime) {
metricUpdater.addTickTime(tickStopTime - tickStartTime, didWork);
}
- // Always sleep some to use avoid using too much CPU and avoid starving waiting threads
+ // Always sleep some to avoid using too much CPU and avoid starving waiting threads
monitor.wait(didWork || waitingForCycle ? 1 : options.cycleWaitTime());
if ( ! isRunning()) { return; }
tickStartTime = timer.getCurrentTimeInMillis();
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java
index 8393e776fc2..d87e0e20908 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java
@@ -35,30 +35,37 @@ public class SlobrokClient implements NodeLookup {
private Supervisor supervisor;
private boolean freshMirror = false;
- public SlobrokClient(FleetControllerContext context, Timer timer) {
+ public SlobrokClient(FleetControllerContext context, Timer timer, String[] connectionSpecs) {
this.context = context;
this.timer = timer;
+ this.connectionSpecs = connectionSpecs;
+ setup();
}
- public boolean equalsExistingSpec(String spec[]) {
+ public boolean equalsExistingSpec(String[] spec) {
if (spec == null && connectionSpecs == null) return true;
- if (spec == null && connectionSpecs != null) return false;
- if (spec != null && connectionSpecs == null) return false;
+ if (spec == null) return false;
+ if (connectionSpecs == null) return false;
if (spec.length != connectionSpecs.length) return false;
- for (int i=0, n=spec.length; i<n; ++i) {
+ for (int i = 0, n = spec.length; i < n; ++i) {
if (!spec[i].equals(connectionSpecs[i])) return false;
}
return true;
}
- public void setSlobrokConnectionSpecs(String slobrokConnectionSpecs[]) {
+ public void setSlobrokConnectionSpecs(String[] slobrokConnectionSpecs) {
if (equalsExistingSpec(slobrokConnectionSpecs)) return;
+
this.connectionSpecs = slobrokConnectionSpecs;
shutdown();
+ setup();
+ }
+
+ private void setup() {
supervisor = new Supervisor(new Transport("slobrok-client"));
supervisor.setDropEmptyBuffers(true);
SlobrokList slist = new SlobrokList();
- slist.setup(slobrokConnectionSpecs);
+ slist.setup(connectionSpecs);
mirror = new Mirror(supervisor, slist);
freshMirror = true;
}
@@ -80,6 +87,7 @@ public class SlobrokClient implements NodeLookup {
@Override
public boolean updateCluster(ContentCluster cluster, SlobrokListener listener) {
if (mirror == null) return false;
+
int mirrorVersion = mirror.updates();
if (freshMirror) {
freshMirror = false;
@@ -124,7 +132,7 @@ public class SlobrokClient implements NodeLookup {
// XXX we really would like to cross-check the actual RPC address against what's configured,
// but this information does not seem to be available to the cluster controller currently.
NodeInfo nodeInfo = cluster.clusterInfo().getNodeInfo(data.node);
- if (nodeInfo == null) continue; // slobrok may contain nonconfigured nodes during state transitions
+ if (nodeInfo == null) continue; // slobrok may contain non-configured nodes during state transitions
cluster.clusterInfo().setRpcAddress(data.node, data.rpcAddress);
if (listener != null)
listener.handleNewNode(nodeInfo); // TODO: We'll never add new nodes here, move this to where clusterInfo.setNodes is called?
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
index d2ae4a8623f..f7efbd2cc3b 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
@@ -125,8 +125,7 @@ public abstract class FleetControllerTest implements Waiter {
options.nodeStateRequestTimeoutEarliestPercentage(),
options.nodeStateRequestTimeoutLatestPercentage(),
options.nodeStateRequestRoundTripTimeMaxSeconds());
- var lookUp = new SlobrokClient(context, timer);
- lookUp.setSlobrokConnectionSpecs(new String[0]);
+ var lookUp = new SlobrokClient(context, timer, new String[0]);
var rpcServer = new RpcServer(timer, timer, options.clusterName(), options.fleetControllerIndex(), options.slobrokBackOffPolicy());
var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer);
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java
index be06ea5a0d5..d120dc06c9f 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java
@@ -21,6 +21,7 @@ import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer;
import com.yahoo.vespa.clustercontroller.core.testutils.LogFormatter;
import com.yahoo.vespa.clustercontroller.core.testutils.WaitCondition;
+import com.yahoo.vespa.clustercontroller.core.testutils.WaitTask;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -77,26 +78,6 @@ public class RpcServerTest extends FleetControllerTest {
slobrok.stop();
}
- /**
- * For some reason, the first test trying to set up a stable system here occasionally times out.
- * The theory is that some test run before it does something that is not cleaned up in time.
- * Trying to add a test that should provoke the failure, but not fail due to it to see if we can verify that
- * assumption.
- *
- * (testRebinding() does not seem to be that test. Tests in StateChangeTest that runs before this test tests very
- * similar things, so strange if it should be from them too though. Maybe last test there.
- */
- @Test
- void testFailOccasionallyAndIgnoreToSeeIfOtherTestsThenWork() {
- try {
- startingTest("RpcServerTest::testFailOccasionallyAndIgnoreToSeeIfOtherTestsThenWork");
- setUpFleetController(true, defaultOptions("mycluster"));
- setUpVdsNodes(true);
- waitForStableSystem();
- } catch (Throwable t) {
- }
- }
-
@Test
void testGetSystemState() throws Exception {
LogFormatter.initializeLogging();
@@ -114,22 +95,28 @@ public class RpcServerTest extends FleetControllerTest {
timer.advanceTime(options.nodeStateRequestTimeoutMS() + options.maxSlobrokDisconnectGracePeriod());
wait(new WaitCondition.StateWait(fleetController(), fleetController().getMonitor()) {
- @Override
- public String isConditionMet() {
- if (currentState == null) {
- return "No cluster state defined yet";
- }
- NodeState distState = currentState.getNodeState(new Node(NodeType.DISTRIBUTOR, 0));
- if (distState.getState() != State.DOWN) {
- return "Distributor not detected down yet: " + currentState.toString();
- }
- NodeState storState = currentState.getNodeState(new Node(NodeType.STORAGE, 9));
- if (!storState.getState().oneOf("md")) {
- return "Storage node not detected down yet: " + currentState.toString();
- }
- return null;
- }
- }, null, timeout());
+ @Override
+ public String isConditionMet() {
+ if (currentState == null) {
+ return "No cluster state defined yet";
+ }
+ NodeState distState = currentState.getNodeState(new Node(NodeType.DISTRIBUTOR, 0));
+ if (distState.getState() != State.DOWN) {
+ return "Distributor not detected down yet: " + currentState.toString();
+ }
+ NodeState storState = currentState.getNodeState(new Node(NodeType.STORAGE, 9));
+ if (!storState.getState().oneOf("md")) {
+ return "Storage node not detected down yet: " + currentState.toString();
+ }
+ return null;
+ }
+ }, new WaitTask() {
+ @Override
+ public boolean performWaitTask() {
+ return false;
+ }
+ },
+ timeout());
int rpcPort = fleetController().getRpcPort();
Target connection = supervisor.connect(new Spec("localhost", rpcPort));
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java
index a7de2f0eedb..5bff101906d 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java
@@ -10,6 +10,7 @@ import java.time.Instant;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
+import java.util.Objects;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -88,7 +89,9 @@ public interface Waiter {
}
public final void wait(WaitCondition c, WaitTask wt, Duration timeout) {
- log.log(Level.INFO, "Waiting for " + c + (wt == null ? "" : " with wait task " + wt));
+ Objects.requireNonNull(wt, "wait task cannot be null");
+
+ log.log(Level.INFO, "Waiting for " + c + " with wait task " + wt);
Instant endTime = Instant.now().plus(timeout);
String lastReason = null;
while (true) {
@@ -104,17 +107,15 @@ public interface Waiter {
}
try {
boolean allowWait = true;
- if (wt != null) {
- if (wt.performWaitTask()) {
- data.getMonitor().notifyAll();
- allowWait = false;
- }
+ if (wt.performWaitTask()) {
+ data.getMonitor().notifyAll();
+ allowWait = false;
}
Duration timeLeft = Duration.between(Instant.now(), endTime);
if (timeLeft.isNegative())
- throw new IllegalStateException("Timed out waiting max " + timeout + " ms for " + c + (wt == null ? "" : "\n with wait task " + wt) + ",\n reason: " + reason);
+ throw new IllegalStateException("Timed out waiting max " + timeout + " ms for " + c + "\n with wait task " + wt + ",\n reason: " + reason);
if (allowWait)
- data.getMonitor().wait(wt == null ? WaitTask.defaultTaskFrequencyMillis : Math.min(wt.getWaitTaskFrequencyInMillis(), timeLeft.toMillis()));
+ data.getMonitor().wait(Math.min(wt.getWaitTaskFrequencyInMillis(), timeLeft.toMillis()));
} catch (InterruptedException e) {
throw new RuntimeException(e);
}