diff options
author | HÃ¥kon Hallingstad <hakon.hallingstad@gmail.com> | 2022-09-20 12:12:09 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-20 12:12:09 +0200 |
commit | 5b894c7ae68032ad490a4c236c915be92f9b6c9d (patch) | |
tree | 59fb37cd1474289145935efae55511d441b54c1d /clustercontroller-core | |
parent | 93f0f40704dc6338279f8469a21d992c2c89efc8 (diff) | |
parent | 7d145e39d3d043fe696c4e999040fb6d1c24b0c6 (diff) |
Merge pull request #24132 from vespa-engine/hmusum/cleanup-34
Cluster controller cleanup, part 16 [run-systemtest]
Diffstat (limited to 'clustercontroller-core')
5 files changed, 52 insertions, 59 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 53dc71b2a90..80b79f4eb13 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -32,7 +32,6 @@ import java.time.Instant; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.List; @@ -99,7 +98,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta // Legacy behavior is an empty set of explicitly configured bucket spaces, which means that // only a baseline cluster state will be sent from the controller and no per-space state // deriving is done. - private Set<String> configuredBucketSpaces = Collections.emptySet(); + private final Set<String> configuredBucketSpaces = Set.of(FixedBucketSpaces.defaultSpace(), FixedBucketSpaces.globalSpace()); public FleetController(FleetControllerContext context, Timer timer, @@ -168,7 +167,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta options.nodeStateRequestTimeoutLatestPercentage(), options.nodeStateRequestRoundTripTimeMaxSeconds()); var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer); - var lookUp = new SlobrokClient(context, timer); + var lookUp = new SlobrokClient(context, timer, options.slobrokConnectionSpecs()); var stateGenerator = new StateChangeHandler(context, timer, log); var stateBroadcaster = new SystemStateBroadcaster(context, timer, timer); var masterElectionHandler = new MasterElectionHandler(context, options.fleetControllerIndex(), options.fleetControllerCount(), timer, timer); @@ -487,7 +486,6 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta cluster.setSlobrokGenerationCount(0); } - configuredBucketSpaces = Set.of(FixedBucketSpaces.defaultSpace(), FixedBucketSpaces.globalSpace()); stateVersionTracker.setMinMergeCompletionRatio(options.minMergeCompletionRatio()); communicator.propagateOptions(options); @@ -625,7 +623,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta if (tickStopTime >= tickStartTime) { metricUpdater.addTickTime(tickStopTime - tickStartTime, didWork); } - // Always sleep some to use avoid using too much CPU and avoid starving waiting threads + // Always sleep some to avoid using too much CPU and avoid starving waiting threads monitor.wait(didWork || waitingForCycle ? 1 : options.cycleWaitTime()); if ( ! isRunning()) { return; } tickStartTime = timer.getCurrentTimeInMillis(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java index 8393e776fc2..d87e0e20908 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java @@ -35,30 +35,37 @@ public class SlobrokClient implements NodeLookup { private Supervisor supervisor; private boolean freshMirror = false; - public SlobrokClient(FleetControllerContext context, Timer timer) { + public SlobrokClient(FleetControllerContext context, Timer timer, String[] connectionSpecs) { this.context = context; this.timer = timer; + this.connectionSpecs = connectionSpecs; + setup(); } - public boolean equalsExistingSpec(String spec[]) { + public boolean equalsExistingSpec(String[] spec) { if (spec == null && connectionSpecs == null) return true; - if (spec == null && connectionSpecs != null) return false; - if (spec != null && connectionSpecs == null) return false; + if (spec == null) return false; + if (connectionSpecs == null) return false; if (spec.length != connectionSpecs.length) return false; - for (int i=0, n=spec.length; i<n; ++i) { + for (int i = 0, n = spec.length; i < n; ++i) { if (!spec[i].equals(connectionSpecs[i])) return false; } return true; } - public void setSlobrokConnectionSpecs(String slobrokConnectionSpecs[]) { + public void setSlobrokConnectionSpecs(String[] slobrokConnectionSpecs) { if (equalsExistingSpec(slobrokConnectionSpecs)) return; + this.connectionSpecs = slobrokConnectionSpecs; shutdown(); + setup(); + } + + private void setup() { supervisor = new Supervisor(new Transport("slobrok-client")); supervisor.setDropEmptyBuffers(true); SlobrokList slist = new SlobrokList(); - slist.setup(slobrokConnectionSpecs); + slist.setup(connectionSpecs); mirror = new Mirror(supervisor, slist); freshMirror = true; } @@ -80,6 +87,7 @@ public class SlobrokClient implements NodeLookup { @Override public boolean updateCluster(ContentCluster cluster, SlobrokListener listener) { if (mirror == null) return false; + int mirrorVersion = mirror.updates(); if (freshMirror) { freshMirror = false; @@ -124,7 +132,7 @@ public class SlobrokClient implements NodeLookup { // XXX we really would like to cross-check the actual RPC address against what's configured, // but this information does not seem to be available to the cluster controller currently. NodeInfo nodeInfo = cluster.clusterInfo().getNodeInfo(data.node); - if (nodeInfo == null) continue; // slobrok may contain nonconfigured nodes during state transitions + if (nodeInfo == null) continue; // slobrok may contain non-configured nodes during state transitions cluster.clusterInfo().setRpcAddress(data.node, data.rpcAddress); if (listener != null) listener.handleNewNode(nodeInfo); // TODO: We'll never add new nodes here, move this to where clusterInfo.setNodes is called? diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java index d2ae4a8623f..f7efbd2cc3b 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java @@ -125,8 +125,7 @@ public abstract class FleetControllerTest implements Waiter { options.nodeStateRequestTimeoutEarliestPercentage(), options.nodeStateRequestTimeoutLatestPercentage(), options.nodeStateRequestRoundTripTimeMaxSeconds()); - var lookUp = new SlobrokClient(context, timer); - lookUp.setSlobrokConnectionSpecs(new String[0]); + var lookUp = new SlobrokClient(context, timer, new String[0]); var rpcServer = new RpcServer(timer, timer, options.clusterName(), options.fleetControllerIndex(), options.slobrokBackOffPolicy()); var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java index be06ea5a0d5..d120dc06c9f 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java @@ -21,6 +21,7 @@ import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer; import com.yahoo.vespa.clustercontroller.core.testutils.LogFormatter; import com.yahoo.vespa.clustercontroller.core.testutils.WaitCondition; +import com.yahoo.vespa.clustercontroller.core.testutils.WaitTask; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -77,26 +78,6 @@ public class RpcServerTest extends FleetControllerTest { slobrok.stop(); } - /** - * For some reason, the first test trying to set up a stable system here occasionally times out. - * The theory is that some test run before it does something that is not cleaned up in time. - * Trying to add a test that should provoke the failure, but not fail due to it to see if we can verify that - * assumption. - * - * (testRebinding() does not seem to be that test. Tests in StateChangeTest that runs before this test tests very - * similar things, so strange if it should be from them too though. Maybe last test there. - */ - @Test - void testFailOccasionallyAndIgnoreToSeeIfOtherTestsThenWork() { - try { - startingTest("RpcServerTest::testFailOccasionallyAndIgnoreToSeeIfOtherTestsThenWork"); - setUpFleetController(true, defaultOptions("mycluster")); - setUpVdsNodes(true); - waitForStableSystem(); - } catch (Throwable t) { - } - } - @Test void testGetSystemState() throws Exception { LogFormatter.initializeLogging(); @@ -114,22 +95,28 @@ public class RpcServerTest extends FleetControllerTest { timer.advanceTime(options.nodeStateRequestTimeoutMS() + options.maxSlobrokDisconnectGracePeriod()); wait(new WaitCondition.StateWait(fleetController(), fleetController().getMonitor()) { - @Override - public String isConditionMet() { - if (currentState == null) { - return "No cluster state defined yet"; - } - NodeState distState = currentState.getNodeState(new Node(NodeType.DISTRIBUTOR, 0)); - if (distState.getState() != State.DOWN) { - return "Distributor not detected down yet: " + currentState.toString(); - } - NodeState storState = currentState.getNodeState(new Node(NodeType.STORAGE, 9)); - if (!storState.getState().oneOf("md")) { - return "Storage node not detected down yet: " + currentState.toString(); - } - return null; - } - }, null, timeout()); + @Override + public String isConditionMet() { + if (currentState == null) { + return "No cluster state defined yet"; + } + NodeState distState = currentState.getNodeState(new Node(NodeType.DISTRIBUTOR, 0)); + if (distState.getState() != State.DOWN) { + return "Distributor not detected down yet: " + currentState.toString(); + } + NodeState storState = currentState.getNodeState(new Node(NodeType.STORAGE, 9)); + if (!storState.getState().oneOf("md")) { + return "Storage node not detected down yet: " + currentState.toString(); + } + return null; + } + }, new WaitTask() { + @Override + public boolean performWaitTask() { + return false; + } + }, + timeout()); int rpcPort = fleetController().getRpcPort(); Target connection = supervisor.connect(new Spec("localhost", rpcPort)); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java index a7de2f0eedb..5bff101906d 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java @@ -10,6 +10,7 @@ import java.time.Instant; import java.util.Collections; import java.util.LinkedList; import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; @@ -88,7 +89,9 @@ public interface Waiter { } public final void wait(WaitCondition c, WaitTask wt, Duration timeout) { - log.log(Level.INFO, "Waiting for " + c + (wt == null ? "" : " with wait task " + wt)); + Objects.requireNonNull(wt, "wait task cannot be null"); + + log.log(Level.INFO, "Waiting for " + c + " with wait task " + wt); Instant endTime = Instant.now().plus(timeout); String lastReason = null; while (true) { @@ -104,17 +107,15 @@ public interface Waiter { } try { boolean allowWait = true; - if (wt != null) { - if (wt.performWaitTask()) { - data.getMonitor().notifyAll(); - allowWait = false; - } + if (wt.performWaitTask()) { + data.getMonitor().notifyAll(); + allowWait = false; } Duration timeLeft = Duration.between(Instant.now(), endTime); if (timeLeft.isNegative()) - throw new IllegalStateException("Timed out waiting max " + timeout + " ms for " + c + (wt == null ? "" : "\n with wait task " + wt) + ",\n reason: " + reason); + throw new IllegalStateException("Timed out waiting max " + timeout + " ms for " + c + "\n with wait task " + wt + ",\n reason: " + reason); if (allowWait) - data.getMonitor().wait(wt == null ? WaitTask.defaultTaskFrequencyMillis : Math.min(wt.getWaitTaskFrequencyInMillis(), timeLeft.toMillis())); + data.getMonitor().wait(Math.min(wt.getWaitTaskFrequencyInMillis(), timeLeft.toMillis())); } catch (InterruptedException e) { throw new RuntimeException(e); } |