From e7a23602da63627f8b30e094bc41469c301891dd Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Mon, 4 Dec 2023 13:46:05 +0100 Subject: Use fake ZooKeeper database implementation for subset of CC tests The fake impl acts "as if" a single-node ZK quorum is present, so it cannot be directly used with most multi-node tests that require multiple nodes to actually participate in leader elections. --- .../clustercontroller/core/FleetController.java | 3 +- .../core/FleetControllerOptions.java | 23 +++- .../core/database/DatabaseFactory.java | 6 +- .../core/DistributionBitCountTest.java | 5 +- .../core/FakeZooKeeperDatabase.java | 136 +++++++++++++++++++++ .../core/FleetControllerTest.java | 18 ++- .../clustercontroller/core/StateChangeTest.java | 8 +- 7 files changed, 184 insertions(+), 15 deletions(-) create mode 100644 clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeZooKeeperDatabase.java diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index dd2ba5bf5f1..3e520d95d2c 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -8,7 +8,6 @@ import com.yahoo.vdslib.state.Node; import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; -import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.listeners.NodeListener; import com.yahoo.vespa.clustercontroller.core.listeners.SlobrokListener; @@ -152,7 +151,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta options.nodeStateRequestTimeoutEarliestPercentage(), options.nodeStateRequestTimeoutLatestPercentage(), options.nodeStateRequestRoundTripTimeMaxSeconds()); - var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer); + var database = new DatabaseHandler(context, options.dbFactoryFn().apply(context), timer, options.zooKeeperServerAddress(), timer); var lookUp = new SlobrokClient(context, timer, options.slobrokConnectionSpecs()); var stateGenerator = new StateChangeHandler(context, timer, log); var stateBroadcaster = new SystemStateBroadcaster(context, timer, timer); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java index d3b2bdf3d8d..a0efaa70b58 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java @@ -5,6 +5,9 @@ import ai.vespa.validation.Validation; import com.yahoo.vdslib.distribution.ConfiguredNode; import com.yahoo.vdslib.distribution.Distribution; import com.yahoo.vdslib.state.NodeType; +import com.yahoo.vespa.clustercontroller.core.database.DatabaseFactory; +import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory; + import java.time.Duration; import java.util.Collection; import java.util.Collections; @@ -13,6 +16,7 @@ import java.util.Objects; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; +import java.util.function.Function; /** * Immutable class representing all the options that can be set in the fleetcontroller. @@ -128,6 +132,9 @@ public class FleetControllerOptions { private final int maxNumberOfGroupsAllowedToBeDown; + private final Function dbFactoryFn; + + // TODO less impressive length...! private FleetControllerOptions(String clusterName, int fleetControllerIndex, int fleetControllerCount, @@ -168,7 +175,8 @@ public class FleetControllerOptions { boolean clusterFeedBlockEnabled, Map clusterFeedBlockLimit, double clusterFeedBlockNoiseLevel, - int maxNumberOfGroupsAllowedToBeDown) { + int maxNumberOfGroupsAllowedToBeDown, + Function dbFactoryFn) { this.clusterName = clusterName; this.fleetControllerIndex = fleetControllerIndex; this.fleetControllerCount = fleetControllerCount; @@ -210,6 +218,7 @@ public class FleetControllerOptions { this.clusterFeedBlockLimit = clusterFeedBlockLimit; this.clusterFeedBlockNoiseLevel = clusterFeedBlockNoiseLevel; this.maxNumberOfGroupsAllowedToBeDown = maxNumberOfGroupsAllowedToBeDown; + this.dbFactoryFn = dbFactoryFn; } public Duration getMaxDeferredTaskVersionWaitTime() { @@ -382,6 +391,8 @@ public class FleetControllerOptions { public int maxNumberOfGroupsAllowedToBeDown() { return maxNumberOfGroupsAllowedToBeDown; } + public Function dbFactoryFn() { return dbFactoryFn; } + public static class Builder { private String clusterName; @@ -425,6 +436,7 @@ public class FleetControllerOptions { private Map clusterFeedBlockLimit = Collections.emptyMap(); private double clusterFeedBlockNoiseLevel = 0.01; private int maxNumberOfGroupsAllowedToBeDown = 1; + private Function dbFactoryFn = ZooKeeperDatabaseFactory::new; public Builder(String clusterName, Collection nodes) { this.clusterName = clusterName; @@ -677,6 +689,11 @@ public class FleetControllerOptions { return this; } + public Builder setDbFactoryFn(Function fn) { + this.dbFactoryFn = fn; + return this; + } + public FleetControllerOptions build() { return new FleetControllerOptions(clusterName, index, @@ -718,7 +735,8 @@ public class FleetControllerOptions { clusterFeedBlockEnabled, clusterFeedBlockLimit, clusterFeedBlockNoiseLevel, - maxNumberOfGroupsAllowedToBeDown); + maxNumberOfGroupsAllowedToBeDown, + dbFactoryFn); } public static Builder copy(FleetControllerOptions options) { @@ -764,6 +782,7 @@ public class FleetControllerOptions { builder.clusterFeedBlockLimit = Map.copyOf(options.clusterFeedBlockLimit); builder.clusterFeedBlockNoiseLevel = options.clusterFeedBlockNoiseLevel; builder.maxNumberOfGroupsAllowedToBeDown = options.maxNumberOfGroupsAllowedToBeDown; + builder.dbFactoryFn = options.dbFactoryFn; return builder; } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseFactory.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseFactory.java index a77c22da835..6419e5f05e7 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseFactory.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseFactory.java @@ -9,9 +9,9 @@ package com.yahoo.vespa.clustercontroller.core.database; public interface DatabaseFactory { class Params { - String dbAddress; - int dbSessionTimeout; - Database.DatabaseListener listener; + public String dbAddress; + public int dbSessionTimeout; + public Database.DatabaseListener listener; Params databaseAddress(String address) { this.dbAddress = address; return this; } Params databaseSessionTimeout(int timeout) { this.dbSessionTimeout = timeout; return this; } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java index 0e48a9bbc45..1e13ed4eef5 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java @@ -14,9 +14,12 @@ import java.util.List; import static org.junit.jupiter.api.Assertions.assertEquals; -@ExtendWith(CleanupZookeeperLogsOnSuccess.class) public class DistributionBitCountTest extends FleetControllerTest { + DistributionBitCountTest() { + useRealZooKeeperInTest(false); + } + private FleetControllerOptions setUpSystem() throws Exception { List configuredNodes = new ArrayList<>(); for (int i = 0 ; i < 10; i++) { diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeZooKeeperDatabase.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeZooKeeperDatabase.java new file mode 100644 index 00000000000..057d137650f --- /dev/null +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeZooKeeperDatabase.java @@ -0,0 +1,136 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import com.yahoo.vdslib.state.Node; +import com.yahoo.vdslib.state.NodeState; +import com.yahoo.vespa.clustercontroller.core.database.Database; +import com.yahoo.vespa.clustercontroller.core.database.DatabaseFactory; + +import java.util.Map; +import java.util.TreeMap; + +/** + * Memory-backed fake DB implementation that tries to mirror the semantics of the + * (synchronous) ZooKeeper DB implementation. By itself this fake acts as if a quorum + * with a _single_, local ZK instance has been configured. This DB instance cannot be + * used across multiple cluster controller instances. + * + * Threading note: we expect all invocations on this instance to happen from the + * main cluster controller thread (i.e. "as-if" single threaded), but we wrap everything + * in a mutex to stay on the safe side since this isn't explicitly documented as + * part of the API, + */ +public class FakeZooKeeperDatabase extends Database { + + public static class Factory implements DatabaseFactory { + private final FleetControllerContext context; + public Factory(FleetControllerContext context) { + this.context = context; + } + @Override + public Database create(Params params) { + return new FakeZooKeeperDatabase(context, params.listener); + } + } + + private final FleetControllerContext context; + private final Database.DatabaseListener listener; + + private final Object mutex = new Object(); + private boolean closed = false; + private Integer persistedLatestStateVersion = null; + private Map persistedLeaderVotes = new TreeMap<>(); + private Map persistedWantedStates = new TreeMap<>(); + private Map persistedStartTimestamps = new TreeMap<>(); + private ClusterStateBundle persistedBundle = ClusterStateBundle.ofBaselineOnly(AnnotatedClusterState.emptyState()); + + public FakeZooKeeperDatabase(FleetControllerContext context, DatabaseListener listener) { + this.context = context; + this.listener = listener; + } + + @Override + public void close() { + synchronized (mutex) { + closed = true; + } + } + + @Override + public boolean isClosed() { + synchronized (mutex) { + return closed; + } + } + + @Override + public boolean storeMasterVote(int voteForNode) { + Map voteState; + synchronized (mutex) { + persistedLeaderVotes.put(context.id().index(), voteForNode); + voteState = Map.copyOf(persistedLeaderVotes); + } + listener.handleMasterData(voteState); + return true; + } + + @Override + public boolean storeLatestSystemStateVersion(int version) { + synchronized (mutex) { + persistedLatestStateVersion = version; + return true; + } + } + + @Override + public Integer retrieveLatestSystemStateVersion() { + synchronized (mutex) { + return persistedLatestStateVersion; + } + } + + @Override + public boolean storeWantedStates(Map states) { + synchronized (mutex) { + persistedWantedStates = Map.copyOf(states); + } + return true; + } + + @Override + public Map retrieveWantedStates() { + synchronized (mutex) { + return Map.copyOf(persistedWantedStates); + } + } + + @Override + public boolean storeStartTimestamps(Map timestamps) { + synchronized (mutex) { + persistedStartTimestamps = Map.copyOf(timestamps); + return true; + } + } + + @Override + public Map retrieveStartTimestamps() { + synchronized (mutex) { + return Map.copyOf(persistedStartTimestamps); + } + } + + @Override + public boolean storeLastPublishedStateBundle(ClusterStateBundle stateBundle) { + synchronized (mutex) { + persistedBundle = stateBundle; + return true; + } + } + + @Override + public ClusterStateBundle retrieveLastPublishedStateBundle() { + synchronized (mutex) { + return persistedBundle; + } + } +} diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java index 400f4a14d24..ee0506070b5 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java @@ -16,7 +16,6 @@ import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.NodeType; import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; -import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory; import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator; import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer; import com.yahoo.vespa.clustercontroller.core.rpc.SlobrokClient; @@ -49,13 +48,14 @@ import static org.junit.jupiter.api.Assertions.fail; */ public abstract class FleetControllerTest implements Waiter { - private static final Logger log = Logger.getLogger(FleetControllerTest.class.getName()); + protected static final Logger log = Logger.getLogger(FleetControllerTest.class.getName()); private static final int DEFAULT_NODE_COUNT = 10; private final Duration timeout = Duration.ofSeconds(30); protected Slobrok slobrok; protected FleetControllerOptions options; + private boolean useRealZooKeeperInTest = true; ZooKeeperTestServer zooKeeperServer; protected final List fleetControllers = new ArrayList<>(); protected List nodes = new ArrayList<>(); @@ -73,6 +73,10 @@ public abstract class FleetControllerTest implements Waiter { LogSetup.initVespaLogging("fleetcontroller"); } + protected void useRealZooKeeperInTest(boolean useRealZk) { + this.useRealZooKeeperInTest = useRealZk; + } + protected static FleetControllerOptions.Builder defaultOptions() { return defaultOptions(IntStream.range(0, DEFAULT_NODE_COUNT) .mapToObj(i -> new ConfiguredNode(i, false)) @@ -121,7 +125,7 @@ public abstract class FleetControllerTest implements Waiter { var log = new EventLog(timer, metricUpdater); var cluster = new ContentCluster(options.clusterName(), options.nodes(), options.storageDistribution()); var stateGatherer = new NodeStateGatherer(timer, timer, log); - var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer); + var database = new DatabaseHandler(context, options.dbFactoryFn().apply(context), timer, options.zooKeeperServerAddress(), timer); // Setting this <1000 ms causes ECONNREFUSED on socket trying to connect to ZK server, in ZooKeeper, // after creating a new ZooKeeper (session). This causes ~10s extra time to connect after connection loss. // Reasons unknown. Larger values like the default 10_000 causes that much additional running time for some tests. @@ -139,7 +143,13 @@ public abstract class FleetControllerTest implements Waiter { } protected FleetControllerOptions setUpFleetController(Timer timer, FleetControllerOptions.Builder builder) throws Exception { - setUpZooKeeperServer(builder); + // TODO consolidate CC setup in tests; currently partial duplication of + // setup/init code across test subclasses. + if (useRealZooKeeperInTest) { + setUpZooKeeperServer(builder); + } else { + builder.setDbFactoryFn(FakeZooKeeperDatabase.Factory::new); + } builder.setSlobrokConnectionSpecs(getSlobrokConnectionSpecs(slobrok)); options = builder.build(); startFleetController(timer); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java index 01987cf1f4c..9e9206526e5 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java @@ -9,7 +9,6 @@ import com.yahoo.vdslib.state.NodeType; import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.testutils.StateWaiter; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; import java.time.Duration; import java.util.ArrayList; @@ -23,7 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; -@ExtendWith(CleanupZookeeperLogsOnSuccess.class) public class StateChangeTest extends FleetControllerTest { private final FakeTimer timer = new FakeTimer(); @@ -31,6 +29,10 @@ public class StateChangeTest extends FleetControllerTest { private FleetController ctrl; private DummyCommunicator communicator; + StateChangeTest() { + useRealZooKeeperInTest(false); + } + private void initialize(FleetControllerOptions.Builder builder) throws Exception { List nodes = new ArrayList<>(); for (int i = 0; i < builder.nodes().size(); ++i) { @@ -38,7 +40,7 @@ public class StateChangeTest extends FleetControllerTest { nodes.add(new Node(NodeType.DISTRIBUTOR, i)); } - setUpZooKeeperServer(builder); + builder.setDbFactoryFn(FakeZooKeeperDatabase.Factory::new); communicator = new DummyCommunicator(nodes, timer); boolean start = false; FleetControllerOptions options = builder.build(); -- cgit v1.2.3