aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@vespa.ai>2023-12-04 14:54:52 +0100
committerGitHub <noreply@github.com>2023-12-04 14:54:52 +0100
commit70d6cc8ac0ef95d839054a7bf6a8c013cc3cb618 (patch)
treef3ed763ad4aab92ff57b9aff9365af8faeb01dd9
parentdd6b76833d795ca987c838dadd8c5da973ce32ef (diff)
parente7a23602da63627f8b30e094bc41469c301891dd (diff)
Merge pull request #29546 from vespa-engine/vekterli/use-fake-zk-database-for-subset-of-cc-testsv8.268.18
Use fake ZooKeeper database implementation for subset of CC tests
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java3
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java23
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseFactory.java6
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java5
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeZooKeeperDatabase.java136
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java18
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java8
7 files changed, 184 insertions, 15 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index dd2ba5bf5f1..3e520d95d2c 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -8,7 +8,6 @@ import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
-import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.core.listeners.NodeListener;
import com.yahoo.vespa.clustercontroller.core.listeners.SlobrokListener;
@@ -152,7 +151,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta
options.nodeStateRequestTimeoutEarliestPercentage(),
options.nodeStateRequestTimeoutLatestPercentage(),
options.nodeStateRequestRoundTripTimeMaxSeconds());
- var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer);
+ var database = new DatabaseHandler(context, options.dbFactoryFn().apply(context), timer, options.zooKeeperServerAddress(), timer);
var lookUp = new SlobrokClient(context, timer, options.slobrokConnectionSpecs());
var stateGenerator = new StateChangeHandler(context, timer, log);
var stateBroadcaster = new SystemStateBroadcaster(context, timer, timer);
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
index d3b2bdf3d8d..a0efaa70b58 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
@@ -5,6 +5,9 @@ import ai.vespa.validation.Validation;
import com.yahoo.vdslib.distribution.ConfiguredNode;
import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vespa.clustercontroller.core.database.DatabaseFactory;
+import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory;
+
import java.time.Duration;
import java.util.Collection;
import java.util.Collections;
@@ -13,6 +16,7 @@ import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
+import java.util.function.Function;
/**
* Immutable class representing all the options that can be set in the fleetcontroller.
@@ -128,6 +132,9 @@ public class FleetControllerOptions {
private final int maxNumberOfGroupsAllowedToBeDown;
+ private final Function<FleetControllerContext, DatabaseFactory> dbFactoryFn;
+
+ // TODO less impressive length...!
private FleetControllerOptions(String clusterName,
int fleetControllerIndex,
int fleetControllerCount,
@@ -168,7 +175,8 @@ public class FleetControllerOptions {
boolean clusterFeedBlockEnabled,
Map<String, Double> clusterFeedBlockLimit,
double clusterFeedBlockNoiseLevel,
- int maxNumberOfGroupsAllowedToBeDown) {
+ int maxNumberOfGroupsAllowedToBeDown,
+ Function<FleetControllerContext, DatabaseFactory> dbFactoryFn) {
this.clusterName = clusterName;
this.fleetControllerIndex = fleetControllerIndex;
this.fleetControllerCount = fleetControllerCount;
@@ -210,6 +218,7 @@ public class FleetControllerOptions {
this.clusterFeedBlockLimit = clusterFeedBlockLimit;
this.clusterFeedBlockNoiseLevel = clusterFeedBlockNoiseLevel;
this.maxNumberOfGroupsAllowedToBeDown = maxNumberOfGroupsAllowedToBeDown;
+ this.dbFactoryFn = dbFactoryFn;
}
public Duration getMaxDeferredTaskVersionWaitTime() {
@@ -382,6 +391,8 @@ public class FleetControllerOptions {
public int maxNumberOfGroupsAllowedToBeDown() { return maxNumberOfGroupsAllowedToBeDown; }
+ public Function<FleetControllerContext, DatabaseFactory> dbFactoryFn() { return dbFactoryFn; }
+
public static class Builder {
private String clusterName;
@@ -425,6 +436,7 @@ public class FleetControllerOptions {
private Map<String, Double> clusterFeedBlockLimit = Collections.emptyMap();
private double clusterFeedBlockNoiseLevel = 0.01;
private int maxNumberOfGroupsAllowedToBeDown = 1;
+ private Function<FleetControllerContext, DatabaseFactory> dbFactoryFn = ZooKeeperDatabaseFactory::new;
public Builder(String clusterName, Collection<ConfiguredNode> nodes) {
this.clusterName = clusterName;
@@ -677,6 +689,11 @@ public class FleetControllerOptions {
return this;
}
+ public Builder setDbFactoryFn(Function<FleetControllerContext, DatabaseFactory> fn) {
+ this.dbFactoryFn = fn;
+ return this;
+ }
+
public FleetControllerOptions build() {
return new FleetControllerOptions(clusterName,
index,
@@ -718,7 +735,8 @@ public class FleetControllerOptions {
clusterFeedBlockEnabled,
clusterFeedBlockLimit,
clusterFeedBlockNoiseLevel,
- maxNumberOfGroupsAllowedToBeDown);
+ maxNumberOfGroupsAllowedToBeDown,
+ dbFactoryFn);
}
public static Builder copy(FleetControllerOptions options) {
@@ -764,6 +782,7 @@ public class FleetControllerOptions {
builder.clusterFeedBlockLimit = Map.copyOf(options.clusterFeedBlockLimit);
builder.clusterFeedBlockNoiseLevel = options.clusterFeedBlockNoiseLevel;
builder.maxNumberOfGroupsAllowedToBeDown = options.maxNumberOfGroupsAllowedToBeDown;
+ builder.dbFactoryFn = options.dbFactoryFn;
return builder;
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseFactory.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseFactory.java
index a77c22da835..6419e5f05e7 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseFactory.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseFactory.java
@@ -9,9 +9,9 @@ package com.yahoo.vespa.clustercontroller.core.database;
public interface DatabaseFactory {
class Params {
- String dbAddress;
- int dbSessionTimeout;
- Database.DatabaseListener listener;
+ public String dbAddress;
+ public int dbSessionTimeout;
+ public Database.DatabaseListener listener;
Params databaseAddress(String address) { this.dbAddress = address; return this; }
Params databaseSessionTimeout(int timeout) { this.dbSessionTimeout = timeout; return this; }
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java
index 0e48a9bbc45..1e13ed4eef5 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java
@@ -14,9 +14,12 @@ import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
-@ExtendWith(CleanupZookeeperLogsOnSuccess.class)
public class DistributionBitCountTest extends FleetControllerTest {
+ DistributionBitCountTest() {
+ useRealZooKeeperInTest(false);
+ }
+
private FleetControllerOptions setUpSystem() throws Exception {
List<ConfiguredNode> configuredNodes = new ArrayList<>();
for (int i = 0 ; i < 10; i++) {
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeZooKeeperDatabase.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeZooKeeperDatabase.java
new file mode 100644
index 00000000000..057d137650f
--- /dev/null
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FakeZooKeeperDatabase.java
@@ -0,0 +1,136 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.clustercontroller.core;
+
+import com.yahoo.vdslib.state.Node;
+import com.yahoo.vdslib.state.NodeState;
+import com.yahoo.vespa.clustercontroller.core.database.Database;
+import com.yahoo.vespa.clustercontroller.core.database.DatabaseFactory;
+
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * Memory-backed fake DB implementation that tries to mirror the semantics of the
+ * (synchronous) ZooKeeper DB implementation. By itself this fake acts as if a quorum
+ * with a _single_, local ZK instance has been configured. This DB instance cannot be
+ * used across multiple cluster controller instances.
+ *
+ * Threading note: we expect all invocations on this instance to happen from the
+ * main cluster controller thread (i.e. "as-if" single threaded), but we wrap everything
+ * in a mutex to stay on the safe side since this isn't explicitly documented as
+ * part of the API,
+ */
+public class FakeZooKeeperDatabase extends Database {
+
+ public static class Factory implements DatabaseFactory {
+ private final FleetControllerContext context;
+ public Factory(FleetControllerContext context) {
+ this.context = context;
+ }
+ @Override
+ public Database create(Params params) {
+ return new FakeZooKeeperDatabase(context, params.listener);
+ }
+ }
+
+ private final FleetControllerContext context;
+ private final Database.DatabaseListener listener;
+
+ private final Object mutex = new Object();
+ private boolean closed = false;
+ private Integer persistedLatestStateVersion = null;
+ private Map<Integer, Integer> persistedLeaderVotes = new TreeMap<>();
+ private Map<Node, NodeState> persistedWantedStates = new TreeMap<>();
+ private Map<Node, Long> persistedStartTimestamps = new TreeMap<>();
+ private ClusterStateBundle persistedBundle = ClusterStateBundle.ofBaselineOnly(AnnotatedClusterState.emptyState());
+
+ public FakeZooKeeperDatabase(FleetControllerContext context, DatabaseListener listener) {
+ this.context = context;
+ this.listener = listener;
+ }
+
+ @Override
+ public void close() {
+ synchronized (mutex) {
+ closed = true;
+ }
+ }
+
+ @Override
+ public boolean isClosed() {
+ synchronized (mutex) {
+ return closed;
+ }
+ }
+
+ @Override
+ public boolean storeMasterVote(int voteForNode) {
+ Map<Integer, Integer> voteState;
+ synchronized (mutex) {
+ persistedLeaderVotes.put(context.id().index(), voteForNode);
+ voteState = Map.copyOf(persistedLeaderVotes);
+ }
+ listener.handleMasterData(voteState);
+ return true;
+ }
+
+ @Override
+ public boolean storeLatestSystemStateVersion(int version) {
+ synchronized (mutex) {
+ persistedLatestStateVersion = version;
+ return true;
+ }
+ }
+
+ @Override
+ public Integer retrieveLatestSystemStateVersion() {
+ synchronized (mutex) {
+ return persistedLatestStateVersion;
+ }
+ }
+
+ @Override
+ public boolean storeWantedStates(Map<Node, NodeState> states) {
+ synchronized (mutex) {
+ persistedWantedStates = Map.copyOf(states);
+ }
+ return true;
+ }
+
+ @Override
+ public Map<Node, NodeState> retrieveWantedStates() {
+ synchronized (mutex) {
+ return Map.copyOf(persistedWantedStates);
+ }
+ }
+
+ @Override
+ public boolean storeStartTimestamps(Map<Node, Long> timestamps) {
+ synchronized (mutex) {
+ persistedStartTimestamps = Map.copyOf(timestamps);
+ return true;
+ }
+ }
+
+ @Override
+ public Map<Node, Long> retrieveStartTimestamps() {
+ synchronized (mutex) {
+ return Map.copyOf(persistedStartTimestamps);
+ }
+ }
+
+ @Override
+ public boolean storeLastPublishedStateBundle(ClusterStateBundle stateBundle) {
+ synchronized (mutex) {
+ persistedBundle = stateBundle;
+ return true;
+ }
+ }
+
+ @Override
+ public ClusterStateBundle retrieveLastPublishedStateBundle() {
+ synchronized (mutex) {
+ return persistedBundle;
+ }
+ }
+}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
index 400f4a14d24..ee0506070b5 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
@@ -16,7 +16,6 @@ import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
-import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory;
import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator;
import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer;
import com.yahoo.vespa.clustercontroller.core.rpc.SlobrokClient;
@@ -49,13 +48,14 @@ import static org.junit.jupiter.api.Assertions.fail;
*/
public abstract class FleetControllerTest implements Waiter {
- private static final Logger log = Logger.getLogger(FleetControllerTest.class.getName());
+ protected static final Logger log = Logger.getLogger(FleetControllerTest.class.getName());
private static final int DEFAULT_NODE_COUNT = 10;
private final Duration timeout = Duration.ofSeconds(30);
protected Slobrok slobrok;
protected FleetControllerOptions options;
+ private boolean useRealZooKeeperInTest = true;
ZooKeeperTestServer zooKeeperServer;
protected final List<FleetController> fleetControllers = new ArrayList<>();
protected List<DummyVdsNode> nodes = new ArrayList<>();
@@ -73,6 +73,10 @@ public abstract class FleetControllerTest implements Waiter {
LogSetup.initVespaLogging("fleetcontroller");
}
+ protected void useRealZooKeeperInTest(boolean useRealZk) {
+ this.useRealZooKeeperInTest = useRealZk;
+ }
+
protected static FleetControllerOptions.Builder defaultOptions() {
return defaultOptions(IntStream.range(0, DEFAULT_NODE_COUNT)
.mapToObj(i -> new ConfiguredNode(i, false))
@@ -121,7 +125,7 @@ public abstract class FleetControllerTest implements Waiter {
var log = new EventLog(timer, metricUpdater);
var cluster = new ContentCluster(options.clusterName(), options.nodes(), options.storageDistribution());
var stateGatherer = new NodeStateGatherer(timer, timer, log);
- var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer);
+ var database = new DatabaseHandler(context, options.dbFactoryFn().apply(context), timer, options.zooKeeperServerAddress(), timer);
// Setting this <1000 ms causes ECONNREFUSED on socket trying to connect to ZK server, in ZooKeeper,
// after creating a new ZooKeeper (session). This causes ~10s extra time to connect after connection loss.
// Reasons unknown. Larger values like the default 10_000 causes that much additional running time for some tests.
@@ -139,7 +143,13 @@ public abstract class FleetControllerTest implements Waiter {
}
protected FleetControllerOptions setUpFleetController(Timer timer, FleetControllerOptions.Builder builder) throws Exception {
- setUpZooKeeperServer(builder);
+ // TODO consolidate CC setup in tests; currently partial duplication of
+ // setup/init code across test subclasses.
+ if (useRealZooKeeperInTest) {
+ setUpZooKeeperServer(builder);
+ } else {
+ builder.setDbFactoryFn(FakeZooKeeperDatabase.Factory::new);
+ }
builder.setSlobrokConnectionSpecs(getSlobrokConnectionSpecs(slobrok));
options = builder.build();
startFleetController(timer);
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
index 01987cf1f4c..9e9206526e5 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java
@@ -9,7 +9,6 @@ import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.testutils.StateWaiter;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.extension.ExtendWith;
import java.time.Duration;
import java.util.ArrayList;
@@ -23,7 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
-@ExtendWith(CleanupZookeeperLogsOnSuccess.class)
public class StateChangeTest extends FleetControllerTest {
private final FakeTimer timer = new FakeTimer();
@@ -31,6 +29,10 @@ public class StateChangeTest extends FleetControllerTest {
private FleetController ctrl;
private DummyCommunicator communicator;
+ StateChangeTest() {
+ useRealZooKeeperInTest(false);
+ }
+
private void initialize(FleetControllerOptions.Builder builder) throws Exception {
List<Node> nodes = new ArrayList<>();
for (int i = 0; i < builder.nodes().size(); ++i) {
@@ -38,7 +40,7 @@ public class StateChangeTest extends FleetControllerTest {
nodes.add(new Node(NodeType.DISTRIBUTOR, i));
}
- setUpZooKeeperServer(builder);
+ builder.setDbFactoryFn(FakeZooKeeperDatabase.Factory::new);
communicator = new DummyCommunicator(nodes, timer);
boolean start = false;
FleetControllerOptions options = builder.build();