summaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2017-05-12 14:22:26 +0200
committerTor Brede Vekterli <vekterli@yahoo-inc.com>2017-05-12 14:38:36 +0200
commit1a17bb806379046a6ef513c3ef05e45d65028e19 (patch)
tree96bbdf3c26c9c375cd181cc1974bd55ff35e3b93 /clustercontroller-core
parentd25a365c86303637ae6db9a25d3149f87121ca83 (diff)
Always write new cluster state versions to ZooKeeper
Previously, the controller would not write the version to ZK unless the version was published to at least one node. This could lead to problems due to un-written version numbers being visible via the controller's REST APIs. External observers could see versions that were not present in ZK and that would not be stable across reelections. As a consequence, invariants for strictly increasing version numbers would be violated from the perspective of these external observers (in particular, our system test framework).
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/pom.xml5
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java2
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java14
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java33
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java55
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java38
6 files changed, 95 insertions, 52 deletions
diff --git a/clustercontroller-core/pom.xml b/clustercontroller-core/pom.xml
index 053f4efd2a7..42af701ac63 100644
--- a/clustercontroller-core/pom.xml
+++ b/clustercontroller-core/pom.xml
@@ -27,6 +27,11 @@
<scope>test</scope>
</dependency>
<dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>hamcrest-library</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
</dependency>
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java
index 0f3bc6ada88..ec07a83c65e 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java
@@ -113,7 +113,7 @@ public class SystemStateBroadcaster {
// Store new version in ZooKeeper _before_ publishing to any nodes so that a
// cluster controller crash after publishing but before a successful ZK store
// will not risk reusing the same version number.
- if (!recipients.isEmpty() && !systemState.isOfficial()) {
+ if (!systemState.isOfficial()) {
database.saveLatestSystemStateVersion(dbContext, systemState.getVersion());
systemState.setOfficial(true);
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java
index 2b37d29e2ec..355166f6a8d 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java
@@ -12,7 +12,6 @@ import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vdslib.state.State;
-import org.junit.After;
import org.junit.Ignore;
import org.junit.Test;
@@ -27,17 +26,8 @@ public class DatabaseTest extends FleetControllerTest {
private static Logger log = Logger.getLogger(DatabaseTest.class.getName());
- protected Supervisor supervisor;
-
- @After
- public void tearDown() throws Exception {
- if (supervisor != null) {
- supervisor.transport().shutdown().join();
- }
- super.tearDown();
- }
-
- private void setWantedState(Node n, NodeState ns, Map<Node, NodeState> wantedStates) {
+ // Note: different semantics than FleetControllerTest.setWantedState
+ protected void setWantedState(Node n, NodeState ns, Map<Node, NodeState> wantedStates) {
int rpcPort = fleetController.getRpcPort();
if (supervisor == null) {
supervisor = new Supervisor(new Transport());
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
index d0aa0bceba9..59e850e0485 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java
@@ -1,6 +1,12 @@
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.clustercontroller.core;
+import com.yahoo.jrt.Request;
+import com.yahoo.jrt.Spec;
+import com.yahoo.jrt.StringValue;
+import com.yahoo.jrt.Supervisor;
+import com.yahoo.jrt.Target;
+import com.yahoo.jrt.Transport;
import com.yahoo.jrt.slobrok.api.BackOffPolicy;
import com.yahoo.jrt.slobrok.server.Slobrok;
import com.yahoo.log.LogLevel;
@@ -10,6 +16,7 @@ import com.yahoo.vdslib.state.ClusterState;
import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.NodeType;
+import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator;
import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer;
@@ -26,6 +33,7 @@ import org.junit.rules.TestRule;
import org.junit.rules.TestWatcher;
import org.junit.runner.Description;
+import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.ByteArrayOutputStream;
@@ -38,13 +46,14 @@ import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
- * @author humbe
+ * @author HÃ¥kon Humberset
*/
public abstract class FleetControllerTest implements Waiter {
private static Logger log = Logger.getLogger(FleetControllerTest.class.getName());
protected static final int DEFAULT_NODE_COUNT = 10;
+ protected Supervisor supervisor;
protected FakeTimer timer = new FakeTimer();
protected boolean usingFakeTimer = false;
protected Slobrok slobrok;
@@ -276,6 +285,9 @@ public abstract class FleetControllerTest implements Waiter {
System.err.println("STOPPING TEST " + testName);
testName = null;
}
+ if (supervisor != null) {
+ supervisor.transport().shutdown().join();
+ }
if (fleetController != null) {
fleetController.shutdown();
fleetController = null;
@@ -504,4 +516,23 @@ public abstract class FleetControllerTest implements Waiter {
.collect(Collectors.toSet());
}
+ protected void setWantedState(DummyVdsNode node, State state, String reason) {
+ if (supervisor == null) {
+ supervisor = new Supervisor(new Transport());
+ }
+ NodeState ns = new NodeState(node.getType(), state);
+ if (reason != null) ns.setDescription(reason);
+ Target connection = supervisor.connect(new Spec("localhost", fleetController.getRpcPort()));
+ Request req = new Request("setNodeState");
+ req.parameters().add(new StringValue(node.getSlobrokName()));
+ req.parameters().add(new StringValue(ns.serialize()));
+ connection.invokeSync(req, timeoutS);
+ if (req.isError()) {
+ assertTrue("Failed to invoke setNodeState(): " + req.errorCode() + ": " + req.errorMessage(), false);
+ }
+ if (!req.checkReturnTypes("s")) {
+ assertTrue("Failed to invoke setNodeState(): Invalid return types.", false);
+ }
+ }
+
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
index d4cb988a986..a298be2a035 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
@@ -9,6 +9,7 @@ import com.yahoo.jrt.Transport;
import com.yahoo.jrt.slobrok.server.Slobrok;
import com.yahoo.log.LogLevel;
import com.yahoo.vdslib.state.ClusterState;
+import com.yahoo.vdslib.state.State;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
@@ -20,8 +21,10 @@ import java.util.List;
import java.util.concurrent.TimeoutException;
import java.util.logging.Logger;
+import static org.hamcrest.Matchers.greaterThan;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
public class MasterElectionTest extends FleetControllerTest {
@@ -440,4 +443,56 @@ public class MasterElectionTest extends FleetControllerTest {
waitForMaster(1);
}
+ /**
+ * Should always write new version to ZooKeeper, even if the version will not
+ * be published to any nodes. External services may still observe the version
+ * number via the cluster REST API, and we should therefore ensure that we never
+ * risk rolling back the version number in the face of a reelection.
+ */
+ @Test
+ public void cluster_state_version_written_to_zookeeper_even_with_empty_send_set() throws Exception {
+ startingTest("MasterElectionTest::cluster_state_version_written_to_zookeeper_even_with_empty_send_set");
+ FleetControllerOptions options = new FleetControllerOptions("mycluster");
+ options.masterZooKeeperCooldownPeriod = 1;
+ options.minRatioOfDistributorNodesUp = 0;
+ options.minRatioOfStorageNodesUp = 0;
+ options.minDistributorNodesUp = 0;
+ options.minStorageNodesUp = 1;
+ setUpFleetController(3, true, options);
+ setUpVdsNodes(true, new DummyVdsNodeOptions());
+ fleetController = fleetControllers.get(0); // Required to prevent waitForStableSystem from NPE'ing
+ waitForStableSystem();
+ waitForMaster(0);
+
+ // Explanation for this convoluted sequence of actions: we want to trigger a scenario where
+ // we have a cluster state version bump _without_ there being any nodes to send the new state
+ // to. If there's an "optimization" to skip writing the version to ZooKeeper if there are no
+ // nodes in the version send set, a newly elected, different master will end up reusing the
+ // very same version number.
+ // We mark all nodes' Reported states as down (which means an empty send set, as no nodes are
+ // online), then mark one storage node as Wanted state as Maintenance. This forces a cluster
+ // state change.
+ this.nodes.forEach(n -> {
+ n.disconnectImmediately();
+ waitForCompleteCycle(0);
+ });
+ setWantedState(this.nodes.get(2*10 - 1), State.MAINTENANCE, "bar");
+ waitForCompleteCycle(0);
+
+ // This receives the version number of the highest _working_ cluster state, with
+ // no guarantees that it has been published to any nodes yet.
+ final long preElectionVersionNumber = fleetControllers.get(0).getSystemState().getVersion();
+
+ // Nuke controller 1, leaving controller 1 in charge.
+ // It should have observed the most recently written version number and increase this
+ // number before publishing its own new state.
+ fleetControllers.get(0).shutdown();
+ waitForMaster(1);
+ waitForCompleteCycle(1);
+
+ final long postElectionVersionNumber = fleetControllers.get(1).getSystemState().getVersion();
+
+ assertThat(postElectionVersionNumber, greaterThan(preElectionVersionNumber));
+ }
+
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java
index 3c4084d304c..9c928262321 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java
@@ -1,50 +1,12 @@
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.clustercontroller.core;
-import com.yahoo.jrt.*;
-import com.yahoo.jrt.StringValue;
-import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.State;
-import org.junit.After;
-import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
public class WantedStateTest extends FleetControllerTest {
- private Supervisor supervisor;
-
- @Before
- public void setUp() {
- supervisor = new Supervisor(new Transport());
- }
-
- @After
- public void tearDown() throws Exception {
- if (supervisor != null) {
- supervisor.transport().shutdown().join();
- supervisor = null;
- }
- super.tearDown();
- }
-
- public void setWantedState(DummyVdsNode node, State state, String reason) {
- NodeState ns = new NodeState(node.getType(), state);
- if (reason != null) ns.setDescription(reason);
- Target connection = supervisor.connect(new Spec("localhost", fleetController.getRpcPort()));
- Request req = new Request("setNodeState");
- req.parameters().add(new StringValue(node.getSlobrokName()));
- req.parameters().add(new StringValue(ns.serialize()));
- connection.invokeSync(req, timeoutS);
- if (req.isError()) {
- assertTrue("Failed to invoke setNodeState(): " + req.errorCode() + ": " + req.errorMessage(), false);
- }
- if (!req.checkReturnTypes("s")) {
- assertTrue("Failed to invoke setNodeState(): Invalid return types.", false);
- }
- }
-
@Test
public void testSettingStorageNodeMaintenanceAndBack() throws Exception {
startingTest("WantedStateTest::testSettingStorageNodeMaintenanceAndBack()");