diff options
Diffstat (limited to 'clustercontroller-core/src')
5 files changed, 90 insertions, 52 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java index 0f3bc6ada88..ec07a83c65e 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java @@ -113,7 +113,7 @@ public class SystemStateBroadcaster { // Store new version in ZooKeeper _before_ publishing to any nodes so that a // cluster controller crash after publishing but before a successful ZK store // will not risk reusing the same version number. - if (!recipients.isEmpty() && !systemState.isOfficial()) { + if (!systemState.isOfficial()) { database.saveLatestSystemStateVersion(dbContext, systemState.getVersion()); systemState.setOfficial(true); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java index 2b37d29e2ec..355166f6a8d 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java @@ -12,7 +12,6 @@ import com.yahoo.vdslib.state.Node; import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.NodeType; import com.yahoo.vdslib.state.State; -import org.junit.After; import org.junit.Ignore; import org.junit.Test; @@ -27,17 +26,8 @@ public class DatabaseTest extends FleetControllerTest { private static Logger log = Logger.getLogger(DatabaseTest.class.getName()); - protected Supervisor supervisor; - - @After - public void tearDown() throws Exception { - if (supervisor != null) { - supervisor.transport().shutdown().join(); - } - super.tearDown(); - } - - private void setWantedState(Node n, NodeState ns, Map<Node, NodeState> wantedStates) { + // Note: different semantics than FleetControllerTest.setWantedState + protected void setWantedState(Node n, NodeState ns, Map<Node, NodeState> wantedStates) { int rpcPort = fleetController.getRpcPort(); if (supervisor == null) { supervisor = new Supervisor(new Transport()); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java index d0aa0bceba9..59e850e0485 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java @@ -1,6 +1,12 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; +import com.yahoo.jrt.Request; +import com.yahoo.jrt.Spec; +import com.yahoo.jrt.StringValue; +import com.yahoo.jrt.Supervisor; +import com.yahoo.jrt.Target; +import com.yahoo.jrt.Transport; import com.yahoo.jrt.slobrok.api.BackOffPolicy; import com.yahoo.jrt.slobrok.server.Slobrok; import com.yahoo.log.LogLevel; @@ -10,6 +16,7 @@ import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vdslib.state.Node; import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.NodeType; +import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator; import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer; @@ -26,6 +33,7 @@ import org.junit.rules.TestRule; import org.junit.rules.TestWatcher; import org.junit.runner.Description; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.ByteArrayOutputStream; @@ -38,13 +46,14 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; /** - * @author humbe + * @author HÃ¥kon Humberset */ public abstract class FleetControllerTest implements Waiter { private static Logger log = Logger.getLogger(FleetControllerTest.class.getName()); protected static final int DEFAULT_NODE_COUNT = 10; + protected Supervisor supervisor; protected FakeTimer timer = new FakeTimer(); protected boolean usingFakeTimer = false; protected Slobrok slobrok; @@ -276,6 +285,9 @@ public abstract class FleetControllerTest implements Waiter { System.err.println("STOPPING TEST " + testName); testName = null; } + if (supervisor != null) { + supervisor.transport().shutdown().join(); + } if (fleetController != null) { fleetController.shutdown(); fleetController = null; @@ -504,4 +516,23 @@ public abstract class FleetControllerTest implements Waiter { .collect(Collectors.toSet()); } + protected void setWantedState(DummyVdsNode node, State state, String reason) { + if (supervisor == null) { + supervisor = new Supervisor(new Transport()); + } + NodeState ns = new NodeState(node.getType(), state); + if (reason != null) ns.setDescription(reason); + Target connection = supervisor.connect(new Spec("localhost", fleetController.getRpcPort())); + Request req = new Request("setNodeState"); + req.parameters().add(new StringValue(node.getSlobrokName())); + req.parameters().add(new StringValue(ns.serialize())); + connection.invokeSync(req, timeoutS); + if (req.isError()) { + assertTrue("Failed to invoke setNodeState(): " + req.errorCode() + ": " + req.errorMessage(), false); + } + if (!req.checkReturnTypes("s")) { + assertTrue("Failed to invoke setNodeState(): Invalid return types.", false); + } + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java index d4cb988a986..a298be2a035 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java @@ -9,6 +9,7 @@ import com.yahoo.jrt.Transport; import com.yahoo.jrt.slobrok.server.Slobrok; import com.yahoo.log.LogLevel; import com.yahoo.vdslib.state.ClusterState; +import com.yahoo.vdslib.state.State; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; @@ -20,8 +21,10 @@ import java.util.List; import java.util.concurrent.TimeoutException; import java.util.logging.Logger; +import static org.hamcrest.Matchers.greaterThan; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; public class MasterElectionTest extends FleetControllerTest { @@ -440,4 +443,56 @@ public class MasterElectionTest extends FleetControllerTest { waitForMaster(1); } + /** + * Should always write new version to ZooKeeper, even if the version will not + * be published to any nodes. External services may still observe the version + * number via the cluster REST API, and we should therefore ensure that we never + * risk rolling back the version number in the face of a reelection. + */ + @Test + public void cluster_state_version_written_to_zookeeper_even_with_empty_send_set() throws Exception { + startingTest("MasterElectionTest::cluster_state_version_written_to_zookeeper_even_with_empty_send_set"); + FleetControllerOptions options = new FleetControllerOptions("mycluster"); + options.masterZooKeeperCooldownPeriod = 1; + options.minRatioOfDistributorNodesUp = 0; + options.minRatioOfStorageNodesUp = 0; + options.minDistributorNodesUp = 0; + options.minStorageNodesUp = 1; + setUpFleetController(3, true, options); + setUpVdsNodes(true, new DummyVdsNodeOptions()); + fleetController = fleetControllers.get(0); // Required to prevent waitForStableSystem from NPE'ing + waitForStableSystem(); + waitForMaster(0); + + // Explanation for this convoluted sequence of actions: we want to trigger a scenario where + // we have a cluster state version bump _without_ there being any nodes to send the new state + // to. If there's an "optimization" to skip writing the version to ZooKeeper if there are no + // nodes in the version send set, a newly elected, different master will end up reusing the + // very same version number. + // We mark all nodes' Reported states as down (which means an empty send set, as no nodes are + // online), then mark one storage node as Wanted state as Maintenance. This forces a cluster + // state change. + this.nodes.forEach(n -> { + n.disconnectImmediately(); + waitForCompleteCycle(0); + }); + setWantedState(this.nodes.get(2*10 - 1), State.MAINTENANCE, "bar"); + waitForCompleteCycle(0); + + // This receives the version number of the highest _working_ cluster state, with + // no guarantees that it has been published to any nodes yet. + final long preElectionVersionNumber = fleetControllers.get(0).getSystemState().getVersion(); + + // Nuke controller 1, leaving controller 1 in charge. + // It should have observed the most recently written version number and increase this + // number before publishing its own new state. + fleetControllers.get(0).shutdown(); + waitForMaster(1); + waitForCompleteCycle(1); + + final long postElectionVersionNumber = fleetControllers.get(1).getSystemState().getVersion(); + + assertThat(postElectionVersionNumber, greaterThan(preElectionVersionNumber)); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java index 3c4084d304c..9c928262321 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java @@ -1,50 +1,12 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; -import com.yahoo.jrt.*; -import com.yahoo.jrt.StringValue; -import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.State; -import org.junit.After; -import org.junit.Before; import org.junit.Test; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; public class WantedStateTest extends FleetControllerTest { - private Supervisor supervisor; - - @Before - public void setUp() { - supervisor = new Supervisor(new Transport()); - } - - @After - public void tearDown() throws Exception { - if (supervisor != null) { - supervisor.transport().shutdown().join(); - supervisor = null; - } - super.tearDown(); - } - - public void setWantedState(DummyVdsNode node, State state, String reason) { - NodeState ns = new NodeState(node.getType(), state); - if (reason != null) ns.setDescription(reason); - Target connection = supervisor.connect(new Spec("localhost", fleetController.getRpcPort())); - Request req = new Request("setNodeState"); - req.parameters().add(new StringValue(node.getSlobrokName())); - req.parameters().add(new StringValue(ns.serialize())); - connection.invokeSync(req, timeoutS); - if (req.isError()) { - assertTrue("Failed to invoke setNodeState(): " + req.errorCode() + ": " + req.errorMessage(), false); - } - if (!req.checkReturnTypes("s")) { - assertTrue("Failed to invoke setNodeState(): Invalid return types.", false); - } - } - @Test public void testSettingStorageNodeMaintenanceAndBack() throws Exception { startingTest("WantedStateTest::testSettingStorageNodeMaintenanceAndBack()"); |