summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <martin.polden@gmail.com>2017-02-08 13:04:00 +0100
committerMartin Polden <martin.polden@gmail.com>2017-02-08 13:15:24 +0100
commit3a1362d08027b36ec20e29a4ab570864f793abdf (patch)
treef9a6fba2981dc3ab98131d0c5b209b9ea6d9b1ed /node-repository
parent1d2fda3acf3035d628aa71bcb82bbcb88f09d5ad (diff)
Fix race condition in application maintainer
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java37
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainerTest.java69
2 files changed, 86 insertions, 20 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
index 32478473111..2d5b5be4f37 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
@@ -4,13 +4,14 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Deployment;
-import com.yahoo.config.provision.NodeType;
+import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import java.time.Duration;
import java.util.Optional;
import java.util.Set;
+import java.util.function.Function;
import java.util.logging.Level;
import java.util.stream.Collectors;
@@ -26,24 +27,36 @@ import java.util.stream.Collectors;
public class ApplicationMaintainer extends Maintainer {
private final Deployer deployer;
+ private final Function<NodeRepository, Set<ApplicationId>> activeApplicationsGetter;
public ApplicationMaintainer(Deployer deployer, NodeRepository nodeRepository, Duration rate) {
+ this(deployer, nodeRepository, rate, ApplicationMaintainer::getActiveApplications);
+ }
+
+ ApplicationMaintainer(Deployer deployer, NodeRepository nodeRepository, Duration rate,
+ Function<NodeRepository, Set<ApplicationId>> activeApplicationsGetter) {
super(nodeRepository, rate);
this.deployer = deployer;
+ this.activeApplicationsGetter = activeApplicationsGetter;
}
@Override
protected void maintain() {
- Set<ApplicationId> applications =
- nodeRepository().getNodes(Node.State.active).stream().map(node -> node.allocation().get().owner()).collect(Collectors.toSet());
+ Set<ApplicationId> applications = activeApplicationsGetter.apply(nodeRepository());
for (ApplicationId application : applications) {
try {
- Optional<Deployment> deployment = deployer.deployFromLocalActive(application, Duration.ofMinutes(30));
- if ( ! deployment.isPresent()) continue; // this will be done at another config server
+ // An application might change it's state between the time the set of applications is retrieved and the
+ // time deployment happens. Lock on application and check if it's still active
+ try (Mutex lock = nodeRepository().lock(application)) {
+ if (isApplicationActive(application)) {
+ Optional<Deployment> deployment = deployer.deployFromLocalActive(application, Duration.ofMinutes(30));
+ if ( ! deployment.isPresent()) continue; // this will be done at another config server
- deployment.get().prepare();
- deployment.get().activate();
+ deployment.get().prepare();
+ deployment.get().activate();
+ }
+ }
}
catch (RuntimeException e) {
log.log(Level.WARNING, "Exception on maintenance redeploy of " + application, e);
@@ -54,4 +67,14 @@ public class ApplicationMaintainer extends Maintainer {
@Override
public String toString() { return "Periodic application redeployer"; }
+ private boolean isApplicationActive(ApplicationId application) {
+ return !nodeRepository().getNodes(application, Node.State.active).isEmpty();
+ }
+
+ static Set<ApplicationId> getActiveApplications(NodeRepository nodeRepository) {
+ return nodeRepository.getNodes(Node.State.active).stream()
+ .map(node -> node.allocation().get().owner())
+ .collect(Collectors.toSet());
+ }
+
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainerTest.java
index e2bf9ede545..549ea90a4e2 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainerTest.java
@@ -8,6 +8,7 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.InstanceName;
+import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.TenantName;
@@ -16,14 +17,14 @@ import com.yahoo.test.ManualClock;
import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.curator.mock.MockCurator;
+import com.yahoo.vespa.curator.transaction.CuratorTransaction;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
-import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner;
-import com.yahoo.vespa.curator.transaction.CuratorTransaction;
import com.yahoo.vespa.hosted.provision.testutils.FlavorConfigBuilder;
import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
+import org.junit.Before;
import org.junit.Test;
import java.time.Duration;
@@ -32,6 +33,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
@@ -40,21 +44,25 @@ import static org.junit.Assert.assertEquals;
*/
public class ApplicationMaintainerTest {
- private Curator curator = new MockCurator();
+ private static final NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default");
- @Test
- public void test_application_maintenance() throws InterruptedException {
- ManualClock clock = new ManualClock();
+ private NodeRepository nodeRepository;
+ private Fixture fixture;
+
+ @Before
+ public void before() {
+ Curator curator = new MockCurator();
Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
- NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default");
- NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone,
- new MockNameResolver().mockAnyLookup());
+ this.nodeRepository = new NodeRepository(nodeFlavors, curator, new ManualClock(), zone,
+ new MockNameResolver().mockAnyLookup());
+ this.fixture = new Fixture(zone, nodeRepository, nodeFlavors, curator);
+ }
+ @Test
+ public void test_application_maintenance() throws InterruptedException {
createReadyNodes(15, nodeRepository, nodeFlavors);
createHostNodes(2, nodeRepository, nodeFlavors);
- Fixture fixture = new Fixture(zone, nodeRepository, nodeFlavors);
-
// Create applications
fixture.activate();
@@ -96,6 +104,29 @@ public class ApplicationMaintainerTest {
reactivatedInApp1, fixture.getNodes(Node.State.inactive).size());
}
+ @Test
+ public void deleted_application_is_not_reactivated() {
+ createReadyNodes(15, nodeRepository, nodeFlavors);
+ createHostNodes(2, nodeRepository, nodeFlavors);
+
+ // Create applications
+ fixture.activate();
+
+ // Freeze active nodes to simulate an application being deleted during a maintenance run
+ Set<ApplicationId> frozenActiveApplications = nodeRepository.getNodes(Node.State.active).stream()
+ .map(n -> n.allocation().get().owner())
+ .collect(Collectors.toSet());
+
+ // Remove one application without letting the application maintainer know about it
+ fixture.remove(fixture.app2);
+ assertEquals(fixture.wantedNodesApp2, nodeRepository.getNodes(fixture.app2, Node.State.inactive).size());
+
+ // Nodes belonging to app2 are inactive after maintenance
+ fixture.runApplicationMaintainer((ignored) -> frozenActiveApplications);
+ assertEquals("Inactive nodes were incorrectly activated after maintenance", fixture.wantedNodesApp2,
+ nodeRepository.getNodes(fixture.app2, Node.State.inactive).size());
+ }
+
private void createReadyNodes(int count, NodeRepository nodeRepository, NodeFlavors nodeFlavors) {
List<Node> nodes = new ArrayList<>(count);
for (int i = 0; i < count; i++)
@@ -118,6 +149,7 @@ public class ApplicationMaintainerTest {
final NodeRepository nodeRepository;
final NodeRepositoryProvisioner provisioner;
+ final Curator curator;
final ApplicationId app1 = ApplicationId.from(TenantName.from("foo1"), ApplicationName.from("bar"), InstanceName.from("fuz"));
final ApplicationId app2 = ApplicationId.from(TenantName.from("foo2"), ApplicationName.from("bar"), InstanceName.from("fuz"));
@@ -126,8 +158,9 @@ public class ApplicationMaintainerTest {
final int wantedNodesApp1 = 5;
final int wantedNodesApp2 = 7;
- Fixture(Zone zone, NodeRepository nodeRepository, NodeFlavors flavors) {
+ Fixture(Zone zone, NodeRepository nodeRepository, NodeFlavors flavors, Curator curator) {
this.nodeRepository = nodeRepository;
+ this.curator = curator;
this.provisioner = new NodeRepositoryProvisioner(nodeRepository, flavors, zone);
}
@@ -145,14 +178,24 @@ public class ApplicationMaintainerTest {
transaction.commit();
}
+ void remove(ApplicationId application) {
+ NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator));
+ provisioner.remove(transaction, application);
+ transaction.commit();
+ }
+
void runApplicationMaintainer() {
+ runApplicationMaintainer(ApplicationMaintainer::getActiveApplications);
+ }
+
+ void runApplicationMaintainer(Function<NodeRepository, Set<ApplicationId>> activeApplicationsGetter) {
Map<ApplicationId, MockDeployer.ApplicationContext> apps = new HashMap<>();
apps.put(app1, new MockDeployer.ApplicationContext(app1, clusterApp1,
Capacity.fromNodeCount(wantedNodesApp1, Optional.of("default")), 1));
apps.put(app2, new MockDeployer.ApplicationContext(app2, clusterApp2,
Capacity.fromNodeCount(wantedNodesApp2, Optional.of("default")), 1));
MockDeployer deployer = new MockDeployer(provisioner, apps);
- new ApplicationMaintainer(deployer, nodeRepository, Duration.ofMinutes(30)).run();
+ new ApplicationMaintainer(deployer, nodeRepository, Duration.ofMinutes(30), activeApplicationsGetter).run();
}
NodeList getNodes(Node.State ... states) {