summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/JobControl.java27
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java28
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/JobsResponse.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java8
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/JobControlTest.java20
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java37
7 files changed, 98 insertions, 30 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/JobControl.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/JobControl.java
index 6596d2abb1d..2d641ef57ab 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/JobControl.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/JobControl.java
@@ -1,41 +1,43 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
-import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient;
import com.yahoo.vespa.curator.Lock;
+import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient;
-import java.util.HashSet;
+import java.util.Collections;
+import java.util.Map;
import java.util.Set;
-import java.util.concurrent.ConcurrentSkipListSet;
+import java.util.concurrent.ConcurrentSkipListMap;
/**
* Provides status and control over running maintenance jobs.
- * This is multithread safe.
+ *
+ * This is multi-thread safe.
*
* @author bratseth
*/
public class JobControl {
/** This is not stored in ZooKeeper as all nodes start all jobs */
- private final Set<String> startedJobs = new ConcurrentSkipListSet<>();
+ private final Map<String, Maintainer> startedJobs = new ConcurrentSkipListMap<>();
/** Used to store deactivation in ZooKeeper to make changes take effect on all nodes */
private final CuratorDatabaseClient db;
-
+
public JobControl(CuratorDatabaseClient db) {
this.db = db;
}
/** Notifies this that a job was started */
- public void started(String jobSimpleClassName) {
- startedJobs.add(jobSimpleClassName);
+ public void started(String jobSimpleClassName, Maintainer maintainer) {
+ startedJobs.put(jobSimpleClassName, maintainer);
}
/**
* Returns a snapshot of the set of jobs started on this system (whether deactivated or not).
* Each job is represented by its simple (omitting package) class name.
*/
- public Set<String> jobs() { return new HashSet<>(startedJobs); }
+ public Set<String> jobs() { return Collections.unmodifiableSet(startedJobs.keySet()); }
/** Returns a snapshot containing the currently inactive jobs in this */
public Set<String> inactiveJobs() { return db.readInactiveJobs(); }
@@ -56,5 +58,12 @@ public class JobControl {
db.writeInactiveJobs(inactiveJobs);
}
}
+
+ /** Run given job (inactive or not) immediately */
+ public void run(String jobSimpleClassName) {
+ var job = startedJobs.get(jobSimpleClassName);
+ if (job == null) throw new IllegalArgumentException("No such job '" + jobSimpleClassName + "'");
+ job.runWithLock();
+ }
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java
index 71de5931e28..e01f7ea7bf5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.google.common.util.concurrent.UncheckedTimeoutException;
import com.yahoo.component.AbstractComponent;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.HostName;
@@ -43,7 +44,7 @@ public abstract class Maintainer extends AbstractComponent implements Runnable {
long delay = staggeredDelay(nodeRepository.database().cluster(), hostname, nodeRepository.clock().instant(), interval);
service = new ScheduledThreadPoolExecutor(1);
service.scheduleAtFixedRate(this, delay, interval.toMillis(), TimeUnit.MILLISECONDS);
- jobControl.started(name());
+ jobControl.started(name(), this);
}
/** Returns the node repository */
@@ -59,8 +60,11 @@ public abstract class Maintainer extends AbstractComponent implements Runnable {
@Override
public void run() {
try {
- if (jobControl.isActive(name()))
- maintain();
+ if (jobControl.isActive(name())) {
+ runWithLock();
+ }
+ } catch (UncheckedTimeoutException ignored) {
+ // Another config server or operator is running this job
} catch (Throwable e) {
log.log(Level.WARNING, this + " failed. Will retry in " + interval.toMinutes() + " minutes", e);
}
@@ -68,13 +72,29 @@ public abstract class Maintainer extends AbstractComponent implements Runnable {
@Override
public void deconstruct() {
- this.service.shutdown();
+ var timeout = Duration.ofSeconds(30);
+ service.shutdown();
+ try {
+ if (!service.awaitTermination(timeout.toMillis(), TimeUnit.MILLISECONDS)) {
+ log.log(Level.WARNING, "Maintainer " + name() + " failed to shutdown " +
+ "within " + timeout);
+ }
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
}
/** Returns the simple name of this job */
@Override
public final String toString() { return name(); }
+ /** Run this while holding the job lock */
+ public void runWithLock() {
+ try (var lock = nodeRepository.database().lockMaintenanceJob(name())) {
+ maintain();
+ }
+ }
+
/** Called once each time this maintenance job should run */
protected abstract void maintain();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
index 87fc2c6323a..8ecdb0cbb1f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
@@ -366,8 +366,11 @@ public class CuratorDatabaseClient {
return curatorDatabase.getData(path).filter(data -> data.length > 0).map(mapper);
}
-
// Maintenance jobs
+ public Lock lockMaintenanceJob(String jobName) {
+ return lock(lockRoot.append("maintenanceJobLocks").append(jobName), defaultLockTimeout);
+ }
+
public Set<String> readInactiveJobs() {
try {
return read(inactiveJobsPath(), stringSetSerializer::fromJson).orElseGet(HashSet::new);
@@ -554,4 +557,5 @@ public class CuratorDatabaseClient {
.mapToObj(i -> firstProvisionIndex + i)
.collect(Collectors.toList());
}
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/JobsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/JobsResponse.java
index f3d8f42f3b7..4dfdef742d6 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/JobsResponse.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/JobsResponse.java
@@ -26,7 +26,7 @@ public class JobsResponse extends HttpResponse {
Slime slime = new Slime();
Cursor root = slime.setObject();
Cursor jobArray = root.setArray("jobs");
- for (String jobName : new TreeSet<>(jobControl.jobs()))
+ for (String jobName : jobControl.jobs())
jobArray.addObject().setString("name", jobName);
Cursor inactiveArray = root.setArray("inactive");
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java
index afea92f3c60..809e4200e7e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java
@@ -20,8 +20,8 @@ import com.yahoo.restapi.ResourceResponse;
import com.yahoo.slime.ArrayTraverser;
import com.yahoo.slime.Inspector;
import com.yahoo.slime.Slime;
-import com.yahoo.slime.Type;
import com.yahoo.slime.SlimeUtils;
+import com.yahoo.slime.Type;
import com.yahoo.vespa.hosted.provision.NoSuchNodeException;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -176,11 +176,17 @@ public class NodesApiHandler extends LoggingRequestHandler {
return new MessageResponse("Added " + addedNodes + " nodes to the provisioned state");
}
if (path.matches("/nodes/v2/maintenance/inactive/{job}")) return setJobActive(path.get("job"), false);
+ if (path.matches("/nodes/v2/maintenance/run/{job}")) return runJob(path.get("job"));
if (path.matches("/nodes/v2/upgrade/firmware")) return requestFirmwareCheckResponse();
throw new NotFoundException("Nothing at path '" + request.getUri().getPath() + "'");
}
+ private HttpResponse runJob(String job) {
+ nodeRepository.jobControl().run(job);
+ return new MessageResponse("Executed job '" + job + "'");
+ }
+
private HttpResponse handleDELETE(HttpRequest request) {
Path path = new Path(request.getUri());
if (path.matches("/nodes/v2/node/{hostname}")) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/JobControlTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/JobControlTest.java
index 729e7f4cd94..396fcd67034 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/JobControlTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/JobControlTest.java
@@ -20,14 +20,16 @@ public class JobControlTest {
public void testJobControl() {
NodeRepositoryTester tester = new NodeRepositoryTester();
JobControl jobControl = new JobControl(tester.nodeRepository().database());
-
+
+ MockMaintainer maintainer1 = new MockMaintainer(tester.nodeRepository());
+ MockMaintainer maintainer2 = new MockMaintainer(tester.nodeRepository());
assertTrue(jobControl.jobs().isEmpty());
String job1 = "Job1";
String job2 = "Job2";
- jobControl.started(job1);
- jobControl.started(job2);
+ jobControl.started(job1, maintainer1);
+ jobControl.started(job2, maintainer2);
assertEquals(2, jobControl.jobs().size());
assertTrue(jobControl.jobs().contains(job1));
assertTrue(jobControl.jobs().contains(job2));
@@ -50,6 +52,18 @@ public class JobControlTest {
jobControl.setActive(job2, true);
assertTrue(jobControl.isActive(job1));
assertTrue(jobControl.isActive(job2));
+
+ // Run jobs on-demand
+ jobControl.run(job1);
+ jobControl.run(job1);
+ assertEquals(2, maintainer1.maintenanceInvocations);
+ jobControl.run(job2);
+ assertEquals(1, maintainer2.maintenanceInvocations);
+
+ // Running jobs on-demand ignores inactive flag
+ jobControl.setActive(job1, false);
+ jobControl.run(job1);
+ assertEquals(3, maintainer1.maintenanceInvocations);
}
@Test
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java
index fe7c09863e2..0001c344dd3 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java
@@ -95,17 +95,6 @@ public class RestApiTest {
assertResponseContains(new Request("http://localhost:8080/nodes/v2/node/host2.yahoo.com"),
"\"rebootGeneration\":4");
- // POST deactivation of a maintenance job
- assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/inactive/NodeFailer",
- new byte[0], Request.Method.POST),
- "{\"message\":\"Deactivated job 'NodeFailer'\"}");
- // GET a list of all maintenance jobs
- assertFile(new Request("http://localhost:8080/nodes/v2/maintenance/"), "maintenance.json");
- // DELETE deactivation of a maintenance job
- assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/inactive/NodeFailer",
- new byte[0], Request.Method.DELETE),
- "{\"message\":\"Re-activated job 'NodeFailer'\"}");
-
// POST new nodes
assertResponse(new Request("http://localhost:8080/nodes/v2/node",
("[" + asNodeJson("host8.yahoo.com", "default", "127.0.8.1") + "," + // test with only 1 ip address
@@ -243,6 +232,32 @@ public class RestApiTest {
}
@Test
+ public void maintenance_requests() throws Exception {
+ // POST deactivation of a maintenance job
+ assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/inactive/NodeFailer",
+ new byte[0], Request.Method.POST),
+ "{\"message\":\"Deactivated job 'NodeFailer'\"}");
+ // GET a list of all maintenance jobs
+ assertFile(new Request("http://localhost:8080/nodes/v2/maintenance/"), "maintenance.json");
+
+ // DELETE deactivation of a maintenance job
+ assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/inactive/NodeFailer",
+ new byte[0], Request.Method.DELETE),
+ "{\"message\":\"Re-activated job 'NodeFailer'\"}");
+
+ // POST run of a maintenance job
+ assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/run/PeriodicApplicationMaintainer",
+ new byte[0], Request.Method.POST),
+ "{\"message\":\"Executed job 'PeriodicApplicationMaintainer'\"}");
+
+ // POST run of unknown maintenance job
+ assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/run/foo",
+ new byte[0], Request.Method.POST),
+ 400,
+ "{\"error-code\":\"BAD_REQUEST\",\"message\":\"No such job 'foo'\"}");
+ }
+
+ @Test
public void post_with_patch_method_override_in_header_is_handled_as_patch() throws Exception {
Request req = new Request("http://localhost:8080/nodes/v2/node/host4.yahoo.com",
Utf8.toBytes("{\"currentRestartGeneration\": 1}"), Request.Method.POST);