diff options
7 files changed, 98 insertions, 30 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/JobControl.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/JobControl.java index 6596d2abb1d..2d641ef57ab 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/JobControl.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/JobControl.java @@ -1,41 +1,43 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; -import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient; import com.yahoo.vespa.curator.Lock; +import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient; -import java.util.HashSet; +import java.util.Collections; +import java.util.Map; import java.util.Set; -import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.ConcurrentSkipListMap; /** * Provides status and control over running maintenance jobs. - * This is multithread safe. + * + * This is multi-thread safe. * * @author bratseth */ public class JobControl { /** This is not stored in ZooKeeper as all nodes start all jobs */ - private final Set<String> startedJobs = new ConcurrentSkipListSet<>(); + private final Map<String, Maintainer> startedJobs = new ConcurrentSkipListMap<>(); /** Used to store deactivation in ZooKeeper to make changes take effect on all nodes */ private final CuratorDatabaseClient db; - + public JobControl(CuratorDatabaseClient db) { this.db = db; } /** Notifies this that a job was started */ - public void started(String jobSimpleClassName) { - startedJobs.add(jobSimpleClassName); + public void started(String jobSimpleClassName, Maintainer maintainer) { + startedJobs.put(jobSimpleClassName, maintainer); } /** * Returns a snapshot of the set of jobs started on this system (whether deactivated or not). * Each job is represented by its simple (omitting package) class name. */ - public Set<String> jobs() { return new HashSet<>(startedJobs); } + public Set<String> jobs() { return Collections.unmodifiableSet(startedJobs.keySet()); } /** Returns a snapshot containing the currently inactive jobs in this */ public Set<String> inactiveJobs() { return db.readInactiveJobs(); } @@ -56,5 +58,12 @@ public class JobControl { db.writeInactiveJobs(inactiveJobs); } } + + /** Run given job (inactive or not) immediately */ + public void run(String jobSimpleClassName) { + var job = startedJobs.get(jobSimpleClassName); + if (job == null) throw new IllegalArgumentException("No such job '" + jobSimpleClassName + "'"); + job.runWithLock(); + } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java index 71de5931e28..e01f7ea7bf5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Maintainer.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.google.common.util.concurrent.UncheckedTimeoutException; import com.yahoo.component.AbstractComponent; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.HostName; @@ -43,7 +44,7 @@ public abstract class Maintainer extends AbstractComponent implements Runnable { long delay = staggeredDelay(nodeRepository.database().cluster(), hostname, nodeRepository.clock().instant(), interval); service = new ScheduledThreadPoolExecutor(1); service.scheduleAtFixedRate(this, delay, interval.toMillis(), TimeUnit.MILLISECONDS); - jobControl.started(name()); + jobControl.started(name(), this); } /** Returns the node repository */ @@ -59,8 +60,11 @@ public abstract class Maintainer extends AbstractComponent implements Runnable { @Override public void run() { try { - if (jobControl.isActive(name())) - maintain(); + if (jobControl.isActive(name())) { + runWithLock(); + } + } catch (UncheckedTimeoutException ignored) { + // Another config server or operator is running this job } catch (Throwable e) { log.log(Level.WARNING, this + " failed. Will retry in " + interval.toMinutes() + " minutes", e); } @@ -68,13 +72,29 @@ public abstract class Maintainer extends AbstractComponent implements Runnable { @Override public void deconstruct() { - this.service.shutdown(); + var timeout = Duration.ofSeconds(30); + service.shutdown(); + try { + if (!service.awaitTermination(timeout.toMillis(), TimeUnit.MILLISECONDS)) { + log.log(Level.WARNING, "Maintainer " + name() + " failed to shutdown " + + "within " + timeout); + } + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } /** Returns the simple name of this job */ @Override public final String toString() { return name(); } + /** Run this while holding the job lock */ + public void runWithLock() { + try (var lock = nodeRepository.database().lockMaintenanceJob(name())) { + maintain(); + } + } + /** Called once each time this maintenance job should run */ protected abstract void maintain(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index 87fc2c6323a..8ecdb0cbb1f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -366,8 +366,11 @@ public class CuratorDatabaseClient { return curatorDatabase.getData(path).filter(data -> data.length > 0).map(mapper); } - // Maintenance jobs + public Lock lockMaintenanceJob(String jobName) { + return lock(lockRoot.append("maintenanceJobLocks").append(jobName), defaultLockTimeout); + } + public Set<String> readInactiveJobs() { try { return read(inactiveJobsPath(), stringSetSerializer::fromJson).orElseGet(HashSet::new); @@ -554,4 +557,5 @@ public class CuratorDatabaseClient { .mapToObj(i -> firstProvisionIndex + i) .collect(Collectors.toList()); } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/JobsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/JobsResponse.java index f3d8f42f3b7..4dfdef742d6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/JobsResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/JobsResponse.java @@ -26,7 +26,7 @@ public class JobsResponse extends HttpResponse { Slime slime = new Slime(); Cursor root = slime.setObject(); Cursor jobArray = root.setArray("jobs"); - for (String jobName : new TreeSet<>(jobControl.jobs())) + for (String jobName : jobControl.jobs()) jobArray.addObject().setString("name", jobName); Cursor inactiveArray = root.setArray("inactive"); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java index afea92f3c60..809e4200e7e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java @@ -20,8 +20,8 @@ import com.yahoo.restapi.ResourceResponse; import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Inspector; import com.yahoo.slime.Slime; -import com.yahoo.slime.Type; import com.yahoo.slime.SlimeUtils; +import com.yahoo.slime.Type; import com.yahoo.vespa.hosted.provision.NoSuchNodeException; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -176,11 +176,17 @@ public class NodesApiHandler extends LoggingRequestHandler { return new MessageResponse("Added " + addedNodes + " nodes to the provisioned state"); } if (path.matches("/nodes/v2/maintenance/inactive/{job}")) return setJobActive(path.get("job"), false); + if (path.matches("/nodes/v2/maintenance/run/{job}")) return runJob(path.get("job")); if (path.matches("/nodes/v2/upgrade/firmware")) return requestFirmwareCheckResponse(); throw new NotFoundException("Nothing at path '" + request.getUri().getPath() + "'"); } + private HttpResponse runJob(String job) { + nodeRepository.jobControl().run(job); + return new MessageResponse("Executed job '" + job + "'"); + } + private HttpResponse handleDELETE(HttpRequest request) { Path path = new Path(request.getUri()); if (path.matches("/nodes/v2/node/{hostname}")) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/JobControlTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/JobControlTest.java index 729e7f4cd94..396fcd67034 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/JobControlTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/JobControlTest.java @@ -20,14 +20,16 @@ public class JobControlTest { public void testJobControl() { NodeRepositoryTester tester = new NodeRepositoryTester(); JobControl jobControl = new JobControl(tester.nodeRepository().database()); - + + MockMaintainer maintainer1 = new MockMaintainer(tester.nodeRepository()); + MockMaintainer maintainer2 = new MockMaintainer(tester.nodeRepository()); assertTrue(jobControl.jobs().isEmpty()); String job1 = "Job1"; String job2 = "Job2"; - jobControl.started(job1); - jobControl.started(job2); + jobControl.started(job1, maintainer1); + jobControl.started(job2, maintainer2); assertEquals(2, jobControl.jobs().size()); assertTrue(jobControl.jobs().contains(job1)); assertTrue(jobControl.jobs().contains(job2)); @@ -50,6 +52,18 @@ public class JobControlTest { jobControl.setActive(job2, true); assertTrue(jobControl.isActive(job1)); assertTrue(jobControl.isActive(job2)); + + // Run jobs on-demand + jobControl.run(job1); + jobControl.run(job1); + assertEquals(2, maintainer1.maintenanceInvocations); + jobControl.run(job2); + assertEquals(1, maintainer2.maintenanceInvocations); + + // Running jobs on-demand ignores inactive flag + jobControl.setActive(job1, false); + jobControl.run(job1); + assertEquals(3, maintainer1.maintenanceInvocations); } @Test diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java index fe7c09863e2..0001c344dd3 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java @@ -95,17 +95,6 @@ public class RestApiTest { assertResponseContains(new Request("http://localhost:8080/nodes/v2/node/host2.yahoo.com"), "\"rebootGeneration\":4"); - // POST deactivation of a maintenance job - assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/inactive/NodeFailer", - new byte[0], Request.Method.POST), - "{\"message\":\"Deactivated job 'NodeFailer'\"}"); - // GET a list of all maintenance jobs - assertFile(new Request("http://localhost:8080/nodes/v2/maintenance/"), "maintenance.json"); - // DELETE deactivation of a maintenance job - assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/inactive/NodeFailer", - new byte[0], Request.Method.DELETE), - "{\"message\":\"Re-activated job 'NodeFailer'\"}"); - // POST new nodes assertResponse(new Request("http://localhost:8080/nodes/v2/node", ("[" + asNodeJson("host8.yahoo.com", "default", "127.0.8.1") + "," + // test with only 1 ip address @@ -243,6 +232,32 @@ public class RestApiTest { } @Test + public void maintenance_requests() throws Exception { + // POST deactivation of a maintenance job + assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/inactive/NodeFailer", + new byte[0], Request.Method.POST), + "{\"message\":\"Deactivated job 'NodeFailer'\"}"); + // GET a list of all maintenance jobs + assertFile(new Request("http://localhost:8080/nodes/v2/maintenance/"), "maintenance.json"); + + // DELETE deactivation of a maintenance job + assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/inactive/NodeFailer", + new byte[0], Request.Method.DELETE), + "{\"message\":\"Re-activated job 'NodeFailer'\"}"); + + // POST run of a maintenance job + assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/run/PeriodicApplicationMaintainer", + new byte[0], Request.Method.POST), + "{\"message\":\"Executed job 'PeriodicApplicationMaintainer'\"}"); + + // POST run of unknown maintenance job + assertResponse(new Request("http://localhost:8080/nodes/v2/maintenance/run/foo", + new byte[0], Request.Method.POST), + 400, + "{\"error-code\":\"BAD_REQUEST\",\"message\":\"No such job 'foo'\"}"); + } + + @Test public void post_with_patch_method_override_in_header_is_handled_as_patch() throws Exception { Request req = new Request("http://localhost:8080/nodes/v2/node/host4.yahoo.com", Utf8.toBytes("{\"currentRestartGeneration\": 1}"), Request.Method.POST); |