summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2020-01-20 16:02:53 +0100
committerJon Marius Venstad <venstad@gmail.com>2020-01-20 16:02:53 +0100
commit79002706787ca4c1def5e5ef50c4ee1340e06dd0 (patch)
tree59cc27192e075471b8890427d83785094023588c /controller-server
parent5ee42fd430e45766def38d1c0d412781e393f2e2 (diff)
Emit metrics on job starts and ends, with status as name and job id as dim.
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java29
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java64
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java43
5 files changed, 128 insertions, 17 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
index 4f6fe2ac2db..d3e21f0d399 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
@@ -9,6 +9,7 @@ import com.yahoo.config.provision.CloudName;
import com.yahoo.config.provision.HostName;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.zone.ZoneApi;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.curator.Lock;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.hosted.controller.api.integration.ApplicationIdSnapshot;
@@ -70,6 +71,7 @@ public class Controller extends AbstractComponent implements ApplicationIdSource
private final FlagSource flagSource;
private final NameServiceForwarder nameServiceForwarder;
private final MavenRepository mavenRepository;
+ private final Metric metric;
/**
* Creates a controller
@@ -77,22 +79,15 @@ public class Controller extends AbstractComponent implements ApplicationIdSource
* @param curator the curator instance storing the persistent state of the controller.
*/
@Inject
- public Controller(CuratorDb curator, RotationsConfig rotationsConfig,
- AccessControl accessControl,
- FlagSource flagSource,
- MavenRepository mavenRepository,
- ServiceRegistry serviceRegistry) {
- this(curator, rotationsConfig,
- accessControl,
- com.yahoo.net.HostName::getLocalhost, flagSource,
- mavenRepository, serviceRegistry);
+ public Controller(CuratorDb curator, RotationsConfig rotationsConfig, AccessControl accessControl, FlagSource flagSource,
+ MavenRepository mavenRepository, ServiceRegistry serviceRegistry, Metric metric) {
+ this(curator, rotationsConfig, accessControl, com.yahoo.net.HostName::getLocalhost, flagSource,
+ mavenRepository, serviceRegistry, metric);
}
- public Controller(CuratorDb curator, RotationsConfig rotationsConfig,
- AccessControl accessControl,
- Supplier<String> hostnameSupplier,
- FlagSource flagSource, MavenRepository mavenRepository,
- ServiceRegistry serviceRegistry) {
+ public Controller(CuratorDb curator, RotationsConfig rotationsConfig, AccessControl accessControl,
+ Supplier<String> hostnameSupplier, FlagSource flagSource, MavenRepository mavenRepository,
+ ServiceRegistry serviceRegistry, Metric metric) {
this.hostnameSupplier = Objects.requireNonNull(hostnameSupplier, "HostnameSupplier cannot be null");
this.curator = Objects.requireNonNull(curator, "Curator cannot be null");
@@ -101,7 +96,7 @@ public class Controller extends AbstractComponent implements ApplicationIdSource
this.clock = Objects.requireNonNull(serviceRegistry.clock(), "Clock cannot be null");
this.flagSource = Objects.requireNonNull(flagSource, "FlagSource cannot be null");
this.mavenRepository = Objects.requireNonNull(mavenRepository, "MavenRepository cannot be null");
-
+ this.metric = Objects.requireNonNull(metric, "Metric cannot be null");
metrics = new ConfigServerMetrics(serviceRegistry.configServer());
nameServiceForwarder = new NameServiceForwarder(curator);
@@ -265,6 +260,10 @@ public class Controller extends AbstractComponent implements ApplicationIdSource
return auditLogger;
}
+ public Metric metric() {
+ return metric;
+ }
+
private Set<CloudName> clouds() {
return zoneRegistry.zones().all().zones().stream()
.map(ZoneApi::getCloudName)
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
index 811daed256e..b23d16767be 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.deployment;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.zone.ZoneId;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.curator.Lock;
import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.Controller;
@@ -79,6 +80,7 @@ public class JobController {
private final BufferedLogStore logs;
private final TesterCloud cloud;
private final Badges badges;
+ private final JobMetrics metric;
private AtomicReference<Consumer<Run>> runner = new AtomicReference<>(__ -> { });
@@ -88,6 +90,7 @@ public class JobController {
this.logs = new BufferedLogStore(curator, controller.serviceRegistry().runDataStore());
this.cloud = controller.serviceRegistry().testerCloud();
this.badges = new Badges(controller.zoneRegistry().badgeUrl());
+ this.metric = new JobMetrics(controller.metric(), controller.system());
}
public TesterCloud cloud() { return cloud; }
@@ -360,6 +363,7 @@ public class JobController {
}
});
logs.flush(id);
+ metric.jobFinished(run.id().job(), finishedRun.status());
return finishedRun;
});
}
@@ -416,6 +420,7 @@ public class JobController {
RunId newId = new RunId(id, type, last.map(run -> run.id().number()).orElse(0L) + 1);
curator.writeLastRun(Run.initial(newId, versions, controller.clock().instant()));
+ metric.jobStarted(newId.job());
});
});
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java
new file mode 100644
index 00000000000..2468167d488
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java
@@ -0,0 +1,64 @@
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.yahoo.config.provision.SystemName;
+import com.yahoo.jdisc.Metric;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId;
+
+import java.util.Map;
+
+/**
+ * Records metrics related to deployment jobs.
+ *
+ * @author jommv
+ */
+public class JobMetrics {
+
+ public static final String start = "deployment.start";
+ public static final String outOfCapacity = "deployment.outOfCapacity";
+ public static final String deploymentFailure = "deployment.deploymentFailure";
+ public static final String convergenceFailure = "deployment.convergenceFailure";
+ public static final String testFailure = "deployment.testFailure";
+ public static final String error = "deployment.error";
+ public static final String abort = "deployment.abort";
+ public static final String success = "deployment.success";
+
+ private final Metric metric;
+ private final SystemName system;
+
+ public JobMetrics(Metric metric, SystemName system) {
+ this.metric = metric;
+ this.system = system;
+ }
+
+ public void jobStarted(JobId id) {
+ metric.add(start, 1, metric.createContext(contextOf(id)));
+ }
+
+ public void jobFinished(JobId id, RunStatus status) {
+ metric.add(valueOf(status), 1, metric.createContext(contextOf(id)));
+ }
+
+ Map<String, String> contextOf(JobId id) {
+ return Map.of("tenant", id.application().tenant().value(),
+ "application", id.application().application().value(),
+ "instance", id.application().instance().value(),
+ "job", id.type().jobName(),
+ "environment", id.type().environment().value(),
+ "region", id.type().zone(system).region().value());
+ }
+
+ static String valueOf(RunStatus status) {
+ switch (status) {
+ case outOfCapacity: return outOfCapacity;
+ case deploymentFailed: return deploymentFailure;
+ case installationFailed: return convergenceFailure;
+ case testFailure: return testFailure;
+ case error: return error;
+ case aborted: return abort;
+ case success: return success;
+ case running: throw new IllegalArgumentException("Not supposed to get this value");
+ default: throw new IllegalArgumentException("Unexpected run status '" + status + "'");
+ }
+ }
+
+}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java
index 84bdedba33c..dbeb96337d1 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java
@@ -32,6 +32,7 @@ import com.yahoo.vespa.hosted.controller.application.SystemApplication;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;
import com.yahoo.vespa.hosted.controller.athenz.impl.AthenzFacade;
import com.yahoo.vespa.hosted.controller.integration.ConfigServerMock;
+import com.yahoo.vespa.hosted.controller.integration.MetricsMock;
import com.yahoo.vespa.hosted.controller.integration.ServiceRegistryMock;
import com.yahoo.vespa.hosted.controller.integration.ZoneRegistryMock;
import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
@@ -392,7 +393,8 @@ public final class ControllerTester {
() -> "test-controller",
new InMemoryFlagSource(),
new MockMavenRepository(),
- serviceRegistry);
+ serviceRegistry,
+ new MetricsMock());
// Calculate initial versions
controller.updateVersionStatus(VersionStatus.compute(controller));
return controller;
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java
index 21e4735f7bf..9de0020ce4a 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java
@@ -11,12 +11,14 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationPackage;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester;
import com.yahoo.vespa.hosted.controller.deployment.JobController;
+import com.yahoo.vespa.hosted.controller.deployment.JobMetrics;
import com.yahoo.vespa.hosted.controller.deployment.Run;
import com.yahoo.vespa.hosted.controller.deployment.RunStatus;
import com.yahoo.vespa.hosted.controller.deployment.Step;
import com.yahoo.vespa.hosted.controller.deployment.Step.Status;
import com.yahoo.vespa.hosted.controller.deployment.StepRunner;
import com.yahoo.vespa.hosted.controller.deployment.Versions;
+import com.yahoo.vespa.hosted.controller.integration.MetricsMock;
import org.junit.Test;
import java.time.Duration;
@@ -43,6 +45,7 @@ import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobTy
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.aborted;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.running;
+import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.success;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.testFailure;
import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.failed;
import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded;
@@ -59,6 +62,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.Step.report;
import static com.yahoo.vespa.hosted.controller.deployment.Step.startTests;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -348,7 +352,44 @@ public class JobRunnerTest {
jobs.start(id, systemTest, versions);
tester.clock().advance(JobRunner.jobTimeout.plus(Duration.ofSeconds(1)));
runner.run();
- assertTrue(jobs.last(id, systemTest).get().status() == aborted);
+ assertSame(aborted, jobs.last(id, systemTest).get().status());
+ }
+
+ @Test
+ public void jobMetrics() {
+ DeploymentTester tester = new DeploymentTester();
+ JobController jobs = tester.controller().jobController();
+ Map<Step, RunStatus> outcomes = new EnumMap<>(Step.class);
+ JobRunner runner = new JobRunner(tester.controller(), Duration.ofDays(1), new JobControl(tester.controller().curator()),
+ inThreadExecutor(), mappedRunner(outcomes));
+
+ TenantAndApplicationId appId = tester.createApplication("tenant", "real", "default").id();
+ ApplicationId id = appId.defaultInstance();
+ jobs.submit(appId, versions.targetApplication().source(), Optional.empty(), Optional.empty(), Optional.empty(), 2, applicationPackage, new byte[0]);
+
+ for (RunStatus status : RunStatus.values()) {
+ if (status == success) continue; // Status not used for steps.
+ outcomes.put(deployTester, status);
+ jobs.start(id, systemTest, versions);
+ runner.run();
+ jobs.finish(jobs.last(id, systemTest).get().id());
+ }
+
+ Map<String, String> context = Map.of("tenant", "tenant",
+ "application", "real",
+ "instance", "default",
+ "job", "system-test",
+ "environment", "test",
+ "region", "us-east-1");
+ MetricsMock metric = ((MetricsMock) tester.controller().metric());
+ assertEquals(RunStatus.values().length - 1, metric.getMetric(context::equals, JobMetrics.start).get().intValue());
+ assertEquals(1, metric.getMetric(context::equals, JobMetrics.abort).get().intValue());
+ assertEquals(1, metric.getMetric(context::equals, JobMetrics.error).get().intValue());
+ assertEquals(1, metric.getMetric(context::equals, JobMetrics.success).get().intValue());
+ assertEquals(1, metric.getMetric(context::equals, JobMetrics.convergenceFailure).get().intValue());
+ assertEquals(1, metric.getMetric(context::equals, JobMetrics.deploymentFailure).get().intValue());
+ assertEquals(1, metric.getMetric(context::equals, JobMetrics.outOfCapacity).get().intValue());
+ assertEquals(1, metric.getMetric(context::equals, JobMetrics.testFailure).get().intValue());
}
public static ExecutorService inThreadExecutor() {