diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2020-01-20 16:02:53 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2020-01-20 16:02:53 +0100 |
commit | 79002706787ca4c1def5e5ef50c4ee1340e06dd0 (patch) | |
tree | 59cc27192e075471b8890427d83785094023588c /controller-server | |
parent | 5ee42fd430e45766def38d1c0d412781e393f2e2 (diff) |
Emit metrics on job starts and ends, with status as name and job id as dim.
Diffstat (limited to 'controller-server')
5 files changed, 128 insertions, 17 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java index 4f6fe2ac2db..d3e21f0d399 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java @@ -9,6 +9,7 @@ import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.ZoneApi; +import com.yahoo.jdisc.Metric; import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.hosted.controller.api.integration.ApplicationIdSnapshot; @@ -70,6 +71,7 @@ public class Controller extends AbstractComponent implements ApplicationIdSource private final FlagSource flagSource; private final NameServiceForwarder nameServiceForwarder; private final MavenRepository mavenRepository; + private final Metric metric; /** * Creates a controller @@ -77,22 +79,15 @@ public class Controller extends AbstractComponent implements ApplicationIdSource * @param curator the curator instance storing the persistent state of the controller. */ @Inject - public Controller(CuratorDb curator, RotationsConfig rotationsConfig, - AccessControl accessControl, - FlagSource flagSource, - MavenRepository mavenRepository, - ServiceRegistry serviceRegistry) { - this(curator, rotationsConfig, - accessControl, - com.yahoo.net.HostName::getLocalhost, flagSource, - mavenRepository, serviceRegistry); + public Controller(CuratorDb curator, RotationsConfig rotationsConfig, AccessControl accessControl, FlagSource flagSource, + MavenRepository mavenRepository, ServiceRegistry serviceRegistry, Metric metric) { + this(curator, rotationsConfig, accessControl, com.yahoo.net.HostName::getLocalhost, flagSource, + mavenRepository, serviceRegistry, metric); } - public Controller(CuratorDb curator, RotationsConfig rotationsConfig, - AccessControl accessControl, - Supplier<String> hostnameSupplier, - FlagSource flagSource, MavenRepository mavenRepository, - ServiceRegistry serviceRegistry) { + public Controller(CuratorDb curator, RotationsConfig rotationsConfig, AccessControl accessControl, + Supplier<String> hostnameSupplier, FlagSource flagSource, MavenRepository mavenRepository, + ServiceRegistry serviceRegistry, Metric metric) { this.hostnameSupplier = Objects.requireNonNull(hostnameSupplier, "HostnameSupplier cannot be null"); this.curator = Objects.requireNonNull(curator, "Curator cannot be null"); @@ -101,7 +96,7 @@ public class Controller extends AbstractComponent implements ApplicationIdSource this.clock = Objects.requireNonNull(serviceRegistry.clock(), "Clock cannot be null"); this.flagSource = Objects.requireNonNull(flagSource, "FlagSource cannot be null"); this.mavenRepository = Objects.requireNonNull(mavenRepository, "MavenRepository cannot be null"); - + this.metric = Objects.requireNonNull(metric, "Metric cannot be null"); metrics = new ConfigServerMetrics(serviceRegistry.configServer()); nameServiceForwarder = new NameServiceForwarder(curator); @@ -265,6 +260,10 @@ public class Controller extends AbstractComponent implements ApplicationIdSource return auditLogger; } + public Metric metric() { + return metric; + } + private Set<CloudName> clouds() { return zoneRegistry.zones().all().zones().stream() .map(ZoneApi::getCloudName) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 811daed256e..b23d16767be 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.deployment; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.jdisc.Metric; import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; @@ -79,6 +80,7 @@ public class JobController { private final BufferedLogStore logs; private final TesterCloud cloud; private final Badges badges; + private final JobMetrics metric; private AtomicReference<Consumer<Run>> runner = new AtomicReference<>(__ -> { }); @@ -88,6 +90,7 @@ public class JobController { this.logs = new BufferedLogStore(curator, controller.serviceRegistry().runDataStore()); this.cloud = controller.serviceRegistry().testerCloud(); this.badges = new Badges(controller.zoneRegistry().badgeUrl()); + this.metric = new JobMetrics(controller.metric(), controller.system()); } public TesterCloud cloud() { return cloud; } @@ -360,6 +363,7 @@ public class JobController { } }); logs.flush(id); + metric.jobFinished(run.id().job(), finishedRun.status()); return finishedRun; }); } @@ -416,6 +420,7 @@ public class JobController { RunId newId = new RunId(id, type, last.map(run -> run.id().number()).orElse(0L) + 1); curator.writeLastRun(Run.initial(newId, versions, controller.clock().instant())); + metric.jobStarted(newId.job()); }); }); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java new file mode 100644 index 00000000000..2468167d488 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java @@ -0,0 +1,64 @@ +package com.yahoo.vespa.hosted.controller.deployment; + +import com.yahoo.config.provision.SystemName; +import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; + +import java.util.Map; + +/** + * Records metrics related to deployment jobs. + * + * @author jommv + */ +public class JobMetrics { + + public static final String start = "deployment.start"; + public static final String outOfCapacity = "deployment.outOfCapacity"; + public static final String deploymentFailure = "deployment.deploymentFailure"; + public static final String convergenceFailure = "deployment.convergenceFailure"; + public static final String testFailure = "deployment.testFailure"; + public static final String error = "deployment.error"; + public static final String abort = "deployment.abort"; + public static final String success = "deployment.success"; + + private final Metric metric; + private final SystemName system; + + public JobMetrics(Metric metric, SystemName system) { + this.metric = metric; + this.system = system; + } + + public void jobStarted(JobId id) { + metric.add(start, 1, metric.createContext(contextOf(id))); + } + + public void jobFinished(JobId id, RunStatus status) { + metric.add(valueOf(status), 1, metric.createContext(contextOf(id))); + } + + Map<String, String> contextOf(JobId id) { + return Map.of("tenant", id.application().tenant().value(), + "application", id.application().application().value(), + "instance", id.application().instance().value(), + "job", id.type().jobName(), + "environment", id.type().environment().value(), + "region", id.type().zone(system).region().value()); + } + + static String valueOf(RunStatus status) { + switch (status) { + case outOfCapacity: return outOfCapacity; + case deploymentFailed: return deploymentFailure; + case installationFailed: return convergenceFailure; + case testFailure: return testFailure; + case error: return error; + case aborted: return abort; + case success: return success; + case running: throw new IllegalArgumentException("Not supposed to get this value"); + default: throw new IllegalArgumentException("Unexpected run status '" + status + "'"); + } + } + +} diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java index 84bdedba33c..dbeb96337d1 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java @@ -32,6 +32,7 @@ import com.yahoo.vespa.hosted.controller.application.SystemApplication; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.athenz.impl.AthenzFacade; import com.yahoo.vespa.hosted.controller.integration.ConfigServerMock; +import com.yahoo.vespa.hosted.controller.integration.MetricsMock; import com.yahoo.vespa.hosted.controller.integration.ServiceRegistryMock; import com.yahoo.vespa.hosted.controller.integration.ZoneRegistryMock; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; @@ -392,7 +393,8 @@ public final class ControllerTester { () -> "test-controller", new InMemoryFlagSource(), new MockMavenRepository(), - serviceRegistry); + serviceRegistry, + new MetricsMock()); // Calculate initial versions controller.updateVersionStatus(VersionStatus.compute(controller)); return controller; diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java index 21e4735f7bf..9de0020ce4a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java @@ -11,12 +11,14 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; import com.yahoo.vespa.hosted.controller.deployment.JobController; +import com.yahoo.vespa.hosted.controller.deployment.JobMetrics; import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.deployment.RunStatus; import com.yahoo.vespa.hosted.controller.deployment.Step; import com.yahoo.vespa.hosted.controller.deployment.Step.Status; import com.yahoo.vespa.hosted.controller.deployment.StepRunner; import com.yahoo.vespa.hosted.controller.deployment.Versions; +import com.yahoo.vespa.hosted.controller.integration.MetricsMock; import org.junit.Test; import java.time.Duration; @@ -43,6 +45,7 @@ import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobTy import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.aborted; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.running; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.success; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.testFailure; import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.failed; import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded; @@ -59,6 +62,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.Step.report; import static com.yahoo.vespa.hosted.controller.deployment.Step.startTests; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -348,7 +352,44 @@ public class JobRunnerTest { jobs.start(id, systemTest, versions); tester.clock().advance(JobRunner.jobTimeout.plus(Duration.ofSeconds(1))); runner.run(); - assertTrue(jobs.last(id, systemTest).get().status() == aborted); + assertSame(aborted, jobs.last(id, systemTest).get().status()); + } + + @Test + public void jobMetrics() { + DeploymentTester tester = new DeploymentTester(); + JobController jobs = tester.controller().jobController(); + Map<Step, RunStatus> outcomes = new EnumMap<>(Step.class); + JobRunner runner = new JobRunner(tester.controller(), Duration.ofDays(1), new JobControl(tester.controller().curator()), + inThreadExecutor(), mappedRunner(outcomes)); + + TenantAndApplicationId appId = tester.createApplication("tenant", "real", "default").id(); + ApplicationId id = appId.defaultInstance(); + jobs.submit(appId, versions.targetApplication().source(), Optional.empty(), Optional.empty(), Optional.empty(), 2, applicationPackage, new byte[0]); + + for (RunStatus status : RunStatus.values()) { + if (status == success) continue; // Status not used for steps. + outcomes.put(deployTester, status); + jobs.start(id, systemTest, versions); + runner.run(); + jobs.finish(jobs.last(id, systemTest).get().id()); + } + + Map<String, String> context = Map.of("tenant", "tenant", + "application", "real", + "instance", "default", + "job", "system-test", + "environment", "test", + "region", "us-east-1"); + MetricsMock metric = ((MetricsMock) tester.controller().metric()); + assertEquals(RunStatus.values().length - 1, metric.getMetric(context::equals, JobMetrics.start).get().intValue()); + assertEquals(1, metric.getMetric(context::equals, JobMetrics.abort).get().intValue()); + assertEquals(1, metric.getMetric(context::equals, JobMetrics.error).get().intValue()); + assertEquals(1, metric.getMetric(context::equals, JobMetrics.success).get().intValue()); + assertEquals(1, metric.getMetric(context::equals, JobMetrics.convergenceFailure).get().intValue()); + assertEquals(1, metric.getMetric(context::equals, JobMetrics.deploymentFailure).get().intValue()); + assertEquals(1, metric.getMetric(context::equals, JobMetrics.outOfCapacity).get().intValue()); + assertEquals(1, metric.getMetric(context::equals, JobMetrics.testFailure).get().intValue()); } public static ExecutorService inThreadExecutor() { |