diff options
author | Ola Aunrønning <olaa@verizonmedia.com> | 2020-08-04 08:46:02 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-04 08:46:02 +0200 |
commit | 8e823940929ac0907b3c98e9437b6496ef5d8a6a (patch) | |
tree | 510f5874c3ea1a7a969dc88b4572e3643946cce9 /controller-server/src/main | |
parent | d39eea00961a00db40f37b8ec2c55e791ce295c2 (diff) |
Add metric for number of infrastructure instance events (#13970)
Diffstat (limited to 'controller-server/src/main')
2 files changed, 13 insertions, 3 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java index c0d79861fae..402a82a9ca1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.zone.ZoneApi; +import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.aws.AwsEventFetcher; import com.yahoo.vespa.hosted.controller.api.integration.aws.CloudEvent; @@ -14,6 +15,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueHandl import java.time.Duration; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.logging.Level; @@ -35,17 +37,22 @@ public class CloudEventReporter extends ControllerMaintainer { private final AwsEventFetcher eventFetcher; private final Map<String, List<ZoneApi>> zonesByCloudNativeRegion; private final NodeRepository nodeRepository; + private final Metric metric; - CloudEventReporter(Controller controller, Duration interval) { + private static final String INFRASTRUCTURE_INSTANCE_EVENTS = "infrastructure_instance_events"; + + CloudEventReporter(Controller controller, Duration interval, Metric metric) { super(controller, interval); this.issueHandler = controller.serviceRegistry().issueHandler(); this.eventFetcher = controller.serviceRegistry().eventFetcherService(); this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); this.zonesByCloudNativeRegion = getZonesByCloudNativeRegion(); + this.metric = metric; } @Override protected boolean maintain() { + int numberOfInfrastructureEvents = 0; for (var awsRegion : zonesByCloudNativeRegion.keySet()) { List<CloudEvent> events = eventFetcher.getEvents(awsRegion); for (var event : events) { @@ -53,10 +60,13 @@ public class CloudEventReporter extends ControllerMaintainer { event.instanceEventId, event.affectedInstances)); List<Node> needsManualIntervention = handleInstances(awsRegion, event); - if (!needsManualIntervention.isEmpty()) + if (!needsManualIntervention.isEmpty()) { + numberOfInfrastructureEvents += needsManualIntervention.size(); submitIssue(event); + } } } + metric.set(INFRASTRUCTURE_INSTANCE_EVENTS, numberOfInfrastructureEvents, metric.createContext(Collections.emptyMap())); return true; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index 10b21ece233..336dc5ddd04 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -69,7 +69,7 @@ public class ControllerMaintenance extends AbstractComponent { nameServiceDispatcher = new NameServiceDispatcher(controller, Duration.ofSeconds(10)); costReportMaintainer = new CostReportMaintainer(controller, Duration.ofHours(2), controller.serviceRegistry().costReportConsumer()); resourceMeterMaintainer = new ResourceMeterMaintainer(controller, Duration.ofMinutes(1), metric, controller.serviceRegistry().meteringService()); - cloudEventReporter = new CloudEventReporter(controller, Duration.ofMinutes(30)); + cloudEventReporter = new CloudEventReporter(controller, Duration.ofMinutes(30), metric); rotationStatusUpdater = new RotationStatusUpdater(controller, maintenanceInterval); resourceTagMaintainer = new ResourceTagMaintainer(controller, Duration.ofMinutes(30), controller.serviceRegistry().resourceTagger()); systemRoutingPolicyMaintainer = new SystemRoutingPolicyMaintainer(controller, Duration.ofMinutes(10)); |