diff options
author | gjoranv <gv@verizonmedia.com> | 2019-06-14 14:19:10 +0200 |
---|---|---|
committer | gjoranv <gv@verizonmedia.com> | 2019-06-14 15:34:54 +0200 |
commit | aa81d01c38f89f29a9aa592921229f0f9ee40e93 (patch) | |
tree | 51adbdbd4a8826785c3612c9f963e0f30ea2ac83 /metrics-proxy/src/main | |
parent | 699b6aab27990ee4c6aca0a7241ecb8f42d86d00 (diff) |
Propagate service health when metrics could not be retrieved.
- Default status message in a MetricsPacket is now empty string,
to avoid the default message to be included in Json output.
Diffstat (limited to 'metrics-proxy/src/main')
9 files changed, 116 insertions, 59 deletions
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java index fe823c72127..14d1203824b 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java @@ -81,6 +81,8 @@ public class MetricsManager { */ public List<MetricsPacket> getMetrics(List<VespaService> services, Instant startTime) { if (services.isEmpty()) return Collections.emptyList(); + + log.log(DEBUG, () -> "Updating services prior to fetching metrics, number of services= " + services.size()); vespaServices.updateServices(services); List<MetricsPacket.Builder> result = vespaMetrics.getMetrics(services); diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java index 054fa704ecb..2ca24dad1e2 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java @@ -14,7 +14,6 @@ import ai.vespa.metricsproxy.metric.model.ConsumerId; import ai.vespa.metricsproxy.metric.model.DimensionId; import ai.vespa.metricsproxy.metric.model.MetricsPacket; import ai.vespa.metricsproxy.service.VespaService; -import ai.vespa.metricsproxy.service.VespaServices; import java.util.ArrayList; import java.util.Collections; @@ -32,7 +31,6 @@ import static ai.vespa.metricsproxy.metric.model.ConsumerId.toConsumerId; import static ai.vespa.metricsproxy.metric.model.DimensionId.toDimensionId; import static ai.vespa.metricsproxy.metric.model.ServiceId.toServiceId; import static com.google.common.base.Strings.isNullOrEmpty; -import static com.yahoo.log.LogLevel.DEBUG; /** * @author Unknown @@ -77,8 +75,6 @@ public class VespaMetrics { public List<MetricsPacket.Builder> getMetrics(List<VespaService> services) { List<MetricsPacket.Builder> metricsPackets = new ArrayList<>(); - log.log(DEBUG, () -> "Updating services prior to fetching metrics, number of services= " + services.size()); - Map<ConsumersConfig.Consumer.Metric, List<ConsumerId>> consumersByMetric = metricsConsumers.getConsumersByMetric(); for (VespaService service : services) { @@ -86,42 +82,58 @@ public class VespaMetrics { Optional<MetricsPacket.Builder> systemCheck = getSystemMetrics(service); systemCheck.ifPresent(metricsPackets::add); - // One metrics packet per set of metrics that share the same dimensions+consumers - // TODO: Move aggregation into MetricsPacket itself? - Metrics serviceMetrics = getServiceMetrics(service, consumersByMetric); - Map<AggregationKey, List<Metric>> aggregatedMetrics = - aggregateMetrics(service.getDimensions(), serviceMetrics); - - aggregatedMetrics.forEach((aggregationKey, metrics) -> { - MetricsPacket.Builder builder = new MetricsPacket.Builder(toServiceId(service.getMonitoringName())) - .putMetrics(metrics) - .putDimension(METRIC_TYPE_DIMENSION_ID, "standard") - .putDimension(INSTANCE_DIMENSION_ID, service.getInstanceName()) - .putDimensions(aggregationKey.getDimensions()); - setMetaInfo(builder, serviceMetrics.getTimeStamp()); - builder.addConsumers(aggregationKey.getConsumers()); - metricsPackets.add(builder); - }); + Metrics allServiceMetrics = service.getMetrics(); + + if (! allServiceMetrics.getMetrics().isEmpty()) { + Metrics serviceMetrics = getServiceMetrics(allServiceMetrics, consumersByMetric); + + // One metrics packet per set of metrics that share the same dimensions+consumers + // TODO: Move aggregation into MetricsPacket itself? + Map<AggregationKey, List<Metric>> aggregatedMetrics = aggregateMetrics(service.getDimensions(), serviceMetrics); + + aggregatedMetrics.forEach((aggregationKey, metrics) -> { + MetricsPacket.Builder builder = new MetricsPacket.Builder(toServiceId(service.getMonitoringName())) + .putMetrics(metrics) + .putDimension(METRIC_TYPE_DIMENSION_ID, "standard") + .putDimension(INSTANCE_DIMENSION_ID, service.getInstanceName()) + .putDimensions(aggregationKey.getDimensions()); + setMetaInfo(builder, serviceMetrics.getTimeStamp()); + builder.addConsumers(aggregationKey.getConsumers()); + metricsPackets.add(builder); + }); + } else { + // Service did not return any metrics, so add metrics packet based on service health. + // TODO: Make VespaService.getMetrics return MetricsPacket and handle health on its own. + metricsPackets.add(getHealth(service)); + } } - return metricsPackets; } + private MetricsPacket.Builder getHealth(VespaService service) { + HealthMetric health = service.getHealth(); + return new MetricsPacket.Builder(toServiceId(service.getMonitoringName())) + .timestamp(System.currentTimeMillis() / 1000) + .statusCode(health.getStatus().ordinal()) // TODO: MetricsPacket should use StatusCode instead of int + .statusMessage(health.getMessage()) + .putDimensions(service.getDimensions()) + .putDimension(INSTANCE_DIMENSION_ID, service.getInstanceName()); + } + /** * Returns the metrics to output for the given service, with updated timestamp * In order to include a metric, it must exist in the given map of metric to consumers. * Each returned metric will contain a collection of consumers that it should be routed to. */ - private Metrics getServiceMetrics(VespaService service, Map<ConsumersConfig.Consumer.Metric, List<ConsumerId>> consumersByMetric) { - Metrics serviceMetrics = new Metrics(); - Metrics allServiceMetrics = service.getMetrics(); - serviceMetrics.setTimeStamp(getMostRecentTimestamp(allServiceMetrics)); + private Metrics getServiceMetrics(Metrics allServiceMetrics, Map<ConsumersConfig.Consumer.Metric, List<ConsumerId>> consumersByMetric) { + Metrics configuredServiceMetrics = new Metrics(); + configuredServiceMetrics.setTimeStamp(getMostRecentTimestamp(allServiceMetrics)); for (Metric candidate : allServiceMetrics.getMetrics()) { getConfiguredMetrics(candidate.getName(), consumersByMetric.keySet()).forEach( - configuredMetric -> serviceMetrics.add( + configuredMetric -> configuredServiceMetrics.add( metricWithConfigProperties(candidate, configuredMetric, consumersByMetric))); } - return serviceMetrics; + return configuredServiceMetrics; } private Map<DimensionId, String> extractDimensions(Map<DimensionId, String> dimensions, List<ConsumersConfig.Consumer.Metric.Dimension> configuredDimensions) { diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java index 41a8c3d414e..4961cc8b2a6 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java @@ -4,49 +4,51 @@ package ai.vespa.metricsproxy.metric; +import ai.vespa.metricsproxy.metric.model.StatusCode; + +import static ai.vespa.metricsproxy.metric.model.StatusCode.DOWN; +import static ai.vespa.metricsproxy.metric.model.StatusCode.UNKNOWN; +import static ai.vespa.metricsproxy.metric.model.StatusCode.UP; + /** + * TODO: Use MetricsPacket instead of this class. + * * @author Jo Kristian Bergum */ public class HealthMetric { private final String message; - private final String status; + private final StatusCode status; private final boolean isAlive; - private HealthMetric(String status, String message, boolean isAlive) { + private HealthMetric(StatusCode status, String message, boolean isAlive) { this.message = message; this.status = status; this.isAlive = isAlive; } public static HealthMetric get(String status, String message) { - if (status == null) { - status = ""; - } - if (message == null) { - message = ""; - } - status = status.toLowerCase(); + if (message == null) message = ""; + var statusCode = StatusCode.fromString(status); + return new HealthMetric(statusCode, message, statusCode == UP); + } - if (status.equals("up") || status.equals("ok")) { - return new HealthMetric(status, message, true); - } else { - return new HealthMetric(status, message, false); - } + public static HealthMetric getDown(String message) { + return new HealthMetric(DOWN, message, false); } - public static HealthMetric getFailed(String message) { - return new HealthMetric("down", message, false); + public static HealthMetric getUnknown(String message) { + return new HealthMetric(UNKNOWN, message, false); } public static HealthMetric getOk(String message) { - return new HealthMetric("up", message, true); + return new HealthMetric(UP, message, true); } public String getMessage() { return this.message; } - public String getStatus() { + public StatusCode getStatus() { return this.status; } diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java index fa45c6251f6..098fd48c8b3 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java @@ -14,7 +14,6 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Optional; import java.util.Set; import java.util.function.Function; @@ -88,7 +87,7 @@ public class MetricsPacket { // Except for 'service' for which we require an explicit non-null value. private ServiceId service; private int statusCode = 0; - private String statusMessage = "<null>"; + private String statusMessage = ""; private long timestamp = 0L; private Map<MetricId, Number> metrics = new LinkedHashMap<>(); private final Map<DimensionId, String> dimensions = new LinkedHashMap<>(); diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/StatusCode.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/StatusCode.java new file mode 100644 index 00000000000..7f5a7d0e64b --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/StatusCode.java @@ -0,0 +1,35 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric.model; + +/** + * Status code for a Vespa service. + * + * @author gjoranv + */ +public enum StatusCode { + + UP(0, "up"), + DOWN(1, "down"), + UNKNOWN(2, "unknown"); + + public final int code; + public final String status; + + StatusCode(int code, String status) { + this.code = code; + this.status = status; + } + + public static StatusCode fromString(String statusString) { + if ("ok".equalsIgnoreCase(statusString)) return UP; + try { + return valueOf(statusString.trim().toUpperCase()); + } catch (IllegalArgumentException | NullPointerException e) { + return UNKNOWN; + } + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericJsonUtil.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericJsonUtil.java index 495e3ec1f7d..aadcc1418af 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericJsonUtil.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericJsonUtil.java @@ -6,6 +6,7 @@ package ai.vespa.metricsproxy.metric.model.json; import ai.vespa.metricsproxy.metric.model.MetricsPacket; import ai.vespa.metricsproxy.metric.model.ServiceId; +import ai.vespa.metricsproxy.metric.model.StatusCode; import java.util.ArrayList; import java.util.List; @@ -34,9 +35,13 @@ public class GenericJsonUtil { var genericMetricsList = packets.stream() .map(packet -> new GenericMetrics(packet.metrics(), packet.dimensions())) .collect(toList()); - var genericService = new GenericService(serviceId.id, - packets.get(0).timestamp, - genericMetricsList); + var genericService = packets.stream().findFirst() + .map(firstPacket -> new GenericService(serviceId.id, + firstPacket.timestamp, + StatusCode.values()[firstPacket.statusCode], + firstPacket.statusMessage, + genericMetricsList)) + .get(); if (VESPA_NODE_SERVICE_ID.equals(serviceId)) { jsonModel.node = new GenericNode(genericService.timestamp, genericService.metrics); } else { diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericService.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericService.java index bd3dbf935ed..f348bd4beca 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericService.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericService.java @@ -4,12 +4,12 @@ package ai.vespa.metricsproxy.metric.model.json; +import ai.vespa.metricsproxy.metric.model.StatusCode; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonPropertyOrder; -import java.util.ArrayList; import java.util.List; import static com.fasterxml.jackson.annotation.JsonInclude.Include.NON_ABSENT; @@ -37,10 +37,11 @@ public class GenericService { public GenericService() { } - GenericService(String name, Long timestamp, List<GenericMetrics> metrics) { + // TODO: take StatusCode instead of int + GenericService(String name, Long timestamp, StatusCode statusCode, String message, List<GenericMetrics> metrics) { this.name = name; this.timestamp = timestamp; - status = new Status("up"); + status = new Status(statusCode, message); this.metrics = metrics; } @@ -50,8 +51,9 @@ public class GenericService { public static class Status { public Status() { } - Status(String code) { - this.code = code; + Status(StatusCode statusCode, String description) { + code = statusCode.status; + this.description = description; } @JsonProperty("code") diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java index f87171a42dc..c9bfc8b365c 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java @@ -28,7 +28,7 @@ public class DummyHealthMetricFetcher extends RemoteHealthMetricFetcher { if (service.isAlive()) { return HealthMetric.getOk("Service is running - pid check only"); } else { - return HealthMetric.getFailed("Service is not running - pid check only"); + return HealthMetric.getDown("Service is not running - pid check only"); } } } diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java index 16f4a5cf05b..068a8faade8 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java @@ -43,7 +43,7 @@ public class RemoteHealthMetricFetcher extends HttpMetricFetcher { * Connect to remote service over http and fetch metrics */ private HealthMetric createHealthMetrics(String data, int fetchCount) { - HealthMetric healthMetric = HealthMetric.getFailed("Failed fetching status page for service"); + HealthMetric healthMetric = HealthMetric.getDown("Failed fetching status page for service"); try { healthMetric = parse(data); } catch (Exception e) { @@ -54,7 +54,7 @@ public class RemoteHealthMetricFetcher extends HttpMetricFetcher { private HealthMetric parse(String data) { if (data == null || data.isEmpty()) { - return HealthMetric.getFailed("Empty response from status page"); + return HealthMetric.getUnknown("Empty response from status page"); } try { JSONObject o = new JSONObject(data); @@ -68,7 +68,7 @@ public class RemoteHealthMetricFetcher extends HttpMetricFetcher { } catch (JSONException e) { log.log(LogLevel.DEBUG, "Failed to parse json response from metrics page:" + e + ":" + data); - return HealthMetric.getFailed("Not able to parse json from status page"); + return HealthMetric.getUnknown("Not able to parse json from status page"); } } } |