summaryrefslogtreecommitdiffstats
path: root/metrics-proxy/src/main/java/ai
diff options
context:
space:
mode:
authorgjoranv <gv@verizonmedia.com>2019-06-14 14:19:10 +0200
committergjoranv <gv@verizonmedia.com>2019-06-14 15:34:54 +0200
commitaa81d01c38f89f29a9aa592921229f0f9ee40e93 (patch)
tree51adbdbd4a8826785c3612c9f963e0f30ea2ac83 /metrics-proxy/src/main/java/ai
parent699b6aab27990ee4c6aca0a7241ecb8f42d86d00 (diff)
Propagate service health when metrics could not be retrieved.
- Default status message in a MetricsPacket is now empty string, to avoid the default message to be included in Json output.
Diffstat (limited to 'metrics-proxy/src/main/java/ai')
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java2
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java66
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java38
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java3
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/StatusCode.java35
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericJsonUtil.java11
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericService.java12
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java2
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java6
9 files changed, 116 insertions, 59 deletions
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java
index fe823c72127..14d1203824b 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java
@@ -81,6 +81,8 @@ public class MetricsManager {
*/
public List<MetricsPacket> getMetrics(List<VespaService> services, Instant startTime) {
if (services.isEmpty()) return Collections.emptyList();
+
+ log.log(DEBUG, () -> "Updating services prior to fetching metrics, number of services= " + services.size());
vespaServices.updateServices(services);
List<MetricsPacket.Builder> result = vespaMetrics.getMetrics(services);
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java
index 054fa704ecb..2ca24dad1e2 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java
@@ -14,7 +14,6 @@ import ai.vespa.metricsproxy.metric.model.ConsumerId;
import ai.vespa.metricsproxy.metric.model.DimensionId;
import ai.vespa.metricsproxy.metric.model.MetricsPacket;
import ai.vespa.metricsproxy.service.VespaService;
-import ai.vespa.metricsproxy.service.VespaServices;
import java.util.ArrayList;
import java.util.Collections;
@@ -32,7 +31,6 @@ import static ai.vespa.metricsproxy.metric.model.ConsumerId.toConsumerId;
import static ai.vespa.metricsproxy.metric.model.DimensionId.toDimensionId;
import static ai.vespa.metricsproxy.metric.model.ServiceId.toServiceId;
import static com.google.common.base.Strings.isNullOrEmpty;
-import static com.yahoo.log.LogLevel.DEBUG;
/**
* @author Unknown
@@ -77,8 +75,6 @@ public class VespaMetrics {
public List<MetricsPacket.Builder> getMetrics(List<VespaService> services) {
List<MetricsPacket.Builder> metricsPackets = new ArrayList<>();
- log.log(DEBUG, () -> "Updating services prior to fetching metrics, number of services= " + services.size());
-
Map<ConsumersConfig.Consumer.Metric, List<ConsumerId>> consumersByMetric = metricsConsumers.getConsumersByMetric();
for (VespaService service : services) {
@@ -86,42 +82,58 @@ public class VespaMetrics {
Optional<MetricsPacket.Builder> systemCheck = getSystemMetrics(service);
systemCheck.ifPresent(metricsPackets::add);
- // One metrics packet per set of metrics that share the same dimensions+consumers
- // TODO: Move aggregation into MetricsPacket itself?
- Metrics serviceMetrics = getServiceMetrics(service, consumersByMetric);
- Map<AggregationKey, List<Metric>> aggregatedMetrics =
- aggregateMetrics(service.getDimensions(), serviceMetrics);
-
- aggregatedMetrics.forEach((aggregationKey, metrics) -> {
- MetricsPacket.Builder builder = new MetricsPacket.Builder(toServiceId(service.getMonitoringName()))
- .putMetrics(metrics)
- .putDimension(METRIC_TYPE_DIMENSION_ID, "standard")
- .putDimension(INSTANCE_DIMENSION_ID, service.getInstanceName())
- .putDimensions(aggregationKey.getDimensions());
- setMetaInfo(builder, serviceMetrics.getTimeStamp());
- builder.addConsumers(aggregationKey.getConsumers());
- metricsPackets.add(builder);
- });
+ Metrics allServiceMetrics = service.getMetrics();
+
+ if (! allServiceMetrics.getMetrics().isEmpty()) {
+ Metrics serviceMetrics = getServiceMetrics(allServiceMetrics, consumersByMetric);
+
+ // One metrics packet per set of metrics that share the same dimensions+consumers
+ // TODO: Move aggregation into MetricsPacket itself?
+ Map<AggregationKey, List<Metric>> aggregatedMetrics = aggregateMetrics(service.getDimensions(), serviceMetrics);
+
+ aggregatedMetrics.forEach((aggregationKey, metrics) -> {
+ MetricsPacket.Builder builder = new MetricsPacket.Builder(toServiceId(service.getMonitoringName()))
+ .putMetrics(metrics)
+ .putDimension(METRIC_TYPE_DIMENSION_ID, "standard")
+ .putDimension(INSTANCE_DIMENSION_ID, service.getInstanceName())
+ .putDimensions(aggregationKey.getDimensions());
+ setMetaInfo(builder, serviceMetrics.getTimeStamp());
+ builder.addConsumers(aggregationKey.getConsumers());
+ metricsPackets.add(builder);
+ });
+ } else {
+ // Service did not return any metrics, so add metrics packet based on service health.
+ // TODO: Make VespaService.getMetrics return MetricsPacket and handle health on its own.
+ metricsPackets.add(getHealth(service));
+ }
}
-
return metricsPackets;
}
+ private MetricsPacket.Builder getHealth(VespaService service) {
+ HealthMetric health = service.getHealth();
+ return new MetricsPacket.Builder(toServiceId(service.getMonitoringName()))
+ .timestamp(System.currentTimeMillis() / 1000)
+ .statusCode(health.getStatus().ordinal()) // TODO: MetricsPacket should use StatusCode instead of int
+ .statusMessage(health.getMessage())
+ .putDimensions(service.getDimensions())
+ .putDimension(INSTANCE_DIMENSION_ID, service.getInstanceName());
+ }
+
/**
* Returns the metrics to output for the given service, with updated timestamp
* In order to include a metric, it must exist in the given map of metric to consumers.
* Each returned metric will contain a collection of consumers that it should be routed to.
*/
- private Metrics getServiceMetrics(VespaService service, Map<ConsumersConfig.Consumer.Metric, List<ConsumerId>> consumersByMetric) {
- Metrics serviceMetrics = new Metrics();
- Metrics allServiceMetrics = service.getMetrics();
- serviceMetrics.setTimeStamp(getMostRecentTimestamp(allServiceMetrics));
+ private Metrics getServiceMetrics(Metrics allServiceMetrics, Map<ConsumersConfig.Consumer.Metric, List<ConsumerId>> consumersByMetric) {
+ Metrics configuredServiceMetrics = new Metrics();
+ configuredServiceMetrics.setTimeStamp(getMostRecentTimestamp(allServiceMetrics));
for (Metric candidate : allServiceMetrics.getMetrics()) {
getConfiguredMetrics(candidate.getName(), consumersByMetric.keySet()).forEach(
- configuredMetric -> serviceMetrics.add(
+ configuredMetric -> configuredServiceMetrics.add(
metricWithConfigProperties(candidate, configuredMetric, consumersByMetric)));
}
- return serviceMetrics;
+ return configuredServiceMetrics;
}
private Map<DimensionId, String> extractDimensions(Map<DimensionId, String> dimensions, List<ConsumersConfig.Consumer.Metric.Dimension> configuredDimensions) {
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java
index 41a8c3d414e..4961cc8b2a6 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java
@@ -4,49 +4,51 @@
package ai.vespa.metricsproxy.metric;
+import ai.vespa.metricsproxy.metric.model.StatusCode;
+
+import static ai.vespa.metricsproxy.metric.model.StatusCode.DOWN;
+import static ai.vespa.metricsproxy.metric.model.StatusCode.UNKNOWN;
+import static ai.vespa.metricsproxy.metric.model.StatusCode.UP;
+
/**
+ * TODO: Use MetricsPacket instead of this class.
+ *
* @author Jo Kristian Bergum
*/
public class HealthMetric {
private final String message;
- private final String status;
+ private final StatusCode status;
private final boolean isAlive;
- private HealthMetric(String status, String message, boolean isAlive) {
+ private HealthMetric(StatusCode status, String message, boolean isAlive) {
this.message = message;
this.status = status;
this.isAlive = isAlive;
}
public static HealthMetric get(String status, String message) {
- if (status == null) {
- status = "";
- }
- if (message == null) {
- message = "";
- }
- status = status.toLowerCase();
+ if (message == null) message = "";
+ var statusCode = StatusCode.fromString(status);
+ return new HealthMetric(statusCode, message, statusCode == UP);
+ }
- if (status.equals("up") || status.equals("ok")) {
- return new HealthMetric(status, message, true);
- } else {
- return new HealthMetric(status, message, false);
- }
+ public static HealthMetric getDown(String message) {
+ return new HealthMetric(DOWN, message, false);
}
- public static HealthMetric getFailed(String message) {
- return new HealthMetric("down", message, false);
+ public static HealthMetric getUnknown(String message) {
+ return new HealthMetric(UNKNOWN, message, false);
}
public static HealthMetric getOk(String message) {
- return new HealthMetric("up", message, true);
+ return new HealthMetric(UP, message, true);
}
public String getMessage() {
return this.message;
}
- public String getStatus() {
+ public StatusCode getStatus() {
return this.status;
}
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java
index fa45c6251f6..098fd48c8b3 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java
@@ -14,7 +14,6 @@ import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
-import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
@@ -88,7 +87,7 @@ public class MetricsPacket {
// Except for 'service' for which we require an explicit non-null value.
private ServiceId service;
private int statusCode = 0;
- private String statusMessage = "<null>";
+ private String statusMessage = "";
private long timestamp = 0L;
private Map<MetricId, Number> metrics = new LinkedHashMap<>();
private final Map<DimensionId, String> dimensions = new LinkedHashMap<>();
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/StatusCode.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/StatusCode.java
new file mode 100644
index 00000000000..7f5a7d0e64b
--- /dev/null
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/StatusCode.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+ */
+
+package ai.vespa.metricsproxy.metric.model;
+
+/**
+ * Status code for a Vespa service.
+ *
+ * @author gjoranv
+ */
+public enum StatusCode {
+
+ UP(0, "up"),
+ DOWN(1, "down"),
+ UNKNOWN(2, "unknown");
+
+ public final int code;
+ public final String status;
+
+ StatusCode(int code, String status) {
+ this.code = code;
+ this.status = status;
+ }
+
+ public static StatusCode fromString(String statusString) {
+ if ("ok".equalsIgnoreCase(statusString)) return UP;
+ try {
+ return valueOf(statusString.trim().toUpperCase());
+ } catch (IllegalArgumentException | NullPointerException e) {
+ return UNKNOWN;
+ }
+ }
+
+}
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericJsonUtil.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericJsonUtil.java
index 495e3ec1f7d..aadcc1418af 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericJsonUtil.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericJsonUtil.java
@@ -6,6 +6,7 @@ package ai.vespa.metricsproxy.metric.model.json;
import ai.vespa.metricsproxy.metric.model.MetricsPacket;
import ai.vespa.metricsproxy.metric.model.ServiceId;
+import ai.vespa.metricsproxy.metric.model.StatusCode;
import java.util.ArrayList;
import java.util.List;
@@ -34,9 +35,13 @@ public class GenericJsonUtil {
var genericMetricsList = packets.stream()
.map(packet -> new GenericMetrics(packet.metrics(), packet.dimensions()))
.collect(toList());
- var genericService = new GenericService(serviceId.id,
- packets.get(0).timestamp,
- genericMetricsList);
+ var genericService = packets.stream().findFirst()
+ .map(firstPacket -> new GenericService(serviceId.id,
+ firstPacket.timestamp,
+ StatusCode.values()[firstPacket.statusCode],
+ firstPacket.statusMessage,
+ genericMetricsList))
+ .get();
if (VESPA_NODE_SERVICE_ID.equals(serviceId)) {
jsonModel.node = new GenericNode(genericService.timestamp, genericService.metrics);
} else {
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericService.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericService.java
index bd3dbf935ed..f348bd4beca 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericService.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/GenericService.java
@@ -4,12 +4,12 @@
package ai.vespa.metricsproxy.metric.model.json;
+import ai.vespa.metricsproxy.metric.model.StatusCode;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
-import java.util.ArrayList;
import java.util.List;
import static com.fasterxml.jackson.annotation.JsonInclude.Include.NON_ABSENT;
@@ -37,10 +37,11 @@ public class GenericService {
public GenericService() { }
- GenericService(String name, Long timestamp, List<GenericMetrics> metrics) {
+ // TODO: take StatusCode instead of int
+ GenericService(String name, Long timestamp, StatusCode statusCode, String message, List<GenericMetrics> metrics) {
this.name = name;
this.timestamp = timestamp;
- status = new Status("up");
+ status = new Status(statusCode, message);
this.metrics = metrics;
}
@@ -50,8 +51,9 @@ public class GenericService {
public static class Status {
public Status() { }
- Status(String code) {
- this.code = code;
+ Status(StatusCode statusCode, String description) {
+ code = statusCode.status;
+ this.description = description;
}
@JsonProperty("code")
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java
index f87171a42dc..c9bfc8b365c 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java
@@ -28,7 +28,7 @@ public class DummyHealthMetricFetcher extends RemoteHealthMetricFetcher {
if (service.isAlive()) {
return HealthMetric.getOk("Service is running - pid check only");
} else {
- return HealthMetric.getFailed("Service is not running - pid check only");
+ return HealthMetric.getDown("Service is not running - pid check only");
}
}
}
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java
index 16f4a5cf05b..068a8faade8 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java
@@ -43,7 +43,7 @@ public class RemoteHealthMetricFetcher extends HttpMetricFetcher {
* Connect to remote service over http and fetch metrics
*/
private HealthMetric createHealthMetrics(String data, int fetchCount) {
- HealthMetric healthMetric = HealthMetric.getFailed("Failed fetching status page for service");
+ HealthMetric healthMetric = HealthMetric.getDown("Failed fetching status page for service");
try {
healthMetric = parse(data);
} catch (Exception e) {
@@ -54,7 +54,7 @@ public class RemoteHealthMetricFetcher extends HttpMetricFetcher {
private HealthMetric parse(String data) {
if (data == null || data.isEmpty()) {
- return HealthMetric.getFailed("Empty response from status page");
+ return HealthMetric.getUnknown("Empty response from status page");
}
try {
JSONObject o = new JSONObject(data);
@@ -68,7 +68,7 @@ public class RemoteHealthMetricFetcher extends HttpMetricFetcher {
} catch (JSONException e) {
log.log(LogLevel.DEBUG, "Failed to parse json response from metrics page:" + e + ":" + data);
- return HealthMetric.getFailed("Not able to parse json from status page");
+ return HealthMetric.getUnknown("Not able to parse json from status page");
}
}
}