diff options
author | gjoranv <gv@verizonmedia.com> | 2019-05-07 15:58:04 +0200 |
---|---|---|
committer | gjoranv <gv@verizonmedia.com> | 2019-05-07 15:58:04 +0200 |
commit | 78db218a48492be512c0eb883b91f402ea380de8 (patch) | |
tree | 8e4ae81932f08448355b2639d8a66e677d604085 /metrics-proxy/src/main/java | |
parent | 192647b7d95905a2ef692a86535f312ec6742edd (diff) |
Reapply Gjoranv/New metrics proxy
This reverts commit 8b0272c3104080d1f293e6a709208d2ea149fc03.
Diffstat (limited to 'metrics-proxy/src/main/java')
37 files changed, 3324 insertions, 0 deletions
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsConsumers.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsConsumers.java new file mode 100644 index 00000000000..564de0806ca --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsConsumers.java @@ -0,0 +1,80 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.core; + +import ai.vespa.metricsproxy.core.ConsumersConfig.Consumer; +import ai.vespa.metricsproxy.metric.model.ConsumerId; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collector; + +import static ai.vespa.metricsproxy.metric.model.ConsumerId.toConsumerId; +import static com.yahoo.stream.CustomCollectors.toLinkedMap; +import static java.util.Collections.unmodifiableSet; +import static java.util.stream.Collectors.collectingAndThen; + +/** + * Contains metrics consumers and their metrics, and mappings between these. + * All collections are final and immutable. + * + * @author gjoranv + */ +public class MetricsConsumers { + + // All metrics for each consumer. + private final Map<ConsumerId, List<Consumer.Metric>> consumerMetrics; + + // All consumers for each metric (more useful than the opposite map). + private final Map<Consumer.Metric, List<ConsumerId>> consumersByMetric; + + public MetricsConsumers(ConsumersConfig config) { + consumerMetrics = config.consumer().stream().collect( + toUnmodifiableLinkedMap(consumer -> toConsumerId(consumer.name()), Consumer::metric)); + + consumersByMetric = createConsumersByMetric(consumerMetrics); + } + + /** + * @param consumer The consumer + * @return The metrics for the given consumer. + */ + public List<Consumer.Metric> getMetricDefinitions(ConsumerId consumer) { + return consumerMetrics.get(consumer); + } + + public Map<Consumer.Metric, List<ConsumerId>> getConsumersByMetric() { + return consumersByMetric; + } + + public Set<ConsumerId> getAllConsumers() { + return unmodifiableSet(consumerMetrics.keySet()); + } + + /** + * Helper function to create mapping from metric to consumers. + * TODO: consider reversing the mapping in metrics-consumers.def instead: metric{}.consumer[] + */ + private static Map<Consumer.Metric, List<ConsumerId>> + createConsumersByMetric(Map<ConsumerId, List<Consumer.Metric>> metricsByConsumer) { + Map<Consumer.Metric, List<ConsumerId>> consumersByMetric = new LinkedHashMap<>(); + metricsByConsumer.forEach( + (consumer, metrics) -> metrics.forEach( + metric -> consumersByMetric.computeIfAbsent(metric, unused -> new ArrayList<>()) + .add(consumer))); + return Collections.unmodifiableMap(consumersByMetric); + } + + public static <T, K, U> Collector<T, ?, Map<K, U>> toUnmodifiableLinkedMap(Function<? super T, ? extends K> keyMapper, + Function<? super T, ? extends U> valueMapper) { + return collectingAndThen(toLinkedMap(keyMapper, valueMapper), Collections::unmodifiableMap); + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java new file mode 100644 index 00000000000..fe823c72127 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/MetricsManager.java @@ -0,0 +1,160 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.core; + +import ai.vespa.metricsproxy.metric.ExternalMetrics; +import ai.vespa.metricsproxy.metric.dimensions.ApplicationDimensions; +import ai.vespa.metricsproxy.metric.dimensions.NodeDimensions; +import ai.vespa.metricsproxy.metric.model.ConsumerId; +import ai.vespa.metricsproxy.metric.model.DimensionId; +import ai.vespa.metricsproxy.metric.model.MetricsPacket; +import ai.vespa.metricsproxy.service.VespaService; +import ai.vespa.metricsproxy.service.VespaServices; +import com.yahoo.component.Vtag; + +import java.time.Duration; +import java.time.Instant; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static ai.vespa.metricsproxy.metric.ExternalMetrics.extractConfigserverDimensions; +import static ai.vespa.metricsproxy.metric.model.DimensionId.toDimensionId; +import static com.yahoo.log.LogLevel.DEBUG; +import static java.util.stream.Collectors.toList; + +/** + * Retrieves metrics and performs necessary conversions and additions of metadata. + * + * @author gjoranv + */ +public class MetricsManager { + private static Logger log = Logger.getLogger(MetricsManager.class.getName()); + + static final DimensionId VESPA_VERSION = toDimensionId("vespaVersion"); + + private final VespaServices vespaServices; + private final VespaMetrics vespaMetrics; + private final ExternalMetrics externalMetrics; + private final ApplicationDimensions applicationDimensions; + private final NodeDimensions nodeDimensions; + + private volatile Map<DimensionId, String> extraDimensions = new HashMap<>(); + + public MetricsManager(VespaServices vespaServices, + VespaMetrics vespaMetrics, + ExternalMetrics externalMetrics, + ApplicationDimensions applicationDimensions, + NodeDimensions nodeDimensions) { + this.vespaServices = vespaServices; + this.vespaMetrics = vespaMetrics; + this.externalMetrics = externalMetrics; + this.applicationDimensions = applicationDimensions; + this.nodeDimensions = nodeDimensions; + } + + /** + * Returns all metrics for the given service that are whitelisted for the given consumer. + */ + public String getMetricNamesForServiceAndConsumer(String service, ConsumerId consumer) { + return vespaMetrics.getMetricNames(vespaServices.getMonitoringServices(service), consumer); + } + + public String getMetricsByConfigId(String configId) { + List<VespaService> services = vespaServices.getInstancesById(configId); + vespaServices.updateServices(services); + + return vespaMetrics.getMetricsAsString(services); + } + + /** + * Returns the metrics for the given services. The empty list is returned if no services are given. + * + * @param services The services to retrieve metrics for. + * @return Metrics for all matching services. + */ + public List<MetricsPacket> getMetrics(List<VespaService> services, Instant startTime) { + if (services.isEmpty()) return Collections.emptyList(); + vespaServices.updateServices(services); + + List<MetricsPacket.Builder> result = vespaMetrics.getMetrics(services); + log.log(DEBUG, () -> "Got " + result.size() + " metrics packets for vespa services."); + + List<MetricsPacket.Builder> externalPackets = externalMetrics.getMetrics().stream() + .filter(MetricsPacket.Builder::hasMetrics) + .collect(toList()); + log.log(DEBUG, () -> "Got " + externalPackets.size() + " external metrics packets with whitelisted metrics."); + + result.addAll(externalPackets); + + return result.stream() + .map(builder -> builder.putDimensionsIfAbsent(getGlobalDimensions())) + .map(builder -> builder.putDimensionsIfAbsent(extraDimensions)) + .map(builder -> adjustTimestamp(builder, startTime)) + .map(MetricsPacket.Builder::build) + .collect(Collectors.toList()); + } + + /** + * Returns a merged map of all global dimensions. + */ + private Map<DimensionId, String> getGlobalDimensions() { + Map<DimensionId, String> globalDimensions = new LinkedHashMap<>(applicationDimensions.getDimensions()); + globalDimensions.putAll(nodeDimensions.getDimensions()); + globalDimensions.put(VESPA_VERSION, Vtag.currentVersion.toFullString()); + return globalDimensions; + } + + /** + * If the metrics packet is less than one minute newer or older than the given startTime, + * set its timestamp to the given startTime. This is done to ensure that metrics retrieved + * from different sources for this invocation get the same timestamp, and a timestamp as close + * as possible to the invocation from the external metrics retrieving client. The assumption + * is that the client requests metrics periodically every minute. + * <p> + * However, if the timestamp of the packet is too far off in time, we don't adjust it because + * we would otherwise be masking a real problem with retrieving the metrics. + */ + static MetricsPacket.Builder adjustTimestamp(MetricsPacket.Builder builder, Instant startTime) { + Duration age = Duration.between(startTime, builder.getTimestamp()); + if (age.abs().minusMinutes(1).isNegative()) + builder.timestamp(startTime.getEpochSecond()); + return builder; + } + + /** + * Returns the health metrics for the given services. The empty list is returned if no services are given. + * + * @param services The services to retrieve health metrics for. + * @return Health metrics for all matching services. + */ + public List<MetricsPacket> getHealthMetrics(List<VespaService> services) { + if (services.isEmpty()) return Collections.emptyList(); + vespaServices.updateServices(services); + + // TODO: Add global dimensions to health metrics? + return vespaMetrics.getHealthMetrics(services); + } + + public void setExtraMetrics(List<MetricsPacket.Builder> packets) { + extraDimensions = extractConfigserverDimensions(packets); + externalMetrics.setExtraMetrics(packets); + } + + /** + * Returns a space separated list of all distinct service names. + */ + public String getAllVespaServices() { + return vespaServices.getVespaServices().stream() + .map(VespaService::getServiceName) + .distinct() + .collect(Collectors.joining(" ")); + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java new file mode 100644 index 00000000000..becfd9a54ce --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/VespaMetrics.java @@ -0,0 +1,307 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.core; + + +import ai.vespa.metricsproxy.metric.AggregationKey; +import ai.vespa.metricsproxy.metric.HealthMetric; +import ai.vespa.metricsproxy.metric.Metric; +import ai.vespa.metricsproxy.metric.Metrics; +import ai.vespa.metricsproxy.metric.MetricsFormatter; +import ai.vespa.metricsproxy.metric.model.ConsumerId; +import ai.vespa.metricsproxy.metric.model.DimensionId; +import ai.vespa.metricsproxy.metric.model.MetricsPacket; +import ai.vespa.metricsproxy.service.VespaService; +import ai.vespa.metricsproxy.service.VespaServices; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static ai.vespa.metricsproxy.metric.model.ConsumerId.toConsumerId; +import static ai.vespa.metricsproxy.metric.model.DimensionId.toDimensionId; +import static ai.vespa.metricsproxy.metric.model.ServiceId.toServiceId; +import static com.google.common.base.Strings.isNullOrEmpty; +import static com.yahoo.log.LogLevel.DEBUG; + +/** + * @author Unknown + * @author gjoranv + */ +public class VespaMetrics { + private static final Logger log = Logger.getLogger(VespaMetrics.class.getPackage().getName()); + + // MUST be the same as the constant defined in config-model + public static final ConsumerId VESPA_CONSUMER_ID = toConsumerId("Vespa"); + + public static final DimensionId METRIC_TYPE_DIMENSION_ID = toDimensionId("metrictype"); + public static final DimensionId INSTANCE_DIMENSION_ID = toDimensionId("instance"); + + private static final Set<ConsumerId> DEFAULT_CONSUMERS = Collections.singleton(VESPA_CONSUMER_ID); + + private final MetricsConsumers metricsConsumers; + + private static final MetricsFormatter formatter = new MetricsFormatter(false, false); + + public VespaMetrics(MetricsConsumers metricsConsumers, VespaServices vespaServices) { + this.metricsConsumers = metricsConsumers; + } + + public List<MetricsPacket> getHealthMetrics(List<VespaService> services) { + List<MetricsPacket> result = new ArrayList<>(); + for (VespaService s : services) { + HealthMetric h = s.getHealth(); + MetricsPacket.Builder builder = new MetricsPacket.Builder(toServiceId(s.getMonitoringName())) + .statusCode(h.isOk() ? 0 : 1) + .statusMessage(h.getMessage()) + .putDimension(METRIC_TYPE_DIMENSION_ID, "health") + .putDimension(INSTANCE_DIMENSION_ID, s.getInstanceName()); + + result.add(builder.build()); + } + + return result; + } + + /** + * @param services The services to get metrics for + * @return A list of metrics packet builders (to allow modification by the caller). + */ + public List<MetricsPacket.Builder> getMetrics(List<VespaService> services) { + List<MetricsPacket.Builder> metricsPackets = new ArrayList<>(); + + log.log(DEBUG, () -> "Updating services prior to fetching metrics, number of services= " + services.size()); + + Map<ConsumersConfig.Consumer.Metric, List<ConsumerId>> consumersByMetric = metricsConsumers.getConsumersByMetric(); + + for (VespaService service : services) { + // One metrics packet for system metrics + Optional<MetricsPacket.Builder> systemCheck = getSystemMetrics(service); + systemCheck.ifPresent(metricsPackets::add); + + // One metrics packet per set of metrics that share the same dimensions+consumers + // TODO: Move aggregation into MetricsPacket itself? + Metrics serviceMetrics = getServiceMetrics(service, consumersByMetric); + Map<AggregationKey, List<Metric>> aggregatedMetrics = + aggregateMetrics(service.getDimensions(), serviceMetrics); + + aggregatedMetrics.forEach((aggregationKey, metrics) -> { + MetricsPacket.Builder builder = new MetricsPacket.Builder(toServiceId(service.getMonitoringName())) + .putMetrics(metrics) + .putDimension(METRIC_TYPE_DIMENSION_ID, "standard") + .putDimension(INSTANCE_DIMENSION_ID, service.getInstanceName()) + .putDimensions(aggregationKey.getDimensions()); + setMetaInfo(builder, serviceMetrics.getTimeStamp()); + builder.addConsumers(aggregationKey.getConsumers()); + metricsPackets.add(builder); + }); + } + + return metricsPackets; + } + + /** + * Returns the metrics to output for the given service, with updated timestamp + * In order to include a metric, it must exist in the given map of metric to consumers. + * Each returned metric will contain a collection of consumers that it should be routed to. + */ + private Metrics getServiceMetrics(VespaService service, Map<ConsumersConfig.Consumer.Metric, List<ConsumerId>> consumersByMetric) { + Metrics serviceMetrics = new Metrics(); + Metrics allServiceMetrics = service.getMetrics(); + serviceMetrics.setTimeStamp(getMostRecentTimestamp(allServiceMetrics)); + for (Metric candidate : allServiceMetrics.getMetrics()) { + getConfiguredMetrics(candidate.getName(), consumersByMetric.keySet()).forEach( + configuredMetric -> serviceMetrics.add( + metricWithConfigProperties(candidate, configuredMetric, consumersByMetric))); + } + return serviceMetrics; + } + + private Map<DimensionId, String> extractDimensions(Map<DimensionId, String> dimensions, List<ConsumersConfig.Consumer.Metric.Dimension> configuredDimensions) { + if ( ! configuredDimensions.isEmpty()) { + Map<DimensionId, String> dims = new HashMap<>(dimensions); + configuredDimensions.forEach(d -> dims.put(toDimensionId(d.key()), d.value())); + dimensions = Collections.unmodifiableMap(dims); + } + return dimensions; + } + + private Set<ConsumerId> extractConsumers(List<ConsumerId> configuredConsumers) { + Set<ConsumerId> consumers = Collections.emptySet(); + if (configuredConsumers != null) { + if ( configuredConsumers.size() == 1) { + consumers = Collections.singleton(configuredConsumers.get(0)); + } else if (configuredConsumers.size() > 1){ + consumers = new HashSet<>(); + consumers.addAll(configuredConsumers); + consumers = Collections.unmodifiableSet(consumers); + } + } + return consumers; + } + + private Metric metricWithConfigProperties(Metric candidate, + ConsumersConfig.Consumer.Metric configuredMetric, + Map<ConsumersConfig.Consumer.Metric, List<ConsumerId>> consumersByMetric) { + Metric metric = candidate.clone(); + metric.setDimensions(extractDimensions(candidate.getDimensions(), configuredMetric.dimension())); + metric.setConsumers(extractConsumers(consumersByMetric.get(configuredMetric))); + + if (!isNullOrEmpty(configuredMetric.outputname())) + metric.setName(configuredMetric.outputname()); + return metric; + } + + /** + * Returns all configured metrics (for any consumer) that have the given id as 'name'. + */ + private static Set<ConsumersConfig.Consumer.Metric> getConfiguredMetrics(String id, + Set<ConsumersConfig.Consumer.Metric> configuredMetrics) { + return configuredMetrics.stream() + .filter(m -> m.name().equals(id)) + .collect(Collectors.toSet()); + } + + private Optional<MetricsPacket.Builder> getSystemMetrics(VespaService service) { + Metrics systemMetrics = service.getSystemMetrics(); + if (systemMetrics.size() == 0) return Optional.empty(); + + MetricsPacket.Builder builder = new MetricsPacket.Builder(toServiceId(service.getMonitoringName())); + setMetaInfo(builder, systemMetrics.getTimeStamp()); + + builder.putDimension(METRIC_TYPE_DIMENSION_ID, "system") + .putDimension(INSTANCE_DIMENSION_ID, service.getInstanceName()) + .putDimensions(service.getDimensions()) + .putMetrics(systemMetrics.getMetrics()); + + builder.addConsumers(metricsConsumers.getAllConsumers()); + return Optional.of(builder); + } + + private long getMostRecentTimestamp(Metrics metrics) { + long mostRecentTimestamp = 0L; + for (Metric metric : metrics.getMetrics()) { + if (metric.getTimeStamp() > mostRecentTimestamp) { + mostRecentTimestamp = metric.getTimeStamp(); + } + } + return mostRecentTimestamp; + } + + private Map<AggregationKey, List<Metric>> aggregateMetrics(Map<DimensionId, String> serviceDimensions, + Metrics metrics) { + Map<AggregationKey, List<Metric>> aggregatedMetrics = new HashMap<>(); + + for (Metric metric : metrics.getMetrics() ) { + Map<DimensionId, String> mergedDimensions = new LinkedHashMap<>(); + mergedDimensions.putAll(metric.getDimensions()); + mergedDimensions.putAll(serviceDimensions); + AggregationKey aggregationKey = new AggregationKey(mergedDimensions, metric.getConsumers()); + + if (aggregatedMetrics.containsKey(aggregationKey)) { + aggregatedMetrics.get(aggregationKey).add(metric); + } else { + List<Metric> ml = new ArrayList<>(); + ml.add(metric); + aggregatedMetrics.put(aggregationKey, ml); + } + } + return aggregatedMetrics; + } + + private List<ConsumersConfig.Consumer.Metric> getMetricDefinitions(ConsumerId consumer) { + if (metricsConsumers == null) return Collections.emptyList(); + + List<ConsumersConfig.Consumer.Metric> definitions = metricsConsumers.getMetricDefinitions(consumer); + return definitions == null ? Collections.emptyList() : definitions; + } + + private static void setMetaInfo(MetricsPacket.Builder builder, long timestamp) { + builder.timestamp(timestamp) + .statusCode(0) + .statusMessage("Data collected successfully"); + } + + /** + * Returns a string representation of metrics for the given services; + * a space separated list of key=value. + */ + public String getMetricsAsString(List<VespaService> services) { + StringBuilder b = new StringBuilder(); + for (VespaService s : services) { + for (Metric metric : s.getMetrics().getMetrics()) { + String key = metric.getName(); + String alias = key; + + boolean isForwarded = false; + for (ConsumersConfig.Consumer.Metric metricConsumer : getMetricDefinitions(VESPA_CONSUMER_ID)) { + if (metricConsumer.name().equals(key)) { + alias = metricConsumer.outputname(); + isForwarded = true; + } + } + if (isForwarded) { + b.append(formatter.format(s, alias, metric.getValue())) + .append(" "); + } + } + } + return b.toString(); + } + + /** + * Get all metric names for the given services + * + * @return String representation + */ + public String getMetricNames(List<VespaService> services, ConsumerId consumer) { + StringBuilder bufferOn = new StringBuilder(); + StringBuilder bufferOff = new StringBuilder(); + for (VespaService s : services) { + + for (Metric m : s.getMetrics().getMetrics()) { + String description = m.getDescription(); + String alias = ""; + boolean isForwarded = false; + + for (ConsumersConfig.Consumer.Metric metric : getMetricDefinitions(consumer)) { + if (metric.name().equals(m.getName())) { + alias = metric.outputname(); + isForwarded = true; + if (description.isEmpty()) { + description = metric.description(); + } + } + } + + String message = "OFF"; + StringBuilder buffer = bufferOff; + if (isForwarded) { + buffer = bufferOn; + message = "ON"; + } + buffer.append(m.getName()).append('=').append(message); + if (!description.isEmpty()) { + buffer.append(";description=").append(description); + } + if (!alias.isEmpty()) { + buffer.append(";output-name=").append(alias); + } + buffer.append(','); + } + } + + return bufferOn.toString() + bufferOff.toString(); + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/package-info.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/package-info.java new file mode 100644 index 00000000000..617cf0a1525 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/core/package-info.java @@ -0,0 +1,8 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +@ExportPackage +package ai.vespa.metricsproxy.core; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/AggregationKey.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/AggregationKey.java new file mode 100644 index 00000000000..9eb1b242535 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/AggregationKey.java @@ -0,0 +1,45 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric; + +import ai.vespa.metricsproxy.metric.model.ConsumerId; +import ai.vespa.metricsproxy.metric.model.DimensionId; + +import java.util.Collections; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * @author gjoranv + */ +public final class AggregationKey { + + private Map<DimensionId, String> dimensions; + private Set<ConsumerId> consumers; + + public AggregationKey(Map<DimensionId, String> dimensions, Set<ConsumerId> consumers) { + this.dimensions = dimensions; + this.consumers = consumers; + } + + public Map<DimensionId, String> getDimensions() { return Collections.unmodifiableMap(dimensions); } + + public Set<ConsumerId> getConsumers() { return Collections.unmodifiableSet(consumers); } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AggregationKey that = (AggregationKey) o; + return Objects.equals(dimensions, that.dimensions) && + Objects.equals(consumers, that.consumers); + } + + @Override + public int hashCode() { + return Objects.hash(dimensions, consumers); + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/ExternalMetrics.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/ExternalMetrics.java new file mode 100644 index 00000000000..62465909798 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/ExternalMetrics.java @@ -0,0 +1,99 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric; + +import ai.vespa.metricsproxy.core.MetricsConsumers; +import ai.vespa.metricsproxy.core.ConsumersConfig.Consumer; +import ai.vespa.metricsproxy.metric.model.DimensionId; +import ai.vespa.metricsproxy.metric.model.MetricId; +import ai.vespa.metricsproxy.metric.model.MetricsPacket; +import ai.vespa.metricsproxy.metric.model.ServiceId; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + +import static ai.vespa.metricsproxy.metric.model.DimensionId.toDimensionId; +import static ai.vespa.metricsproxy.metric.model.MetricId.toMetricId; +import static ai.vespa.metricsproxy.metric.model.ServiceId.toServiceId; +import static com.yahoo.log.LogLevel.DEBUG; +import static java.util.stream.Collectors.toCollection; + +/** + * This class is responsible for handling metrics received from external processes. + * + * @author gjoranv + */ +public class ExternalMetrics { + private static final Logger log = Logger.getLogger(ExternalMetrics.class.getName()); + + public static final DimensionId ROLE_DIMENSION = toDimensionId("role"); + public static final DimensionId STATE_DIMENSION = toDimensionId("state"); + public static final DimensionId ORCHESTRATOR_STATE_DIMENSION = toDimensionId("orchestratorState"); + + static final ServiceId VESPA_NODE_SERVICE_ID = toServiceId("vespa.node"); + + private volatile List<MetricsPacket.Builder> metrics = new ArrayList<>(); + private final MetricsConsumers consumers; + + public ExternalMetrics(MetricsConsumers consumers) { + this.consumers = consumers; + } + + public List<MetricsPacket.Builder> getMetrics() { + return metrics; + } + + public void setExtraMetrics(List<MetricsPacket.Builder> externalPackets) { + log.log(DEBUG, () -> "Setting new external metrics with " + externalPackets.size() + " metrics packets."); + externalPackets.forEach(packet -> { + packet.addConsumers(consumers.getAllConsumers()) + .service(VESPA_NODE_SERVICE_ID) + .retainMetrics(metricsToRetain()) + .applyOutputNames(outputNamesById()); + }); + metrics = List.copyOf(externalPackets); + } + + private Set<MetricId> metricsToRetain() { + return consumers.getConsumersByMetric().keySet().stream() + .map(configuredMetric -> toMetricId(configuredMetric.name())) + .collect(toCollection(LinkedHashSet::new)); + } + + /** + * Returns a mapping from metric id to a list of the metric's output names. + * Metrics that only have their id as output name are included in the output. + */ + private Map<MetricId, List<String>> outputNamesById() { + Map<MetricId, List<String>> outputNamesById = new LinkedHashMap<>(); + for (Consumer.Metric metric : consumers.getConsumersByMetric().keySet()) { + MetricId id = toMetricId(metric.name()); + outputNamesById.computeIfAbsent(id, unused -> new ArrayList<>()) + .add(metric.outputname()); + } + return outputNamesById; + } + + /** + * Extracts the node repository dimensions (role, state etc.) from the given packets. + * If the same dimension exists in multiple packets, this implementation gives no guarantees + * about which value is returned. + */ + public static Map<DimensionId, String> extractConfigserverDimensions(Collection<MetricsPacket.Builder> packets) { + Map<DimensionId, String> dimensions = new HashMap<>(); + for (MetricsPacket.Builder packet : packets) { + dimensions.putAll(packet.build().dimensions()); + } + dimensions.keySet().retainAll(Set.of(ROLE_DIMENSION, STATE_DIMENSION, ORCHESTRATOR_STATE_DIMENSION)); + return dimensions; + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java new file mode 100644 index 00000000000..41a8c3d414e --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/HealthMetric.java @@ -0,0 +1,56 @@ +/* +* Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric; + +/** + * @author Jo Kristian Bergum + */ +public class HealthMetric { + private final String message; + private final String status; + private final boolean isAlive; + + private HealthMetric(String status, String message, boolean isAlive) { + this.message = message; + this.status = status; + this.isAlive = isAlive; + } + + public static HealthMetric get(String status, String message) { + if (status == null) { + status = ""; + } + if (message == null) { + message = ""; + } + status = status.toLowerCase(); + + if (status.equals("up") || status.equals("ok")) { + return new HealthMetric(status, message, true); + } else { + return new HealthMetric(status, message, false); + } + } + + public static HealthMetric getFailed(String message) { + return new HealthMetric("down", message, false); + } + + public static HealthMetric getOk(String message) { + return new HealthMetric("up", message, true); + } + + public String getMessage() { + return this.message; + } + + public String getStatus() { + return this.status; + } + + public boolean isOk() { + return this.isAlive; + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/Metric.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/Metric.java new file mode 100644 index 00000000000..59fbe301a49 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/Metric.java @@ -0,0 +1,124 @@ +/* +* Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric; + +import ai.vespa.metricsproxy.metric.model.ConsumerId; +import ai.vespa.metricsproxy.metric.model.DimensionId; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +/** + * @author Jo Kristian Bergum + */ +public class Metric { + private final long time; + private final Number value; + private final String description; + private String name; + private Map<DimensionId, String> dimensions; + private Set<ConsumerId> consumers; + + /** + * Creates a new metric instance + * + * @param name The metric name. E.g 'documents' + * @param value The numeric value + * @param time The timestamp of this metric in seconds + */ + public Metric(String name, Number value, long time, Map<DimensionId, String> dimensions, String description) { + this.time = time; + this.value = value; + this.name = name; + this.dimensions = dimensions; + this.description = description; + } + + public Metric(String name, Number value, long timestamp) { + this(name, value, timestamp, Collections.emptyMap(), ""); + } + + public Metric(String name, Number value) { + this(name, value, System.currentTimeMillis() / 1000); + } + + public void setDimensions(Map<DimensionId, String> dimensions) { + this.dimensions = dimensions; + } + + /** + * @return A map of the dimensions registered for this metric + */ + public Map<DimensionId, String> getDimensions() { return dimensions; } + + public void setConsumers(Set<ConsumerId> consumers) { this.consumers = consumers; } + + /** + * @return The consumers this metric should be routed to. + */ + public Set<ConsumerId> getConsumers() { return consumers; } + + /** + * @return The number that this metric name represent + */ + public Number getValue() { + return value; + } + + /** + * Set the name of this metric + * + * @param name The name to use for this metric + */ + public void setName(String name) { + this.name = name; + } + + /** + * @return The name of the metric + */ + public String getName() { + return name; + } + + /** + * @return The UTC timestamp for when this metric was collected + */ + public long getTimeStamp() { + return this.time; + } + + @Override + public String toString() { + return "Metric{" + + "time=" + time + + ", name=" + name + + ", value='" + value + '\'' + + ", dimensions=" + dimensions + + '}'; + } + + @Override + public Metric clone() { + return new Metric(name, value, time, new LinkedHashMap<>(dimensions), getDescription()); + } + + /** + * @return the description of this metric + */ + public String getDescription() { + return this.description; + } + + /** Return an adjusted (rounded up) time if necessary */ + public static long adjustTime(long timestamp, long now) { + if ((now == (timestamp+1)) && ((now % 60) == 0)) { + return now; + } + return timestamp; + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/Metrics.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/Metrics.java new file mode 100644 index 00000000000..ca611368730 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/Metrics.java @@ -0,0 +1,111 @@ +/* +* Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Once a getter is called, the instance is frozen and no more metrics can be added. + * + * @author Unknown + */ +// TODO: remove timestamp, only used as temporary storage. +// TODO: instances of this class can probably be replaced by a simple freezable map. +public class Metrics { + private final List<Metric> metrics = new ArrayList<>(); + private long timestamp; + private boolean isFrozen = false; + + public Metrics() { + this(System.currentTimeMillis() / 1000L); + } + + public Metrics(long timestamp) { + this.timestamp = timestamp; + } + + private void ensureNotFrozen() { + if (isFrozen) throw new IllegalStateException("Frozen Metrics cannot be modified!"); + + } + + public long getTimeStamp() { + return this.timestamp; + } + + /** + * Update the timestamp + * + * @param timestamp IN UTC seconds resolution + */ + public void setTimeStamp(long timestamp) { + ensureNotFrozen(); + this.timestamp = timestamp; + } + + public void add(Metric m) { + ensureNotFrozen(); + this.timestamp = m.getTimeStamp(); + this.metrics.add(m); + } + + /** + * Get the size of the metrics covered. Note that this might also contain expired metrics + * + * @return size of metrics + */ + public int size() { + return this.metrics.size(); + } + + /** + * TODO: Remove, might be multiple metrics with same name but different dimensions + * + * @param key metric name + * @return the metric, or null + */ + public Metric getMetric(String key) { + isFrozen = true; + for (Metric m: metrics) { + if (m.getName().equals(key)) { + return m; + } + } + return null; + } + + public List<Metric> getMetrics() { + isFrozen = true; + return Collections.unmodifiableList(metrics); + } + + + /** + * Get a single metric based on the metric name + * TODO: Remove, might be multiple metrics with same name, but different + * + * @param key metric name + * @return The value or null if metric was not found or expired + */ + public Number get(String key) { + isFrozen = true; + Metric m = getMetric(key); + if (m != null) { + return m.getValue(); + } + return null; + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + for (Metric m : metrics) { + sb.append(m.getName()).append(":").append(m.getValue()).append("\n"); + } + return sb.toString(); + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/MetricsFormatter.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/MetricsFormatter.java new file mode 100644 index 00000000000..8858e21486a --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/MetricsFormatter.java @@ -0,0 +1,71 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric; + +import ai.vespa.metricsproxy.service.VespaService; + +import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; +import java.util.Locale; + +/** + * Format metrics as required by users of the "getMetricsById" rpc method. + * + * @author Unknown + */ +public class MetricsFormatter { + private final boolean includeServiceName; + private final boolean isSystemMetric; + private final DecimalFormat df = new DecimalFormat("0.000", new DecimalFormatSymbols(Locale.ENGLISH)); + + public MetricsFormatter(boolean includeServiceName, boolean isSystemMetric) { + this.includeServiceName = includeServiceName; + this.isSystemMetric = isSystemMetric; + } + + public String format(VespaService service, String name, Number value) { + StringBuilder sb = new StringBuilder(); + + if (includeServiceName) { + sb.append(service.getServiceName()).append("."); + } + + if (isSystemMetric) + sb.append(toSystemServiceId(service.getConfigId())); + else + sb.append(toServiceId(service.getConfigId())); + + sb.append(".") + .append(formatMetricName(name)) + .append("="); + + if (value instanceof Double) { + sb.append(df.format(value.doubleValue())); + } else { + sb.append(value.toString()); + } + + return sb.toString(); + } + + private static String formatMetricName(String name) { + name = name.replaceAll("\"", ""); + name = name.replaceAll("\\.", "_"); + return name; + } + + // E.g. container/qrserver.1 -> 'container.qrserver.1' + private static String toServiceId(String configId) { + return "'" + configId.replace("/", ".") + "'"; + } + + // E.g. container/qrserver.1 -> container.'qrserver.1' + private static String toSystemServiceId(String configId) { + String name = configId.replace("/", "."); + name = name.replaceFirst("\\.", ".'") + "'"; + return name; + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/dimensions/ApplicationDimensions.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/dimensions/ApplicationDimensions.java new file mode 100644 index 00000000000..ae40f672a32 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/dimensions/ApplicationDimensions.java @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric.dimensions; + +import ai.vespa.metricsproxy.metric.model.DimensionId; + +import java.util.Map; + +import static ai.vespa.metricsproxy.core.MetricsConsumers.toUnmodifiableLinkedMap; +import static ai.vespa.metricsproxy.metric.model.DimensionId.toDimensionId; + +/** + * Application-specific but node-agnostic dimensions. + * + * @author gjoranv + */ +public class ApplicationDimensions { + + private final Map<DimensionId, String> dimensions; + + public ApplicationDimensions(ApplicationDimensionsConfig config) { + dimensions = config.dimensions().entrySet().stream().collect( + toUnmodifiableLinkedMap(e -> toDimensionId(e.getKey()), Map.Entry::getValue)); + } + + public Map<DimensionId, String> getDimensions() { return dimensions; } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/dimensions/NodeDimensions.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/dimensions/NodeDimensions.java new file mode 100644 index 00000000000..d2c1799e148 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/dimensions/NodeDimensions.java @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric.dimensions; + +import ai.vespa.metricsproxy.metric.model.DimensionId; + +import java.util.Map; + +import static ai.vespa.metricsproxy.core.MetricsConsumers.toUnmodifiableLinkedMap; +import static ai.vespa.metricsproxy.metric.model.DimensionId.toDimensionId; + +/** + * Node-specific metric dimensions. + * + * @author gjoranv + */ +public class NodeDimensions { + + private final Map<DimensionId, String> dimensions; + + public NodeDimensions(NodeDimensionsConfig config) { + dimensions = config.dimensions().entrySet().stream().collect( + toUnmodifiableLinkedMap(e -> toDimensionId(e.getKey()), Map.Entry::getValue)); + } + + public Map<DimensionId, String> getDimensions() { return dimensions; } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/dimensions/package-info.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/dimensions/package-info.java new file mode 100644 index 00000000000..f4e5f74313a --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/dimensions/package-info.java @@ -0,0 +1,8 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +@ExportPackage +package ai.vespa.metricsproxy.metric.dimensions; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java new file mode 100644 index 00000000000..0d7acd5f354 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ConsumerId.java @@ -0,0 +1,38 @@ +/* +* Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric.model; + +import java.util.Objects; + +/** + * @author gjoranv + */ +public class ConsumerId { + public final String id; + private ConsumerId(String id) { this.id = id; } + + public static ConsumerId toConsumerId(String id) { return new ConsumerId(id); } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ConsumerId that = (ConsumerId) o; + return Objects.equals(id, that.id); + } + + @Override + public int hashCode() { + return Objects.hash(id); + } + + @Override + public String toString() { + return "ConsumerId{" + + "id='" + id + '\'' + + '}'; + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/DimensionId.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/DimensionId.java new file mode 100644 index 00000000000..03f4c2c01ff --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/DimensionId.java @@ -0,0 +1,38 @@ +/* +* Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric.model; + +import java.util.Objects; + +/** + * @author gjoranv + */ +public class DimensionId { + + public final String id; + private DimensionId(String id) { this.id = id; } + + public static DimensionId toDimensionId(String id) { return new DimensionId(id); } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DimensionId that = (DimensionId) o; + return Objects.equals(id, that.id); + } + + @Override + public int hashCode() { + return Objects.hash(id); + } + + @Override + public String toString() { + return "DimensionId{" + + "id='" + id + '\'' + + '}'; + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricId.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricId.java new file mode 100644 index 00000000000..c93735c7fca --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricId.java @@ -0,0 +1,39 @@ +/* +* Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric.model; + +import java.util.Objects; + +/** + * @author gjoranv + */ +public class MetricId { + + public final String id; + private MetricId(String id) { this.id = id; } + + public static MetricId toMetricId(String id) { return new MetricId(id); } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + MetricId metricId = (MetricId) o; + return Objects.equals(id, metricId.id); + } + + @Override + public int hashCode() { + return Objects.hash(id); + } + + @Override + public String toString() { + return "MetricId{" + + "id='" + id + '\'' + + '}'; + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java new file mode 100644 index 00000000000..fa45c6251f6 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/MetricsPacket.java @@ -0,0 +1,182 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric.model; + +import ai.vespa.metricsproxy.metric.Metric; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; + +import static ai.vespa.metricsproxy.metric.model.MetricId.toMetricId; +import static java.util.Collections.unmodifiableList; +import static java.util.Collections.unmodifiableMap; +import static java.util.stream.Collectors.joining; + +/** + * Represents a packet of metrics (with meta information) that belong together because they: + * <ul> + * <li>share both the same dimensions and consumers, AND</li> + * <li>represent the same source, e.g. a vespa service or the system hardware.</li> + * </ul> + * + * @author gjoranv + */ +public class MetricsPacket { + public final int statusCode; + public final String statusMessage; + public final long timestamp; + public final ServiceId service; + private final Map<MetricId, Number> metrics; + private final Map<DimensionId, String> dimensions; + private final List<ConsumerId> consumers; + + private MetricsPacket(int statusCode, String statusMessage, long timestamp, ServiceId service, + Map<MetricId, Number> metrics, Map<DimensionId, String> dimensions, Set<ConsumerId> consumers ) { + this.statusCode = statusCode; + this.statusMessage = statusMessage; + this.timestamp = timestamp; + this.service = service; + this.metrics = metrics; + this.dimensions = dimensions; + this.consumers = new ArrayList<>(consumers); + } + + public Map<MetricId, Number> metrics() { + return unmodifiableMap(metrics); + } + + public Map<DimensionId, String> dimensions() { + return unmodifiableMap(dimensions); + } + + public List<ConsumerId> consumers() { + return unmodifiableList(consumers); + } + + @Override + public String toString() { + return "MetricsPacket{" + + "statusCode=" + statusCode + + ", statusMessage='" + statusMessage + '\'' + + ", timestamp=" + timestamp + + ", service=" + service.id + + ", metrics=" + idMapToString(metrics, id -> id.id) + + ", dimensions=" + idMapToString(dimensions, id -> id.id) + + ", consumers=" + consumers.stream().map(id -> id.id).collect(joining(",", "[", "]")) + + '}'; + } + + private static <K,V> String idMapToString(Map<K,V> map, Function<K, String> idMapper) { + return map.entrySet().stream() + .map(entry -> idMapper.apply(entry.getKey()) + "=" + entry.getValue()) + .collect(joining(",", "{", "}")); + } + + public static class Builder { + // Set sensible defaults here, and use null guard in all setters. + // Except for 'service' for which we require an explicit non-null value. + private ServiceId service; + private int statusCode = 0; + private String statusMessage = "<null>"; + private long timestamp = 0L; + private Map<MetricId, Number> metrics = new LinkedHashMap<>(); + private final Map<DimensionId, String> dimensions = new LinkedHashMap<>(); + private final Set<ConsumerId> consumers = new LinkedHashSet<>(); + + public Builder(ServiceId service) { + Objects.requireNonNull(service, "Service cannot be null."); + this.service = service; + } + + public Builder service(ServiceId service) { + if (service == null) throw new IllegalArgumentException("Service cannot be null."); + this.service = service; + return this; + } + + public Builder statusCode(Integer statusCode) { + if (statusCode != null) this.statusCode = statusCode; + return this; + } + + public Builder statusMessage(String statusMessage) { + if (statusMessage != null) this.statusMessage = statusMessage; + return this; + } + + public Builder timestamp(Long timestamp) { + if (timestamp != null) this.timestamp = timestamp; + return this; + } + + public Builder putMetrics(Collection<Metric> extraMetrics) { + if (extraMetrics != null) + extraMetrics.forEach(metric -> metrics.put(toMetricId(metric.getName()), + metric.getValue().doubleValue())); + return this; + } + + public Builder putMetric(MetricId id, Number value) { + metrics.put(id, value); + return this; + } + + public Builder retainMetrics(Set<MetricId> idsToRetain) { + metrics.keySet().retainAll(idsToRetain); + return this; + } + + public Builder applyOutputNames(Map<MetricId, List<String>> outputNamesById) { + Map<MetricId, Number> newMetrics = new LinkedHashMap<>(); + outputNamesById.forEach((id, outputNames) -> { + if (metrics.containsKey(id)) + outputNames.forEach(outputName -> newMetrics.put(toMetricId(outputName), metrics.get(id))); + }); + metrics = newMetrics; + return this; + } + + public Builder putDimension(DimensionId id, String value) { + dimensions.put(id, value); + return this; + } + + public Builder putDimensions(Map<DimensionId, String> extraDimensions) { + if (extraDimensions != null) dimensions.putAll(extraDimensions); + return this; + } + + public Builder putDimensionsIfAbsent(Map<DimensionId, String> extraDimensions) { + if (extraDimensions != null) extraDimensions.forEach(dimensions::putIfAbsent); + return this; + } + + public Builder addConsumers(Set<ConsumerId> extraConsumers) { + if (extraConsumers != null) consumers.addAll(extraConsumers); + return this; + } + + public MetricsPacket build() { + return new MetricsPacket(statusCode, statusMessage, timestamp, service, metrics, dimensions, consumers); + } + + public boolean hasMetrics() { + return ! metrics.isEmpty(); + } + + public Instant getTimestamp() { return Instant.ofEpochSecond(timestamp); } + + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ServiceId.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ServiceId.java new file mode 100644 index 00000000000..b61ead75b72 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/ServiceId.java @@ -0,0 +1,39 @@ +/* +* Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric.model; + +import java.util.Objects; + +/** + * @author gjoranv + */ +public class ServiceId { + + public final String id; + private ServiceId(String id) { this.id = id; } + + public static ServiceId toServiceId(String id) { return new ServiceId(id); } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ServiceId serviceId = (ServiceId) o; + return Objects.equals(id, serviceId.id); + } + + @Override + public int hashCode() { + return Objects.hash(id); + } + + @Override + public String toString() { + return "ServiceId{" + + "id='" + id + '\'' + + '}'; + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/JsonUtil.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/JsonUtil.java new file mode 100644 index 00000000000..f48e5759528 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/JsonUtil.java @@ -0,0 +1,130 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric.model.json; + +import ai.vespa.metricsproxy.metric.model.ConsumerId; +import ai.vespa.metricsproxy.metric.model.MetricsPacket; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static ai.vespa.metricsproxy.metric.model.ServiceId.toServiceId; +import static com.yahoo.stream.CustomCollectors.toLinkedMap; +import static java.util.Collections.emptyList; +import static java.util.logging.Level.WARNING; + +/** + * @author gjoranv + */ +public class JsonUtil { + private static final Logger log = Logger.getLogger(JsonUtil.class.getName()); + + static final String YAMAS_ROUTING = "yamas"; + + public static MetricsPacket.Builder toMetricsPacketBuilder(YamasJsonModel jsonModel) { + if (jsonModel.application == null) + throw new IllegalArgumentException("Service id cannot be null"); + + return new MetricsPacket.Builder(toServiceId(jsonModel.application)) + .statusCode(jsonModel.status_code) + .statusMessage(jsonModel.status_msg) + .timestamp(jsonModel.timestamp) + .putMetrics(jsonModel.getMetricsList()) + .putDimensions(jsonModel.getDimensionsById()) + .addConsumers(jsonModel.getYamasConsumers()); + } + + public static YamasArrayJsonModel toYamasArray(Collection<MetricsPacket> metricsPackets) { + YamasArrayJsonModel yamasArray = toYamasArray(metricsPackets, false); + + // Add a single status object at the end + yamasArray.metrics.stream().findFirst().map(YamasJsonModel::getYamasConsumers) + .ifPresent(consumers -> yamasArray.add(getStatusYamasModel("Data collected successfully", 0, consumers))); + return yamasArray; + } + + public static YamasArrayJsonModel toYamasArray(Collection<MetricsPacket> metricsPackets, boolean addStatus) { + YamasArrayJsonModel yamasArray = new YamasArrayJsonModel(); + metricsPackets.forEach(packet -> yamasArray.add(toYamasModel(packet, addStatus))); + return yamasArray; + } + + /** + * Converts the given json formatted string to a list of metrics packet builders. + * Note that this method returns an empty list if an IOException occurs, + * and logs a warning as a side effect. + */ + public static List<MetricsPacket.Builder> toMetricsPackets(String jsonString) { + List<MetricsPacket.Builder> packets = new ArrayList<>(); + try { + JsonParser jp = new JsonFactory().createParser(jsonString); + jp.setCodec(new ObjectMapper()); + while (jp.nextToken() != null) { + YamasJsonModel jsonModel = jp.readValueAs(YamasJsonModel.class); + packets.add(toMetricsPacketBuilder(jsonModel)); + } + return packets; + } catch (IOException e) { + log.log(WARNING, "Could not create metrics packet from string:\n" + jsonString, e); + return emptyList(); + } + } + + private static YamasJsonModel getStatusYamasModel(String statusMessage, int statusCode, Collection<ConsumerId> consumers) { + YamasJsonModel model = new YamasJsonModel(); + model.status_code = statusCode; + model.status_msg = statusMessage; + model.application = "yms_check_vespa"; + model.routing = ImmutableMap.of(YAMAS_ROUTING, toYamasJsonNamespaces(consumers)); + return model; + } + + private static YamasJsonModel toYamasModel(MetricsPacket packet, boolean addStatus) { + YamasJsonModel model = new YamasJsonModel(); + + if (addStatus) { + model.status_code = packet.statusCode; + model.status_msg = packet.statusMessage; + } + + model.application = packet.service.id; + model.timestamp = (packet.timestamp == 0L) ? null : packet.timestamp; + + if (packet.metrics().isEmpty()) model.metrics = null; + else { + model.metrics = packet.metrics().entrySet().stream().collect( + toLinkedMap(id2metric -> id2metric.getKey().id, + id2metric -> id2metric.getValue().doubleValue())); + } + + if (packet.dimensions().isEmpty()) model.dimensions = null; + else { + model.dimensions = packet.dimensions().entrySet().stream().collect( + toLinkedMap(id2dim -> id2dim.getKey().id, + Map.Entry::getValue)); + } + + if (packet.consumers().isEmpty()) model.routing = null; + else model.routing = ImmutableMap.of(YAMAS_ROUTING, toYamasJsonNamespaces(packet.consumers())); + + return model; + } + + private static YamasJsonModel.YamasJsonNamespace toYamasJsonNamespaces(Collection<ConsumerId> consumers) { + YamasJsonModel.YamasJsonNamespace namespaces = new YamasJsonModel.YamasJsonNamespace(); + namespaces.namespaces = consumers.stream().map(consumer -> consumer.id).collect(Collectors.toList()); + return namespaces; + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/YamasArrayJsonModel.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/YamasArrayJsonModel.java new file mode 100644 index 00000000000..fdac0521256 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/YamasArrayJsonModel.java @@ -0,0 +1,80 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric.model.json; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.Version; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.module.SimpleModule; + +import java.io.IOException; +import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; + +/** + * Datamodel for the metricsproxy representation of multiple yamas checks. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonInclude(JsonInclude.Include.NON_NULL) +public class YamasArrayJsonModel { + @JsonProperty("metrics") + public final List<YamasJsonModel> metrics = new ArrayList<>(); + + public void add(List<YamasJsonModel> results) { + metrics.addAll(results); + } + + public void add(YamasJsonModel result) { + metrics.add(result); + } + + public void add(YamasArrayJsonModel array) { + metrics.addAll(array.metrics); + } + + /** + * Convenience method to serialize. + * <p> + * Custom floating point serializer to avoid scientifc notation + * + * @return Serialized json + */ + public String serialize() { + ObjectMapper mapper = new ObjectMapper(); + SimpleModule module = new SimpleModule("DoubleSerializer", + new Version(1, 0, 0, "", null, null)); + module.addSerializer(Double.class, new DoubleSerializer()); + mapper.registerModule(module); + + if (metrics.size() > 0) { + try { + return mapper.writeValueAsString(this); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } + + return "{}"; // Backwards compatability + } + + public class DoubleSerializer extends JsonSerializer<Double> { + @Override + public void serialize(Double value, JsonGenerator jgen, + SerializerProvider provider) throws IOException, JsonProcessingException { + DecimalFormat df = new DecimalFormat("#.####", new DecimalFormatSymbols(Locale.ENGLISH)); + df.setMaximumFractionDigits(13); + jgen.writeNumber(df.format(value)); + } + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/YamasJsonModel.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/YamasJsonModel.java new file mode 100644 index 00000000000..5fdbe9577be --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/model/json/YamasJsonModel.java @@ -0,0 +1,130 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.metric.model.json; + +import ai.vespa.metricsproxy.metric.Metric; +import ai.vespa.metricsproxy.metric.model.ConsumerId; +import ai.vespa.metricsproxy.metric.model.DimensionId; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonPropertyOrder; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static com.yahoo.stream.CustomCollectors.toLinkedMap; +import static java.util.Collections.emptyList; +import static java.util.Collections.emptyMap; +import static java.util.Collections.emptySet; + +/** + * Datamodel for Yamas execute output + * <p> + * Used to read from original yamas checks and annotate with routing information. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonInclude(JsonInclude.Include.NON_NULL) +@JsonPropertyOrder({ "status_code", "timestamp", "application", "metrics", "dimensions", "routing", "status_msg"}) +public class YamasJsonModel { + @JsonProperty("status_code") + public Integer status_code; + @JsonProperty("status_msg") + public String status_msg; + @JsonProperty("timestamp") + public Long timestamp; + @JsonProperty("application") + public String application; + @JsonProperty("metrics") + public Map<String, Double> metrics; + @JsonProperty("dimensions") + public Map<String, String> dimensions; + @JsonProperty("routing") + public Map<String, YamasJsonNamespace> routing; + + public static class YamasJsonNamespace { + @JsonProperty("namespaces") + public List<String> namespaces; + } + + // NOTE: do not rename to 'setMetrics', as jackson will try to use it. + public void resetMetrics(List<Metric> newMetrics) { + metrics = new LinkedHashMap<>(); + newMetrics.forEach(metric -> metrics.put(metric.getName(), metric.getValue().doubleValue())); + } + + /** + * Convenience method to add targets to the routing object + * + * @param names Namespaces E.g "Vespa" + */ + public void addRouting(Set<ConsumerId> names) { + // Setup routing structure if not already existing + if (routing == null) { + routing = new HashMap<>(); + } + + if (! routing.containsKey("yamas")) { + routing.put("yamas", new YamasJsonModel.YamasJsonNamespace()); + } + YamasJsonModel.YamasJsonNamespace namespace = routing.get("yamas"); + + if (namespace.namespaces == null) { + namespace.namespaces = new ArrayList<>(); + } + + namespace.namespaces.addAll(names.stream().map(consumer -> consumer.id).collect(Collectors.toList())); + } + + /** + * Convenience method to add dimensions + */ + public void addDimensions(Map<DimensionId, String> additionalDimensions, boolean replace) { + additionalDimensions.forEach((k,v) -> { + addDimension(k.id, v, replace); + }); + } + + /** + * Convenience method to add dimensions + */ + public void addDimension(String key, String value, boolean replace) { + if (dimensions == null) { + dimensions = new HashMap<>(); + } + if (!dimensions.containsKey(key) || replace) { + dimensions.put(key, value); + } + } + + List<Metric> getMetricsList() { + if (metrics == null) return emptyList(); + + return metrics.keySet().stream() + .map(name -> new Metric(name, metrics.get(name))) + .collect(Collectors.toList()); + } + + Map<DimensionId, String> getDimensionsById() { + if (dimensions == null) return emptyMap(); + + return dimensions.keySet().stream().collect(toLinkedMap(DimensionId::toDimensionId, + name -> dimensions.get(name))); + } + + Set<ConsumerId> getYamasConsumers() { + if (routing == null || routing.get("yamas") == null) return emptySet(); + + return routing.get("yamas").namespaces.stream() + .map(ConsumerId::toConsumerId) + .collect(Collectors.toCollection(LinkedHashSet::new)); + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/package-info.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/package-info.java new file mode 100644 index 00000000000..c72f2484f8c --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/metric/package-info.java @@ -0,0 +1,8 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +@ExportPackage +package ai.vespa.metricsproxy.metric; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/rpc/RpcConnector.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/rpc/RpcConnector.java new file mode 100644 index 00000000000..e7feab9926d --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/rpc/RpcConnector.java @@ -0,0 +1,62 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.rpc; + +import com.yahoo.component.AbstractComponent; +import com.yahoo.jrt.Acceptor; +import com.yahoo.jrt.ListenFailedException; +import com.yahoo.jrt.Method; +import com.yahoo.jrt.Spec; +import com.yahoo.jrt.Supervisor; +import com.yahoo.jrt.Transport; + +import java.util.logging.Logger; + +import static com.yahoo.log.LogLevel.DEBUG; +import static java.util.logging.Level.INFO; + +/** + * Contains the connector for the rpc server, to prevent it from going down after component reconfiguration. + * This will only be recreated if the rpc port changes, which should never happen under normal circumstances. + * + * @author gjoranv + */ +public class RpcConnector extends AbstractComponent { + private static final Logger log = Logger.getLogger(RpcConnector.class.getName()); + + private final Supervisor supervisor = new Supervisor(new Transport()); + private final Acceptor acceptor; + + public RpcConnector(RpcConnectorConfig config) { + Spec spec = new Spec(config.port()); + try { + acceptor = supervisor.listen(spec); + log.log(DEBUG, "Listening on " + spec.host() + ":" + spec.port()); + } catch (ListenFailedException e) { + stop(); + log.log(INFO, "Failed listening at " + spec.host() + ":" + spec.port()); + throw new RuntimeException("Could not listen at " + spec, e); + } + } + + /** + * Adds a method. If a method with the same name already exists, it will be replaced. + * @param method The method to add. + */ + public void addMethod(Method method) { + supervisor.addMethod(method); + } + + public void stop() { + acceptor.shutdown().join(); + supervisor.transport().shutdown().join(); + } + + @Override + public void deconstruct() { + stop(); + super.deconstruct(); + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/rpc/RpcServer.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/rpc/RpcServer.java new file mode 100644 index 00000000000..e0e0e7a3f87 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/rpc/RpcServer.java @@ -0,0 +1,213 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.rpc; + +import ai.vespa.metricsproxy.core.MetricsManager; +import ai.vespa.metricsproxy.metric.model.ConsumerId; +import ai.vespa.metricsproxy.metric.model.MetricsPacket; +import ai.vespa.metricsproxy.service.VespaService; +import ai.vespa.metricsproxy.service.VespaServices; +import com.yahoo.jrt.ErrorCode; +import com.yahoo.jrt.Method; +import com.yahoo.jrt.Request; +import com.yahoo.jrt.Spec; +import com.yahoo.jrt.StringValue; +import com.yahoo.jrt.Supervisor; +import com.yahoo.jrt.Transport; + +import java.time.Instant; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static ai.vespa.metricsproxy.metric.model.ConsumerId.toConsumerId; +import static ai.vespa.metricsproxy.metric.model.json.JsonUtil.toMetricsPackets; +import static ai.vespa.metricsproxy.metric.model.json.JsonUtil.toYamasArray; +import static com.yahoo.collections.CollectionUtil.mkString; +import static com.yahoo.log.LogLevel.DEBUG; +import static java.util.logging.Level.INFO; +import static java.util.logging.Level.WARNING; + +/** + * Rpc server for the metrics proxy. + * + * When a new object is created after reconfiguration, it will claim ownership of the methods + * in the given {@link RpcConnector}. This is ok because at the time this component is created, + * all components it depends on are already created. + * + * @author gjoranv + */ +public class RpcServer { + + private static final Logger log = Logger.getLogger(RpcServer.class.getName()); + + private static int LOG_SPENT_TIME_LIMIT = 10 * 1000; // ms. same as default client RPC timeout used in rpc_invoke + + private final VespaServices vespaServices; + private final MetricsManager metricsManager; + + public RpcServer(RpcConnector connector, VespaServices vespaServices, MetricsManager metricsManager) { + this.vespaServices = vespaServices; + this.metricsManager = metricsManager; + addMethods(connector); + log.log(DEBUG, "RPC server created"); + } + + private void addMethods(RpcConnector connector) { + // Add/replace this method first to increase likelihood of getting extra metrics and global dimensions + connector.addMethod( + new Method("setExtraMetrics", "s", "", this::setExtraMetrics) + .methodDesc("Set extra metrics that will be added to output from getMetricsForYamas.") + .paramDesc(0, "metricsJson", "The metrics in json format")); + + connector.addMethod( + new Method("getMetricsById", "s", "s", this::getMetricsById) + .methodDesc("Get Vespa metrics for the service with the given Id") + .paramDesc(0, "id", "The id of the service") + .returnDesc(0, "ret", "Vespa metrics")); + + connector.addMethod( + new Method("getServices", "", "s", this::getServices) + .methodDesc("Get Vespa services monitored by this metrics proxy") + .returnDesc(0, "ret", "Vespa metrics")); + + connector.addMethod( + new Method("getMetricsForYamas", "s", "s", this::getMetricsForYamas) + .methodDesc("Get JSON formatted Vespa metrics for a given service name or 'all'") + .paramDesc(0, "service", "The vespa service name, or 'all'") + .returnDesc(0, "ret", "Vespa metrics")); + + connector.addMethod( + new Method("getHealthMetricsForYamas", "s", "s", this::getHealthMetricsForYamas) + .methodDesc("Get JSON formatted Health check for a given service name or 'all'") + .paramDesc(0, "service", "The vespa service name") + .returnDesc(0, "ret", "Vespa metrics")); + + connector.addMethod( + new Method("getAllMetricNamesForService", "ss", "s", this::getAllMetricNamesForService) + .methodDesc("Get metric names known for service ") + .paramDesc(0, "service", "The vespa service name'") + .paramDesc(1, "consumer", "The consumer'") + .returnDesc(0, "ret", "Metric names, one metric name per line")); + } + + void getAllMetricNamesForService(Request req) { + String service = req.parameters().get(0).asString(); + ConsumerId consumer = toConsumerId(req.parameters().get(1).asString()); + withExceptionHandling(req, () -> { + String metricNames = metricsManager.getMetricNamesForServiceAndConsumer(service, consumer); + req.returnValues().add(new StringValue(metricNames)); + }); + } + + void getMetricsById(Request req) { + String id = req.parameters().get(0).asString(); + withExceptionHandling(req, () -> { + String metricsString = metricsManager.getMetricsByConfigId(id); + req.returnValues().add(new StringValue(metricsString)); + }); + } + + + void getServices(Request req) { + withExceptionHandling(req, () -> { + String servicesString = metricsManager.getAllVespaServices(); + req.returnValues().add(new StringValue(servicesString)); + }); + } + + void getMetricsForYamas(Request req) { + Instant startTime = Instant.now(); + req.detach(); + String service = req.parameters().get(0).asString(); + log.log(DEBUG, () -> "getMetricsForYamas called at " + startTime + " with argument: " + service); + List<VespaService> services = vespaServices.getMonitoringServices(service); + log.log(DEBUG, () -> "Getting metrics for services: " + mkString(services, "[", ", ", "]")); + if (services.isEmpty()) setNoServiceError(req, service); + else withExceptionHandling(req, () -> { + List<MetricsPacket> packets = metricsManager.getMetrics(services, startTime); + log.log(DEBUG,() -> "Returning metrics packets:\n" + mkString(packets, "\n")); + req.returnValues().add(new StringValue(toYamasArray(packets).serialize())); + }); + req.returnRequest(); + } + + void getHealthMetricsForYamas(Request req) { + req.detach(); + String service = req.parameters().get(0).asString(); + List<VespaService> services = vespaServices.getMonitoringServices(service); + if (services.isEmpty()) setNoServiceError(req, service); + else withExceptionHandling(req, () -> { + List<MetricsPacket> packets = metricsManager.getHealthMetrics(services); + req.returnValues().add(new StringValue(toYamasArray(packets, true).serialize())); + }); + req.returnRequest(); + } + + void setExtraMetrics(Request req) { + String metricsJson = req.parameters().get(0).asString(); + log.log(DEBUG, "setExtraMetrics called with argument: " + metricsJson); + withExceptionHandling(req, () -> metricsManager.setExtraMetrics(toMetricsPackets(metricsJson))); + } + + private static void withExceptionHandling(Request req, ThrowingRunnable runnable) { + try { + TimeTracker timeTracker = new TimeTracker(req); + runnable.run(); + timeTracker.logSpentTime(); + } catch (Exception e) { + log.log(WARNING, "Got exception when running RPC command " + req.methodName(), e); + setMethodFailedError(req, e); + } catch (Error e) { + log.log(WARNING, "Got error when running RPC command " + req.methodName(), e); + setMethodFailedError(req, e); + } catch (Throwable t) { + log.log(WARNING, "Got throwable (non-error, non-exception) when running RPC command " + req.methodName(), t); + setMethodFailedError(req, t); + } + } + + private static void setMethodFailedError(Request req, Throwable t) { + String msg = "Request failed due to internal error: " + t.getClass().getName() + ": " + t.getMessage(); + req.setError(ErrorCode.METHOD_FAILED, msg); + req.returnValues().add(new StringValue("")); + } + + private static void setNoServiceError(Request req, String serviceName) { + String msg = "No service with name '" + serviceName + "'"; + req.setError(ErrorCode.BAD_REQUEST, msg); + req.returnValues().add(new StringValue("")); + } + + + private static class TimeTracker { + private final long startTime = System.currentTimeMillis(); + private final Request request; + + private TimeTracker(Request request) { + this.request = request; + } + + long spentTime() { + return System.currentTimeMillis() - startTime; + } + + private void logSpentTime() { + Level logLevel = DEBUG; + if (spentTime() > LOG_SPENT_TIME_LIMIT) { + logLevel = INFO; + } + if (log.isLoggable(logLevel)) { + log.log(logLevel, "RPC request '" + request.methodName() + "' with parameters '" + + request.parameters() + "' took " + spentTime() + " ms"); + } + } + } + + private interface ThrowingRunnable { + void run() throws Exception; + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/ConfigSentinelClient.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/ConfigSentinelClient.java new file mode 100644 index 00000000000..875b6190763 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/ConfigSentinelClient.java @@ -0,0 +1,176 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +import com.google.inject.Inject; +import com.yahoo.log.LogLevel; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Logger; + +/** + * Connects to the config sentinel and gets information like pid for the services on the node + */ +public class ConfigSentinelClient { + private final static Logger log = Logger.getLogger(ConfigSentinelClient.class.getName()); + + private final CmdClient client; + + @Inject + public ConfigSentinelClient() { + this.client = new CmdClient(); + } + + /** + * Update all services reading from config sentinel + * + * @param services The list of services + */ + synchronized void updateServiceStatuses(List<VespaService> services) { + try { + setStatus(services); + } catch (Exception e) { + log.log(LogLevel.ERROR, "Unable to update service pids from sentinel", e); + } + } + + + /** + * Update status + * + * @param s The service to update the status for + */ + public synchronized void ping(VespaService s) { + List<VespaService> services = new ArrayList<>(); + services.add(s); + log.log(LogLevel.DEBUG, "Ping for service " + s); + try { + setStatus(services); + } catch (Exception e) { + log.log(LogLevel.ERROR, "Unable to update service pids from sentinel", e); + } + } + + /** + * Update the status (pid check etc) + * + * @param services list of services + * @throws Exception if something went wrong + */ + protected synchronized void setStatus(List<VespaService> services) throws Exception { + InputStream in; + PrintStream out; + client.connect(); + + in = client.getInputStream(); + BufferedReader reader = new BufferedReader(new InputStreamReader(in)); + String line; + List<VespaService> updatedServices = new ArrayList<>(); + while ((line = reader.readLine()) != null) { + if (line.equals("")) { + break; + } + + VespaService s = parseServiceString(line, services); + if (s != null) { + updatedServices.add(s); + } + } + + //Check if there are services that were not found in output + //from the sentinel + for (VespaService s : services) { + if ((!s.getServiceName().equals("configserver")) && !updatedServices.contains(s)) { + log.log(LogLevel.DEBUG,"Service " + s + " is no longer found with sentinel - setting alive = false"); + s.setAlive(false); + } + } + + //Close streams + reader.close(); + client.disconnect(); + } + + protected static VespaService parseServiceString(String line, List<VespaService> services) { + String[] parts = line.split(" "); + if (parts.length < 3) + return null; + + String name = parts[0]; + int pid = -1; + String state = null; + VespaService service = null; + + for (VespaService s : services) { + if (s.getInstanceName().compareToIgnoreCase(name) == 0) { + service = s; + break; + } + } + + //Could not find this service + //nothing wrong with that as the check is invoked per line from sentinel + if (service == null) { + return service; + } + + for (int i = 1; i < parts.length; i++) { + String keyValue[] = parts[i].split("="); + + String key = keyValue[0]; + String value = keyValue[1]; + + if (key.equals("state")) { + state = value; + } else if (key.equals("pid")) { + pid = Integer.parseInt(value); + } + } + + if (state != null) { + service.setState(state); + if (pid >= 0 && "RUNNING".equals(state)) { + service.setAlive(true); + service.setPid(pid); + } else { + service.setAlive(false); + + } + } else { + service.setAlive(false); + } + return service; + } + + static class CmdClient { + Process proc; + // NOTE: hostname/port not used yet + void connect() { + String[] args = new String[]{"vespa-sentinel-cmd", "list"}; + try { + proc = Runtime.getRuntime().exec(args); + } catch (Exception e) { + log.log(LogLevel.WARNING, "could not run vespa-sentinel-cmd: "+e); + proc = null; + } + } + void disconnect() { + if (proc.isAlive()) { + proc.destroy(); + } + proc = null; + } + InputStream getInputStream() { + return (proc != null) + ? proc.getInputStream() + : new java.io.ByteArrayInputStream(new byte[0]); + } + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java new file mode 100644 index 00000000000..312e7d5c0c1 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/CpuJiffies.java @@ -0,0 +1,43 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +class CpuJiffies { + private int cpuId; + private long jiffies; + + CpuJiffies(String line) { + parseLine(line); + } + + private void parseLine(String line) { + String elems[]; + String cpuId; + long jiffies; + + elems = line.split("\\s+"); + cpuId = elems[0].substring(3); + if (cpuId.length() == 0) { + this.cpuId = -1; + } else { + this.cpuId = Integer.parseInt(cpuId); + } + + jiffies = 0; + for (int i = 1; i < elems.length; i++) { + jiffies += Long.parseLong(elems[i].replaceAll("[\\n\\r]+", "")); + } + + this.jiffies = jiffies; + } + + public int getCpuId() { + return cpuId; + } + + public long getTotalJiffies() { + return jiffies; + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java new file mode 100644 index 00000000000..f87171a42dc --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyHealthMetricFetcher.java @@ -0,0 +1,34 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +import ai.vespa.metricsproxy.metric.HealthMetric; + +/** + * Dummy class used for getting health status for a vespa service that has no HTTP service + * for getting health status + * + * @author hmusum + */ +public class DummyHealthMetricFetcher extends RemoteHealthMetricFetcher { + + /** + * @param service The service to fetch metrics from + */ + DummyHealthMetricFetcher(VespaService service) { + super(service, 0); + } + + /** + * Connect to remote service over http and fetch metrics + */ + public HealthMetric getHealth(int fetchCount) { + if (service.isAlive()) { + return HealthMetric.getOk("Service is running - pid check only"); + } else { + return HealthMetric.getFailed("Service is not running - pid check only"); + } + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyMetricsFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyMetricsFetcher.java new file mode 100644 index 00000000000..f21d125e279 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/DummyMetricsFetcher.java @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +import ai.vespa.metricsproxy.metric.Metrics; + +/** + * Dummy class used for getting health status for a vespa service that has no HTTP service + * for getting metrics + * + * @author hmusum + */ +public class DummyMetricsFetcher extends RemoteMetricsFetcher { + + /** + * @param service The service to fetch metrics from + */ + DummyMetricsFetcher(VespaService service) { + super(service, 0); + } + + /** + * Connect to remote service over http and fetch metrics + */ + public Metrics getMetrics(int fetchCount) { + return new Metrics(); + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java new file mode 100644 index 00000000000..9094ef22c20 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/HttpMetricFetcher.java @@ -0,0 +1,94 @@ +/* +* Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +import ai.vespa.util.http.VespaHttpClientBuilder; +import com.yahoo.log.LogLevel; +import com.yahoo.yolean.Exceptions; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.BasicResponseHandler; +import org.apache.http.impl.client.CloseableHttpClient; + +import java.io.IOException; +import java.net.URI; +import java.util.logging.Logger; + +/** + * HTTP client to get metrics or health data from a service + * + * @author hmusum + * @author bjorncs + */ +public abstract class HttpMetricFetcher { + private final static Logger log = Logger.getLogger(HttpMetricFetcher.class.getPackage().getName()); + public final static String STATE_PATH = "/state/v1/"; + final static String METRICS_PATH = STATE_PATH + "metrics"; + final static String HEALTH_PATH = STATE_PATH + "health"; + // The call to apache will do 3 retries. As long as we check the services in series, we can't have this too high. + public static int CONNECTION_TIMEOUT = 5000; + private final static int SOCKET_TIMEOUT = 60000; + private final URI url; + protected final VespaService service; + private static final CloseableHttpClient httpClient = createHttpClient(); + + + /** + * @param service The service to fetch metrics from + * @param port The port to use + */ + HttpMetricFetcher(VespaService service, int port, String path) { + this.service = service; + + String u = "http://localhost:" + port + path; + this.url = URI.create(u); + log.log(LogLevel.DEBUG, "Fetching metrics from " + u + " with timeout " + CONNECTION_TIMEOUT); + } + + String getJson() throws IOException { + log.log(LogLevel.DEBUG, "Connecting to url " + url + " for service '" + service + "'"); + return httpClient.execute(new HttpGet(url), new BasicResponseHandler()); + } + + public String toString() { + return this.getClass().getSimpleName() + " using " + url; + } + + String errMsgNoResponse(IOException e) { + return "Unable to get response from service '" + service + "': " + + Exceptions.toMessageString(e); + } + + void handleException(Exception e, String data, int timesFetched) { + logMessage("Unable to parse json '" + data + "' for service '" + service + "': " + + Exceptions.toMessageString(e), timesFetched); + } + + private void logMessage(String message, int timesFetched) { + if (service.isAlive() && timesFetched > 5) { + log.log(LogLevel.INFO, message); + } else { + log.log(LogLevel.DEBUG, message); + } + } + + void logMessageNoResponse(String message, int timesFetched) { + if (timesFetched > 5) { + log.log(LogLevel.WARNING, message); + } else { + log.log(LogLevel.INFO, message); + } + } + + private static CloseableHttpClient createHttpClient() { + return VespaHttpClientBuilder.create() + .setUserAgent("metrics-proxy-http-client") + .setDefaultRequestConfig(RequestConfig.custom() + .setConnectTimeout(CONNECTION_TIMEOUT) + .setSocketTimeout(SOCKET_TIMEOUT) + .build()) + .build(); + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java new file mode 100644 index 00000000000..503f582a827 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteHealthMetricFetcher.java @@ -0,0 +1,77 @@ +/* +* Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +import ai.vespa.metricsproxy.metric.HealthMetric; +import com.yahoo.log.LogLevel; +import org.json.JSONException; +import org.json.JSONObject; + +import java.io.IOException; +import java.util.logging.Logger; + +/** + * Fetch health status for a given vespa service + * + * @author Jo Kristian Bergum + */ +public class RemoteHealthMetricFetcher extends HttpMetricFetcher { + + private final static Logger log = Logger.getLogger(RemoteHealthMetricFetcher.class.getPackage().getName()); + + /** + * @param service The service to fetch metrics from + * @param port The port to use + */ + public RemoteHealthMetricFetcher(VespaService service, int port) { + super(service, port, HEALTH_PATH); + } + + /** + * Connect to remote service over http and fetch metrics + */ + public HealthMetric getHealth(int fetchCount) { + String data = "{}"; + try { + data = getJson(); + } catch (IOException e) { + logMessageNoResponse(errMsgNoResponse(e), fetchCount); + } + return createHealthMetrics(data, fetchCount); + } + + /** + * Connect to remote service over http and fetch metrics + */ + HealthMetric createHealthMetrics(String data, int fetchCount) { + HealthMetric healthMetric = HealthMetric.getFailed("Failed fetching status page for service"); + try { + healthMetric = parse(data); + } catch (Exception e) { + handleException(e, data, fetchCount); + } + return healthMetric; + } + + private HealthMetric parse(String data) { + if (data == null || data.isEmpty()) { + return HealthMetric.getFailed("Empty response from status page"); + } + try { + JSONObject o = new JSONObject(data); + JSONObject status = o.getJSONObject("status"); + String code = status.getString("code"); + String message = ""; + if (status.has("message")) { + message = status.getString("message"); + } + return HealthMetric.get(code, message); + + } catch (JSONException e) { + log.log(LogLevel.DEBUG, "Failed to parse json response from metrics page:" + e + ":" + data); + return HealthMetric.getFailed("Not able to parse json from status page"); + } + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteMetricsFetcher.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteMetricsFetcher.java new file mode 100644 index 00000000000..a606ec7d8cd --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/RemoteMetricsFetcher.java @@ -0,0 +1,129 @@ +/* +* Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +import ai.vespa.metricsproxy.metric.Metric; +import ai.vespa.metricsproxy.metric.Metrics; +import ai.vespa.metricsproxy.metric.model.DimensionId; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import static ai.vespa.metricsproxy.metric.model.DimensionId.toDimensionId; + +/** + * Fetch metrics for a given vespa service + * + * @author Jo Kristian Bergum + */ +public class RemoteMetricsFetcher extends HttpMetricFetcher { + /** + * @param service The service to fetch metrics from + * @param port The port to use + */ + RemoteMetricsFetcher(VespaService service, int port) { + super(service, port, METRICS_PATH); + } + + /** + * Connect to remote service over http and fetch metrics + */ + public Metrics getMetrics(int fetchCount) { + String data = "{}"; + try { + data = getJson(); + } catch (IOException e) { + logMessageNoResponse(errMsgNoResponse(e), fetchCount); + } + + return createMetrics(data, fetchCount); + } + + /** + * Connect to remote service over http and fetch metrics + */ + public Metrics createMetrics(String data, int fetchCount) { + Metrics remoteMetrics = new Metrics(); + try { + remoteMetrics = parse(data); + } catch (Exception e) { + handleException(e, data, fetchCount); + } + + return remoteMetrics; + } + + Metrics parse(String data) throws JSONException { + JSONObject o = new JSONObject(data); + if (!(o.has("metrics"))) { + return new Metrics(); //empty + } + + JSONObject metrics = o.getJSONObject("metrics"); + JSONArray values; + long timestamp; + + try { + JSONObject snapshot = metrics.getJSONObject("snapshot"); + timestamp = (long) snapshot.getDouble("to"); + values = metrics.getJSONArray("values"); + } catch (JSONException e) { + // snapshot might not have been produced. Do not throw exception into log + return new Metrics(); + } + long now = System.currentTimeMillis() / 1000; + timestamp = Metric.adjustTime(timestamp, now); + Metrics m = new Metrics(timestamp); + + Map<DimensionId, String> noDims = Collections.emptyMap(); + Map<String, Map<DimensionId, String>> uniqueDimensions = new HashMap<>(); + for (int i = 0; i < values.length(); i++) { + JSONObject metric = values.getJSONObject(i); + String name = metric.getString("name"); + String description = ""; + + if (metric.has("description")) { + description = metric.getString("description"); + } + + Map<DimensionId, String> dim = noDims; + if (metric.has("dimensions")) { + JSONObject dimensions = metric.getJSONObject("dimensions"); + StringBuilder sb = new StringBuilder(); + for (Iterator<?> it = dimensions.keys(); it.hasNext(); ) { + String k = (String) it.next(); + String v = dimensions.getString(k); + sb.append(toDimensionId(k)).append(v); + } + if ( ! uniqueDimensions.containsKey(sb.toString())) { + dim = new HashMap<>(); + for (Iterator<?> it = dimensions.keys(); it.hasNext(); ) { + String k = (String) it.next(); + String v = dimensions.getString(k); + dim.put(toDimensionId(k), v); + } + uniqueDimensions.put(sb.toString(), Collections.unmodifiableMap(dim)); + } + dim = uniqueDimensions.get(sb.toString()); + } + + JSONObject aggregates = metric.getJSONObject("values"); + for (Iterator<?> it = aggregates.keys(); it.hasNext(); ) { + String aggregator = (String) it.next(); + Number value = (Number) aggregates.get(aggregator); + StringBuilder metricName = (new StringBuilder()).append(name).append(".").append(aggregator); + m.add(new Metric(metricName.toString(), value, timestamp, dim, description)); + } + } + + return m; + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/ServiceListener.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/ServiceListener.java new file mode 100644 index 00000000000..810eb5fb908 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/ServiceListener.java @@ -0,0 +1,14 @@ +/* +* Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +import java.util.List; + +/** + * @author Unknown + */ +public interface ServiceListener { + void setServices(List<VespaService> services); +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java new file mode 100644 index 00000000000..9f6614668a5 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java @@ -0,0 +1,259 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +import ai.vespa.metricsproxy.metric.Metric; +import ai.vespa.metricsproxy.metric.Metrics; +import com.yahoo.log.LogLevel; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Timer; +import java.util.TimerTask; +import java.util.concurrent.ConcurrentHashMap; +import java.util.logging.Logger; + +/** + * Class to get data from the system and update the services at given intervals. + * TODO: rewrite to use ScheduledExecutorService or just call poll() directly. + * + * @author Eirik Nygaard + */ +public class SystemPoller implements ServiceListener { + final private static Logger log = Logger.getLogger(SystemPoller.class.getPackage().getName()); + + private final int pollingIntervalSecs; + private volatile List<VespaService> services; + + private final int memoryTypeVirtual = 0; + private final int memoryTypeResident = 1; + private final Map<VespaService, Long> lastCpuJiffiesMetrics = new ConcurrentHashMap<>(); + private final Timer systemPollTimer; + + private long lastTotalCpuJiffies = -1; + + public SystemPoller(List<VespaService> services, int pollingIntervalSecs) { + this.services = services; + this.pollingIntervalSecs = pollingIntervalSecs; + systemPollTimer = new Timer("systemPollTimer", true); + } + + @Override + public void setServices(List<VespaService> services) { + log.log(LogLevel.DEBUG, "Setting services in SystemPoller to: " + services); + this.services = services; + } + + void stop() { + systemPollTimer.cancel(); + } + + /** + * Return memory usage for a given process, both resident and virtual is + * returned. + * + * @param service The instance to get memory usage for + * @return array[0] = memoryResident, array[1] = memoryVirtual (kB units) + */ + long[] getMemoryUsage(VespaService service) { + long size[] = new long[2]; + BufferedReader br; + int pid = service.getPid(); + + size[0] = 0; + size[1] = 0; + try { + br = new BufferedReader(new FileReader("/proc/" + pid + "/smaps")); + } catch (FileNotFoundException ex) { + markDead(service); + return size; + } + String line; + try { + while ((line = br.readLine()) != null) { + String[] elems = line.split("\\s+"); + /* Memory size is given in kB - convert to bytes by multiply with 1024*/ + if (line.startsWith("Rss:")) { + size[memoryTypeResident] += Long.parseLong(elems[1]) * 1024; + } else if (line.startsWith("Size:")) { + size[memoryTypeVirtual] += Long.parseLong(elems[1]) * 1024; + } + } + + br.close(); + } catch (IOException ex) { + log.log(LogLevel.DEBUG, "Unable to read line from smaps file", ex); + return size; + } + + return size; + } + + /** + * Mark a service as dead. + * + * @param service The service to mark as dead. + */ + private static void markDead(VespaService service) { + service.setAlive(false); + } + + /** + * Poll services for system metrics + */ + void poll() { + long startTime = System.currentTimeMillis(); + boolean someAlive = false; + + /* Don't do any work if there are no known services */ + if (services.isEmpty()) { + schedule(); + return; + } + + log.log(LogLevel.DEBUG, "Monitoring system metrics for " + services.size() + " services"); + + long sysJiffies = getNormalizedSystemJiffies(); + for (VespaService s : services) { + + + if(s.isAlive()) { + someAlive = true; + } + + Metrics metrics = new Metrics(); + log.log(LogLevel.DEBUG, "Current size of system metrics for service " + s + " is " + metrics.size()); + + long[] size = getMemoryUsage(s); + log.log(LogLevel.DEBUG, "Updating memory metric for service " + s); + + metrics.add(new Metric("memory_virt", size[memoryTypeVirtual], startTime / 1000)); + metrics.add(new Metric("memory_rss", size[memoryTypeResident], startTime / 1000)); + + long procJiffies = getPidJiffies(s); + if (lastTotalCpuJiffies >= 0 && lastCpuJiffiesMetrics.containsKey(s)) { + long last = lastCpuJiffiesMetrics.get(s); + long diff = procJiffies - last; + + if (diff >= 0) { + metrics.add(new Metric("cpu", 100 * ((double) diff) / (sysJiffies - lastTotalCpuJiffies), startTime / 1000)); + } + } + lastCpuJiffiesMetrics.put(s, procJiffies); + s.setSystemMetrics(metrics); + } + + lastTotalCpuJiffies = sysJiffies; + + // If none of the services were alive, reschedule in a short time + if (!someAlive) { + reschedule(System.currentTimeMillis() - startTime); + } else { + schedule(); + } + } + + long getPidJiffies(VespaService service) { + BufferedReader in; + String line; + String[] elems; + int pid = service.getPid(); + + try { + in = new BufferedReader(new FileReader("/proc/" + pid + "/stat")); + } catch (FileNotFoundException ex) { + log.log(LogLevel.DEBUG, "Unable to find pid in proc directory " + pid); + service.setAlive(false); + return 0; + } + + try { + line = in.readLine(); + in.close(); + } catch (IOException ex) { + log.log(LogLevel.DEBUG, "Unable to read line from process stat file", ex); + return 0; + } + + elems = line.split(" "); + + /* Add user mode and kernel mode jiffies for the given process */ + return Long.parseLong(elems[13]) + Long.parseLong(elems[14]); + } + + long getNormalizedSystemJiffies() { + BufferedReader in; + String line; + ArrayList<CpuJiffies> jiffies = new ArrayList<>(); + CpuJiffies total = null; + + try { + in = new BufferedReader(new FileReader("/proc/stat")); + } catch (FileNotFoundException ex) { + log.log(LogLevel.ERROR, "Unable to open stat file", ex); + return 0; + } + try { + while ((line = in.readLine()) != null) { + if (line.startsWith("cpu ")) { + total = new CpuJiffies(line); + } else if (line.startsWith("cpu")) { + jiffies.add(new CpuJiffies(line)); + } + } + + in.close(); + } catch (IOException ex) { + log.log(LogLevel.ERROR, "Unable to read line from stat file", ex); + return 0; + } + + /* Normalize so that a process that uses an entire CPU core will get 100% util */ + if (total != null) { + return total.getTotalJiffies() / jiffies.size(); + } else { + return 0; + } + } + + private void schedule(long time) { + try { + systemPollTimer.schedule(new PollTask(this), time); + } catch(IllegalStateException e){ + log.info("Tried to schedule task, but timer was already shut down."); + } + } + + public void schedule() { + schedule(pollingIntervalSecs * 1000); + } + + private void reschedule(long skew) { + long sleep = (pollingIntervalSecs * 1000) - skew; + + // Don't sleep less than 1 min + sleep = Math.max(60 * 1000, sleep); + schedule(sleep); + } + + + private static class PollTask extends TimerTask { + private final SystemPoller poller; + + PollTask(SystemPoller poller) { + this.poller = poller; + } + + @Override + public void run() { + poller.poll(); + } + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPollerProvider.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPollerProvider.java new file mode 100644 index 00000000000..dea7b2b4809 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPollerProvider.java @@ -0,0 +1,33 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +import ai.vespa.metricsproxy.core.MonitoringConfig; +import com.yahoo.container.di.componentgraph.Provider; + +/** + * @author gjoranv + */ +public class SystemPollerProvider implements Provider<SystemPoller> { + + private final SystemPoller poller; + + /** + * @param services The list of VespaService instances to monitor for System metrics + * @param monitoringConfig The interval in seconds between each polling. + */ + public SystemPollerProvider (VespaServices services, MonitoringConfig monitoringConfig) { + poller = new SystemPoller(services.getVespaServices(), 60 * monitoringConfig.intervalMinutes()); + poller.poll(); + } + + public void deconstruct() { + poller.stop(); + } + + public SystemPoller get() { + return poller; + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/VespaService.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/VespaService.java new file mode 100644 index 00000000000..d3f674176b3 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/VespaService.java @@ -0,0 +1,216 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +import ai.vespa.metricsproxy.metric.HealthMetric; +import ai.vespa.metricsproxy.metric.Metrics; +import ai.vespa.metricsproxy.metric.model.DimensionId; + +import java.util.Collections; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + + +/** + * Represents a Vespa service + * + * @author jobergum + */ +public class VespaService implements Comparable<VespaService> { + + private static final Map<DimensionId, String> EMPTY_DIMENSIONS = Collections.emptyMap(); + private static final String DEFAULT_MONITORING_PREFIX = "vespa"; + public static final String SEPARATOR = "."; + + private final String instanceName; + private final String configId; + private final String serviceName; + private final String monitoringPrefix; + private final Map<DimensionId, String> dimensions; + + + private volatile int pid = -1; + private volatile String state = "UNKNOWN"; + + // Used to keep the last polled system metrics for service + private Metrics systemMetrics; + + private final int statePort; + + private final RemoteHealthMetricFetcher remoteHealthMetricFetcher; + private final RemoteMetricsFetcher remoteMetricsFetcher; + + private boolean isAlive; + + // Used to keep track of log level when health or metrics requests fail + private AtomicInteger metricsFetchCount = new AtomicInteger(0); + private AtomicInteger healthFetchCount = new AtomicInteger(0); + + + public static VespaService create(String name, String id, int statePort) { + return create(name,id, statePort, DEFAULT_MONITORING_PREFIX, EMPTY_DIMENSIONS); + } + + public static VespaService create(String name, String id, int statePort, String monitoringName, Map<DimensionId, String> dimensions) { + String serviceName = name.replaceAll("\\d*$", ""); + return new VespaService(serviceName, name, id, statePort, monitoringName, dimensions); + } + + VespaService(String serviceName, String configId) { + this(serviceName, serviceName, configId); + } + + VespaService(String serviceName, String instanceName, String configId) { + this(serviceName, instanceName, configId, -1, DEFAULT_MONITORING_PREFIX, EMPTY_DIMENSIONS); + } + + private VespaService(String serviceName, String instanceName, String configId, + int statePort, String monitoringPrefix, + Map<DimensionId, String> dimensions) { + this.serviceName = serviceName; + this.instanceName = instanceName; + this.monitoringPrefix = monitoringPrefix; + this.configId = configId; + this.statePort = statePort; + this.dimensions = dimensions; + this.systemMetrics = new Metrics(); + this.isAlive = false; + this.remoteMetricsFetcher = (this.statePort> 0) ? new RemoteMetricsFetcher(this, this.statePort) : new DummyMetricsFetcher(this); + this.remoteHealthMetricFetcher = (this.statePort > 0) ? new RemoteHealthMetricFetcher(this, this.statePort) : new DummyHealthMetricFetcher(this); + } + + /** + * The name used for this service in the monitoring system: + * monitoring-system-name.serviceName + */ + public String getMonitoringName() { + return monitoringPrefix + SEPARATOR + serviceName; + } + + @Override + public int compareTo(VespaService other) { + return this.getInstanceName().compareTo(other.getInstanceName()); + } + + /** + * Get the service name/type. E.g 'searchnode', but not 'searchnode2' + * + * @return the service name + */ + public String getServiceName() { + return this.serviceName; + } + + /** + * Get the instance name. E.g searchnode2 + * + * @return the instance service name + */ + public String getInstanceName() { + return this.instanceName; + } + + public Map<DimensionId, String> getDimensions() { + return dimensions; + } + + /** + * @return The health of this service + */ + public HealthMetric getHealth() { + HealthMetric healthMetric = remoteHealthMetricFetcher.getHealth(healthFetchCount.get()); + healthFetchCount.getAndIncrement(); + return healthMetric; + } + + /** + * Gets the system metrics for this service + * + * @return System metrics + */ + public synchronized Metrics getSystemMetrics() { + return this.systemMetrics; + } + + /** + * Get the Metrics registered for this service. Metrics are fetched over HTTP + * if a metric http port has been defined, otherwise from log file + * + * @return the non-system metrics + */ + public Metrics getMetrics() { + Metrics remoteMetrics = remoteMetricsFetcher.getMetrics(metricsFetchCount.get()); + metricsFetchCount.getAndIncrement(); + return remoteMetrics; + } + + /** + * Gets the config id of this service + * + * @return the config id + */ + public String getConfigId() { + return configId; + } + + /** + * The current pid of this service + * + * @return The pid + */ + public int getPid() { + return this.pid; + } + + /** + * update the pid of this service + * + * @param pid The pid that this service runs as + */ + public void setPid(int pid) { + this.pid = pid; + } + + /** + * Get the string representation of the state of this service + * + * @return string representing the state of this service - obtained from config-sentinel + */ + public String getState() { + return state; + } + + /** + * Update the state of this service + * + * @param state the new state + */ + public void setState(String state) { + this.state = state; + } + + /** + * Check if this pid/service is running + * + * @return true if the service is alive (e.g the pid is running) + */ + public boolean isAlive() { + return (isAlive && (pid >= 0)); + } + + @Override + public String toString() { + return instanceName + ":" + pid + ":" + state + ":" + configId; + } + + public void setAlive(boolean alive) { + this.isAlive = alive; + } + + public synchronized void setSystemMetrics(Metrics systemMetrics) { + this.systemMetrics = systemMetrics; + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/VespaServices.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/VespaServices.java new file mode 100644 index 00000000000..2668c158ed6 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/VespaServices.java @@ -0,0 +1,123 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +package ai.vespa.metricsproxy.service; + +import ai.vespa.metricsproxy.core.MonitoringConfig; +import ai.vespa.metricsproxy.metric.model.DimensionId; +import ai.vespa.metricsproxy.service.VespaServicesConfig.Service; +import com.google.common.annotations.VisibleForTesting; +import com.google.inject.Inject; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import static ai.vespa.metricsproxy.core.MetricsConsumers.toUnmodifiableLinkedMap; +import static ai.vespa.metricsproxy.metric.model.DimensionId.toDimensionId; +import static com.yahoo.log.LogLevel.DEBUG; + +/** + * Creates representations for the Vespa services running on the node, + * and provides methods for updating and getting them. + * + * @author gjoranv + */ +public class VespaServices { + private static final Logger log = Logger.getLogger(VespaServices.class.getName()); + + public static final String ALL_SERVICES = "all"; + + private final ConfigSentinelClient sentinel; + private final List<VespaService> services; + + @Inject + public VespaServices(VespaServicesConfig config, MonitoringConfig monitoringConfig, ConfigSentinelClient sentinel) { + this.services = createServices(config, monitoringConfig.systemName()); + this.sentinel = sentinel; + } + + @VisibleForTesting + public VespaServices(List<VespaService> services) { + this.services = services; + sentinel = null; + } + + private List<VespaService> createServices(VespaServicesConfig servicesConfig, String monitoringSystemName) { + List<VespaService> services = new ArrayList<>(); + for (Service s : servicesConfig.service()) { + log.log(DEBUG, "Re-configuring service " + s.name()); + VespaService vespaService = VespaService.create(s.name(), s.configId(), s.healthport(), monitoringSystemName, + createServiceDimensions(s)); + services.add(vespaService); + } + log.log(DEBUG, "Created new services: " + services.size()); + updateServices(services); + return services; + } + + /** + * Sets 'alive=false' for services that are no longer running. + * Note that the status is updated in-place for the given services. + */ + public void updateServices(List<VespaService> services) { + if (sentinel != null) { + log.log(DEBUG, "Updating services "); + sentinel.updateServiceStatuses(services); + } + } + + /** + * Get all known vespa services + * + * @return A list of VespaService objects + */ + public List<VespaService> getVespaServices() { + return Collections.unmodifiableList(services); + } + + /** + * @param id The configid + * @return A list with size 1 as there should only be one service with the given configid + */ + public List<VespaService> getInstancesById(String id) { + List<VespaService> myServices = new ArrayList<>(); + for (VespaService s : services) { + if (s.getConfigId().equals(id)) { + myServices.add(s); + } + } + + return myServices; + } + + /** + * Get services matching pattern for the name used in the monitoring system. + * + * @param service name in monitoring system + service name, without index, e.g: vespa.container + * @return A list of VespaServices + */ + public List<VespaService> getMonitoringServices(String service) { + if (service.equalsIgnoreCase(ALL_SERVICES)) + return services; + + List<VespaService> myServices = new ArrayList<>(); + for (VespaService s : services) { + log.log(DEBUG, () -> "getMonitoringServices. service=" + service + ", checking against " + s + ", which has monitoring name " + s.getMonitoringName()); + if (s.getMonitoringName().equalsIgnoreCase(service)) { + myServices.add(s); + } + } + + return myServices; + } + + private static Map<DimensionId, String> createServiceDimensions(Service service) { + return service.dimension().stream().collect( + toUnmodifiableLinkedMap(dim -> toDimensionId(dim.key()), Service.Dimension::value)); + } + +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/package-info.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/package-info.java new file mode 100644 index 00000000000..b478cdf8b5b --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/package-info.java @@ -0,0 +1,8 @@ +/* + * Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + */ + +@ExportPackage +package ai.vespa.metricsproxy.service; + +import com.yahoo.osgi.annotation.ExportPackage; |