From 9c52475534ca65d4cdb2db56f88ee6f04e27edeb Mon Sep 17 00:00:00 2001 From: gjoranv Date: Sat, 5 Nov 2016 23:53:39 +0100 Subject: Add MetricSet abstraction and a VespaMetricSet. - Move all vespa metrics definitions to VespaMetricSet. - MetricsConsumer has a MetricSet instead of a plain map. --- .../admin/monitoring/DefaultMetricConsumers.java | 309 +-------------------- .../yahoo/vespa/model/admin/monitoring/Metric.java | 1 - .../vespa/model/admin/monitoring/MetricSet.java | 93 +++++++ .../model/admin/monitoring/MetricsConsumer.java | 22 +- .../model/admin/monitoring/VespaMetricSet.java | 300 ++++++++++++++++++++ .../builder/xml/dom/DomMetricBuilderHelper.java | 14 +- .../model/admin/monitoring/MetricSetTest.java | 43 +++ .../vespa/model/admin/monitoring/MetricTest.java | 38 +++ 8 files changed, 510 insertions(+), 310 deletions(-) create mode 100644 config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricSet.java create mode 100644 config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java create mode 100644 config-model/src/test/java/com/yahoo/vespa/model/admin/monitoring/MetricSetTest.java create mode 100644 config-model/src/test/java/com/yahoo/vespa/model/admin/monitoring/MetricTest.java (limited to 'config-model') diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/DefaultMetricConsumers.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/DefaultMetricConsumers.java index d96b6af53b1..bfb7730982e 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/DefaultMetricConsumers.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/DefaultMetricConsumers.java @@ -1,320 +1,37 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.admin.monitoring; - -import com.yahoo.vespa.model.admin.monitoring.Metric; -import com.yahoo.vespa.model.admin.monitoring.MetricsConsumer; - -import java.util.ArrayList; -import java.util.Collection; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; /** - * A class to set up the default metrics for all services to be forwarded to Yamas + * This class sets up the default metrics and the default 'vespa' metrics consumer. + * + * TODO: remove for Vespa 7 or when the 'metric-consumers' element in 'admin' has been removed. * * @author Trygve Bolsø Berdal + * @author gjoranv */ +@SuppressWarnings("UnusedDeclaration") // All public apis are used by model amenders public class DefaultMetricConsumers { + private static final MetricSet vespaMetricSet = new VespaMetricSet(); + /** * Populates a map of with consumer as key and metrics for that consumer as value. The metrics - * are to be forwarded to consumers (ymon and yamas are the options at the moment). + * are to be forwarded to consumers. * * @return A map of default metric consumers and default metrics for that consumer. */ - public Map getDefaultMetricsConsumers() { + @SuppressWarnings("UnusedDeclaration") + public static Map getDefaultMetricsConsumers() { Map metricsConsumers = new LinkedHashMap<>(); - metricsConsumers.put("yamas", getDefaultYamasConsumer()); + metricsConsumers.put("yamas", getYamasConsumer()); return metricsConsumers; } - private MetricsConsumer getDefaultYamasConsumer(){ - List metrics = new ArrayList<>(); - - metrics.addAll(getSearchNodeMetrics()); - metrics.addAll(getStorageMetrics()); - metrics.addAll(getDocprocMetrics()); - metrics.addAll(getClusterControllerMetrics()); - metrics.addAll(getQrserverMetrics()); - metrics.addAll(getContainerMetrics()); - metrics.addAll(getConfigServerMetrics()); - - return new MetricsConsumer("yamas", toMapByName(metrics)); - } - - private Map toMapByName(List metrics) { - Map metricMap = new LinkedHashMap<>(); - for (Metric metric : metrics) { - metricMap.put(metric.name, metric); - } - return metricMap; - } - - private Collection getConfigServerMetrics() { - List metrics = new ArrayList<>(); - - metrics.add(new Metric("configserver.requests.count", "configserver.requests")); - metrics.add(new Metric("configserver.failedRequests.count", "configserver.failedRequests")); - metrics.add(new Metric("configserver.latency.average", "configserver.latency")); - metrics.add(new Metric("configserver.cacheConfigElems.last", "configserver.cacheConfigElems")); - metrics.add(new Metric("configserver.cacheChecksumElems.last", "configserver.cacheChecksumElems")); - metrics.add(new Metric("configserver.hosts.last", "configserver.hosts")); - metrics.add(new Metric("configserver.delayedResponses.count", "configserver.delayedResponses")); - metrics.add(new Metric("configserver.sessionChangeErrors.count", "configserver.sessionChangeErrors")); - - return metrics; - } - - private Collection getContainerMetrics() { - List metrics = new ArrayList<>(); - - metrics.add(new Metric("serverRejectedRequests.rate")); - metrics.add(new Metric("serverRejectedRequests.count")); - - metrics.add(new Metric("serverThreadPoolSize.average")); - metrics.add(new Metric("serverThreadPoolSize.min")); - metrics.add(new Metric("serverThreadPoolSize.max")); - metrics.add(new Metric("serverThreadPoolSize.rate")); - metrics.add(new Metric("serverThreadPoolSize.count")); - metrics.add(new Metric("serverThreadPoolSize.last")); - - metrics.add(new Metric("serverActiveThreads.average")); - metrics.add(new Metric("serverActiveThreads.min")); - metrics.add(new Metric("serverActiveThreads.max")); - metrics.add(new Metric("serverActiveThreads.rate")); - metrics.add(new Metric("serverActiveThreads.count")); - metrics.add(new Metric("serverActiveThreads.last")); - - metrics.add(new Metric("httpapi_latency.average")); - metrics.add(new Metric("httpapi_pending.average")); - metrics.add(new Metric("httpapi_num_operations.rate")); - metrics.add(new Metric("httpapi_num_updates.rate")); - metrics.add(new Metric("httpapi_num_removes.rate")); - metrics.add(new Metric("httpapi_num_puts.rate")); - metrics.add(new Metric("httpapi_succeeded.rate")); - metrics.add(new Metric("httpapi_failed.rate")); - - metrics.add(new Metric("mem.heap.total.average")); - metrics.add(new Metric("mem.heap.free.average")); - metrics.add(new Metric("mem.heap.used.average")); - - return metrics; - } - - private Collection getClusterControllerMetrics() { - List metrics = new ArrayList<>(); - - metrics.add(new Metric("cluster-controller.down.count.last")); - metrics.add(new Metric("cluster-controller.initializing.count.last")); - metrics.add(new Metric("cluster-controller.maintenance.count.last")); - metrics.add(new Metric("cluster-controller.retired.count.last")); - metrics.add(new Metric("cluster-controller.stopping.count.last")); - metrics.add(new Metric("cluster-controller.up.count.last")); - metrics.add(new Metric("cluster-controller.cluster-state-change.count", "content.cluster-controller.cluster-state-change.count")); - - metrics.add(new Metric("cluster-controller.is-master.last")); - // TODO(hakonhall): Update this name once persistent "count" metrics has been implemented. - // DO NOT RELY ON THIS METRIC YET. - metrics.add(new Metric("cluster-controller.node-event.count")); - - return metrics; - } - - private Collection getDocprocMetrics() { - List metrics = new ArrayList<>(); - - // per chain - metrics.add(new Metric("documents_processed.rate", "documents_processed")); - - return metrics; - } - - private Collection getQrserverMetrics() { - List metrics = new ArrayList<>(); - - metrics.add(new Metric("peak_qps.average", "peak_qps")); - metrics.add(new Metric("search_connections.average", "search_connections")); - metrics.add(new Metric("active_queries.average", "active_queries")); - metrics.add(new Metric("queries.rate", "queries")); - metrics.add(new Metric("query_latency.average", "mean_query_latency")); - metrics.add(new Metric("query_latency.max", "max_query_latency")); - metrics.add(new Metric("query_latency.95percentile", "95p_query_latency")); - metrics.add(new Metric("query_latency.99percentile", "99p_query_latency")); - metrics.add(new Metric("failed_queries.rate", "failed_queries")); - metrics.add(new Metric("hits_per_query.average", "hits_per_query")); - metrics.add(new Metric("empty_results.rate", "empty_results")); - metrics.add(new Metric("requestsOverQuota.rate")); - metrics.add(new Metric("requestsOverQuota.count")); - - // Errors from qrserver - metrics.add(new Metric("error.timeout.rate","error.timeout")); - metrics.add(new Metric("error.backends_oos.rate","error.backends_oos")); - metrics.add(new Metric("error.plugin_failure.rate","error.plugin_failure")); - metrics.add(new Metric("error.backend_communication_error.rate","error.backend_communication_error")); - metrics.add(new Metric("error.empty_document_summaries.rate","error.empty_document_summaries")); - metrics.add(new Metric("error.invalid_query_parameter.rate","error.invalid_query_parameter")); - metrics.add(new Metric("error.internal_server_error.rate", "error.internal_server_error")); - metrics.add(new Metric("error.misconfigured_server.rate","error.misconfigured_server")); - metrics.add(new Metric("error.invalid_query_transformation.rate","error.invalid_query_transformation")); - metrics.add(new Metric("error.result_with_errors.rate","error.result_with_errors")); - metrics.add(new Metric("error.unspecified.rate","error.unspecified")); - metrics.add(new Metric("error.unhandled_exception.rate","error.unhandled_exception")); - metrics.add(new Metric("http.status.1xx.rate")); - metrics.add(new Metric("http.status.2xx.rate")); - metrics.add(new Metric("http.status.3xx.rate")); - metrics.add(new Metric("http.status.4xx.rate")); - metrics.add(new Metric("http.status.5xx.rate")); - - return metrics; - } - - private Collection getSearchNodeMetrics() { - List metrics = new ArrayList<>(); - - metrics.add(new Metric("proton.numstoreddocs.last", "documents_total")); - metrics.add(new Metric("proton.numindexeddocs.last", "documents_ready")); - metrics.add(new Metric("proton.numactivedocs.last", "documents_active")); - metrics.add(new Metric("proton.numremoveddocs.last", "documents_removed")); - - metrics.add(new Metric("proton.docsinmemory.last", "documents_inmemory")); - metrics.add(new Metric("proton.diskusage.last", "diskusage")); - metrics.add(new Metric("proton.memoryusage.max", "content.proton.memoryusage.max")); - metrics.add(new Metric("proton.transport.query.count.rate", "query_requests")); - metrics.add(new Metric("proton.transport.docsum.docs.rate", "document_requests")); - metrics.add(new Metric("proton.transport.docsum.latency.average", "content.proton.transport.docsum.latency.average")); - metrics.add(new Metric("proton.transport.query.latency.average", "query_latency")); - - // jobs - metrics.add(new Metric("content.proton.documentdb.job.total.average")); - metrics.add(new Metric("content.proton.documentdb.job.attribute_flush.average")); - metrics.add(new Metric("content.proton.documentdb.job.memory_index_flush.average")); - metrics.add(new Metric("content.proton.documentdb.job.disk_index_fusion.average")); - metrics.add(new Metric("content.proton.documentdb.job.document_store_flush.average")); - metrics.add(new Metric("content.proton.documentdb.job.document_store_compact.average")); - metrics.add(new Metric("content.proton.documentdb.job.bucket_move.average")); - metrics.add(new Metric("content.proton.documentdb.job.lid_space_compact.average")); - metrics.add(new Metric("content.proton.documentdb.job.removed_documents_prune.average")); - - // lid space - metrics.add(new Metric("content.proton.documentdb.ready.lid_space.lid_bloat_factor.average")); - metrics.add(new Metric("content.proton.documentdb.notready.lid_space.lid_bloat_factor.average")); - metrics.add(new Metric("content.proton.documentdb.removed.lid_space.lid_bloat_factor.average")); - metrics.add(new Metric("content.proton.documentdb.ready.lid_space.lid_fragmentation_factor.average")); - metrics.add(new Metric("content.proton.documentdb.notready.lid_space.lid_fragmentation_factor.average")); - metrics.add(new Metric("content.proton.documentdb.removed.lid_space.lid_fragmentation_factor.average")); - - // resource usage - metrics.add(new Metric("content.proton.resource_usage.disk.average")); - metrics.add(new Metric("content.proton.resource_usage.memory.average")); - metrics.add(new Metric("content.proton.resource_usage.feeding_blocked.last")); - metrics.add(new Metric("content.proton.documentdb.attribute.resource_usage.enum_store.average")); - metrics.add(new Metric("content.proton.documentdb.attribute.resource_usage.multi_value.average")); - metrics.add(new Metric("content.proton.documentdb.attribute.resource_usage.feeding_blocked.last")); - - // transaction log - metrics.add(new Metric("content.proton.transactionlog.entries.average")); - metrics.add(new Metric("content.proton.transactionlog.disk_usage.average")); - - // document store - metrics.add(new Metric("content.proton.documentdb.ready.document_store.disk_usage.average")); - metrics.add(new Metric("content.proton.documentdb.ready.document_store.disk_bloat.average")); - metrics.add(new Metric("content.proton.documentdb.ready.document_store.max_bucket_spread.average")); - metrics.add(new Metric("content.proton.documentdb.notready.document_store.disk_usage.average")); - metrics.add(new Metric("content.proton.documentdb.notready.document_store.disk_bloat.average")); - metrics.add(new Metric("content.proton.documentdb.notready.document_store.max_bucket_spread.average")); - metrics.add(new Metric("content.proton.documentdb.removed.document_store.disk_usage.average")); - metrics.add(new Metric("content.proton.documentdb.removed.document_store.disk_bloat.average")); - metrics.add(new Metric("content.proton.documentdb.removed.document_store.max_bucket_spread.average")); - - return metrics; - } - - private Collection getStorageMetrics() { - List metrics = new ArrayList<>(); - - metrics.add(new Metric("vds.datastored.alldisks.docs.average","docs")); - metrics.add(new Metric("vds.datastored.alldisks.bytes.average","bytes")); - metrics.add(new Metric("vds.visitor.allthreads.averagevisitorlifetime.sum.average","visitorlifetime")); - metrics.add(new Metric("vds.visitor.allthreads.averagequeuewait.sum.average","visitorqueuewait")); - metrics.add(new Metric("vds.filestor.alldisks.allthreads.put.sum.count.rate","put")); - metrics.add(new Metric("vds.filestor.alldisks.allthreads.remove.sum.count.rate","remove")); - metrics.add(new Metric("vds.filestor.alldisks.allthreads.get.sum.count.rate","get")); - metrics.add(new Metric("vds.filestor.alldisks.allthreads.update.sum.count.rate","update")); - metrics.add(new Metric("vds.filestor.alldisks.queuesize.average","diskqueuesize")); - metrics.add(new Metric("vds.filestor.alldisks.averagequeuewait.sum.average","diskqueuewait")); - - metrics.add(new Metric("vds.memfilepersistence.cache.files.average")); - metrics.add(new Metric("vds.memfilepersistence.cache.body.average")); - metrics.add(new Metric("vds.memfilepersistence.cache.header.average")); - metrics.add(new Metric("vds.memfilepersistence.cache.meta.average")); - metrics.add(new Metric("vds.visitor.allthreads.queuesize.count.average")); - metrics.add(new Metric("vds.visitor.allthreads.completed.sum.average")); - metrics.add(new Metric("vds.visitor.allthreads.created.sum.rate","visit")); - - metrics.add(new Metric("vds.filestor.alldisks.allthreads.put.sum.latency.average")); - metrics.add(new Metric("vds.filestor.alldisks.allthreads.remove.sum.latency.average")); - metrics.add(new Metric("vds.filestor.alldisks.allthreads.get.sum.latency.average")); - metrics.add(new Metric("vds.filestor.alldisks.allthreads.update.sum.latency.average")); - metrics.add(new Metric("vds.filestor.alldisks.allthreads.splitbuckets.count.rate")); - metrics.add(new Metric("vds.filestor.alldisks.allthreads.joinbuckets.count.rate")); - metrics.add(new Metric("vds.filestor.alldisks.allthreads.setbucketstates.count.rate")); - - metrics.add(new Metric("vds.filestor.spi.put.success.average")); - metrics.add(new Metric("vds.filestor.spi.remove.success.average")); - metrics.add(new Metric("vds.filestor.spi.update.success.average")); - metrics.add(new Metric("vds.filestor.spi.get.success.average")); - metrics.add(new Metric("vds.filestor.spi.iterate.success.average")); - metrics.add(new Metric("vds.filestor.spi.put.success.rate")); - metrics.add(new Metric("vds.filestor.spi.remove.success.rate")); - metrics.add(new Metric("vds.filestor.spi.update.success.rate")); - metrics.add(new Metric("vds.filestor.spi.get.success.rate")); - metrics.add(new Metric("vds.filestor.spi.iterate.success.rate")); - - - //Distributor - metrics.add(new Metric("vds.visitor.sum.latency.average")); - metrics.add(new Metric("vds.visitor.sum.failed.rate")); - - metrics.add(new Metric("vds.idealstate.buckets_rechecking.average")); - metrics.add(new Metric("vds.idealstate.idealstate_diff.average")); - metrics.add(new Metric("vds.idealstate.buckets_toofewcopies.average")); - metrics.add(new Metric("vds.idealstate.buckets_toomanycopies.average")); - metrics.add(new Metric("vds.idealstate.buckets.average")); - metrics.add(new Metric("vds.idealstate.buckets_notrusted.average")); - metrics.add(new Metric("vds.idealstate.delete_bucket.done_ok.rate","deleteok")); - metrics.add(new Metric("vds.idealstate.delete_bucket.done_failed.rate","deletefailed")); - metrics.add(new Metric("vds.idealstate.delete_bucket.pending.average","deletepending")); - metrics.add(new Metric("vds.idealstate.merge_bucket.done_ok.rate","mergeok")); - metrics.add(new Metric("vds.idealstate.merge_bucket.done_failed.rate","mergefailed")); - metrics.add(new Metric("vds.idealstate.merge_bucket.pending.average","mergepending")); - metrics.add(new Metric("vds.idealstate.split_bucket.done_ok.rate","splitok")); - metrics.add(new Metric("vds.idealstate.split_bucket.done_failed.rate","splitfailed")); - metrics.add(new Metric("vds.idealstate.split_bucket.pending.average","splitpending")); - metrics.add(new Metric("vds.idealstate.join_bucket.done_ok.rate","joinok")); - metrics.add(new Metric("vds.idealstate.join_bucket.done_failed.rate","joinfailed")); - metrics.add(new Metric("vds.idealstate.join_bucket.pending.average","joinpending")); - - metrics.add(new Metric("vds.distributor.puts.sum.latency.average")); - metrics.add(new Metric("vds.distributor.puts.sum.ok.rate")); - metrics.add(new Metric("vds.distributor.puts.sum.failures.total.rate")); - metrics.add(new Metric("vds.distributor.removes.sum.latency.average")); - metrics.add(new Metric("vds.distributor.removes.sum.ok.rate")); - metrics.add(new Metric("vds.distributor.removes.sum.failures.total.rate")); - metrics.add(new Metric("vds.distributor.updates.sum.latency.average")); - metrics.add(new Metric("vds.distributor.updates.sum.ok.rate")); - metrics.add(new Metric("vds.distributor.updates.sum.failures.total.rate")); - metrics.add(new Metric("vds.distributor.removelocations.sum.latency.average")); - metrics.add(new Metric("vds.distributor.removelocations.sum.ok.rate")); - metrics.add(new Metric("vds.distributor.removelocations.sum.failures.total.rate")); - metrics.add(new Metric("vds.distributor.gets.sum.latency.average")); - metrics.add(new Metric("vds.distributor.gets.sum.ok.rate")); - metrics.add(new Metric("vds.distributor.gets.sum.failures.total.rate")); - metrics.add(new Metric("vds.distributor.docsstored.average")); - metrics.add(new Metric("vds.distributor.bytesstored.average")); - - return metrics; + private static MetricsConsumer getYamasConsumer(){ + return new MetricsConsumer("yamas", vespaMetricSet); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/Metric.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/Metric.java index 8b5a28f2ab9..14997ed5872 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/Metric.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/Metric.java @@ -19,7 +19,6 @@ public class Metric { public final String description; public final Map dimensions; - public Metric(String name, String outputName, String description, Map dimensions) { this.name = name; this.outputName = outputName; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricSet.java new file mode 100644 index 00000000000..5e498ee207a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricSet.java @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.admin.monitoring; + +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +import static java.util.Collections.unmodifiableMap; + +/** + * Models a metric set containing a set of metrics and child metric sets. + * + * @author gjoranv + */ +public class MetricSet { + + private final String id; + private final Map metrics; + private final Set children; + + + public MetricSet(String id, Collection metrics, Collection children) { + Objects.requireNonNull(id, "Id cannot be null or empty."); + + this.id = id; + this.metrics = toMapByName(metrics); + this.children = new LinkedHashSet<>(children); + } + + public MetricSet(String id, Collection metrics) { + this(id, metrics, Collections.emptySet()); + } + + public final String getId() { return id; } + + /** + * Returns all metrics in this set, including all metrics in any contained metric sets. + *
+ * Joins this set's metrics with its child sets into a named flat map of metrics. + * In the case of duplicate metrics, the metrics directly defined in this set + * takes precedence with respect to output name, description and dimension value + * (even if they are empty), while new dimensions from the children will be added. + * + * @return All metrics contained in this set. + */ + public final Map getMetrics() { + return unmodifiableMap(flatten(metrics, children)); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof MetricSet)) return false; + + MetricSet that = (MetricSet) o; + + return Objects.equals(id, that.id); + + } + + @Override + public int hashCode() { + return Objects.hashCode(id); + } + + private Map flatten(Map metrics, Set children) { + Map joinedMetrics = new LinkedHashMap<>(metrics); + + for (MetricSet child : children) { + child.getMetrics().forEach( + (name, metric) -> { + if (joinedMetrics.containsKey(name)) + joinedMetrics.put(name, joinedMetrics.get(name).addDimensionsFrom(metric)); + else + joinedMetrics.put(name, metric); + }); + } + return joinedMetrics; + } + + private Map toMapByName(Collection metrics) { + Map metricMap = new LinkedHashMap<>(); + for (Metric metric : metrics) { + metricMap.put(metric.name, metric); + } + return metricMap; + } + +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java index 00451992de6..d71f3d18e28 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java @@ -9,27 +9,29 @@ import java.util.Map; * @author trygve */ public class MetricsConsumer { - private final String consumer; - private final Map metrics; + private final String id; + private final MetricSet metricSet; /** - * @param consumer The consumer - * @param metrics The metrics for the the consumer + * @param id The consumer + * @param metricSet The metrics for this consumer */ - public MetricsConsumer(String consumer, Map metrics) { - this.consumer = consumer; - this.metrics = metrics; + public MetricsConsumer(String id, MetricSet metricSet) { + this.id = id; + this.metricSet = metricSet; } - public String getConsumer() { - return consumer; + public String getId() { + return id; } + public MetricSet getMetricSet() { return metricSet; } + /** * @return Map of metric with metric name as key */ public Map getMetrics() { - return metrics; + return metricSet.getMetrics(); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java new file mode 100644 index 00000000000..dccb61eb655 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java @@ -0,0 +1,300 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.admin.monitoring; + +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.Set; + +/** + * @author gjoranv + */ +@SuppressWarnings("UnusedDeclaration") // Used by model amenders +public class VespaMetricSet extends MetricSet { + + private static final Set vespaMetrics = getVespaMetrics(); + + public VespaMetricSet() { + super("vespa", vespaMetrics, Collections.emptySet()); + } + + private static Set getVespaMetrics() { + Set metrics =new LinkedHashSet<>(); + + metrics.addAll(getSearchNodeMetrics()); + metrics.addAll(getStorageMetrics()); + metrics.addAll(getDocprocMetrics()); + metrics.addAll(getClusterControllerMetrics()); + metrics.addAll(getQrserverMetrics()); + metrics.addAll(getContainerMetrics()); + metrics.addAll(getConfigServerMetrics()); + + return Collections.unmodifiableSet(metrics); + } + + private static Set getConfigServerMetrics() { + Set metrics =new LinkedHashSet<>(); + + metrics.add(new Metric("configserver.requests.count", "configserver.requests")); + metrics.add(new Metric("configserver.failedRequests.count", "configserver.failedRequests")); + metrics.add(new Metric("configserver.latency.average", "configserver.latency")); + metrics.add(new Metric("configserver.cacheConfigElems.last", "configserver.cacheConfigElems")); + metrics.add(new Metric("configserver.cacheChecksumElems.last", "configserver.cacheChecksumElems")); + metrics.add(new Metric("configserver.hosts.last", "configserver.hosts")); + metrics.add(new Metric("configserver.delayedResponses.count", "configserver.delayedResponses")); + metrics.add(new Metric("configserver.sessionChangeErrors.count", "configserver.sessionChangeErrors")); + + return metrics; + } + + private static Set getContainerMetrics() { + Set metrics =new LinkedHashSet<>(); + + metrics.add(new Metric("serverRejectedRequests.rate")); + metrics.add(new Metric("serverRejectedRequests.count")); + + metrics.add(new Metric("serverThreadPoolSize.average")); + metrics.add(new Metric("serverThreadPoolSize.min")); + metrics.add(new Metric("serverThreadPoolSize.max")); + metrics.add(new Metric("serverThreadPoolSize.rate")); + metrics.add(new Metric("serverThreadPoolSize.count")); + metrics.add(new Metric("serverThreadPoolSize.last")); + + metrics.add(new Metric("serverActiveThreads.average")); + metrics.add(new Metric("serverActiveThreads.min")); + metrics.add(new Metric("serverActiveThreads.max")); + metrics.add(new Metric("serverActiveThreads.rate")); + metrics.add(new Metric("serverActiveThreads.count")); + metrics.add(new Metric("serverActiveThreads.last")); + + metrics.add(new Metric("httpapi_latency.average")); + metrics.add(new Metric("httpapi_pending.average")); + metrics.add(new Metric("httpapi_num_operations.rate")); + metrics.add(new Metric("httpapi_num_updates.rate")); + metrics.add(new Metric("httpapi_num_removes.rate")); + metrics.add(new Metric("httpapi_num_puts.rate")); + metrics.add(new Metric("httpapi_succeeded.rate")); + metrics.add(new Metric("httpapi_failed.rate")); + + metrics.add(new Metric("mem.heap.total.average")); + metrics.add(new Metric("mem.heap.free.average")); + metrics.add(new Metric("mem.heap.used.average")); + + return metrics; + } + + private static Set getClusterControllerMetrics() { + Set metrics =new LinkedHashSet<>(); + + metrics.add(new Metric("cluster-controller.down.count.last")); + metrics.add(new Metric("cluster-controller.initializing.count.last")); + metrics.add(new Metric("cluster-controller.maintenance.count.last")); + metrics.add(new Metric("cluster-controller.retired.count.last")); + metrics.add(new Metric("cluster-controller.stopping.count.last")); + metrics.add(new Metric("cluster-controller.up.count.last")); + metrics.add(new Metric("cluster-controller.cluster-state-change.count", "content.cluster-controller.cluster-state-change.count")); + + metrics.add(new Metric("cluster-controller.is-master.last")); + // TODO(hakonhall): Update this name once persistent "count" metrics has been implemented. + // DO NOT RELY ON THIS METRIC YET. + metrics.add(new Metric("cluster-controller.node-event.count")); + + return metrics; + } + + private static Set getDocprocMetrics() { + Set metrics =new LinkedHashSet<>(); + + // per chain + metrics.add(new Metric("documents_processed.rate", "documents_processed")); + + return metrics; + } + + private static Set getQrserverMetrics() { + Set metrics =new LinkedHashSet<>(); + + metrics.add(new Metric("peak_qps.average", "peak_qps")); + metrics.add(new Metric("search_connections.average", "search_connections")); + metrics.add(new Metric("active_queries.average", "active_queries")); + metrics.add(new Metric("queries.rate", "queries")); + metrics.add(new Metric("query_latency.average", "mean_query_latency")); + metrics.add(new Metric("query_latency.max", "max_query_latency")); + metrics.add(new Metric("query_latency.95percentile", "95p_query_latency")); + metrics.add(new Metric("query_latency.99percentile", "99p_query_latency")); + metrics.add(new Metric("failed_queries.rate", "failed_queries")); + metrics.add(new Metric("hits_per_query.average", "hits_per_query")); + metrics.add(new Metric("empty_results.rate", "empty_results")); + metrics.add(new Metric("requestsOverQuota.rate")); + metrics.add(new Metric("requestsOverQuota.count")); + + // Errors from qrserver + metrics.add(new Metric("error.timeout.rate","error.timeout")); + metrics.add(new Metric("error.backends_oos.rate","error.backends_oos")); + metrics.add(new Metric("error.plugin_failure.rate","error.plugin_failure")); + metrics.add(new Metric("error.backend_communication_error.rate","error.backend_communication_error")); + metrics.add(new Metric("error.empty_document_summaries.rate","error.empty_document_summaries")); + metrics.add(new Metric("error.invalid_query_parameter.rate","error.invalid_query_parameter")); + metrics.add(new Metric("error.internal_server_error.rate", "error.internal_server_error")); + metrics.add(new Metric("error.misconfigured_server.rate","error.misconfigured_server")); + metrics.add(new Metric("error.invalid_query_transformation.rate","error.invalid_query_transformation")); + metrics.add(new Metric("error.result_with_errors.rate","error.result_with_errors")); + metrics.add(new Metric("error.unspecified.rate","error.unspecified")); + metrics.add(new Metric("error.unhandled_exception.rate","error.unhandled_exception")); + metrics.add(new Metric("http.status.1xx.rate")); + metrics.add(new Metric("http.status.2xx.rate")); + metrics.add(new Metric("http.status.3xx.rate")); + metrics.add(new Metric("http.status.4xx.rate")); + metrics.add(new Metric("http.status.5xx.rate")); + + return metrics; + } + + private static Set getSearchNodeMetrics() { + Set metrics =new LinkedHashSet<>(); + + metrics.add(new Metric("proton.numstoreddocs.last", "documents_total")); + metrics.add(new Metric("proton.numindexeddocs.last", "documents_ready")); + metrics.add(new Metric("proton.numactivedocs.last", "documents_active")); + metrics.add(new Metric("proton.numremoveddocs.last", "documents_removed")); + + metrics.add(new Metric("proton.docsinmemory.last", "documents_inmemory")); + metrics.add(new Metric("proton.diskusage.last", "diskusage")); + metrics.add(new Metric("proton.memoryusage.max", "content.proton.memoryusage.max")); + metrics.add(new Metric("proton.transport.query.count.rate", "query_requests")); + metrics.add(new Metric("proton.transport.docsum.docs.rate", "document_requests")); + metrics.add(new Metric("proton.transport.docsum.latency.average", "content.proton.transport.docsum.latency.average")); + metrics.add(new Metric("proton.transport.query.latency.average", "query_latency")); + + // jobs + metrics.add(new Metric("content.proton.documentdb.job.total.average")); + metrics.add(new Metric("content.proton.documentdb.job.attribute_flush.average")); + metrics.add(new Metric("content.proton.documentdb.job.memory_index_flush.average")); + metrics.add(new Metric("content.proton.documentdb.job.disk_index_fusion.average")); + metrics.add(new Metric("content.proton.documentdb.job.document_store_flush.average")); + metrics.add(new Metric("content.proton.documentdb.job.document_store_compact.average")); + metrics.add(new Metric("content.proton.documentdb.job.bucket_move.average")); + metrics.add(new Metric("content.proton.documentdb.job.lid_space_compact.average")); + metrics.add(new Metric("content.proton.documentdb.job.removed_documents_prune.average")); + + // lid space + metrics.add(new Metric("content.proton.documentdb.ready.lid_space.lid_bloat_factor.average")); + metrics.add(new Metric("content.proton.documentdb.notready.lid_space.lid_bloat_factor.average")); + metrics.add(new Metric("content.proton.documentdb.removed.lid_space.lid_bloat_factor.average")); + metrics.add(new Metric("content.proton.documentdb.ready.lid_space.lid_fragmentation_factor.average")); + metrics.add(new Metric("content.proton.documentdb.notready.lid_space.lid_fragmentation_factor.average")); + metrics.add(new Metric("content.proton.documentdb.removed.lid_space.lid_fragmentation_factor.average")); + + // resource usage + metrics.add(new Metric("content.proton.resource_usage.disk.average")); + metrics.add(new Metric("content.proton.resource_usage.memory.average")); + metrics.add(new Metric("content.proton.resource_usage.feeding_blocked.last")); + metrics.add(new Metric("content.proton.documentdb.attribute.resource_usage.enum_store.average")); + metrics.add(new Metric("content.proton.documentdb.attribute.resource_usage.multi_value.average")); + metrics.add(new Metric("content.proton.documentdb.attribute.resource_usage.feeding_blocked.last")); + + // transaction log + metrics.add(new Metric("content.proton.transactionlog.entries.average")); + metrics.add(new Metric("content.proton.transactionlog.disk_usage.average")); + + // document store + metrics.add(new Metric("content.proton.documentdb.ready.document_store.disk_usage.average")); + metrics.add(new Metric("content.proton.documentdb.ready.document_store.disk_bloat.average")); + metrics.add(new Metric("content.proton.documentdb.ready.document_store.max_bucket_spread.average")); + metrics.add(new Metric("content.proton.documentdb.notready.document_store.disk_usage.average")); + metrics.add(new Metric("content.proton.documentdb.notready.document_store.disk_bloat.average")); + metrics.add(new Metric("content.proton.documentdb.notready.document_store.max_bucket_spread.average")); + metrics.add(new Metric("content.proton.documentdb.removed.document_store.disk_usage.average")); + metrics.add(new Metric("content.proton.documentdb.removed.document_store.disk_bloat.average")); + metrics.add(new Metric("content.proton.documentdb.removed.document_store.max_bucket_spread.average")); + + return metrics; + } + + private static Set getStorageMetrics() { + Set metrics =new LinkedHashSet<>(); + + metrics.add(new Metric("vds.datastored.alldisks.docs.average","docs")); + metrics.add(new Metric("vds.datastored.alldisks.bytes.average","bytes")); + metrics.add(new Metric("vds.visitor.allthreads.averagevisitorlifetime.sum.average","visitorlifetime")); + metrics.add(new Metric("vds.visitor.allthreads.averagequeuewait.sum.average","visitorqueuewait")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.put.sum.count.rate","put")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.remove.sum.count.rate","remove")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.get.sum.count.rate","get")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.update.sum.count.rate","update")); + metrics.add(new Metric("vds.filestor.alldisks.queuesize.average","diskqueuesize")); + metrics.add(new Metric("vds.filestor.alldisks.averagequeuewait.sum.average","diskqueuewait")); + + metrics.add(new Metric("vds.memfilepersistence.cache.files.average")); + metrics.add(new Metric("vds.memfilepersistence.cache.body.average")); + metrics.add(new Metric("vds.memfilepersistence.cache.header.average")); + metrics.add(new Metric("vds.memfilepersistence.cache.meta.average")); + metrics.add(new Metric("vds.visitor.allthreads.queuesize.count.average")); + metrics.add(new Metric("vds.visitor.allthreads.completed.sum.average")); + metrics.add(new Metric("vds.visitor.allthreads.created.sum.rate","visit")); + + metrics.add(new Metric("vds.filestor.alldisks.allthreads.put.sum.latency.average")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.remove.sum.latency.average")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.get.sum.latency.average")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.update.sum.latency.average")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.splitbuckets.count.rate")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.joinbuckets.count.rate")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.setbucketstates.count.rate")); + + metrics.add(new Metric("vds.filestor.spi.put.success.average")); + metrics.add(new Metric("vds.filestor.spi.remove.success.average")); + metrics.add(new Metric("vds.filestor.spi.update.success.average")); + metrics.add(new Metric("vds.filestor.spi.get.success.average")); + metrics.add(new Metric("vds.filestor.spi.iterate.success.average")); + metrics.add(new Metric("vds.filestor.spi.put.success.rate")); + metrics.add(new Metric("vds.filestor.spi.remove.success.rate")); + metrics.add(new Metric("vds.filestor.spi.update.success.rate")); + metrics.add(new Metric("vds.filestor.spi.get.success.rate")); + metrics.add(new Metric("vds.filestor.spi.iterate.success.rate")); + + + //Distributor + metrics.add(new Metric("vds.visitor.sum.latency.average")); + metrics.add(new Metric("vds.visitor.sum.failed.rate")); + + metrics.add(new Metric("vds.idealstate.buckets_rechecking.average")); + metrics.add(new Metric("vds.idealstate.idealstate_diff.average")); + metrics.add(new Metric("vds.idealstate.buckets_toofewcopies.average")); + metrics.add(new Metric("vds.idealstate.buckets_toomanycopies.average")); + metrics.add(new Metric("vds.idealstate.buckets.average")); + metrics.add(new Metric("vds.idealstate.buckets_notrusted.average")); + metrics.add(new Metric("vds.idealstate.delete_bucket.done_ok.rate","deleteok")); + metrics.add(new Metric("vds.idealstate.delete_bucket.done_failed.rate","deletefailed")); + metrics.add(new Metric("vds.idealstate.delete_bucket.pending.average","deletepending")); + metrics.add(new Metric("vds.idealstate.merge_bucket.done_ok.rate","mergeok")); + metrics.add(new Metric("vds.idealstate.merge_bucket.done_failed.rate","mergefailed")); + metrics.add(new Metric("vds.idealstate.merge_bucket.pending.average","mergepending")); + metrics.add(new Metric("vds.idealstate.split_bucket.done_ok.rate","splitok")); + metrics.add(new Metric("vds.idealstate.split_bucket.done_failed.rate","splitfailed")); + metrics.add(new Metric("vds.idealstate.split_bucket.pending.average","splitpending")); + metrics.add(new Metric("vds.idealstate.join_bucket.done_ok.rate","joinok")); + metrics.add(new Metric("vds.idealstate.join_bucket.done_failed.rate","joinfailed")); + metrics.add(new Metric("vds.idealstate.join_bucket.pending.average","joinpending")); + + metrics.add(new Metric("vds.distributor.puts.sum.latency.average")); + metrics.add(new Metric("vds.distributor.puts.sum.ok.rate")); + metrics.add(new Metric("vds.distributor.puts.sum.failures.total.rate")); + metrics.add(new Metric("vds.distributor.removes.sum.latency.average")); + metrics.add(new Metric("vds.distributor.removes.sum.ok.rate")); + metrics.add(new Metric("vds.distributor.removes.sum.failures.total.rate")); + metrics.add(new Metric("vds.distributor.updates.sum.latency.average")); + metrics.add(new Metric("vds.distributor.updates.sum.ok.rate")); + metrics.add(new Metric("vds.distributor.updates.sum.failures.total.rate")); + metrics.add(new Metric("vds.distributor.removelocations.sum.latency.average")); + metrics.add(new Metric("vds.distributor.removelocations.sum.ok.rate")); + metrics.add(new Metric("vds.distributor.removelocations.sum.failures.total.rate")); + metrics.add(new Metric("vds.distributor.gets.sum.latency.average")); + metrics.add(new Metric("vds.distributor.gets.sum.ok.rate")); + metrics.add(new Metric("vds.distributor.gets.sum.failures.total.rate")); + metrics.add(new Metric("vds.distributor.docsstored.average")); + metrics.add(new Metric("vds.distributor.bytesstored.average")); + + return metrics; + } + +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomMetricBuilderHelper.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomMetricBuilderHelper.java index 0e09ed51414..e98c9e73486 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomMetricBuilderHelper.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomMetricBuilderHelper.java @@ -3,12 +3,15 @@ package com.yahoo.vespa.model.builder.xml.dom; import com.yahoo.text.XML; import com.yahoo.vespa.model.admin.monitoring.Metric; +import com.yahoo.vespa.model.admin.monitoring.MetricSet; import com.yahoo.vespa.model.admin.monitoring.MetricsConsumer; import org.w3c.dom.Element; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; /** * Helper class for parsing yamasmetric config. @@ -30,18 +33,23 @@ public class DomMetricBuilderHelper { List consumersElem = XML.getChildren(spec, "consumer"); for (Element consumer : consumersElem) { String consumerName = consumer.getAttribute("name"); - Map metrics = new LinkedHashMap<>(); + Set metrics = new LinkedHashSet<>(); List metricsEl = XML.getChildren(consumer, "metric"); if (metricsEl != null) { for (Element metric : metricsEl) { String metricName = metric.getAttribute("name"); String outputName = metric.getAttribute("output-name"); - metrics.put(metricName, new Metric(metricName, outputName)); + metrics.add(new Metric(metricName, outputName)); } } - MetricsConsumer metricsConsumer = new MetricsConsumer(consumerName, metrics); + MetricsConsumer metricsConsumer = new MetricsConsumer(consumerName, + new MetricSet(metricSetId(consumerName), metrics)); metricsConsumers.put(consumerName, metricsConsumer); } return metricsConsumers; } + + private static String metricSetId(String consumerName) { + return "legacy-user-metrics-" + consumerName; + } } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/admin/monitoring/MetricSetTest.java b/config-model/src/test/java/com/yahoo/vespa/model/admin/monitoring/MetricSetTest.java new file mode 100644 index 00000000000..701a7fe0d30 --- /dev/null +++ b/config-model/src/test/java/com/yahoo/vespa/model/admin/monitoring/MetricSetTest.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.admin.monitoring; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Sets; +import org.junit.Test; + +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +/** + * @author gjoranv + */ +public class MetricSetTest { + + @Test + public void internal_metrics_take_precedence_over_metrics_from_children() { + String METRIC_NAME = "metric1"; + String COMMON_DIMENSION_KEY = "commonKey"; + + Map childDimensions = ImmutableMap.builder() + .put(COMMON_DIMENSION_KEY, "childCommonVal") + .put("childKey", "childVal") + .build(); + Metric childMetric = new Metric(METRIC_NAME, "child-output-name", "child-description", childDimensions); + + Map parentDimensions = ImmutableMap.builder() + .put(COMMON_DIMENSION_KEY, "parentCommonVal") + .put("parentKey", "parentVal") + .build(); + Metric parentMetric = new Metric(METRIC_NAME, "parent-output-name", "parent-description", parentDimensions); + + MetricSet child = new MetricSet("set1", Sets.newHashSet(childMetric)); + MetricSet parent = new MetricSet("set1", Sets.newHashSet(parentMetric), Sets.newHashSet(child)); + + Metric combinedMetric = parent.getMetrics().get(METRIC_NAME); + assertEquals("parent-output-name", combinedMetric.outputName); + assertEquals("parent-description", combinedMetric.description); + assertEquals(3, combinedMetric.dimensions.size()); + assertEquals("parentCommonVal", combinedMetric.dimensions.get(COMMON_DIMENSION_KEY)); + } +} diff --git a/config-model/src/test/java/com/yahoo/vespa/model/admin/monitoring/MetricTest.java b/config-model/src/test/java/com/yahoo/vespa/model/admin/monitoring/MetricTest.java new file mode 100644 index 00000000000..9e30b5b6d69 --- /dev/null +++ b/config-model/src/test/java/com/yahoo/vespa/model/admin/monitoring/MetricTest.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.admin.monitoring; + +import com.google.common.collect.ImmutableMap; +import org.junit.Test; + +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +/** + * @author gjoranv + */ +public class MetricTest { + + @Test + public void this_metric_takes_precedence_when_combined_with_another_metric() { + String COMMON_DIMENSION_KEY = "commonKey"; + + Map thisDimensions = ImmutableMap.builder() + .put(COMMON_DIMENSION_KEY, "thisCommonVal") + .put("thisKey", "thisVal") + .build(); + Metric thisMetric = new Metric("thisMetric", "this-output-name", "this-description", thisDimensions); + + Map thatDimensions = ImmutableMap.builder() + .put(COMMON_DIMENSION_KEY, "thatCommonVal") + .put("thatKey", "thatVal") + .build(); + Metric thatMetric = new Metric("thatMetric", "that-output-name", "that-description", thatDimensions); + + Metric combinedMetric = thisMetric.addDimensionsFrom(thatMetric); + assertEquals("this-output-name", combinedMetric.outputName); + assertEquals("this-description", combinedMetric.description); + assertEquals(3, combinedMetric.dimensions.size()); + assertEquals("thisCommonVal", combinedMetric.dimensions.get(COMMON_DIMENSION_KEY)); + } +} -- cgit v1.2.3