summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--metrics/src/tests/metricmanagertest.cpp184
-rw-r--r--metrics/src/vespa/metrics/CMakeLists.txt1
-rw-r--r--metrics/src/vespa/metrics/metric.cpp8
-rw-r--r--metrics/src/vespa/metrics/metric.h5
-rw-r--r--metrics/src/vespa/metrics/prometheus_writer.cpp287
-rw-r--r--metrics/src/vespa/metrics/prometheus_writer.h77
-rw-r--r--storage/src/vespa/storage/common/statusmetricconsumer.cpp13
7 files changed, 543 insertions, 32 deletions
diff --git a/metrics/src/tests/metricmanagertest.cpp b/metrics/src/tests/metricmanagertest.cpp
index be63bed5bec..e5e39bd9dcb 100644
--- a/metrics/src/tests/metricmanagertest.cpp
+++ b/metrics/src/tests/metricmanagertest.cpp
@@ -5,19 +5,21 @@
#include <vespa/metrics/metricmanager.h>
#include <vespa/metrics/state_api_adapter.h>
#include <vespa/metrics/textwriter.h>
+#include <vespa/metrics/prometheus_writer.h>
#include <vespa/vespalib/data/slime/slime.h>
-#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/stllike/asciistream.h>
-#include <vespa/vespalib/util/size_literals.h>
#include <vespa/vespalib/util/time.h>
#include <vespa/vespalib/data/simple_buffer.h>
#include <vespa/vespalib/util/atomic.h>
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
#include <mutex>
#include <thread>
#include <vespa/log/log.h>
LOG_SETUP(".test.metricmanager");
+using namespace ::testing;
using namespace vespalib::atomic;
using config::ConfigUri;
@@ -41,16 +43,16 @@ struct SubMetricSet : public MetricSet
DoubleValueMetric val2;
SumMetric<DoubleValueMetric> valsum;
- SubMetricSet(const Metric::String & name, MetricSet* owner);
- ~SubMetricSet();
+ explicit SubMetricSet(const Metric::String& name, MetricSet* owner = nullptr);
+ ~SubMetricSet() override;
};
-SubMetricSet::SubMetricSet(const Metric::String & name, MetricSet* owner)
- : MetricSet(name, {{"sub"}}, "sub desc", owner),
- val1("val1", {{"tag4"},{"snaptest"}}, "val1 desc", this),
+SubMetricSet::SubMetricSet(const Metric::String& name, MetricSet* owner)
+ : MetricSet(name, {{"partofsum"}, {"sub"}}, "sub desc", owner),
+ val1("val1", {{"tag4"}, {"snaptest"}}, "val1 desc", this),
val2("val2", {{"tag5"}}, "val2 desc", this),
- valsum("valsum", {{"tag4"},{"snaptest"}}, "valsum desc", this)
+ valsum("valsum", {{"tag4"}, {"snaptest"}}, "valsum desc", this)
{
valsum.addMetricToSum(val1);
valsum.addMetricToSum(val2);
@@ -65,7 +67,7 @@ struct MultiSubMetricSet
SubMetricSet b;
SumMetric<MetricSet> sum;
- MultiSubMetricSet(MetricSet* owner);
+ explicit MultiSubMetricSet(MetricSet* owner);
~MultiSubMetricSet();
};
@@ -156,13 +158,6 @@ getMatchedMetrics(const vespalib::string& config)
mm.init(ConfigUri(config));
MetricNameVisitor visitor;
- /** Take a copy to verify clone works.
- std::list<Metric::SP> ownerList;
- MetricSet::UP copy(dynamic_cast<MetricSet*>(
- mm.getMetrics().clone(ownerList)));
- mm.visit(*copy, visitor, "consumer");
- */
-
MetricLockGuard g(mm.getMetricLock());
mm.visit(g, mm.getActiveMetrics(g), visitor, "consumer");
const MetricManager::ConsumerSpec * consumerSpec = mm.getConsumerSpec(g, "consumer");
@@ -386,8 +381,8 @@ struct BriefValuePrinter : public MetricVisitor {
bool waitForTimeProcessed(const MetricManager& mm, time_point::duration processtime, uint32_t timeout = 120)
{
- uint32_t lastchance = time(0) + timeout;
- while (time(0) < lastchance) {
+ uint32_t lastchance = time(nullptr) + timeout;
+ while (time(nullptr) < lastchance) {
if (mm.getLastProcessedTime() >= time_point(processtime)) return true;
mm.timeChangedNotification();
std::this_thread::sleep_for(10ms);
@@ -705,6 +700,16 @@ struct MetricSnapshotTestFixture
}
return ss.str();
}
+
+ std::string render_last_snapshot_as_prometheus() const {
+ vespalib::asciistream os;
+ PrometheusWriter writer(os);
+ {
+ MetricLockGuard lockGuard(manager.getMetricLock());
+ manager.visit(lockGuard, manager.getMetricSnapshot(lockGuard, 300s, false), writer, "snapper");
+ }
+ return os.str();
+ }
};
class JsonMetricWrapper
@@ -762,7 +767,7 @@ struct DimensionTestMetricSet : MetricSet
DoubleValueMetric val1;
LongCountMetric val2;
- DimensionTestMetricSet(MetricSet* owner = nullptr);
+ explicit DimensionTestMetricSet(MetricSet* owner = nullptr);
~DimensionTestMetricSet() override;
};
@@ -801,7 +806,7 @@ struct NestedDimensionTestMetricSet : MetricSet
DimensionTestMetricSet nestedSet;
NestedDimensionTestMetricSet();
- ~NestedDimensionTestMetricSet();
+ ~NestedDimensionTestMetricSet() override;
};
NestedDimensionTestMetricSet::NestedDimensionTestMetricSet()
@@ -839,7 +844,7 @@ struct DimensionOverridableTestMetricSet : MetricSet
{
DoubleValueMetric val;
- DimensionOverridableTestMetricSet(const std::string& dimValue, MetricSet* owner = nullptr);
+ explicit DimensionOverridableTestMetricSet(const std::string& dimValue, MetricSet* owner = nullptr);
~DimensionOverridableTestMetricSet() override;
};
@@ -855,7 +860,7 @@ struct SameNamesTestMetricSet : MetricSet
DimensionOverridableTestMetricSet set2;
SameNamesTestMetricSet();
- ~SameNamesTestMetricSet();
+ ~SameNamesTestMetricSet() override;
};
SameNamesTestMetricSet::SameNamesTestMetricSet()
@@ -894,14 +899,14 @@ TEST_F(MetricManagerTest, test_text_output)
MetricLockGuard lockGuard(mm.getMetricLock());
mm.registerMetric(lockGuard, mySet.set);
}
- // Adding metrics to have some values in them
+ // Adding metrics to have some values in them
mySet.val6.addValue(2);
mySet.val9.val1.addValue(4);
mySet.val10.count.inc();
mySet.val10.a.val1.addValue(7);
mySet.val10.a.val2.addValue(2);
mySet.val10.b.val1.addValue(1);
- // Initialize metric manager to get snapshots created.
+ // Initialize metric manager to get snapshots created.
mm.init(ConfigUri("raw:"
"consumer[2]\n"
"consumer[0].name snapper\n"
@@ -953,6 +958,137 @@ TEST_F(MetricManagerTest, text_output_supports_dimensions)
EXPECT_EQ(expected, actual);
}
+TEST_F(MetricManagerTest, prometheus_output_groups_related_time_series) {
+ SameNamesTestMetricSet mset;
+ MetricSnapshotTestFixture fixture(*this, mset);
+
+ mset.set1.val.addValue(2);
+ mset.set1.val.addValue(3);
+ mset.set2.val.addValue(5);
+ mset.set2.val.addValue(7);
+
+ fixture.takeSnapshotsOnce();
+ std::string actual = fixture.render_last_snapshot_as_prometheus();
+ std::string expected(R"(# NOTE: THIS API IS NOT INTENDED FOR PUBLIC USE
+# TYPE outer_temp_val_count untyped
+outer_temp_val_count{foo="bar",fancy="stuff"} 2 1300000
+outer_temp_val_count{foo="baz",fancy="stuff"} 2 1300000
+# TYPE outer_temp_val_max untyped
+outer_temp_val_max{foo="bar",fancy="stuff"} 3 1300000
+outer_temp_val_max{foo="baz",fancy="stuff"} 7 1300000
+# TYPE outer_temp_val_min untyped
+outer_temp_val_min{foo="bar",fancy="stuff"} 2 1300000
+outer_temp_val_min{foo="baz",fancy="stuff"} 5 1300000
+# TYPE outer_temp_val_sum untyped
+outer_temp_val_sum{foo="bar",fancy="stuff"} 5 1300000
+outer_temp_val_sum{foo="baz",fancy="stuff"} 12 1300000
+)");
+ EXPECT_EQ(expected, actual);
+}
+
+struct MetricSetWrapper : MetricSet {
+ MultiSubMetricSet sub;
+
+ MetricSetWrapper();
+ ~MetricSetWrapper() override;
+};
+
+MetricSetWrapper::MetricSetWrapper()
+ : MetricSet("top_level", {}, "stuff and junk", nullptr),
+ sub(this)
+{
+}
+
+MetricSetWrapper::~MetricSetWrapper() = default;
+
+TEST_F(MetricManagerTest, prometheus_output_only_emits_sum_metric_aggregate_values) {
+ MetricSetWrapper mset;
+ MetricSnapshotTestFixture fixture(*this, mset);
+
+ mset.sub.a.val1.addValue(21);
+ mset.sub.a.val2.addValue(17);
+ mset.sub.b.val1.addValue(7);
+ mset.sub.b.val2.addValue(3);
+
+ fixture.takeSnapshotsOnce();
+ std::string actual = fixture.render_last_snapshot_as_prometheus();
+ std::string expected = R"(# NOTE: THIS API IS NOT INTENDED FOR PUBLIC USE
+# TYPE top_level_multisub_sum_val1_count untyped
+top_level_multisub_sum_val1_count 2 1300000
+# TYPE top_level_multisub_sum_val1_max untyped
+top_level_multisub_sum_val1_max 21 1300000
+# TYPE top_level_multisub_sum_val1_min untyped
+top_level_multisub_sum_val1_min 7 1300000
+# TYPE top_level_multisub_sum_val1_sum untyped
+top_level_multisub_sum_val1_sum 56 1300000
+# TYPE top_level_multisub_sum_val2_count untyped
+top_level_multisub_sum_val2_count 2 1300000
+# TYPE top_level_multisub_sum_val2_max untyped
+top_level_multisub_sum_val2_max 17 1300000
+# TYPE top_level_multisub_sum_val2_min untyped
+top_level_multisub_sum_val2_min 3 1300000
+# TYPE top_level_multisub_sum_val2_sum untyped
+top_level_multisub_sum_val2_sum 40 1300000
+# TYPE top_level_multisub_sum_valsum_count untyped
+top_level_multisub_sum_valsum_count 4 1300000
+# TYPE top_level_multisub_sum_valsum_max untyped
+top_level_multisub_sum_valsum_max 21 1300000
+# TYPE top_level_multisub_sum_valsum_min untyped
+top_level_multisub_sum_valsum_min 3 1300000
+# TYPE top_level_multisub_sum_valsum_sum untyped
+top_level_multisub_sum_valsum_sum 192 1300000
+)";
+ EXPECT_EQ(expected, actual);
+}
+
+TEST_F(MetricManagerTest, prometheus_output_can_emit_inf_values_verbatim) {
+ SameNamesTestMetricSet mset;
+ MetricSnapshotTestFixture fixture(*this, mset);
+
+ // We have explicit guards against setting Inf/NaN directly, so we have to fudge the numbers
+ // a bit to get +/- Inf by saturating additions towards infinity. TODO how to test NaN...? :o
+ mset.set1.val.addValue(std::numeric_limits<double>::max());
+ mset.set1.val.addValue(std::numeric_limits<double>::max());
+ mset.set2.val.addValue(std::numeric_limits<double>::lowest());
+ mset.set2.val.addValue(std::numeric_limits<double>::lowest());
+
+ fixture.takeSnapshotsOnce();
+ std::string actual = fixture.render_last_snapshot_as_prometheus();
+ EXPECT_THAT(actual, HasSubstr("outer_temp_val_sum{foo=\"bar\",fancy=\"stuff\"} +Inf 1300000\n"));
+ EXPECT_THAT(actual, HasSubstr("outer_temp_val_sum{foo=\"baz\",fancy=\"stuff\"} -Inf 1300000\n"));
+}
+
+struct SneakyNamesMetricSet : public MetricSet {
+ DoubleValueMetric val1;
+ DoubleValueMetric val2;
+
+ SneakyNamesMetricSet();
+ ~SneakyNamesMetricSet() override;
+};
+
+
+SneakyNamesMetricSet::SneakyNamesMetricSet()
+ : MetricSet("sneaky/path", {}, "sub desc", nullptr),
+ val1("a.name", {{"foo.bar", "blah\nbaz\"zoid\\"}}, "", this),
+ val2("another-name", {}, "", this)
+{
+}
+
+SneakyNamesMetricSet::~SneakyNamesMetricSet() = default;
+
+TEST_F(MetricManagerTest, prometheus_output_normalizes_and_escapes_names_and_labels) {
+ SneakyNamesMetricSet mset;
+ MetricSnapshotTestFixture fixture(*this, mset);
+
+ mset.val1.addValue(123);
+ mset.val2.addValue(42);
+
+ fixture.takeSnapshotsOnce();
+ std::string actual = fixture.render_last_snapshot_as_prometheus();
+ EXPECT_THAT(actual, HasSubstr(R"(sneaky_path_a_name_count{foo_bar="blah\nbaz\"zoid\\"} 1 1300000)"));
+ EXPECT_THAT(actual, HasSubstr("sneaky_path_another_name_count 1 1300000"));
+}
+
namespace {
struct MyUpdateHook : public UpdateHook {
std::ostringstream& _output;
diff --git a/metrics/src/vespa/metrics/CMakeLists.txt b/metrics/src/vespa/metrics/CMakeLists.txt
index 00e80ddec26..06a5febaea7 100644
--- a/metrics/src/vespa/metrics/CMakeLists.txt
+++ b/metrics/src/vespa/metrics/CMakeLists.txt
@@ -12,6 +12,7 @@ vespa_add_library(metrics
metrictimer.cpp
metricvalueset.cpp
name_repo.cpp
+ prometheus_writer.cpp
state_api_adapter.cpp
summetric.cpp
textwriter.cpp
diff --git a/metrics/src/vespa/metrics/metric.cpp b/metrics/src/vespa/metrics/metric.cpp
index db27ca63839..652a5e6bd5f 100644
--- a/metrics/src/vespa/metrics/metric.cpp
+++ b/metrics/src/vespa/metrics/metric.cpp
@@ -54,7 +54,7 @@ Tag::Tag(vespalib::stringref k, vespalib::stringref v)
Tag::Tag(const Tag &) noexcept = default;
Tag & Tag::operator = (const Tag &) = default;
-Tag::~Tag() {}
+Tag::~Tag() = default;
Metric::Metric(const String& name,
Tags dimensions,
@@ -139,11 +139,11 @@ Metric::createMangledNameWithDimensions() const
void
Metric::verifyConstructionParameters()
{
- if (getName().size() == 0) {
- throw vespalib::IllegalArgumentException(
- "Metric cannot have empty name", VESPA_STRLOC);
+ if (getName().empty()) {
+ throw vespalib::IllegalArgumentException("Metric cannot have empty name", VESPA_STRLOC);
}
const auto &name = getName();
+ // FIXME this is broken (should use std::regex_match instead, but we have metrics that will fail this test...!)
if (!std::regex_search(name.c_str(), name.c_str() + name.size(), name_pattern_regex)) {
throw vespalib::IllegalArgumentException(
"Illegal metric name '" + getName() + "'. Names must match pattern "
diff --git a/metrics/src/vespa/metrics/metric.h b/metrics/src/vespa/metrics/metric.h
index 36f363a8fc5..afa2851ef13 100644
--- a/metrics/src/vespa/metrics/metric.h
+++ b/metrics/src/vespa/metrics/metric.h
@@ -17,7 +17,7 @@ class MemoryConsumption;
/** Implement class to visit metrics. */
struct MetricVisitor {
- virtual ~MetricVisitor() {}
+ virtual ~MetricVisitor() = default;
/**
* Visit a snapshot. Return true to visit content of the snapshot
@@ -32,8 +32,7 @@ struct MetricVisitor {
* fly such as in sum metrics.
* @return True if you want to visit the content of this metric set.
*/
- virtual bool visitMetricSet(const MetricSet&, bool autoGenerated)
- { (void) autoGenerated; return true; }
+ virtual bool visitMetricSet(const MetricSet&, [[maybe_unused]] bool autoGenerated) { return true; }
/**
* Callback visitors can use if they need to know the tree traversal of
diff --git a/metrics/src/vespa/metrics/prometheus_writer.cpp b/metrics/src/vespa/metrics/prometheus_writer.cpp
new file mode 100644
index 00000000000..27c509638b2
--- /dev/null
+++ b/metrics/src/vespa/metrics/prometheus_writer.cpp
@@ -0,0 +1,287 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "countmetric.h"
+#include "metricset.h"
+#include "metricsnapshot.h"
+#include "prometheus_writer.h"
+#include "valuemetric.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/stllike/hash_set.hpp>
+#include <vespa/vespalib/util/small_vector.h>
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+
+VESPALIB_HASH_SET_INSTANTIATE(vespalib::stringref);
+
+using vespalib::ArrayRef;
+using vespalib::ConstArrayRef;
+using vespalib::stringref;
+using vespalib::asciistream;
+
+namespace metrics {
+
+namespace {
+
+[[nodiscard]] bool any_metric_in_path_has_nonempty_tag(const Metric& m) noexcept {
+ const Metric* current = &m;
+ do {
+ if (std::ranges::any_of(current->getTags(), [](auto& t) noexcept { return t.hasValue(); })) {
+ return true;
+ }
+ current = current->getOwner();
+ } while (current != nullptr);
+ return false;
+}
+
+[[nodiscard]] constexpr bool valid_prometheus_char(char ch) noexcept {
+ // Prometheus also allows ':', but we don't.
+ return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_');
+}
+
+[[nodiscard]] bool valid_prometheus_name(stringref name) noexcept {
+ return std::ranges::all_of(name, [](char ch) noexcept { return valid_prometheus_char(ch); });
+}
+
+[[nodiscard]] constexpr bool label_char_needs_escaping(char ch) noexcept {
+ return (ch == '\\' || ch == '\n' || ch == '"');
+}
+
+[[nodiscard]] bool label_value_needs_escaping(stringref value) noexcept {
+ return std::ranges::any_of(value, [](char ch) noexcept { return label_char_needs_escaping(ch); });
+}
+
+[[nodiscard]] vespalib::string prometheus_escaped_name(stringref str) {
+ asciistream os;
+ for (char ch : str) {
+ if (valid_prometheus_char(ch)) [[likely]] {
+ os << ch;
+ } else {
+ os << '_';
+ }
+ }
+ return os.str();
+}
+
+[[nodiscard]] bool arrays_eq(ConstArrayRef<stringref> lhs, ConstArrayRef<stringref> rhs) noexcept {
+ return std::ranges::equal(lhs, rhs);
+}
+
+[[nodiscard]] bool arrays_lt(ConstArrayRef<stringref> lhs, ConstArrayRef<stringref> rhs) noexcept {
+ return std::ranges::lexicographical_compare(lhs, rhs);
+}
+
+}
+
+PrometheusWriter::PrometheusWriter(asciistream& out)
+ : MetricVisitor(),
+ _arena(),
+ _timestamp_str(),
+ _samples(),
+ _unique_str_refs(),
+ _path(),
+ _out(out)
+{}
+
+PrometheusWriter::~PrometheusWriter() = default;
+
+bool PrometheusWriter::TimeSeriesSample::operator<(const TimeSeriesSample& rhs) const noexcept {
+ // Standard multidimensional strict-weak ordering, with an indirection via
+ // ConstArrayRefs for the first and last dimension.
+ if (!arrays_eq(metric_path, rhs.metric_path)) {
+ return arrays_lt(metric_path, rhs.metric_path);
+ }
+ if (aggr != rhs.aggr) {
+ return aggr < rhs.aggr;
+ }
+ return arrays_lt(labels, rhs.labels);
+}
+
+stringref PrometheusWriter::arena_stable_string_ref(stringref str) {
+ auto maybe_iter = _unique_str_refs.find(str);
+ if (maybe_iter != _unique_str_refs.end()) {
+ return *maybe_iter;
+ }
+ auto buf = _arena.create_uninitialized_array<char>(str.size());
+ memcpy(buf.data(), str.data(), buf.size());
+ stringref ref(buf.data(), buf.size());
+ _unique_str_refs.insert(ref);
+ return ref;
+}
+
+stringref PrometheusWriter::stable_name_string_ref(stringref raw_name) {
+ if (valid_prometheus_name(raw_name)) [[likely]] {
+ return arena_stable_string_ref(raw_name);
+ } else {
+ return arena_stable_string_ref(prometheus_escaped_name(raw_name));
+ }
+}
+
+ConstArrayRef<stringref> PrometheusWriter::metric_to_path_ref(stringref leaf_metric_name) {
+ vespalib::SmallVector<stringref, 16> path_refs;
+ // _path strings are already in canonical (sanitized) form and arena-allocated
+ for (const auto& p :_path) {
+ path_refs.emplace_back(p);
+ }
+ path_refs.emplace_back(stable_name_string_ref(leaf_metric_name));
+ return _arena.copy_array<stringref>({path_refs.data(), path_refs.size()});
+}
+
+vespalib::string PrometheusWriter::escaped_label_value(stringref value) {
+ asciistream out;
+ for (char ch : value) {
+ if (ch == '\\') {
+ out << "\\\\";
+ } else if (ch == '"') {
+ out << "\\\"";
+ } else if (ch == '\n') {
+ out << "\\n";
+ } else [[likely]] {
+ out << ch; // assumed to be part of a valid UTF-8 sequence
+ }
+ }
+ return out.str();
+}
+
+stringref PrometheusWriter::stable_label_value_string_ref(stringref raw_label_value) {
+ if (!label_value_needs_escaping(raw_label_value)) [[likely]] {
+ return arena_stable_string_ref(raw_label_value);
+ } else {
+ return arena_stable_string_ref(escaped_label_value(raw_label_value));
+ }
+}
+
+void PrometheusWriter::build_labels_upto_root(vespalib::SmallVector<stringref, 16>& out, const Metric& m) {
+ const Metric* current = &m;
+ do {
+ for (const auto& tag : current->getTags()) {
+ if (!tag.hasValue()) {
+ continue; // Don't emit value-less tags, as these are not proper labels
+ }
+ out.emplace_back(stable_name_string_ref(tag.key()));
+ out.emplace_back(stable_label_value_string_ref(tag.value()));
+ }
+ current = current->getOwner();
+ } while (current != nullptr);
+}
+
+ConstArrayRef<stringref> PrometheusWriter::as_prometheus_labels(const Metric& m) {
+ if (!any_metric_in_path_has_nonempty_tag(m)) {
+ return {};
+ }
+ vespalib::SmallVector<stringref, 16> kv_refs;
+ build_labels_upto_root(kv_refs, m);
+ return _arena.copy_array<stringref>(kv_refs);
+}
+
+bool PrometheusWriter::visitSnapshot(const MetricSnapshot& ms) {
+ // Pre-cache timestamp in string form to avoid same conversion for every time series
+ _timestamp_str = std::to_string(std::chrono::duration_cast<std::chrono::milliseconds>(
+ ms.getToTime().time_since_epoch()).count());
+ return true;
+}
+
+void PrometheusWriter::doneVisitingSnapshot(const MetricSnapshot&) {
+ // No-op
+}
+
+bool PrometheusWriter::visitMetricSet(const MetricSet& set, bool) {
+ // Don't include metric sets that will be aggregated up into a separate sum metric.
+ // We don't care about individual threads etc., just their aggregate values.
+ if (set.hasTag("partofsum")) {
+ return false;
+ }
+ if (set.getOwner()) {
+ _path.emplace_back(stable_name_string_ref(set.getName()));
+ } // else: don't add the topmost set
+ return true;
+}
+
+void PrometheusWriter::doneVisitingMetricSet(const MetricSet& set) {
+ if (set.getOwner()) {
+ assert(!_path.empty());
+ _path.pop_back();
+ }
+}
+
+bool PrometheusWriter::visitCountMetric(const AbstractCountMetric& m, bool) {
+ auto full_path = metric_to_path_ref(m.getName());
+ auto labels = as_prometheus_labels(m);
+ _samples.emplace_back(TimeSeriesSample{full_path, "count", labels, {m.getLongValue("count")}});
+ return true;
+}
+
+bool PrometheusWriter::visitValueMetric(const AbstractValueMetric& m, bool) {
+ auto full_path = metric_to_path_ref(m.getName());
+ auto labels = as_prometheus_labels(m);
+ _samples.emplace_back(TimeSeriesSample{full_path, "count", labels, {m.getLongValue("count")}});
+ _samples.emplace_back(TimeSeriesSample{full_path, "sum", labels, {m.getDoubleValue("total")}});
+ _samples.emplace_back(TimeSeriesSample{full_path, "min", labels, {m.getDoubleValue("min")}});
+ _samples.emplace_back(TimeSeriesSample{full_path, "max", labels, {m.getDoubleValue("max")}});
+ return true;
+}
+
+void PrometheusWriter::render_path_as_metric_name_prefix(asciistream& out, ConstArrayRef<stringref> path) {
+ for (const auto& p : path) {
+ out << p << '_';
+ }
+}
+
+void PrometheusWriter::render_label_pairs(asciistream& out, ConstArrayRef<stringref> labels) {
+ if (!labels.empty()) {
+ assert((labels.size() % 2) == 0);
+ out << '{';
+ for (size_t i = 0; i < labels.size(); i += 2) {
+ if (i > 0) {
+ out << ',';
+ }
+ // We expect both label key and value to be pre-normalized/sanitized.
+ out << labels[i] << "=\"" << labels[i + 1] << '"';
+ }
+ out << '}';
+ }
+}
+
+void PrometheusWriter::render_sample_value(asciistream& out, I64OrDouble value) {
+ if (std::holds_alternative<double>(value)) {
+ const double v = std::get<double>(value);
+ const bool inf = std::isinf(v);
+ const bool nan = std::isnan(v);
+ // Prometheus allows "-Inf", "+Inf" and "NaN" as special values for negative infinity,
+ // positive infinity and "not a number", respectively.
+ if (!inf && !nan) [[likely]] {
+ out << asciistream::Precision(16) << vespalib::automatic << v;
+ } else if (inf) {
+ out << (v < 0.0 ? "-Inf" : "+Inf");
+ } else {
+ out << "NaN";
+ }
+ } else {
+ const int64_t v = std::get<int64_t>(value);
+ out << v;
+ }
+}
+
+void PrometheusWriter::doneVisiting() {
+ _out << "# NOTE: THIS API IS NOT INTENDED FOR PUBLIC USE\n";
+ // Sort and implicitly group all related metrics together, ordered by name -> aggregation -> dimensions
+ std::sort(_samples.begin(), _samples.end());
+ ConstArrayRef<stringref> last_metric;
+ stringref last_aggr;
+ for (const auto& s : _samples) {
+ if ((s.aggr != last_aggr) || !arrays_eq(s.metric_path, last_metric)) {
+ _out << "# TYPE ";
+ render_path_as_metric_name_prefix(_out, s.metric_path);
+ _out << s.aggr << " untyped\n";
+ last_metric = s.metric_path;
+ last_aggr = s.aggr;
+ }
+ render_path_as_metric_name_prefix(_out, s.metric_path);
+ _out << s.aggr;
+ render_label_pairs(_out, s.labels);
+ _out << ' ';
+ render_sample_value(_out, s.value);
+ _out << ' ' << _timestamp_str << '\n';
+ }
+}
+
+}
diff --git a/metrics/src/vespa/metrics/prometheus_writer.h b/metrics/src/vespa/metrics/prometheus_writer.h
new file mode 100644
index 00000000000..410ea3b2b8e
--- /dev/null
+++ b/metrics/src/vespa/metrics/prometheus_writer.h
@@ -0,0 +1,77 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "metric.h"
+#include <vespa/vespalib/stllike/hash_set.h>
+#include <vespa/vespalib/util/small_vector.h>
+#include <vespa/vespalib/util/stash.h>
+#include <vespa/vespalib/util/time.h>
+#include <variant>
+#include <vector>
+
+namespace vespalib { class asciistream; }
+
+namespace metrics {
+
+/**
+ * Metric snapshot writer which emits text output conforming to the Prometheus
+ * 0.0.4 exposition format.
+ *
+ * Metrics are emitted by effectively "flattening" all paths in the metric tree
+ * and grouping by path and aggregation type (sum, count etc).
+ *
+ * - For CountMetrics, emits `_count` (i64) time series only.
+ * - For ValueMetrics, emits `_count` (i64), `_sum`, `_min` and `_max` (double)
+ * time series (`_last` and `_average` are _not_ included).
+ *
+ * Due to poor compatibility between Prometheus and our internal data model, all
+ * time series are emitted as "untyped".
+ */
+class PrometheusWriter : public MetricVisitor {
+ using I64OrDouble = std::variant<int64_t, double>;
+ struct TimeSeriesSample {
+ // All referenced strings shall be either arena-allocated or static
+ vespalib::ConstArrayRef<vespalib::stringref> metric_path;
+ vespalib::stringref aggr;
+ // Labels are laid out in key/value pairs, that is size() is always % 2 == 0
+ vespalib::ConstArrayRef<vespalib::stringref> labels;
+ I64OrDouble value;
+
+ bool operator<(const TimeSeriesSample& rhs) const noexcept;
+ };
+
+ vespalib::Stash _arena;
+ std::string _timestamp_str;
+ std::vector<TimeSeriesSample> _samples;
+ vespalib::hash_set<vespalib::stringref> _unique_str_refs;
+ std::vector<vespalib::stringref> _path;
+ vespalib::asciistream& _out;
+public:
+ explicit PrometheusWriter(vespalib::asciistream& out);
+ ~PrometheusWriter() override;
+
+private:
+ [[nodiscard]] vespalib::stringref arena_stable_string_ref(vespalib::stringref str);
+ [[nodiscard]] vespalib::ConstArrayRef<vespalib::stringref> as_prometheus_labels(const Metric& m);
+ [[nodiscard]] vespalib::ConstArrayRef<vespalib::stringref> metric_to_path_ref(vespalib::stringref leaf_metric_name);
+ [[nodiscard]] vespalib::stringref stable_name_string_ref(vespalib::stringref raw_name);
+ [[nodiscard]] vespalib::stringref stable_label_value_string_ref(vespalib::stringref raw_label_value);
+ void build_labels_upto_root(vespalib::SmallVector<vespalib::stringref, 16>& out, const Metric& m);
+
+ [[nodiscard]] static vespalib::string escaped_label_value(vespalib::stringref value);
+ // Renders name with a tailing '_' character, as the caller is expected to append an aggregate.
+ static void render_path_as_metric_name_prefix(vespalib::asciistream& out, vespalib::ConstArrayRef<vespalib::stringref> path);
+ static void render_label_pairs(vespalib::asciistream& out, vespalib::ConstArrayRef<vespalib::stringref> labels);
+ static void render_sample_value(vespalib::asciistream& out, I64OrDouble value);
+
+ // MetricVisitor impl
+ bool visitSnapshot(const MetricSnapshot&) override;
+ void doneVisitingSnapshot(const MetricSnapshot&) override;
+ bool visitMetricSet(const MetricSet&, bool autoGenerated) override;
+ void doneVisitingMetricSet(const MetricSet&) override;
+ bool visitCountMetric(const AbstractCountMetric&, bool autoGenerated) override;
+ bool visitValueMetric(const AbstractValueMetric&, bool autoGenerated) override;
+ void doneVisiting() override;
+};
+
+}
diff --git a/storage/src/vespa/storage/common/statusmetricconsumer.cpp b/storage/src/vespa/storage/common/statusmetricconsumer.cpp
index 90cd52e27b4..a2cfbf0843b 100644
--- a/storage/src/vespa/storage/common/statusmetricconsumer.cpp
+++ b/storage/src/vespa/storage/common/statusmetricconsumer.cpp
@@ -5,6 +5,7 @@
#include <boost/lexical_cast.hpp>
#include <vespa/metrics/jsonwriter.h>
#include <vespa/metrics/textwriter.h>
+#include <vespa/metrics/prometheus_writer.h>
#include <vespa/metrics/metricmanager.h>
#include <vespa/storageapi/messageapi/storagemessage.h>
#include <vespa/vespalib/stllike/asciistream.h>
@@ -39,6 +40,10 @@ StatusMetricConsumer::getReportContentType(const framework::HttpUrlPath& path) c
return "text/plain";
}
+ if (path.getAttribute("format") == "prometheus") {
+ return "text/plain; version=0.0.4";
+ }
+
if (path.getAttribute("format") == "json") {
return "application/json";
}
@@ -53,7 +58,8 @@ StatusMetricConsumer::reportStatus(std::ostream& out,
_manager.updateMetrics();
vespalib::system_time currentTime = _component.getClock().getSystemTime();
- bool json = (path.getAttribute("format") == "json");
+ const bool json = (path.getAttribute("format") == "json");
+ const bool prometheus = (path.getAttribute("format") == "prometheus");
int verbosity(path.get("verbosity", 0));
// We have to copy unset values if using HTML as HTML version gathers
@@ -126,6 +132,11 @@ StatusMetricConsumer::reportStatus(std::ostream& out,
stream << End();
stream.finalize();
out << jsonStreamData.str();
+ } else if (prometheus) {
+ vespalib::asciistream ps;
+ metrics::PrometheusWriter pw(ps);
+ _manager.visit(metricLock, *snapshot, pw, consumer);
+ out << ps.str();
} else {
std::string pattern = path.getAttribute("pattern", ".*");
metrics::TextWriter textWriter(out, snapshot->getPeriod(), pattern, verbosity > 0);