diff options
-rw-r--r-- | metrics/src/tests/metricmanagertest.cpp | 184 | ||||
-rw-r--r-- | metrics/src/vespa/metrics/CMakeLists.txt | 1 | ||||
-rw-r--r-- | metrics/src/vespa/metrics/metric.cpp | 8 | ||||
-rw-r--r-- | metrics/src/vespa/metrics/metric.h | 5 | ||||
-rw-r--r-- | metrics/src/vespa/metrics/prometheus_writer.cpp | 287 | ||||
-rw-r--r-- | metrics/src/vespa/metrics/prometheus_writer.h | 77 | ||||
-rw-r--r-- | storage/src/vespa/storage/common/statusmetricconsumer.cpp | 13 |
7 files changed, 543 insertions, 32 deletions
diff --git a/metrics/src/tests/metricmanagertest.cpp b/metrics/src/tests/metricmanagertest.cpp index be63bed5bec..e5e39bd9dcb 100644 --- a/metrics/src/tests/metricmanagertest.cpp +++ b/metrics/src/tests/metricmanagertest.cpp @@ -5,19 +5,21 @@ #include <vespa/metrics/metricmanager.h> #include <vespa/metrics/state_api_adapter.h> #include <vespa/metrics/textwriter.h> +#include <vespa/metrics/prometheus_writer.h> #include <vespa/vespalib/data/slime/slime.h> -#include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/util/size_literals.h> #include <vespa/vespalib/util/time.h> #include <vespa/vespalib/data/simple_buffer.h> #include <vespa/vespalib/util/atomic.h> +#include <gtest/gtest.h> +#include <gmock/gmock.h> #include <mutex> #include <thread> #include <vespa/log/log.h> LOG_SETUP(".test.metricmanager"); +using namespace ::testing; using namespace vespalib::atomic; using config::ConfigUri; @@ -41,16 +43,16 @@ struct SubMetricSet : public MetricSet DoubleValueMetric val2; SumMetric<DoubleValueMetric> valsum; - SubMetricSet(const Metric::String & name, MetricSet* owner); - ~SubMetricSet(); + explicit SubMetricSet(const Metric::String& name, MetricSet* owner = nullptr); + ~SubMetricSet() override; }; -SubMetricSet::SubMetricSet(const Metric::String & name, MetricSet* owner) - : MetricSet(name, {{"sub"}}, "sub desc", owner), - val1("val1", {{"tag4"},{"snaptest"}}, "val1 desc", this), +SubMetricSet::SubMetricSet(const Metric::String& name, MetricSet* owner) + : MetricSet(name, {{"partofsum"}, {"sub"}}, "sub desc", owner), + val1("val1", {{"tag4"}, {"snaptest"}}, "val1 desc", this), val2("val2", {{"tag5"}}, "val2 desc", this), - valsum("valsum", {{"tag4"},{"snaptest"}}, "valsum desc", this) + valsum("valsum", {{"tag4"}, {"snaptest"}}, "valsum desc", this) { valsum.addMetricToSum(val1); valsum.addMetricToSum(val2); @@ -65,7 +67,7 @@ struct MultiSubMetricSet SubMetricSet b; SumMetric<MetricSet> sum; - MultiSubMetricSet(MetricSet* owner); + explicit MultiSubMetricSet(MetricSet* owner); ~MultiSubMetricSet(); }; @@ -156,13 +158,6 @@ getMatchedMetrics(const vespalib::string& config) mm.init(ConfigUri(config)); MetricNameVisitor visitor; - /** Take a copy to verify clone works. - std::list<Metric::SP> ownerList; - MetricSet::UP copy(dynamic_cast<MetricSet*>( - mm.getMetrics().clone(ownerList))); - mm.visit(*copy, visitor, "consumer"); - */ - MetricLockGuard g(mm.getMetricLock()); mm.visit(g, mm.getActiveMetrics(g), visitor, "consumer"); const MetricManager::ConsumerSpec * consumerSpec = mm.getConsumerSpec(g, "consumer"); @@ -386,8 +381,8 @@ struct BriefValuePrinter : public MetricVisitor { bool waitForTimeProcessed(const MetricManager& mm, time_point::duration processtime, uint32_t timeout = 120) { - uint32_t lastchance = time(0) + timeout; - while (time(0) < lastchance) { + uint32_t lastchance = time(nullptr) + timeout; + while (time(nullptr) < lastchance) { if (mm.getLastProcessedTime() >= time_point(processtime)) return true; mm.timeChangedNotification(); std::this_thread::sleep_for(10ms); @@ -705,6 +700,16 @@ struct MetricSnapshotTestFixture } return ss.str(); } + + std::string render_last_snapshot_as_prometheus() const { + vespalib::asciistream os; + PrometheusWriter writer(os); + { + MetricLockGuard lockGuard(manager.getMetricLock()); + manager.visit(lockGuard, manager.getMetricSnapshot(lockGuard, 300s, false), writer, "snapper"); + } + return os.str(); + } }; class JsonMetricWrapper @@ -762,7 +767,7 @@ struct DimensionTestMetricSet : MetricSet DoubleValueMetric val1; LongCountMetric val2; - DimensionTestMetricSet(MetricSet* owner = nullptr); + explicit DimensionTestMetricSet(MetricSet* owner = nullptr); ~DimensionTestMetricSet() override; }; @@ -801,7 +806,7 @@ struct NestedDimensionTestMetricSet : MetricSet DimensionTestMetricSet nestedSet; NestedDimensionTestMetricSet(); - ~NestedDimensionTestMetricSet(); + ~NestedDimensionTestMetricSet() override; }; NestedDimensionTestMetricSet::NestedDimensionTestMetricSet() @@ -839,7 +844,7 @@ struct DimensionOverridableTestMetricSet : MetricSet { DoubleValueMetric val; - DimensionOverridableTestMetricSet(const std::string& dimValue, MetricSet* owner = nullptr); + explicit DimensionOverridableTestMetricSet(const std::string& dimValue, MetricSet* owner = nullptr); ~DimensionOverridableTestMetricSet() override; }; @@ -855,7 +860,7 @@ struct SameNamesTestMetricSet : MetricSet DimensionOverridableTestMetricSet set2; SameNamesTestMetricSet(); - ~SameNamesTestMetricSet(); + ~SameNamesTestMetricSet() override; }; SameNamesTestMetricSet::SameNamesTestMetricSet() @@ -894,14 +899,14 @@ TEST_F(MetricManagerTest, test_text_output) MetricLockGuard lockGuard(mm.getMetricLock()); mm.registerMetric(lockGuard, mySet.set); } - // Adding metrics to have some values in them + // Adding metrics to have some values in them mySet.val6.addValue(2); mySet.val9.val1.addValue(4); mySet.val10.count.inc(); mySet.val10.a.val1.addValue(7); mySet.val10.a.val2.addValue(2); mySet.val10.b.val1.addValue(1); - // Initialize metric manager to get snapshots created. + // Initialize metric manager to get snapshots created. mm.init(ConfigUri("raw:" "consumer[2]\n" "consumer[0].name snapper\n" @@ -953,6 +958,137 @@ TEST_F(MetricManagerTest, text_output_supports_dimensions) EXPECT_EQ(expected, actual); } +TEST_F(MetricManagerTest, prometheus_output_groups_related_time_series) { + SameNamesTestMetricSet mset; + MetricSnapshotTestFixture fixture(*this, mset); + + mset.set1.val.addValue(2); + mset.set1.val.addValue(3); + mset.set2.val.addValue(5); + mset.set2.val.addValue(7); + + fixture.takeSnapshotsOnce(); + std::string actual = fixture.render_last_snapshot_as_prometheus(); + std::string expected(R"(# NOTE: THIS API IS NOT INTENDED FOR PUBLIC USE +# TYPE outer_temp_val_count untyped +outer_temp_val_count{foo="bar",fancy="stuff"} 2 1300000 +outer_temp_val_count{foo="baz",fancy="stuff"} 2 1300000 +# TYPE outer_temp_val_max untyped +outer_temp_val_max{foo="bar",fancy="stuff"} 3 1300000 +outer_temp_val_max{foo="baz",fancy="stuff"} 7 1300000 +# TYPE outer_temp_val_min untyped +outer_temp_val_min{foo="bar",fancy="stuff"} 2 1300000 +outer_temp_val_min{foo="baz",fancy="stuff"} 5 1300000 +# TYPE outer_temp_val_sum untyped +outer_temp_val_sum{foo="bar",fancy="stuff"} 5 1300000 +outer_temp_val_sum{foo="baz",fancy="stuff"} 12 1300000 +)"); + EXPECT_EQ(expected, actual); +} + +struct MetricSetWrapper : MetricSet { + MultiSubMetricSet sub; + + MetricSetWrapper(); + ~MetricSetWrapper() override; +}; + +MetricSetWrapper::MetricSetWrapper() + : MetricSet("top_level", {}, "stuff and junk", nullptr), + sub(this) +{ +} + +MetricSetWrapper::~MetricSetWrapper() = default; + +TEST_F(MetricManagerTest, prometheus_output_only_emits_sum_metric_aggregate_values) { + MetricSetWrapper mset; + MetricSnapshotTestFixture fixture(*this, mset); + + mset.sub.a.val1.addValue(21); + mset.sub.a.val2.addValue(17); + mset.sub.b.val1.addValue(7); + mset.sub.b.val2.addValue(3); + + fixture.takeSnapshotsOnce(); + std::string actual = fixture.render_last_snapshot_as_prometheus(); + std::string expected = R"(# NOTE: THIS API IS NOT INTENDED FOR PUBLIC USE +# TYPE top_level_multisub_sum_val1_count untyped +top_level_multisub_sum_val1_count 2 1300000 +# TYPE top_level_multisub_sum_val1_max untyped +top_level_multisub_sum_val1_max 21 1300000 +# TYPE top_level_multisub_sum_val1_min untyped +top_level_multisub_sum_val1_min 7 1300000 +# TYPE top_level_multisub_sum_val1_sum untyped +top_level_multisub_sum_val1_sum 56 1300000 +# TYPE top_level_multisub_sum_val2_count untyped +top_level_multisub_sum_val2_count 2 1300000 +# TYPE top_level_multisub_sum_val2_max untyped +top_level_multisub_sum_val2_max 17 1300000 +# TYPE top_level_multisub_sum_val2_min untyped +top_level_multisub_sum_val2_min 3 1300000 +# TYPE top_level_multisub_sum_val2_sum untyped +top_level_multisub_sum_val2_sum 40 1300000 +# TYPE top_level_multisub_sum_valsum_count untyped +top_level_multisub_sum_valsum_count 4 1300000 +# TYPE top_level_multisub_sum_valsum_max untyped +top_level_multisub_sum_valsum_max 21 1300000 +# TYPE top_level_multisub_sum_valsum_min untyped +top_level_multisub_sum_valsum_min 3 1300000 +# TYPE top_level_multisub_sum_valsum_sum untyped +top_level_multisub_sum_valsum_sum 192 1300000 +)"; + EXPECT_EQ(expected, actual); +} + +TEST_F(MetricManagerTest, prometheus_output_can_emit_inf_values_verbatim) { + SameNamesTestMetricSet mset; + MetricSnapshotTestFixture fixture(*this, mset); + + // We have explicit guards against setting Inf/NaN directly, so we have to fudge the numbers + // a bit to get +/- Inf by saturating additions towards infinity. TODO how to test NaN...? :o + mset.set1.val.addValue(std::numeric_limits<double>::max()); + mset.set1.val.addValue(std::numeric_limits<double>::max()); + mset.set2.val.addValue(std::numeric_limits<double>::lowest()); + mset.set2.val.addValue(std::numeric_limits<double>::lowest()); + + fixture.takeSnapshotsOnce(); + std::string actual = fixture.render_last_snapshot_as_prometheus(); + EXPECT_THAT(actual, HasSubstr("outer_temp_val_sum{foo=\"bar\",fancy=\"stuff\"} +Inf 1300000\n")); + EXPECT_THAT(actual, HasSubstr("outer_temp_val_sum{foo=\"baz\",fancy=\"stuff\"} -Inf 1300000\n")); +} + +struct SneakyNamesMetricSet : public MetricSet { + DoubleValueMetric val1; + DoubleValueMetric val2; + + SneakyNamesMetricSet(); + ~SneakyNamesMetricSet() override; +}; + + +SneakyNamesMetricSet::SneakyNamesMetricSet() + : MetricSet("sneaky/path", {}, "sub desc", nullptr), + val1("a.name", {{"foo.bar", "blah\nbaz\"zoid\\"}}, "", this), + val2("another-name", {}, "", this) +{ +} + +SneakyNamesMetricSet::~SneakyNamesMetricSet() = default; + +TEST_F(MetricManagerTest, prometheus_output_normalizes_and_escapes_names_and_labels) { + SneakyNamesMetricSet mset; + MetricSnapshotTestFixture fixture(*this, mset); + + mset.val1.addValue(123); + mset.val2.addValue(42); + + fixture.takeSnapshotsOnce(); + std::string actual = fixture.render_last_snapshot_as_prometheus(); + EXPECT_THAT(actual, HasSubstr(R"(sneaky_path_a_name_count{foo_bar="blah\nbaz\"zoid\\"} 1 1300000)")); + EXPECT_THAT(actual, HasSubstr("sneaky_path_another_name_count 1 1300000")); +} + namespace { struct MyUpdateHook : public UpdateHook { std::ostringstream& _output; diff --git a/metrics/src/vespa/metrics/CMakeLists.txt b/metrics/src/vespa/metrics/CMakeLists.txt index 00e80ddec26..06a5febaea7 100644 --- a/metrics/src/vespa/metrics/CMakeLists.txt +++ b/metrics/src/vespa/metrics/CMakeLists.txt @@ -12,6 +12,7 @@ vespa_add_library(metrics metrictimer.cpp metricvalueset.cpp name_repo.cpp + prometheus_writer.cpp state_api_adapter.cpp summetric.cpp textwriter.cpp diff --git a/metrics/src/vespa/metrics/metric.cpp b/metrics/src/vespa/metrics/metric.cpp index db27ca63839..652a5e6bd5f 100644 --- a/metrics/src/vespa/metrics/metric.cpp +++ b/metrics/src/vespa/metrics/metric.cpp @@ -54,7 +54,7 @@ Tag::Tag(vespalib::stringref k, vespalib::stringref v) Tag::Tag(const Tag &) noexcept = default; Tag & Tag::operator = (const Tag &) = default; -Tag::~Tag() {} +Tag::~Tag() = default; Metric::Metric(const String& name, Tags dimensions, @@ -139,11 +139,11 @@ Metric::createMangledNameWithDimensions() const void Metric::verifyConstructionParameters() { - if (getName().size() == 0) { - throw vespalib::IllegalArgumentException( - "Metric cannot have empty name", VESPA_STRLOC); + if (getName().empty()) { + throw vespalib::IllegalArgumentException("Metric cannot have empty name", VESPA_STRLOC); } const auto &name = getName(); + // FIXME this is broken (should use std::regex_match instead, but we have metrics that will fail this test...!) if (!std::regex_search(name.c_str(), name.c_str() + name.size(), name_pattern_regex)) { throw vespalib::IllegalArgumentException( "Illegal metric name '" + getName() + "'. Names must match pattern " diff --git a/metrics/src/vespa/metrics/metric.h b/metrics/src/vespa/metrics/metric.h index 36f363a8fc5..afa2851ef13 100644 --- a/metrics/src/vespa/metrics/metric.h +++ b/metrics/src/vespa/metrics/metric.h @@ -17,7 +17,7 @@ class MemoryConsumption; /** Implement class to visit metrics. */ struct MetricVisitor { - virtual ~MetricVisitor() {} + virtual ~MetricVisitor() = default; /** * Visit a snapshot. Return true to visit content of the snapshot @@ -32,8 +32,7 @@ struct MetricVisitor { * fly such as in sum metrics. * @return True if you want to visit the content of this metric set. */ - virtual bool visitMetricSet(const MetricSet&, bool autoGenerated) - { (void) autoGenerated; return true; } + virtual bool visitMetricSet(const MetricSet&, [[maybe_unused]] bool autoGenerated) { return true; } /** * Callback visitors can use if they need to know the tree traversal of diff --git a/metrics/src/vespa/metrics/prometheus_writer.cpp b/metrics/src/vespa/metrics/prometheus_writer.cpp new file mode 100644 index 00000000000..27c509638b2 --- /dev/null +++ b/metrics/src/vespa/metrics/prometheus_writer.cpp @@ -0,0 +1,287 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "countmetric.h" +#include "metricset.h" +#include "metricsnapshot.h" +#include "prometheus_writer.h" +#include "valuemetric.h" +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/stllike/hash_set.hpp> +#include <vespa/vespalib/util/small_vector.h> +#include <algorithm> +#include <cassert> +#include <cmath> + +VESPALIB_HASH_SET_INSTANTIATE(vespalib::stringref); + +using vespalib::ArrayRef; +using vespalib::ConstArrayRef; +using vespalib::stringref; +using vespalib::asciistream; + +namespace metrics { + +namespace { + +[[nodiscard]] bool any_metric_in_path_has_nonempty_tag(const Metric& m) noexcept { + const Metric* current = &m; + do { + if (std::ranges::any_of(current->getTags(), [](auto& t) noexcept { return t.hasValue(); })) { + return true; + } + current = current->getOwner(); + } while (current != nullptr); + return false; +} + +[[nodiscard]] constexpr bool valid_prometheus_char(char ch) noexcept { + // Prometheus also allows ':', but we don't. + return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_'); +} + +[[nodiscard]] bool valid_prometheus_name(stringref name) noexcept { + return std::ranges::all_of(name, [](char ch) noexcept { return valid_prometheus_char(ch); }); +} + +[[nodiscard]] constexpr bool label_char_needs_escaping(char ch) noexcept { + return (ch == '\\' || ch == '\n' || ch == '"'); +} + +[[nodiscard]] bool label_value_needs_escaping(stringref value) noexcept { + return std::ranges::any_of(value, [](char ch) noexcept { return label_char_needs_escaping(ch); }); +} + +[[nodiscard]] vespalib::string prometheus_escaped_name(stringref str) { + asciistream os; + for (char ch : str) { + if (valid_prometheus_char(ch)) [[likely]] { + os << ch; + } else { + os << '_'; + } + } + return os.str(); +} + +[[nodiscard]] bool arrays_eq(ConstArrayRef<stringref> lhs, ConstArrayRef<stringref> rhs) noexcept { + return std::ranges::equal(lhs, rhs); +} + +[[nodiscard]] bool arrays_lt(ConstArrayRef<stringref> lhs, ConstArrayRef<stringref> rhs) noexcept { + return std::ranges::lexicographical_compare(lhs, rhs); +} + +} + +PrometheusWriter::PrometheusWriter(asciistream& out) + : MetricVisitor(), + _arena(), + _timestamp_str(), + _samples(), + _unique_str_refs(), + _path(), + _out(out) +{} + +PrometheusWriter::~PrometheusWriter() = default; + +bool PrometheusWriter::TimeSeriesSample::operator<(const TimeSeriesSample& rhs) const noexcept { + // Standard multidimensional strict-weak ordering, with an indirection via + // ConstArrayRefs for the first and last dimension. + if (!arrays_eq(metric_path, rhs.metric_path)) { + return arrays_lt(metric_path, rhs.metric_path); + } + if (aggr != rhs.aggr) { + return aggr < rhs.aggr; + } + return arrays_lt(labels, rhs.labels); +} + +stringref PrometheusWriter::arena_stable_string_ref(stringref str) { + auto maybe_iter = _unique_str_refs.find(str); + if (maybe_iter != _unique_str_refs.end()) { + return *maybe_iter; + } + auto buf = _arena.create_uninitialized_array<char>(str.size()); + memcpy(buf.data(), str.data(), buf.size()); + stringref ref(buf.data(), buf.size()); + _unique_str_refs.insert(ref); + return ref; +} + +stringref PrometheusWriter::stable_name_string_ref(stringref raw_name) { + if (valid_prometheus_name(raw_name)) [[likely]] { + return arena_stable_string_ref(raw_name); + } else { + return arena_stable_string_ref(prometheus_escaped_name(raw_name)); + } +} + +ConstArrayRef<stringref> PrometheusWriter::metric_to_path_ref(stringref leaf_metric_name) { + vespalib::SmallVector<stringref, 16> path_refs; + // _path strings are already in canonical (sanitized) form and arena-allocated + for (const auto& p :_path) { + path_refs.emplace_back(p); + } + path_refs.emplace_back(stable_name_string_ref(leaf_metric_name)); + return _arena.copy_array<stringref>({path_refs.data(), path_refs.size()}); +} + +vespalib::string PrometheusWriter::escaped_label_value(stringref value) { + asciistream out; + for (char ch : value) { + if (ch == '\\') { + out << "\\\\"; + } else if (ch == '"') { + out << "\\\""; + } else if (ch == '\n') { + out << "\\n"; + } else [[likely]] { + out << ch; // assumed to be part of a valid UTF-8 sequence + } + } + return out.str(); +} + +stringref PrometheusWriter::stable_label_value_string_ref(stringref raw_label_value) { + if (!label_value_needs_escaping(raw_label_value)) [[likely]] { + return arena_stable_string_ref(raw_label_value); + } else { + return arena_stable_string_ref(escaped_label_value(raw_label_value)); + } +} + +void PrometheusWriter::build_labels_upto_root(vespalib::SmallVector<stringref, 16>& out, const Metric& m) { + const Metric* current = &m; + do { + for (const auto& tag : current->getTags()) { + if (!tag.hasValue()) { + continue; // Don't emit value-less tags, as these are not proper labels + } + out.emplace_back(stable_name_string_ref(tag.key())); + out.emplace_back(stable_label_value_string_ref(tag.value())); + } + current = current->getOwner(); + } while (current != nullptr); +} + +ConstArrayRef<stringref> PrometheusWriter::as_prometheus_labels(const Metric& m) { + if (!any_metric_in_path_has_nonempty_tag(m)) { + return {}; + } + vespalib::SmallVector<stringref, 16> kv_refs; + build_labels_upto_root(kv_refs, m); + return _arena.copy_array<stringref>(kv_refs); +} + +bool PrometheusWriter::visitSnapshot(const MetricSnapshot& ms) { + // Pre-cache timestamp in string form to avoid same conversion for every time series + _timestamp_str = std::to_string(std::chrono::duration_cast<std::chrono::milliseconds>( + ms.getToTime().time_since_epoch()).count()); + return true; +} + +void PrometheusWriter::doneVisitingSnapshot(const MetricSnapshot&) { + // No-op +} + +bool PrometheusWriter::visitMetricSet(const MetricSet& set, bool) { + // Don't include metric sets that will be aggregated up into a separate sum metric. + // We don't care about individual threads etc., just their aggregate values. + if (set.hasTag("partofsum")) { + return false; + } + if (set.getOwner()) { + _path.emplace_back(stable_name_string_ref(set.getName())); + } // else: don't add the topmost set + return true; +} + +void PrometheusWriter::doneVisitingMetricSet(const MetricSet& set) { + if (set.getOwner()) { + assert(!_path.empty()); + _path.pop_back(); + } +} + +bool PrometheusWriter::visitCountMetric(const AbstractCountMetric& m, bool) { + auto full_path = metric_to_path_ref(m.getName()); + auto labels = as_prometheus_labels(m); + _samples.emplace_back(TimeSeriesSample{full_path, "count", labels, {m.getLongValue("count")}}); + return true; +} + +bool PrometheusWriter::visitValueMetric(const AbstractValueMetric& m, bool) { + auto full_path = metric_to_path_ref(m.getName()); + auto labels = as_prometheus_labels(m); + _samples.emplace_back(TimeSeriesSample{full_path, "count", labels, {m.getLongValue("count")}}); + _samples.emplace_back(TimeSeriesSample{full_path, "sum", labels, {m.getDoubleValue("total")}}); + _samples.emplace_back(TimeSeriesSample{full_path, "min", labels, {m.getDoubleValue("min")}}); + _samples.emplace_back(TimeSeriesSample{full_path, "max", labels, {m.getDoubleValue("max")}}); + return true; +} + +void PrometheusWriter::render_path_as_metric_name_prefix(asciistream& out, ConstArrayRef<stringref> path) { + for (const auto& p : path) { + out << p << '_'; + } +} + +void PrometheusWriter::render_label_pairs(asciistream& out, ConstArrayRef<stringref> labels) { + if (!labels.empty()) { + assert((labels.size() % 2) == 0); + out << '{'; + for (size_t i = 0; i < labels.size(); i += 2) { + if (i > 0) { + out << ','; + } + // We expect both label key and value to be pre-normalized/sanitized. + out << labels[i] << "=\"" << labels[i + 1] << '"'; + } + out << '}'; + } +} + +void PrometheusWriter::render_sample_value(asciistream& out, I64OrDouble value) { + if (std::holds_alternative<double>(value)) { + const double v = std::get<double>(value); + const bool inf = std::isinf(v); + const bool nan = std::isnan(v); + // Prometheus allows "-Inf", "+Inf" and "NaN" as special values for negative infinity, + // positive infinity and "not a number", respectively. + if (!inf && !nan) [[likely]] { + out << asciistream::Precision(16) << vespalib::automatic << v; + } else if (inf) { + out << (v < 0.0 ? "-Inf" : "+Inf"); + } else { + out << "NaN"; + } + } else { + const int64_t v = std::get<int64_t>(value); + out << v; + } +} + +void PrometheusWriter::doneVisiting() { + _out << "# NOTE: THIS API IS NOT INTENDED FOR PUBLIC USE\n"; + // Sort and implicitly group all related metrics together, ordered by name -> aggregation -> dimensions + std::sort(_samples.begin(), _samples.end()); + ConstArrayRef<stringref> last_metric; + stringref last_aggr; + for (const auto& s : _samples) { + if ((s.aggr != last_aggr) || !arrays_eq(s.metric_path, last_metric)) { + _out << "# TYPE "; + render_path_as_metric_name_prefix(_out, s.metric_path); + _out << s.aggr << " untyped\n"; + last_metric = s.metric_path; + last_aggr = s.aggr; + } + render_path_as_metric_name_prefix(_out, s.metric_path); + _out << s.aggr; + render_label_pairs(_out, s.labels); + _out << ' '; + render_sample_value(_out, s.value); + _out << ' ' << _timestamp_str << '\n'; + } +} + +} diff --git a/metrics/src/vespa/metrics/prometheus_writer.h b/metrics/src/vespa/metrics/prometheus_writer.h new file mode 100644 index 00000000000..410ea3b2b8e --- /dev/null +++ b/metrics/src/vespa/metrics/prometheus_writer.h @@ -0,0 +1,77 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "metric.h" +#include <vespa/vespalib/stllike/hash_set.h> +#include <vespa/vespalib/util/small_vector.h> +#include <vespa/vespalib/util/stash.h> +#include <vespa/vespalib/util/time.h> +#include <variant> +#include <vector> + +namespace vespalib { class asciistream; } + +namespace metrics { + +/** + * Metric snapshot writer which emits text output conforming to the Prometheus + * 0.0.4 exposition format. + * + * Metrics are emitted by effectively "flattening" all paths in the metric tree + * and grouping by path and aggregation type (sum, count etc). + * + * - For CountMetrics, emits `_count` (i64) time series only. + * - For ValueMetrics, emits `_count` (i64), `_sum`, `_min` and `_max` (double) + * time series (`_last` and `_average` are _not_ included). + * + * Due to poor compatibility between Prometheus and our internal data model, all + * time series are emitted as "untyped". + */ +class PrometheusWriter : public MetricVisitor { + using I64OrDouble = std::variant<int64_t, double>; + struct TimeSeriesSample { + // All referenced strings shall be either arena-allocated or static + vespalib::ConstArrayRef<vespalib::stringref> metric_path; + vespalib::stringref aggr; + // Labels are laid out in key/value pairs, that is size() is always % 2 == 0 + vespalib::ConstArrayRef<vespalib::stringref> labels; + I64OrDouble value; + + bool operator<(const TimeSeriesSample& rhs) const noexcept; + }; + + vespalib::Stash _arena; + std::string _timestamp_str; + std::vector<TimeSeriesSample> _samples; + vespalib::hash_set<vespalib::stringref> _unique_str_refs; + std::vector<vespalib::stringref> _path; + vespalib::asciistream& _out; +public: + explicit PrometheusWriter(vespalib::asciistream& out); + ~PrometheusWriter() override; + +private: + [[nodiscard]] vespalib::stringref arena_stable_string_ref(vespalib::stringref str); + [[nodiscard]] vespalib::ConstArrayRef<vespalib::stringref> as_prometheus_labels(const Metric& m); + [[nodiscard]] vespalib::ConstArrayRef<vespalib::stringref> metric_to_path_ref(vespalib::stringref leaf_metric_name); + [[nodiscard]] vespalib::stringref stable_name_string_ref(vespalib::stringref raw_name); + [[nodiscard]] vespalib::stringref stable_label_value_string_ref(vespalib::stringref raw_label_value); + void build_labels_upto_root(vespalib::SmallVector<vespalib::stringref, 16>& out, const Metric& m); + + [[nodiscard]] static vespalib::string escaped_label_value(vespalib::stringref value); + // Renders name with a tailing '_' character, as the caller is expected to append an aggregate. + static void render_path_as_metric_name_prefix(vespalib::asciistream& out, vespalib::ConstArrayRef<vespalib::stringref> path); + static void render_label_pairs(vespalib::asciistream& out, vespalib::ConstArrayRef<vespalib::stringref> labels); + static void render_sample_value(vespalib::asciistream& out, I64OrDouble value); + + // MetricVisitor impl + bool visitSnapshot(const MetricSnapshot&) override; + void doneVisitingSnapshot(const MetricSnapshot&) override; + bool visitMetricSet(const MetricSet&, bool autoGenerated) override; + void doneVisitingMetricSet(const MetricSet&) override; + bool visitCountMetric(const AbstractCountMetric&, bool autoGenerated) override; + bool visitValueMetric(const AbstractValueMetric&, bool autoGenerated) override; + void doneVisiting() override; +}; + +} diff --git a/storage/src/vespa/storage/common/statusmetricconsumer.cpp b/storage/src/vespa/storage/common/statusmetricconsumer.cpp index 90cd52e27b4..a2cfbf0843b 100644 --- a/storage/src/vespa/storage/common/statusmetricconsumer.cpp +++ b/storage/src/vespa/storage/common/statusmetricconsumer.cpp @@ -5,6 +5,7 @@ #include <boost/lexical_cast.hpp> #include <vespa/metrics/jsonwriter.h> #include <vespa/metrics/textwriter.h> +#include <vespa/metrics/prometheus_writer.h> #include <vespa/metrics/metricmanager.h> #include <vespa/storageapi/messageapi/storagemessage.h> #include <vespa/vespalib/stllike/asciistream.h> @@ -39,6 +40,10 @@ StatusMetricConsumer::getReportContentType(const framework::HttpUrlPath& path) c return "text/plain"; } + if (path.getAttribute("format") == "prometheus") { + return "text/plain; version=0.0.4"; + } + if (path.getAttribute("format") == "json") { return "application/json"; } @@ -53,7 +58,8 @@ StatusMetricConsumer::reportStatus(std::ostream& out, _manager.updateMetrics(); vespalib::system_time currentTime = _component.getClock().getSystemTime(); - bool json = (path.getAttribute("format") == "json"); + const bool json = (path.getAttribute("format") == "json"); + const bool prometheus = (path.getAttribute("format") == "prometheus"); int verbosity(path.get("verbosity", 0)); // We have to copy unset values if using HTML as HTML version gathers @@ -126,6 +132,11 @@ StatusMetricConsumer::reportStatus(std::ostream& out, stream << End(); stream.finalize(); out << jsonStreamData.str(); + } else if (prometheus) { + vespalib::asciistream ps; + metrics::PrometheusWriter pw(ps); + _manager.visit(metricLock, *snapshot, pw, consumer); + out << ps.str(); } else { std::string pattern = path.getAttribute("pattern", ".*"); metrics::TextWriter textWriter(out, snapshot->getPeriod(), pattern, verbosity > 0); |