diff options
author | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-03-21 11:46:14 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-03-21 12:21:26 +0000 |
commit | 1562ae1726453a0789aabb4baad2754020579b8d (patch) | |
tree | d702db3a02c9b035d4a14ee94b295f27859d6743 | |
parent | 723d6cacbdce4c45e01c92cb3e2eeb71f7b513f2 (diff) |
Wire Prometheus metric export to state V1 APIs
Extends metric producer classes with the requested exposition format.
As a consequence, the State API server has been changed to allow
emitting other content types than just `application/json`.
Add custom Prometheus rendering for Slobrok, as it does its own
domain-specific metric tracking. However, since it has non-destructive
sampling properties, we can actually use proper `counter` types.
17 files changed, 334 insertions, 132 deletions
diff --git a/metrics/src/tests/metricmanagertest.cpp b/metrics/src/tests/metricmanagertest.cpp index e5e39bd9dcb..9629c63f333 100644 --- a/metrics/src/tests/metricmanagertest.cpp +++ b/metrics/src/tests/metricmanagertest.cpp @@ -572,7 +572,7 @@ TEST_F(MetricManagerTest, test_json_output) // No snapshots have been taken yet, so the non-total getMetrics call should return // the empty string (i.e. no metrics produced). metrics::StateApiAdapter adapter(mm); - auto json_str = adapter.getMetrics("snapper"); + auto json_str = adapter.getMetrics("snapper", vespalib::MetricsProducer::ExpositionFormat::JSON); EXPECT_EQ(json_str, ""); } @@ -635,9 +635,9 @@ TEST_F(MetricManagerTest, test_json_output) EXPECT_EQ(10.0, slime.get()["values"][10]["values"]["last"].asDouble()) << jsonData; metrics::StateApiAdapter adapter(mm); - vespalib::string normal = adapter.getMetrics("snapper"); + vespalib::string normal = adapter.getMetrics("snapper", vespalib::MetricsProducer::ExpositionFormat::JSON); EXPECT_EQ(vespalib::string(jsonData), normal); - vespalib::string total = adapter.getTotalMetrics("snapper"); + vespalib::string total = adapter.getTotalMetrics("snapper", vespalib::MetricsProducer::ExpositionFormat::JSON); EXPECT_GT(total.size(), 0); EXPECT_NE(total, normal); } @@ -1058,6 +1058,18 @@ TEST_F(MetricManagerTest, prometheus_output_can_emit_inf_values_verbatim) { EXPECT_THAT(actual, HasSubstr("outer_temp_val_sum{foo=\"baz\",fancy=\"stuff\"} -Inf 1300000\n")); } +TEST_F(MetricManagerTest, state_adapter_can_output_prometheus_format) { + SameNamesTestMetricSet mset; + mset.set1.val.addValue(2); + mset.set2.val.addValue(3); + MetricSnapshotTestFixture fixture(*this, mset); + fixture.takeSnapshotsOnce(); + metrics::StateApiAdapter adapter(fixture.manager); + auto metrics = adapter.getMetrics("snapper", vespalib::MetricsProducer::ExpositionFormat::Prometheus); + EXPECT_THAT(metrics, HasSubstr("outer_temp_val_sum{foo=\"bar\",fancy=\"stuff\"} 2 1300000\n")); + EXPECT_THAT(metrics, HasSubstr("outer_temp_val_sum{foo=\"baz\",fancy=\"stuff\"} 3 1300000\n")); +} + struct SneakyNamesMetricSet : public MetricSet { DoubleValueMetric val1; DoubleValueMetric val2; diff --git a/metrics/src/vespa/metrics/state_api_adapter.cpp b/metrics/src/vespa/metrics/state_api_adapter.cpp index 56a04542345..2c92448fe95 100644 --- a/metrics/src/vespa/metrics/state_api_adapter.cpp +++ b/metrics/src/vespa/metrics/state_api_adapter.cpp @@ -1,30 +1,45 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "jsonwriter.h" -#include "state_api_adapter.h" #include "metricmanager.h" +#include "prometheus_writer.h" +#include "state_api_adapter.h" #include <vespa/vespalib/stllike/asciistream.h> namespace metrics { vespalib::string -StateApiAdapter::getMetrics(const vespalib::string &consumer) +StateApiAdapter::getMetrics(const vespalib::string &consumer, ExpositionFormat format) { MetricLockGuard guard(_manager.getMetricLock()); auto periods = _manager.getSnapshotPeriods(guard); if (periods.empty() || !_manager.any_snapshots_taken(guard)) { return ""; // no configuration or snapshots yet } - const MetricSnapshot &snapshot(_manager.getMetricSnapshot(guard, periods[0])); - vespalib::asciistream json; - vespalib::JsonStream stream(json); - metrics::JsonWriter metricJsonWriter(stream); - _manager.visit(guard, snapshot, metricJsonWriter, consumer); - stream.finalize(); - return json.str(); + const MetricSnapshot& snapshot(_manager.getMetricSnapshot(guard, periods[0])); + vespalib::asciistream out; + // Using `switch` instead of `if` so that we fail with a compiler warning -> error if + // we add another enum value and forget to add a case for it here. + switch (format) { + case ExpositionFormat::JSON: + { + vespalib::JsonStream stream(out); + JsonWriter metricJsonWriter(stream); + _manager.visit(guard, snapshot, metricJsonWriter, consumer); + stream.finalize(); + break; + } + case ExpositionFormat::Prometheus: + { + PrometheusWriter writer(out); + _manager.visit(guard, snapshot, writer, consumer); + break; + } + } + return out.str(); } vespalib::string -StateApiAdapter::getTotalMetrics(const vespalib::string &consumer) +StateApiAdapter::getTotalMetrics(const vespalib::string &consumer, ExpositionFormat format) { _manager.updateMetrics(); MetricLockGuard guard(_manager.getMetricLock()); @@ -34,13 +49,26 @@ StateApiAdapter::getTotalMetrics(const vespalib::string &consumer) _manager.getTotalMetricSnapshot(guard).getMetrics(), true); _manager.getActiveMetrics(guard).addToSnapshot(*generated, false, currentTime); generated->setFromTime(_manager.getTotalMetricSnapshot(guard).getFromTime()); - const MetricSnapshot &snapshot = *generated; - vespalib::asciistream json; - vespalib::JsonStream stream(json); - metrics::JsonWriter metricJsonWriter(stream); - _manager.visit(guard, snapshot, metricJsonWriter, consumer); - stream.finalize(); - return json.str(); + const MetricSnapshot& snapshot = *generated; + vespalib::asciistream out; + switch (format) { + case ExpositionFormat::JSON: + { + vespalib::JsonStream stream(out); + metrics::JsonWriter metricJsonWriter(stream); + _manager.visit(guard, snapshot, metricJsonWriter, consumer); + stream.finalize(); + break; + } + case ExpositionFormat::Prometheus: + { + PrometheusWriter writer(out); + _manager.visit(guard, snapshot, writer, consumer); + break; + } + } + + return out.str(); } } // namespace metrics diff --git a/metrics/src/vespa/metrics/state_api_adapter.h b/metrics/src/vespa/metrics/state_api_adapter.h index fd610226fda..39a80099355 100644 --- a/metrics/src/vespa/metrics/state_api_adapter.h +++ b/metrics/src/vespa/metrics/state_api_adapter.h @@ -11,7 +11,7 @@ class MetricManager; /** * This is an adapter class that implements the metrics producer * interface defined by the state api implementation in vespalib by - * extracting metrics in json format from a metric manager. + * extracting metrics in JSON or Prometheus format from a metric manager. **/ class StateApiAdapter : public vespalib::MetricsProducer { @@ -19,10 +19,10 @@ private: MetricManager &_manager; public: - StateApiAdapter(MetricManager &manager) : _manager(manager) {} + explicit StateApiAdapter(MetricManager &manager) : _manager(manager) {} - vespalib::string getMetrics(const vespalib::string &consumer) override; - vespalib::string getTotalMetrics(const vespalib::string &consumer) override; + vespalib::string getMetrics(const vespalib::string &consumer, ExpositionFormat format) override; + vespalib::string getTotalMetrics(const vespalib::string &consumer, ExpositionFormat format) override; }; } // namespace metrics diff --git a/slobrok/src/vespa/slobrok/server/metrics_producer.cpp b/slobrok/src/vespa/slobrok/server/metrics_producer.cpp index f25a0681397..b7eca808a75 100644 --- a/slobrok/src/vespa/slobrok/server/metrics_producer.cpp +++ b/slobrok/src/vespa/slobrok/server/metrics_producer.cpp @@ -2,6 +2,7 @@ #include "metrics_producer.h" #include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/stllike/asciistream.h> #include <vespa/fnet/task.h> #include <vespa/fnet/transport.h> @@ -11,9 +12,12 @@ using namespace std::chrono; namespace { -time_t -secondsSinceEpoch() { - return duration_cast<seconds>(system_clock::now().time_since_epoch()).count(); +[[nodiscard]] constexpr seconds seconds_since_epoch(std::chrono::system_clock::time_point tp) noexcept { + return duration_cast<seconds>(tp.time_since_epoch()); +} + +[[nodiscard]] constexpr milliseconds ms_since_epoch(std::chrono::system_clock::time_point tp) noexcept { + return duration_cast<milliseconds>(tp.time_since_epoch()); } class MetricsSnapshotter : public FNET_Task @@ -32,7 +36,7 @@ public: Schedule(60.0); } - ~MetricsSnapshotter() { Kill(); } + ~MetricsSnapshotter() override { Kill(); } }; class MetricSnapshot @@ -45,7 +49,7 @@ private: double _snapLen; public: - MetricSnapshot(uint32_t prevTime, uint32_t currTime); + MetricSnapshot(system_clock::time_point prevTime, system_clock::time_point currTime); void addCount(const char *name, const char *desc, uint32_t count); vespalib::string asString() const { @@ -53,15 +57,15 @@ public: } }; -MetricSnapshot::MetricSnapshot(uint32_t prevTime, uint32_t currTime) +MetricSnapshot::MetricSnapshot(system_clock::time_point prevTime, system_clock::time_point currTime) : _data(), _metrics(_data.setObject()), _snapshot(_metrics.setObject("snapshot")), _values(_metrics.setArray("values")), - _snapLen(currTime - prevTime) + _snapLen(static_cast<double>(seconds_since_epoch(currTime).count() - seconds_since_epoch(prevTime).count())) { - _snapshot.setLong("from", prevTime); - _snapshot.setLong("to", currTime); + _snapshot.setLong("from", seconds_since_epoch(prevTime).count()); + _snapshot.setLong("to", seconds_since_epoch(currTime).count()); if (_snapLen < 1.0) { _snapLen = 1.0; } @@ -81,8 +85,8 @@ MetricSnapshot::addCount(const char *name, const char *desc, uint32_t count) } vespalib::string -makeSnapshot(const RPCHooks::Metrics &prev, const RPCHooks::Metrics &curr, - uint32_t prevTime, uint32_t currTime) +make_json_snapshot(const RPCHooks::Metrics &prev, const RPCHooks::Metrics &curr, + system_clock::time_point prevTime, system_clock::time_point currTime) { MetricSnapshot snapshot(prevTime, currTime); snapshot.addCount("slobrok.heartbeats.failed", @@ -103,44 +107,90 @@ makeSnapshot(const RPCHooks::Metrics &prev, const RPCHooks::Metrics &curr, return snapshot.asString(); } +void emit_prometheus_counter(vespalib::asciistream &out, vespalib::stringref name, + vespalib::stringref description, uint64_t value, + system_clock::time_point now) +{ + // Prometheus naming conventions state that "_total" should be used for counter metrics. + out << "# HELP " << name << "_total " << description << '\n'; + out << "# TYPE " << name << "_total counter\n"; + out << name << "_total " << value << ' ' << ms_since_epoch(now).count() << '\n'; +} + +void emit_prometheus_gauge(vespalib::asciistream &out, vespalib::stringref name, + vespalib::stringref description, uint64_t value, + system_clock::time_point now) +{ + // Gauge metrics do not appear to have any convention for name suffixes, so emit name verbatim. + out << "# HELP " << name << ' ' << description << '\n'; + out << "# TYPE " << name << " gauge\n"; + out << name << ' ' << value << ' ' << ms_since_epoch(now).count() << '\n'; +} + +vespalib::string +make_prometheus_snapshot(const RPCHooks::Metrics &curr, system_clock::time_point now) +{ + vespalib::asciistream out; + emit_prometheus_counter(out, "slobrok_heartbeats_failed", + "count of failed heartbeat requests", + curr.heartBeatFails, now); + emit_prometheus_counter(out, "slobrok_requests_register", + "count of register requests received", + curr.registerReqs, now); + emit_prometheus_counter(out, "slobrok_requests_mirror", + "count of mirroring requests received", + curr.mirrorReqs, now); + emit_prometheus_counter(out, "slobrok_requests_admin", + "count of administrative requests received", + curr.adminReqs, now); + emit_prometheus_gauge(out, "slobrok_missing_consensus", + "number of seconds without full consensus with all other brokers", + curr.missingConsensusTime, now); + return out.str(); +} + } // namespace <unnamed> -MetricsProducer::MetricsProducer(const RPCHooks &hooks, - FNET_Transport &transport) +MetricsProducer::MetricsProducer(const RPCHooks &hooks, FNET_Transport &transport) : _rpcHooks(hooks), _lastMetrics(RPCHooks::Metrics::zero()), _producer(), - _startTime(secondsSinceEpoch()), + _startTime(system_clock::now()), _lastSnapshotStart(_startTime), - _snapshotter(new MetricsSnapshotter(transport, *this)) + _snapshotter(std::make_unique<MetricsSnapshotter>(transport, *this)) { } MetricsProducer::~MetricsProducer() = default; vespalib::string -MetricsProducer::getMetrics(const vespalib::string &consumer) +MetricsProducer::getMetrics(const vespalib::string &consumer, ExpositionFormat format) { - return _producer.getMetrics(consumer); + return _producer.getMetrics(consumer, format); } vespalib::string -MetricsProducer::getTotalMetrics(const vespalib::string &) +MetricsProducer::getTotalMetrics(const vespalib::string &, ExpositionFormat format) { - uint32_t now = secondsSinceEpoch(); + const auto now = system_clock::now(); RPCHooks::Metrics current = _rpcHooks.getMetrics(); - RPCHooks::Metrics start = RPCHooks::Metrics::zero(); - return makeSnapshot(start, current, _startTime, now); + if (format == ExpositionFormat::Prometheus) { + return make_prometheus_snapshot(current, now); + } else { + RPCHooks::Metrics start = RPCHooks::Metrics::zero(); + return make_json_snapshot(start, current, _startTime, now); + } } void MetricsProducer::snapshot() { - uint32_t now = secondsSinceEpoch(); + const auto now = system_clock::now(); RPCHooks::Metrics current = _rpcHooks.getMetrics(); - _producer.setMetrics(makeSnapshot(_lastMetrics, current, _lastSnapshotStart, now)); + _producer.setMetrics(make_json_snapshot(_lastMetrics, current, _lastSnapshotStart, now), ExpositionFormat::JSON); + _producer.setMetrics(make_prometheus_snapshot(current, now), ExpositionFormat::Prometheus); _lastMetrics = current; _lastSnapshotStart = now; } diff --git a/slobrok/src/vespa/slobrok/server/metrics_producer.h b/slobrok/src/vespa/slobrok/server/metrics_producer.h index fd1fc70651b..0a9dd589a15 100644 --- a/slobrok/src/vespa/slobrok/server/metrics_producer.h +++ b/slobrok/src/vespa/slobrok/server/metrics_producer.h @@ -4,6 +4,7 @@ #include "rpchooks.h" #include <vespa/vespalib/net/http/metrics_producer.h> #include <vespa/vespalib/net/http/simple_metrics_producer.h> +#include <chrono> class FNET_Transport; @@ -15,13 +16,13 @@ private: const RPCHooks &_rpcHooks; RPCHooks::Metrics _lastMetrics; vespalib::SimpleMetricsProducer _producer; - uint32_t _startTime; - uint32_t _lastSnapshotStart; + std::chrono::system_clock::time_point _startTime; + std::chrono::system_clock::time_point _lastSnapshotStart; std::unique_ptr<FNET_Task> _snapshotter; public: - vespalib::string getMetrics(const vespalib::string &consumer) override; - vespalib::string getTotalMetrics(const vespalib::string &consumer) override; + vespalib::string getMetrics(const vespalib::string &consumer, ExpositionFormat format) override; + vespalib::string getTotalMetrics(const vespalib::string &consumer, ExpositionFormat format) override; void snapshot(); diff --git a/storage/src/vespa/storage/storageserver/statereporter.cpp b/storage/src/vespa/storage/storageserver/statereporter.cpp index c0d2d4dcc59..93b60cad71d 100644 --- a/storage/src/vespa/storage/storageserver/statereporter.cpp +++ b/storage/src/vespa/storage/storageserver/statereporter.cpp @@ -4,6 +4,7 @@ #include <vespa/storageframework/generic/clock/clock.h> #include <vespa/metrics/jsonwriter.h> #include <vespa/metrics/metricmanager.h> +#include <vespa/metrics/prometheus_writer.h> #include <vespa/storage/common/nodestateupdater.h> #include <vespa/vdslib/state/nodestate.h> #include <vespa/vespalib/net/connection_auth_context.h> @@ -74,7 +75,7 @@ StateReporter::reportStatus(std::ostream& out, } vespalib::string -StateReporter::getMetrics(const vespalib::string &consumer) +StateReporter::getMetrics(const vespalib::string &consumer, ExpositionFormat format) { metrics::MetricLockGuard guard(_manager.getMetricLock()); auto periods = _manager.getSnapshotPeriods(guard); @@ -92,18 +93,30 @@ StateReporter::getMetrics(const vespalib::string &consumer) snapshot.reset(); _manager.getMetricSnapshot(guard, interval).addToSnapshot(snapshot, _component.getClock().getSystemTime()); - vespalib::asciistream json; - vespalib::JsonStream stream(json); - metrics::JsonWriter metricJsonWriter(stream); - _manager.visit(guard, snapshot, metricJsonWriter, consumer); - stream.finalize(); - return json.str(); + vespalib::asciistream out; + switch (format) { + case ExpositionFormat::JSON: + { + vespalib::JsonStream stream(out); + metrics::JsonWriter metricJsonWriter(stream); + _manager.visit(guard, snapshot, metricJsonWriter, consumer); + stream.finalize(); + break; + } + case ExpositionFormat::Prometheus: + { + metrics::PrometheusWriter writer(out); + _manager.visit(guard, snapshot, writer, consumer); + break; + } + } + return out.str(); } vespalib::string -StateReporter::getTotalMetrics(const vespalib::string &consumer) +StateReporter::getTotalMetrics(const vespalib::string &consumer, ExpositionFormat format) { - return _metricsAdapter.getTotalMetrics(consumer); + return _metricsAdapter.getTotalMetrics(consumer, format); } vespalib::HealthProducer::Health diff --git a/storage/src/vespa/storage/storageserver/statereporter.h b/storage/src/vespa/storage/storageserver/statereporter.h index 9601d0fc34f..c6fb570ae04 100644 --- a/storage/src/vespa/storage/storageserver/statereporter.h +++ b/storage/src/vespa/storage/storageserver/statereporter.h @@ -57,8 +57,8 @@ private: ApplicationGenerationFetcher& _generationFetcher; std::string _name; - vespalib::string getMetrics(const vespalib::string &consumer) override; - vespalib::string getTotalMetrics(const vespalib::string &consumer) override; + vespalib::string getMetrics(const vespalib::string &consumer, ExpositionFormat format) override; + vespalib::string getTotalMetrics(const vespalib::string &consumer, ExpositionFormat format) override; Health getHealth() const override; void getComponentConfig(Consumer &consumer) override; }; diff --git a/vespalib/src/tests/state_server/state_server_test.cpp b/vespalib/src/tests/state_server/state_server_test.cpp index 2922a6d5069..6c248b54cc8 100644 --- a/vespalib/src/tests/state_server/state_server_test.cpp +++ b/vespalib/src/tests/state_server/state_server_test.cpp @@ -47,7 +47,8 @@ vespalib::string getPage(int port, const vespalib::string &path, const vespalib: vespalib::string getFull(int port, const vespalib::string &path) { return getPage(port, path, "-D -"); } -vespalib::string get_json(const JsonGetHandler &handler, +std::pair<vespalib::string, vespalib::string> +get_body_and_content_type(const JsonGetHandler &handler, const vespalib::string &host, const vespalib::string &path, const std::map<vespalib::string,vespalib::string> ¶ms) @@ -55,11 +56,19 @@ vespalib::string get_json(const JsonGetHandler &handler, net::ConnectionAuthContext dummy_ctx(net::tls::PeerCredentials(), net::tls::CapabilitySet::all()); auto res = handler.get(host, path, params, dummy_ctx); if (res.ok()) { - return res.payload(); + return {res.payload(), res.content_type()}; } return {}; } +vespalib::string get_json(const JsonGetHandler &handler, + const vespalib::string &host, + const vespalib::string &path, + const std::map<vespalib::string,vespalib::string> ¶ms) +{ + return get_body_and_content_type(handler, host, path, params).first; +} + //----------------------------------------------------------------------------- struct DummyHandler : JsonGetHandler { @@ -208,7 +217,7 @@ TEST_FFFF("require that the state server wires the appropriate url prefixes", SimpleHealthProducer(), SimpleMetricsProducer(), SimpleComponentConfigProducer(), StateServer(0, f1, f2, f3)) { - f2.setTotalMetrics("{}"); // avoid empty result + f2.setTotalMetrics("{}", MetricsProducer::ExpositionFormat::JSON); // avoid empty result int port = f4.getListenPort(); EXPECT_TRUE(getFull(port, short_root_path).find("HTTP/1.1 200 OK") == 0); EXPECT_TRUE(getFull(port, total_metrics_path).find("HTTP/1.1 200 OK") == 0); @@ -282,7 +291,7 @@ TEST_FFFF("require that state api responds to the expected paths", SimpleHealthProducer(), SimpleMetricsProducer(), SimpleComponentConfigProducer(), StateApi(f1, f2, f3)) { - f2.setTotalMetrics("{}"); // avoid empty result + f2.setTotalMetrics("{}", MetricsProducer::ExpositionFormat::JSON); // avoid empty result EXPECT_TRUE(!get_json(f4, host_tag, short_root_path, empty_params).empty()); EXPECT_TRUE(!get_json(f4, host_tag, root_path, empty_params).empty()); EXPECT_TRUE(!get_json(f4, host_tag, health_path, empty_params).empty()); @@ -340,9 +349,20 @@ TEST_FFFF("require that metrics resource works as expected", EXPECT_EQUAL("{\"status\":{\"code\":\"down\",\"message\":\"FAIL MSG\"}}", get_json(f4, host_tag, metrics_path, empty_params)); f1.setOk(); - f2.setMetrics("{\"foo\":\"bar\"}"); - EXPECT_EQUAL("{\"status\":{\"code\":\"up\"},\"metrics\":{\"foo\":\"bar\"}}", - get_json(f4, host_tag, metrics_path, empty_params)); + f2.setMetrics(R"({"foo":"bar"})", MetricsProducer::ExpositionFormat::JSON); + f2.setMetrics(R"(cool_stuff{hello="world"} 1 23456)", MetricsProducer::ExpositionFormat::Prometheus); + + auto result = get_body_and_content_type(f4, host_tag, metrics_path, empty_params); + EXPECT_EQUAL(R"({"status":{"code":"up"},"metrics":{"foo":"bar"}})", result.first); + EXPECT_EQUAL("application/json", result.second); + + result = get_body_and_content_type(f4, host_tag, metrics_path, {{"format", "json"}}); // Explicit JSON + EXPECT_EQUAL(R"({"status":{"code":"up"},"metrics":{"foo":"bar"}})", result.first); + EXPECT_EQUAL("application/json", result.second); + + result = get_body_and_content_type(f4, host_tag, metrics_path, {{"format", "prometheus"}}); // Explicit Prometheus + EXPECT_EQUAL(R"(cool_stuff{hello="world"} 1 23456)", result.first); + EXPECT_EQUAL("text/plain; version=0.0.4", result.second); } TEST_FFFF("require that config resource works as expected", @@ -367,9 +387,12 @@ TEST_FFFF("require that state api also can return total metric", SimpleHealthProducer(), SimpleMetricsProducer(), SimpleComponentConfigProducer(), StateApi(f1, f2, f3)) { - f2.setTotalMetrics("{\"foo\":\"bar\"}"); - EXPECT_EQUAL("{\"foo\":\"bar\"}", + f2.setTotalMetrics(R"({"foo":"bar"})", MetricsProducer::ExpositionFormat::JSON); + f2.setTotalMetrics(R"(cool_stuff{hello="world"} 1 23456)", MetricsProducer::ExpositionFormat::Prometheus); + EXPECT_EQUAL(R"({"foo":"bar"})", get_json(f4, host_tag, total_metrics_path, empty_params)); + EXPECT_EQUAL(R"(cool_stuff{hello="world"} 1 23456)", + get_json(f4, host_tag, total_metrics_path, {{"format", "prometheus"}})); } TEST_FFFFF("require that custom handlers can be added to the state server", @@ -384,12 +407,25 @@ TEST_FFFFF("require that custom handlers can be added to the state server", } struct EchoConsumer : MetricsProducer { + static constexpr const char* to_string(ExpositionFormat format) noexcept { + switch (format) { + case ExpositionFormat::JSON: return "JSON"; + case ExpositionFormat::Prometheus: return "Prometheus"; + } + abort(); + } + + static vespalib::string stringify_params(const vespalib::string &consumer, ExpositionFormat format) { + // Not semantically meaningful output if format == Prometheus, but doesn't really matter here. + return vespalib::make_string(R"(["%s", "%s"])", to_string(format), consumer.c_str()); + } + ~EchoConsumer() override; - vespalib::string getMetrics(const vespalib::string &consumer) override { - return "[\"" + consumer + "\"]"; + vespalib::string getMetrics(const vespalib::string &consumer, ExpositionFormat format) override { + return stringify_params(consumer, format); } - vespalib::string getTotalMetrics(const vespalib::string &consumer) override { - return "[\"" + consumer + "\"]"; + vespalib::string getTotalMetrics(const vespalib::string &consumer, ExpositionFormat format) override { + return stringify_params(consumer, format); } }; @@ -399,17 +435,17 @@ TEST_FFFF("require that empty v1 metrics consumer defaults to 'statereporter'", SimpleHealthProducer(), EchoConsumer(), SimpleComponentConfigProducer(), StateApi(f1, f2, f3)) { - std::map<vespalib::string,vespalib::string> my_params; - EXPECT_EQUAL("{\"status\":{\"code\":\"up\"},\"metrics\":[\"statereporter\"]}", + EXPECT_EQUAL(R"({"status":{"code":"up"},"metrics":["JSON", "statereporter"]})", get_json(f4, host_tag, metrics_path, empty_params)); + EXPECT_EQUAL(R"(["Prometheus", "statereporter"])", + get_json(f4, host_tag, metrics_path, {{"format", "prometheus"}})); } TEST_FFFF("require that empty total metrics consumer defaults to the empty string", SimpleHealthProducer(), EchoConsumer(), SimpleComponentConfigProducer(), StateApi(f1, f2, f3)) { - std::map<vespalib::string,vespalib::string> my_params; - EXPECT_EQUAL("[\"\"]", get_json(f4, host_tag, total_metrics_path, empty_params)); + EXPECT_EQUAL(R"(["JSON", ""])", get_json(f4, host_tag, total_metrics_path, empty_params)); } TEST_FFFF("require that metrics consumer is passed correctly", @@ -418,8 +454,10 @@ TEST_FFFF("require that metrics consumer is passed correctly", { std::map<vespalib::string,vespalib::string> my_params; my_params["consumer"] = "ME"; - EXPECT_EQUAL("{\"status\":{\"code\":\"up\"},\"metrics\":[\"ME\"]}", get_json(f4, host_tag, metrics_path, my_params)); - EXPECT_EQUAL("[\"ME\"]", get_json(f4, host_tag, total_metrics_path, my_params)); + EXPECT_EQUAL(R"({"status":{"code":"up"},"metrics":["JSON", "ME"]})", get_json(f4, host_tag, metrics_path, my_params)); + EXPECT_EQUAL(R"(["JSON", "ME"])", get_json(f4, host_tag, total_metrics_path, my_params)); + my_params["format"] = "prometheus"; + EXPECT_EQUAL(R"(["Prometheus", "ME"])", get_json(f4, host_tag, total_metrics_path, my_params)); } void check_json(const vespalib::string &expect_json, const vespalib::string &actual_json) { diff --git a/vespalib/src/vespa/vespalib/metrics/producer.cpp b/vespalib/src/vespa/vespalib/metrics/producer.cpp index fe244607f43..ca6d773e129 100644 --- a/vespalib/src/vespa/vespalib/metrics/producer.cpp +++ b/vespalib/src/vespa/vespalib/metrics/producer.cpp @@ -4,15 +4,16 @@ #include "metrics_manager.h" #include "json_formatter.h" -namespace vespalib { -namespace metrics { +namespace vespalib::metrics { Producer::Producer(std::shared_ptr<MetricsManager> m) - : _manager(m) + : _manager(std::move(m)) {} +Producer::~Producer() = default; + vespalib::string -Producer::getMetrics(const vespalib::string &) +Producer::getMetrics(const vespalib::string &, ExpositionFormat /*ignored*/) { Snapshot snap = _manager->snapshot(); JsonFormatter fmt(snap); @@ -20,14 +21,11 @@ Producer::getMetrics(const vespalib::string &) } vespalib::string -Producer::getTotalMetrics(const vespalib::string &) +Producer::getTotalMetrics(const vespalib::string &, ExpositionFormat /*ignored*/) { Snapshot snap = _manager->totalSnapshot(); JsonFormatter fmt(snap); return fmt.asString(); } - - } // namespace vespalib::metrics -} // namespace vespalib diff --git a/vespalib/src/vespa/vespalib/metrics/producer.h b/vespalib/src/vespa/vespalib/metrics/producer.h index b0b3e2bc701..95730258f86 100644 --- a/vespalib/src/vespa/vespalib/metrics/producer.h +++ b/vespalib/src/vespa/vespalib/metrics/producer.h @@ -15,9 +15,10 @@ class Producer : public vespalib::MetricsProducer { private: std::shared_ptr<MetricsManager> _manager; public: - Producer(std::shared_ptr<MetricsManager> m); - vespalib::string getMetrics(const vespalib::string &consumer) override; - vespalib::string getTotalMetrics(const vespalib::string &consumer) override; + explicit Producer(std::shared_ptr<MetricsManager> m); + ~Producer() override; + vespalib::string getMetrics(const vespalib::string &consumer, ExpositionFormat format) override; + vespalib::string getTotalMetrics(const vespalib::string &consumer, ExpositionFormat format) override; }; } diff --git a/vespalib/src/vespa/vespalib/net/http/http_server.cpp b/vespalib/src/vespa/vespalib/net/http/http_server.cpp index a307a4eca4f..15443276389 100644 --- a/vespalib/src/vespa/vespalib/net/http/http_server.cpp +++ b/vespalib/src/vespa/vespalib/net/http/http_server.cpp @@ -13,7 +13,7 @@ HttpServer::get(Portal::GetRequest req) if (response.failed()) { req.respond_with_error(response.status_code(), response.status_message()); } else { - req.respond_with_content("application/json", response.payload()); + req.respond_with_content(response.content_type(), response.payload()); } } diff --git a/vespalib/src/vespa/vespalib/net/http/json_get_handler.cpp b/vespalib/src/vespa/vespalib/net/http/json_get_handler.cpp index c9d7859b5b4..7f04235f781 100644 --- a/vespalib/src/vespa/vespalib/net/http/json_get_handler.cpp +++ b/vespalib/src/vespa/vespalib/net/http/json_get_handler.cpp @@ -4,14 +4,18 @@ namespace vespalib { -JsonGetHandler::Response::Response(int status_code, vespalib::string status_or_payload) +JsonGetHandler::Response::Response(int status_code, + vespalib::string status_or_payload, + vespalib::string content_type_override) : _status_code(status_code), - _status_or_payload(std::move(status_or_payload)) + _status_or_payload(std::move(status_or_payload)), + _content_type_override(std::move(content_type_override)) {} JsonGetHandler::Response::Response() : _status_code(500), - _status_or_payload("Internal Server Error") + _status_or_payload("Internal Server Error"), + _content_type_override() {} JsonGetHandler::Response::~Response() = default; @@ -24,19 +28,25 @@ JsonGetHandler::Response& JsonGetHandler::Response::operator=(Response&&) noexce JsonGetHandler::Response JsonGetHandler::Response::make_ok_with_json(vespalib::string json) { - return {200, std::move(json)}; + return {200, std::move(json), {}}; +} + +JsonGetHandler::Response +JsonGetHandler::Response::make_ok_with_content_type(vespalib::string payload, vespalib::string content_type) +{ + return {200, std::move(payload), std::move(content_type)}; } JsonGetHandler::Response JsonGetHandler::Response::make_failure(int status_code, vespalib::string status_message) { - return {status_code, std::move(status_message)}; + return {status_code, std::move(status_message), {}}; } JsonGetHandler::Response JsonGetHandler::Response::make_not_found() { - return {404, "Not Found"}; + return {404, "Not Found", {}}; } } diff --git a/vespalib/src/vespa/vespalib/net/http/json_get_handler.h b/vespalib/src/vespa/vespalib/net/http/json_get_handler.h index b7786ddd119..43793dbf1d8 100644 --- a/vespalib/src/vespa/vespalib/net/http/json_get_handler.h +++ b/vespalib/src/vespa/vespalib/net/http/json_get_handler.h @@ -13,8 +13,11 @@ struct JsonGetHandler { class Response { int _status_code; vespalib::string _status_or_payload; + vespalib::string _content_type_override; - Response(int status_code, vespalib::string status_or_payload); + Response(int status_code, + vespalib::string status_or_payload, + vespalib::string content_type_override); public: Response(); // By default, 500 Internal Server Error ~Response(); @@ -40,8 +43,16 @@ struct JsonGetHandler { return {}; } } + [[nodiscard]] vespalib::stringref content_type() const noexcept { + if (_content_type_override.empty()) { + return "application/json"; + } else { + return _content_type_override; + } + } [[nodiscard]] static Response make_ok_with_json(vespalib::string json); + [[nodiscard]] static Response make_ok_with_content_type(vespalib::string payload, vespalib::string content_type); [[nodiscard]] static Response make_failure(int status_code, vespalib::string status_message); [[nodiscard]] static Response make_not_found(); }; diff --git a/vespalib/src/vespa/vespalib/net/http/metrics_producer.h b/vespalib/src/vespa/vespalib/net/http/metrics_producer.h index 18e61ff05e3..0ffb1773456 100644 --- a/vespalib/src/vespa/vespalib/net/http/metrics_producer.h +++ b/vespalib/src/vespa/vespalib/net/http/metrics_producer.h @@ -7,8 +7,13 @@ namespace vespalib { struct MetricsProducer { - virtual vespalib::string getMetrics(const vespalib::string &consumer) = 0; - virtual vespalib::string getTotalMetrics(const vespalib::string &consumer) = 0; + enum class ExpositionFormat { + JSON, + Prometheus + }; + + virtual vespalib::string getMetrics(const vespalib::string &consumer, ExpositionFormat format) = 0; + virtual vespalib::string getTotalMetrics(const vespalib::string &consumer, ExpositionFormat format) = 0; virtual ~MetricsProducer() = default; }; diff --git a/vespalib/src/vespa/vespalib/net/http/simple_metrics_producer.cpp b/vespalib/src/vespa/vespalib/net/http/simple_metrics_producer.cpp index 5fbddd8d1b2..52836589ce3 100644 --- a/vespalib/src/vespa/vespalib/net/http/simple_metrics_producer.cpp +++ b/vespalib/src/vespa/vespalib/net/http/simple_metrics_producer.cpp @@ -7,38 +7,38 @@ namespace vespalib { SimpleMetricsProducer::SimpleMetricsProducer() : _lock(), _metrics(), - _totalMetrics() + _total_metrics() { } SimpleMetricsProducer::~SimpleMetricsProducer() = default; void -SimpleMetricsProducer::setMetrics(const vespalib::string &metrics) +SimpleMetricsProducer::setMetrics(const vespalib::string &metrics, ExpositionFormat format) { std::lock_guard guard(_lock); - _metrics = metrics; + _metrics[format] = metrics; } vespalib::string -SimpleMetricsProducer::getMetrics(const vespalib::string &) +SimpleMetricsProducer::getMetrics(const vespalib::string &, ExpositionFormat format) { std::lock_guard guard(_lock); - return _metrics; + return _metrics[format]; // May implicitly create entry, but that's fine here. } void -SimpleMetricsProducer::setTotalMetrics(const vespalib::string &metrics) +SimpleMetricsProducer::setTotalMetrics(const vespalib::string &metrics, ExpositionFormat format) { std::lock_guard guard(_lock); - _totalMetrics = metrics; + _total_metrics[format] = metrics; } vespalib::string -SimpleMetricsProducer::getTotalMetrics(const vespalib::string &) +SimpleMetricsProducer::getTotalMetrics(const vespalib::string &, ExpositionFormat format) { std::lock_guard guard(_lock); - return _totalMetrics; + return _total_metrics[format]; } } // namespace vespalib diff --git a/vespalib/src/vespa/vespalib/net/http/simple_metrics_producer.h b/vespalib/src/vespa/vespalib/net/http/simple_metrics_producer.h index bebf357492c..670e8d494c2 100644 --- a/vespalib/src/vespa/vespalib/net/http/simple_metrics_producer.h +++ b/vespalib/src/vespa/vespalib/net/http/simple_metrics_producer.h @@ -3,6 +3,7 @@ #pragma once #include "metrics_producer.h" +#include <map> #include <mutex> namespace vespalib { @@ -11,16 +12,16 @@ class SimpleMetricsProducer : public MetricsProducer { private: std::mutex _lock; - vespalib::string _metrics; - vespalib::string _totalMetrics; + std::map<ExpositionFormat, vespalib::string> _metrics; + std::map<ExpositionFormat, vespalib::string> _total_metrics; public: SimpleMetricsProducer(); ~SimpleMetricsProducer() override; - void setMetrics(const vespalib::string &metrics); - vespalib::string getMetrics(const vespalib::string &consumer) override; - void setTotalMetrics(const vespalib::string &metrics); - vespalib::string getTotalMetrics(const vespalib::string &consumer) override; + void setMetrics(const vespalib::string &metrics, ExpositionFormat format); + vespalib::string getMetrics(const vespalib::string &consumer, ExpositionFormat format) override; + void setTotalMetrics(const vespalib::string &metrics, ExpositionFormat format); + vespalib::string getTotalMetrics(const vespalib::string &consumer, ExpositionFormat format) override; }; } // namespace vespalib diff --git a/vespalib/src/vespa/vespalib/net/http/state_api.cpp b/vespalib/src/vespa/vespalib/net/http/state_api.cpp index 1b233e4cdbc..31d0010d72d 100644 --- a/vespalib/src/vespa/vespalib/net/http/state_api.cpp +++ b/vespalib/src/vespa/vespalib/net/http/state_api.cpp @@ -58,14 +58,15 @@ void build_health_status(JSONStringer &json, const HealthProducer &healthProduce json.endObject(); } -vespalib::string get_consumer(const std::map<vespalib::string,vespalib::string> ¶ms, - vespalib::stringref default_consumer) +vespalib::string get_param(const std::map<vespalib::string,vespalib::string> ¶ms, + vespalib::stringref param_name, + vespalib::stringref default_value) { - auto consumer_lookup = params.find("consumer"); - if (consumer_lookup == params.end()) { - return default_consumer; + auto maybe_value = params.find(param_name); + if (maybe_value == params.end()) { + return default_value; } - return consumer_lookup->second; + return maybe_value->second; } void render_link(JSONStringer &json, const vespalib::string &host, const vespalib::string &path) { @@ -99,15 +100,15 @@ vespalib::string respond_health(const HealthProducer &healthProducer) { return json.toString(); } -vespalib::string respond_metrics(const vespalib::string &consumer, - const HealthProducer &healthProducer, - MetricsProducer &metricsProducer) +vespalib::string respond_json_metrics(const vespalib::string &consumer, + const HealthProducer &healthProducer, + MetricsProducer &metricsProducer) { JSONStringer json; json.beginObject(); build_health_status(json, healthProducer); { // metrics - vespalib::string metrics = metricsProducer.getMetrics(consumer); + vespalib::string metrics = metricsProducer.getMetrics(consumer, MetricsProducer::ExpositionFormat::JSON); if (!metrics.empty()) { json.appendKey("metrics"); json.appendJSON(metrics); @@ -117,6 +118,22 @@ vespalib::string respond_metrics(const vespalib::string &consumer, return json.toString(); } +JsonGetHandler::Response cap_check_and_respond_metrics( + const net::ConnectionAuthContext &auth_ctx, + const std::map<vespalib::string,vespalib::string> ¶ms, + const vespalib::string& default_consumer, + std::function<JsonGetHandler::Response(const vespalib::string&, MetricsProducer::ExpositionFormat)> response_fn) +{ + if (!auth_ctx.capabilities().contains(Capability::content_metrics_api())) { + return JsonGetHandler::Response::make_failure(403, "Forbidden"); + } + auto consumer = get_param(params, "consumer", default_consumer); + auto format_str = get_param(params, "format", "json"); + auto format = (format_str == "prometheus" ? MetricsProducer::ExpositionFormat::Prometheus + : MetricsProducer::ExpositionFormat::JSON); + return response_fn(consumer, format); +} + vespalib::string respond_config(ComponentConfigProducer &componentConfigProducer) { JSONStringer json; json.beginObject(); @@ -154,6 +171,10 @@ JsonGetHandler::Response cap_checked(const net::ConnectionAuthContext &auth_ctx, return cap_checked(auth_ctx, CapabilitySet::of({required_cap}), std::move(fn)); } +constexpr const char* prometheus_content_type() noexcept { + return "text/plain; version=0.0.4"; +} + } // namespace vespalib::<unnamed> JsonGetHandler::Response @@ -172,17 +193,30 @@ StateApi::get(const vespalib::string &host, }); } else if (path == "/state/v1/metrics") { // Using a 'statereporter' consumer by default removes many uninteresting per-thread - // metrics but retains their aggregates. - return cap_checked(auth_ctx, Capability::content_metrics_api(), [&] { - return respond_metrics(get_consumer(params, "statereporter"), _healthProducer, _metricsProducer); + // metrics but retains their aggregates (side note: per-thread metrics are NOT included + // in Prometheus metrics regardless of the specified consumer). + return cap_check_and_respond_metrics(auth_ctx, params, "statereporter", [&](auto& consumer, auto format) { + if (format == MetricsProducer::ExpositionFormat::Prometheus) { + auto metrics_text = _metricsProducer.getMetrics(consumer, MetricsProducer::ExpositionFormat::Prometheus); + return JsonGetHandler::Response::make_ok_with_content_type(std::move(metrics_text), prometheus_content_type()); + } else { + auto json = respond_json_metrics(consumer, _healthProducer, _metricsProducer); + return JsonGetHandler::Response::make_ok_with_json(std::move(json)); + } }); } else if (path == "/state/v1/config") { return cap_checked(auth_ctx, Capability::content_state_api(), [&] { return respond_config(_componentConfigProducer); }); } else if (path == "/metrics/total") { - return cap_checked(auth_ctx, Capability::content_metrics_api(), [&] { - return _metricsProducer.getTotalMetrics(get_consumer(params, "")); + return cap_check_and_respond_metrics(auth_ctx, params, "", [&](auto& consumer, auto format) { + if (format == MetricsProducer::ExpositionFormat::Prometheus) { + auto metrics_text = _metricsProducer.getTotalMetrics(consumer, MetricsProducer::ExpositionFormat::Prometheus); + return JsonGetHandler::Response::make_ok_with_content_type(std::move(metrics_text), prometheus_content_type()); + } else { + auto json = _metricsProducer.getTotalMetrics(consumer, vespalib::MetricsProducer::ExpositionFormat::JSON); + return JsonGetHandler::Response::make_ok_with_json(std::move(json)); + } }); } else { // Assume this is for the nested state v1 stuff; may delegate capability check to handler later if desired. |