From 8089a9a696cafede455a25c3e37432bf24845790 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Tue, 29 Aug 2023 12:22:40 +0000 Subject: Do not return State V1 metrics until at least one metric snapshot has been created This avoids a 60 second time window just after process startup where metrics may be reported from an empty initial snapshot. Depending on the timing of State V1 sampling and metric manager snapshotting, these values risk erroneously appearing as all zero, causing much confusion and gnashing of teeth. --- metrics/src/tests/metricmanagertest.cpp | 13 +++++++++++++ metrics/src/vespa/metrics/metricmanager.cpp | 7 +++++++ metrics/src/vespa/metrics/metricmanager.h | 2 ++ metrics/src/vespa/metrics/metricsnapshot.cpp | 5 ++++- metrics/src/vespa/metrics/metricsnapshot.h | 9 +++++++++ metrics/src/vespa/metrics/state_api_adapter.cpp | 4 ++-- 6 files changed, 37 insertions(+), 3 deletions(-) (limited to 'metrics/src') diff --git a/metrics/src/tests/metricmanagertest.cpp b/metrics/src/tests/metricmanagertest.cpp index 9e6b0f40be3..f655d6c210e 100644 --- a/metrics/src/tests/metricmanagertest.cpp +++ b/metrics/src/tests/metricmanagertest.cpp @@ -474,6 +474,7 @@ TEST_F(MetricManagerTest, test_snapshots) MetricNameVisitor visitor; { MetricLockGuard lockGuard(mm.getMetricLock()); + EXPECT_FALSE(mm.any_snapshots_taken(lockGuard)); // No snapshots yet mm.visit(lockGuard, mm.getActiveMetrics(lockGuard), visitor, "snapper"); const MetricManager::ConsumerSpec * consumerSpec = mm.getConsumerSpec(lockGuard, "snapper"); EXPECT_EQ(std::string("\n" @@ -506,6 +507,10 @@ TEST_F(MetricManagerTest, test_snapshots) ASSERT_VALUES(mm, 5 * 60s, "2,4,4,1,7,9,1,1,8,2,10"); ASSERT_VALUES(mm, 60 * 60s, ""); ASSERT_VALUES(mm, 0 * 60s, "2,4,4,1,7,9,1,1,8,2,10"); + { + auto guard = mm.getMetricLock(); + EXPECT_TRUE(mm.any_snapshots_taken(guard)); // At least one snapshot has been taken + } // Adding metrics done in second five minute period. Total should // be updated to account for both @@ -567,6 +572,14 @@ TEST_F(MetricManagerTest, test_json_output) "consumer[0].tags[1]\n" "consumer[0].tags[0] snaptest\n")); + { + // No snapshots have been taken yet, so the non-total getMetrics call should return + // the empty string (i.e. no metrics produced). + metrics::StateApiAdapter adapter(mm); + auto json_str = adapter.getMetrics("snapper"); + EXPECT_EQ(json_str, ""); + } + takeSnapshots(mm, 1000); // Adding metrics to have some values in them diff --git a/metrics/src/vespa/metrics/metricmanager.cpp b/metrics/src/vespa/metrics/metricmanager.cpp index e399a23b0b9..f3635f72c08 100644 --- a/metrics/src/vespa/metrics/metricmanager.cpp +++ b/metrics/src/vespa/metrics/metricmanager.cpp @@ -168,6 +168,12 @@ MetricManager::isInitialized() const { return static_cast(_configHandle); } +bool +MetricManager::any_snapshots_taken(const MetricLockGuard&) const noexcept { + assert(!_snapshots.empty()); + return _snapshots[0]->current_is_assigned(); +} + void MetricManager::init(const config::ConfigUri & uri, bool startThread) { @@ -795,6 +801,7 @@ MetricManager::takeSnapshots(const MetricLockGuard & guard, system_time timeToPr _activeMetrics.addToSnapshot(firstTarget, false, timeToProcess); _activeMetrics.addToSnapshot(*_totalMetrics, false, timeToProcess); _activeMetrics.reset(timeToProcess); + _snapshots[0]->tag_current_as_assigned(); LOG(debug, "After snapshotting, active metrics goes from %s to %s, and 5 minute metrics goes from %s to %s.", to_string(_activeMetrics.getFromTime()).c_str(), to_string(_activeMetrics.getToTime()).c_str(), to_string(firstTarget.getFromTime()).c_str(), to_string(firstTarget.getToTime()).c_str()); diff --git a/metrics/src/vespa/metrics/metricmanager.h b/metrics/src/vespa/metrics/metricmanager.h index 81d0c581370..cfb3ab2137a 100644 --- a/metrics/src/vespa/metrics/metricmanager.h +++ b/metrics/src/vespa/metrics/metricmanager.h @@ -267,6 +267,8 @@ public: bool isInitialized() const; + [[nodiscard]] bool any_snapshots_taken(const MetricLockGuard&) const noexcept; + private: void takeSnapshots(const MetricLockGuard &, system_time timeToProcess); diff --git a/metrics/src/vespa/metrics/metricsnapshot.cpp b/metrics/src/vespa/metrics/metricsnapshot.cpp index 08ab1be3033..104ad858e43 100644 --- a/metrics/src/vespa/metrics/metricsnapshot.cpp +++ b/metrics/src/vespa/metrics/metricsnapshot.cpp @@ -78,12 +78,15 @@ MetricSnapshotSet::MetricSnapshotSet(const Metric::String& name, system_time::du : _count(count), _builderCount(0), _current(std::make_unique(name, period, source, snapshotUnsetMetrics)), - _building(count == 1 ? nullptr : new MetricSnapshot(name, period, source, snapshotUnsetMetrics)) + _building(count == 1 ? nullptr : new MetricSnapshot(name, period, source, snapshotUnsetMetrics)), + _current_is_assigned(false) { _current->reset(); if (_building) _building->reset(); } +MetricSnapshotSet::~MetricSnapshotSet() = default; + MetricSnapshot& MetricSnapshotSet::getNextTarget() { diff --git a/metrics/src/vespa/metrics/metricsnapshot.h b/metrics/src/vespa/metrics/metricsnapshot.h index e2cedf018c9..a6a68b43015 100644 --- a/metrics/src/vespa/metrics/metricsnapshot.h +++ b/metrics/src/vespa/metrics/metricsnapshot.h @@ -77,9 +77,11 @@ class MetricSnapshotSet { // building instance. std::unique_ptr _current; // The last full period std::unique_ptr _building; // The building period + bool _current_is_assigned; public: MetricSnapshotSet(const Metric::String& name, system_time::duration period, uint32_t count, const MetricSet& source, bool snapshotUnsetMetrics); + ~MetricSnapshotSet(); const Metric::String& getName() const { return _current->getName(); } system_time::duration getPeriod() const { return _current->getPeriod(); } @@ -109,6 +111,13 @@ public: void recreateSnapshot(const MetricSet& metrics, bool copyUnset); void addMemoryUsage(MemoryConsumption&) const; void setFromTime(system_time fromTime); + + [[nodiscard]] bool current_is_assigned() const noexcept { + return _current_is_assigned; + } + void tag_current_as_assigned() noexcept { + _current_is_assigned = true; + } }; } // metrics diff --git a/metrics/src/vespa/metrics/state_api_adapter.cpp b/metrics/src/vespa/metrics/state_api_adapter.cpp index 136ccf6e06a..6c0cb9a6013 100644 --- a/metrics/src/vespa/metrics/state_api_adapter.cpp +++ b/metrics/src/vespa/metrics/state_api_adapter.cpp @@ -11,8 +11,8 @@ StateApiAdapter::getMetrics(const vespalib::string &consumer) { MetricLockGuard guard(_manager.getMetricLock()); auto periods = _manager.getSnapshotPeriods(guard); - if (periods.empty()) { - return ""; // no configuration yet + if (periods.empty() || !_manager.any_snapshots_taken(guard)) { + return ""; // no configuration or snapshots yet } const MetricSnapshot &snapshot(_manager.getMetricSnapshot(guard, periods[0])); vespalib::asciistream json; -- cgit v1.2.3 From 0ee50412c84796b2991902545d3841ea6007ec2c Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Tue, 29 Aug 2023 13:22:01 +0000 Subject: Let `any_snapshots_taken()` be well-defined prior to metric manager init --- metrics/src/tests/metricmanagertest.cpp | 1 + metrics/src/vespa/metrics/metricmanager.cpp | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'metrics/src') diff --git a/metrics/src/tests/metricmanagertest.cpp b/metrics/src/tests/metricmanagertest.cpp index f655d6c210e..a6dd141576f 100644 --- a/metrics/src/tests/metricmanagertest.cpp +++ b/metrics/src/tests/metricmanagertest.cpp @@ -462,6 +462,7 @@ TEST_F(MetricManagerTest, test_snapshots) { MetricLockGuard lockGuard(mm.getMetricLock()); mm.registerMetric(lockGuard, mySet.set); + EXPECT_FALSE(mm.any_snapshots_taken(lockGuard)); // well-defined prior to init() } mm.init(ConfigUri("raw:" "consumer[2]\n" diff --git a/metrics/src/vespa/metrics/metricmanager.cpp b/metrics/src/vespa/metrics/metricmanager.cpp index f3635f72c08..fa18ddc383b 100644 --- a/metrics/src/vespa/metrics/metricmanager.cpp +++ b/metrics/src/vespa/metrics/metricmanager.cpp @@ -170,8 +170,7 @@ MetricManager::isInitialized() const { bool MetricManager::any_snapshots_taken(const MetricLockGuard&) const noexcept { - assert(!_snapshots.empty()); - return _snapshots[0]->current_is_assigned(); + return (!_snapshots.empty() && _snapshots[0]->current_is_assigned()); } void -- cgit v1.2.3