summaryrefslogtreecommitdiffstats
path: root/metrics
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2023-08-29 12:22:40 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2023-08-29 13:04:35 +0000
commit8089a9a696cafede455a25c3e37432bf24845790 (patch)
treee29430a3b0c0879f7a01b8e58aca3f9e3b41a578 /metrics
parenta42035a3325890d63ac8dd1526d012f093f1fc90 (diff)
Do not return State V1 metrics until at least one metric snapshot has been created
This avoids a 60 second time window just after process startup where metrics may be reported from an empty initial snapshot. Depending on the timing of State V1 sampling and metric manager snapshotting, these values risk erroneously appearing as all zero, causing much confusion and gnashing of teeth.
Diffstat (limited to 'metrics')
-rw-r--r--metrics/src/tests/metricmanagertest.cpp13
-rw-r--r--metrics/src/vespa/metrics/metricmanager.cpp7
-rw-r--r--metrics/src/vespa/metrics/metricmanager.h2
-rw-r--r--metrics/src/vespa/metrics/metricsnapshot.cpp5
-rw-r--r--metrics/src/vespa/metrics/metricsnapshot.h9
-rw-r--r--metrics/src/vespa/metrics/state_api_adapter.cpp4
6 files changed, 37 insertions, 3 deletions
diff --git a/metrics/src/tests/metricmanagertest.cpp b/metrics/src/tests/metricmanagertest.cpp
index 9e6b0f40be3..f655d6c210e 100644
--- a/metrics/src/tests/metricmanagertest.cpp
+++ b/metrics/src/tests/metricmanagertest.cpp
@@ -474,6 +474,7 @@ TEST_F(MetricManagerTest, test_snapshots)
MetricNameVisitor visitor;
{
MetricLockGuard lockGuard(mm.getMetricLock());
+ EXPECT_FALSE(mm.any_snapshots_taken(lockGuard)); // No snapshots yet
mm.visit(lockGuard, mm.getActiveMetrics(lockGuard), visitor, "snapper");
const MetricManager::ConsumerSpec * consumerSpec = mm.getConsumerSpec(lockGuard, "snapper");
EXPECT_EQ(std::string("\n"
@@ -506,6 +507,10 @@ TEST_F(MetricManagerTest, test_snapshots)
ASSERT_VALUES(mm, 5 * 60s, "2,4,4,1,7,9,1,1,8,2,10");
ASSERT_VALUES(mm, 60 * 60s, "");
ASSERT_VALUES(mm, 0 * 60s, "2,4,4,1,7,9,1,1,8,2,10");
+ {
+ auto guard = mm.getMetricLock();
+ EXPECT_TRUE(mm.any_snapshots_taken(guard)); // At least one snapshot has been taken
+ }
// Adding metrics done in second five minute period. Total should
// be updated to account for both
@@ -567,6 +572,14 @@ TEST_F(MetricManagerTest, test_json_output)
"consumer[0].tags[1]\n"
"consumer[0].tags[0] snaptest\n"));
+ {
+ // No snapshots have been taken yet, so the non-total getMetrics call should return
+ // the empty string (i.e. no metrics produced).
+ metrics::StateApiAdapter adapter(mm);
+ auto json_str = adapter.getMetrics("snapper");
+ EXPECT_EQ(json_str, "");
+ }
+
takeSnapshots(mm, 1000);
// Adding metrics to have some values in them
diff --git a/metrics/src/vespa/metrics/metricmanager.cpp b/metrics/src/vespa/metrics/metricmanager.cpp
index e399a23b0b9..f3635f72c08 100644
--- a/metrics/src/vespa/metrics/metricmanager.cpp
+++ b/metrics/src/vespa/metrics/metricmanager.cpp
@@ -168,6 +168,12 @@ MetricManager::isInitialized() const {
return static_cast<bool>(_configHandle);
}
+bool
+MetricManager::any_snapshots_taken(const MetricLockGuard&) const noexcept {
+ assert(!_snapshots.empty());
+ return _snapshots[0]->current_is_assigned();
+}
+
void
MetricManager::init(const config::ConfigUri & uri, bool startThread)
{
@@ -795,6 +801,7 @@ MetricManager::takeSnapshots(const MetricLockGuard & guard, system_time timeToPr
_activeMetrics.addToSnapshot(firstTarget, false, timeToProcess);
_activeMetrics.addToSnapshot(*_totalMetrics, false, timeToProcess);
_activeMetrics.reset(timeToProcess);
+ _snapshots[0]->tag_current_as_assigned();
LOG(debug, "After snapshotting, active metrics goes from %s to %s, and 5 minute metrics goes from %s to %s.",
to_string(_activeMetrics.getFromTime()).c_str(), to_string(_activeMetrics.getToTime()).c_str(),
to_string(firstTarget.getFromTime()).c_str(), to_string(firstTarget.getToTime()).c_str());
diff --git a/metrics/src/vespa/metrics/metricmanager.h b/metrics/src/vespa/metrics/metricmanager.h
index 81d0c581370..cfb3ab2137a 100644
--- a/metrics/src/vespa/metrics/metricmanager.h
+++ b/metrics/src/vespa/metrics/metricmanager.h
@@ -267,6 +267,8 @@ public:
bool isInitialized() const;
+ [[nodiscard]] bool any_snapshots_taken(const MetricLockGuard&) const noexcept;
+
private:
void takeSnapshots(const MetricLockGuard &, system_time timeToProcess);
diff --git a/metrics/src/vespa/metrics/metricsnapshot.cpp b/metrics/src/vespa/metrics/metricsnapshot.cpp
index 08ab1be3033..104ad858e43 100644
--- a/metrics/src/vespa/metrics/metricsnapshot.cpp
+++ b/metrics/src/vespa/metrics/metricsnapshot.cpp
@@ -78,12 +78,15 @@ MetricSnapshotSet::MetricSnapshotSet(const Metric::String& name, system_time::du
: _count(count),
_builderCount(0),
_current(std::make_unique<MetricSnapshot>(name, period, source, snapshotUnsetMetrics)),
- _building(count == 1 ? nullptr : new MetricSnapshot(name, period, source, snapshotUnsetMetrics))
+ _building(count == 1 ? nullptr : new MetricSnapshot(name, period, source, snapshotUnsetMetrics)),
+ _current_is_assigned(false)
{
_current->reset();
if (_building) _building->reset();
}
+MetricSnapshotSet::~MetricSnapshotSet() = default;
+
MetricSnapshot&
MetricSnapshotSet::getNextTarget()
{
diff --git a/metrics/src/vespa/metrics/metricsnapshot.h b/metrics/src/vespa/metrics/metricsnapshot.h
index e2cedf018c9..a6a68b43015 100644
--- a/metrics/src/vespa/metrics/metricsnapshot.h
+++ b/metrics/src/vespa/metrics/metricsnapshot.h
@@ -77,9 +77,11 @@ class MetricSnapshotSet {
// building instance.
std::unique_ptr<MetricSnapshot> _current; // The last full period
std::unique_ptr<MetricSnapshot> _building; // The building period
+ bool _current_is_assigned;
public:
MetricSnapshotSet(const Metric::String& name, system_time::duration period, uint32_t count,
const MetricSet& source, bool snapshotUnsetMetrics);
+ ~MetricSnapshotSet();
const Metric::String& getName() const { return _current->getName(); }
system_time::duration getPeriod() const { return _current->getPeriod(); }
@@ -109,6 +111,13 @@ public:
void recreateSnapshot(const MetricSet& metrics, bool copyUnset);
void addMemoryUsage(MemoryConsumption&) const;
void setFromTime(system_time fromTime);
+
+ [[nodiscard]] bool current_is_assigned() const noexcept {
+ return _current_is_assigned;
+ }
+ void tag_current_as_assigned() noexcept {
+ _current_is_assigned = true;
+ }
};
} // metrics
diff --git a/metrics/src/vespa/metrics/state_api_adapter.cpp b/metrics/src/vespa/metrics/state_api_adapter.cpp
index 136ccf6e06a..6c0cb9a6013 100644
--- a/metrics/src/vespa/metrics/state_api_adapter.cpp
+++ b/metrics/src/vespa/metrics/state_api_adapter.cpp
@@ -11,8 +11,8 @@ StateApiAdapter::getMetrics(const vespalib::string &consumer)
{
MetricLockGuard guard(_manager.getMetricLock());
auto periods = _manager.getSnapshotPeriods(guard);
- if (periods.empty()) {
- return ""; // no configuration yet
+ if (periods.empty() || !_manager.any_snapshots_taken(guard)) {
+ return ""; // no configuration or snapshots yet
}
const MetricSnapshot &snapshot(_manager.getMetricSnapshot(guard, periods[0]));
vespalib::asciistream json;