aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-07-19 16:33:37 +0200
committerTor Egge <Tor.Egge@online.no>2023-07-19 16:33:37 +0200
commit0f6bdb99acea472c7f109ef3426a5388ef49c6e6 (patch)
tree861f331dfc0735d3684f7829e6bce18b5d25f1e7
parent9819e2c447f2ff54dc458bf33b8d0ea84d1017d6 (diff)
Warn on missing health ping.
-rw-r--r--storage/src/vespa/storage/storageserver/statemanager.cpp30
-rw-r--r--storage/src/vespa/storage/storageserver/statemanager.h5
2 files changed, 35 insertions, 0 deletions
diff --git a/storage/src/vespa/storage/storageserver/statemanager.cpp b/storage/src/vespa/storage/storageserver/statemanager.cpp
index 654fe0e1f5d..cb3bfcf6400 100644
--- a/storage/src/vespa/storage/storageserver/statemanager.cpp
+++ b/storage/src/vespa/storage/storageserver/statemanager.cpp
@@ -17,6 +17,7 @@
#include <vespa/vespalib/util/exceptions.h>
#include <vespa/vespalib/util/string_escape.h>
#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/time.h>
#include <fstream>
#include <vespa/log/log.h>
@@ -68,6 +69,10 @@ StateManager::StateManager(StorageComponentRegister& compReg,
_threadLock(),
_systemStateHistory(),
_systemStateHistorySize(50),
+ _start_time(vespalib::steady_clock::now()),
+ _health_ping_time(),
+ _health_ping_warn_interval(5min),
+ _health_ping_warn_time(_start_time + _health_ping_warn_interval),
_hostInfo(std::move(hostInfo)),
_controllers_observed_explicit_node_state(),
_noThreadTestMode(testMode),
@@ -391,6 +396,8 @@ StateManager::onGetNodeState(const api::GetNodeStateCommand::SP& cmd)
std::shared_ptr<api::GetNodeStateReply> reply;
{
std::unique_lock guard(_stateLock);
+ _health_ping_time = vespalib::steady_clock::now();
+ _health_ping_warn_time = _health_ping_time.value() + _health_ping_warn_interval;
const bool is_up_to_date = (_controllers_observed_explicit_node_state.find(cmd->getSourceIndex())
!= _controllers_observed_explicit_node_state.end());
if ((cmd->getExpectedState() != nullptr)
@@ -479,6 +486,28 @@ StateManager::run(framework::ThreadHandle& thread)
}
void
+StateManager::warn_on_missing_health_ping()
+{
+ vespalib::steady_time now(vespalib::steady_clock::now());
+ std::optional<vespalib::steady_time> health_ping_time;
+ {
+ std::lock_guard lock(_stateLock);
+ if (now <= _health_ping_warn_time) {
+ return;
+ }
+ health_ping_time = _health_ping_time;
+ _health_ping_warn_time = now + _health_ping_warn_interval;
+ }
+ if (health_ping_time.has_value()) {
+ vespalib::duration duration = now - health_ping_time.value();
+ LOG(warning, "Last health ping was %1.1f seconds ago", vespalib::to_s(duration));
+ } else {
+ vespalib::duration duration = now - _start_time;
+ LOG(warning, "No health pings since startup %1.1f seconds ago", vespalib::to_s(duration));
+ }
+}
+
+void
StateManager::tick() {
bool almost_immediate_replies = _requested_almost_immediate_node_state_replies.load(std::memory_order_relaxed);
if (almost_immediate_replies) {
@@ -487,6 +516,7 @@ StateManager::tick() {
} else {
sendGetNodeStateReplies(_component.getClock().getMonotonicTime());
}
+ warn_on_missing_health_ping();
}
bool
diff --git a/storage/src/vespa/storage/storageserver/statemanager.h b/storage/src/vespa/storage/storageserver/statemanager.h
index 0b9a47c2515..3b1291b1c3f 100644
--- a/storage/src/vespa/storage/storageserver/statemanager.h
+++ b/storage/src/vespa/storage/storageserver/statemanager.h
@@ -65,6 +65,10 @@ class StateManager : public NodeStateUpdater,
std::condition_variable _threadCond;
std::deque<TimeSysStatePair> _systemStateHistory;
uint32_t _systemStateHistorySize;
+ const vespalib::steady_time _start_time;
+ std::optional<vespalib::steady_time> _health_ping_time;
+ vespalib::duration _health_ping_warn_interval;
+ vespalib::steady_time _health_ping_warn_time;
std::unique_ptr<HostInfo> _hostInfo;
std::unique_ptr<framework::Thread> _thread;
// Controllers that have observed a GetNodeState response sent _after_
@@ -84,6 +88,7 @@ public:
void onClose() override;
void tick();
+ void warn_on_missing_health_ping();
void print(std::ostream& out, bool verbose, const std::string& indent) const override;
void reportHtmlStatus(std::ostream&, const framework::HttpUrlPath&) const override;