diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-07-19 16:33:37 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-07-19 16:33:37 +0200 |
commit | 0f6bdb99acea472c7f109ef3426a5388ef49c6e6 (patch) | |
tree | 861f331dfc0735d3684f7829e6bce18b5d25f1e7 | |
parent | 9819e2c447f2ff54dc458bf33b8d0ea84d1017d6 (diff) |
Warn on missing health ping.
-rw-r--r-- | storage/src/vespa/storage/storageserver/statemanager.cpp | 30 | ||||
-rw-r--r-- | storage/src/vespa/storage/storageserver/statemanager.h | 5 |
2 files changed, 35 insertions, 0 deletions
diff --git a/storage/src/vespa/storage/storageserver/statemanager.cpp b/storage/src/vespa/storage/storageserver/statemanager.cpp index 654fe0e1f5d..cb3bfcf6400 100644 --- a/storage/src/vespa/storage/storageserver/statemanager.cpp +++ b/storage/src/vespa/storage/storageserver/statemanager.cpp @@ -17,6 +17,7 @@ #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/string_escape.h> #include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/time.h> #include <fstream> #include <vespa/log/log.h> @@ -68,6 +69,10 @@ StateManager::StateManager(StorageComponentRegister& compReg, _threadLock(), _systemStateHistory(), _systemStateHistorySize(50), + _start_time(vespalib::steady_clock::now()), + _health_ping_time(), + _health_ping_warn_interval(5min), + _health_ping_warn_time(_start_time + _health_ping_warn_interval), _hostInfo(std::move(hostInfo)), _controllers_observed_explicit_node_state(), _noThreadTestMode(testMode), @@ -391,6 +396,8 @@ StateManager::onGetNodeState(const api::GetNodeStateCommand::SP& cmd) std::shared_ptr<api::GetNodeStateReply> reply; { std::unique_lock guard(_stateLock); + _health_ping_time = vespalib::steady_clock::now(); + _health_ping_warn_time = _health_ping_time.value() + _health_ping_warn_interval; const bool is_up_to_date = (_controllers_observed_explicit_node_state.find(cmd->getSourceIndex()) != _controllers_observed_explicit_node_state.end()); if ((cmd->getExpectedState() != nullptr) @@ -479,6 +486,28 @@ StateManager::run(framework::ThreadHandle& thread) } void +StateManager::warn_on_missing_health_ping() +{ + vespalib::steady_time now(vespalib::steady_clock::now()); + std::optional<vespalib::steady_time> health_ping_time; + { + std::lock_guard lock(_stateLock); + if (now <= _health_ping_warn_time) { + return; + } + health_ping_time = _health_ping_time; + _health_ping_warn_time = now + _health_ping_warn_interval; + } + if (health_ping_time.has_value()) { + vespalib::duration duration = now - health_ping_time.value(); + LOG(warning, "Last health ping was %1.1f seconds ago", vespalib::to_s(duration)); + } else { + vespalib::duration duration = now - _start_time; + LOG(warning, "No health pings since startup %1.1f seconds ago", vespalib::to_s(duration)); + } +} + +void StateManager::tick() { bool almost_immediate_replies = _requested_almost_immediate_node_state_replies.load(std::memory_order_relaxed); if (almost_immediate_replies) { @@ -487,6 +516,7 @@ StateManager::tick() { } else { sendGetNodeStateReplies(_component.getClock().getMonotonicTime()); } + warn_on_missing_health_ping(); } bool diff --git a/storage/src/vespa/storage/storageserver/statemanager.h b/storage/src/vespa/storage/storageserver/statemanager.h index 0b9a47c2515..3b1291b1c3f 100644 --- a/storage/src/vespa/storage/storageserver/statemanager.h +++ b/storage/src/vespa/storage/storageserver/statemanager.h @@ -65,6 +65,10 @@ class StateManager : public NodeStateUpdater, std::condition_variable _threadCond; std::deque<TimeSysStatePair> _systemStateHistory; uint32_t _systemStateHistorySize; + const vespalib::steady_time _start_time; + std::optional<vespalib::steady_time> _health_ping_time; + vespalib::duration _health_ping_warn_interval; + vespalib::steady_time _health_ping_warn_time; std::unique_ptr<HostInfo> _hostInfo; std::unique_ptr<framework::Thread> _thread; // Controllers that have observed a GetNodeState response sent _after_ @@ -84,6 +88,7 @@ public: void onClose() override; void tick(); + void warn_on_missing_health_ping(); void print(std::ostream& out, bool verbose, const std::string& indent) const override; void reportHtmlStatus(std::ostream&, const framework::HttpUrlPath&) const override; |