diff options
author | Arne Juul <arnej@verizonmedia.com> | 2021-06-01 14:42:21 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2021-06-02 07:38:26 +0000 |
commit | 63eadd1f540bfbfc21c4515e95d9fa533589eced (patch) | |
tree | 5da9d45cad12eb4acc038ca42be8ae73eec6232c /configd | |
parent | c3d3fc8e7aedba5fb5330692ae866bbb2906ecdc (diff) |
perform outward connectivity check
* does not abort startup yet, just logs results from check
* but requires self-connectivity now
Diffstat (limited to 'configd')
-rw-r--r-- | configd/src/apps/sentinel/CMakeLists.txt | 1 | ||||
-rw-r--r-- | configd/src/apps/sentinel/env.cpp | 91 | ||||
-rw-r--r-- | configd/src/apps/sentinel/env.h | 3 | ||||
-rw-r--r-- | configd/src/apps/sentinel/outward-check.cpp | 10 | ||||
-rw-r--r-- | configd/src/apps/sentinel/outward-check.h | 31 | ||||
-rw-r--r-- | configd/src/apps/sentinel/peer-check.cpp | 2 | ||||
-rw-r--r-- | configd/src/apps/sentinel/peer-check.h | 1 | ||||
-rw-r--r-- | configd/src/apps/sentinel/rpcserver.h | 1 |
8 files changed, 118 insertions, 22 deletions
diff --git a/configd/src/apps/sentinel/CMakeLists.txt b/configd/src/apps/sentinel/CMakeLists.txt index d67a41f2a75..e77abc19077 100644 --- a/configd/src/apps/sentinel/CMakeLists.txt +++ b/configd/src/apps/sentinel/CMakeLists.txt @@ -9,6 +9,7 @@ vespa_add_executable(configd_config-sentinel_app manager.cpp metrics.cpp output-connection.cpp + outward-check.cpp peer-check.cpp rpchooks.cpp rpcserver.cpp diff --git a/configd/src/apps/sentinel/env.cpp b/configd/src/apps/sentinel/env.cpp index 45eea3c6417..9f76df9c05a 100644 --- a/configd/src/apps/sentinel/env.cpp +++ b/configd/src/apps/sentinel/env.cpp @@ -2,6 +2,8 @@ #include "env.h" #include "check-completion-handler.h" +#include "outward-check.h" +#include <vespa/defaults.h> #include <vespa/log/log.h> #include <vespa/config/common/exceptions.h> #include <vespa/vespalib/util/exceptions.h> @@ -43,21 +45,7 @@ void Env::boot(const std::string &configId) { rpcPort(cfg.port.rpc); statePort(cfg.port.telnet); if (auto up = ConfigOwner::fetchModelConfig(MODEL_TIMEOUT_MS)) { - const ModelConfig &model = *up; - for (const auto & h : model.hosts) { - LOG(info, "- Model for host %s with %zd services", h.name.c_str(), h.services.size()); - for (const auto & s : h.services) { - if (s.name == "config-sentinel") { - LOG(info, " - Model for service %s type %s configid %s with %zd ports", - s.name.c_str(), s.type.c_str(), s.configid.c_str(), s.ports.size()); - for (const auto & p : s.ports) { - if (p.tags.find("rpc") != p.tags.npos) { - LOG(info, " - Model for port %d has tags %s", p.number, p.tags.c_str()); - } - } - } - } - } + waitForConnectivity(*up); } } @@ -79,12 +67,12 @@ void Env::statePort(int port) { throw vespalib::FatalException("Bad port " + std::to_string(port) + ", expected range [1, 65535]", VESPA_STRLOC); } if (port == 0) { - port = 19098; + port = 19098; // default in config } if (_stateServer && port == _statePort) { return; // ok already } - LOG(debug, "Config-sentinel accepts connections on port %d", port); + LOG(debug, "Config-sentinel accepts state connections on port %d", port); _stateServer = std::make_unique<vespalib::StateServer>( port, _stateApi.myHealth, _startMetrics.producer, _stateApi.myComponents); _statePort = port; @@ -96,8 +84,73 @@ void Env::notifyConfigUpdated() { } -void Env::handleCmd(Cmd::UP cmd) { - cmd->retError("still booting, not ready for all RPC commands"); +void Env::respondAsEmpty() { + auto commands = _rpcCommandQueue.drain(); + for (Cmd::UP &cmd : commands) { + cmd->retError("still booting, not ready for all RPC commands"); + } +} + +void Env::waitForConnectivity(const ModelConfig &model) { + std::map<std::string, OutwardCheck> connectivityMap; + for (const auto & h : model.hosts) { + bool foundSentinelPort = false; + for (const auto & s : h.services) { + if (s.name == "config-sentinel") { + for (const auto & p : s.ports) { + if (p.tags.find("rpc") != p.tags.npos) { + connectivityMap.try_emplace(h.name, h.name, p.number, _rpcServer->orb()); + foundSentinelPort = true; + } + } + } + } + if (! foundSentinelPort) { + LOG(warning, "Did not find 'config-sentinel' RPC port in model for host %s [%zd services]", + h.name.c_str(), h.services.size()); + } + } + size_t cntOk = 0; + size_t cntBad = 0; + for (int retry = 1; retry <= 100; ++retry) { + cntOk = 0; + cntBad = 0; + for (const auto & [hostname, check] : connectivityMap) { + if (check.ok()) { + ++cntOk; + } else if (check.bad()) { + ++cntBad; + } + } + if (cntOk + cntBad == connectivityMap.size()) break; + respondAsEmpty(); + std::this_thread::sleep_for(15ms); + if ((retry % 20) == 0) { + LOG(warning, "still waiting for connectivity checks after %d retries", retry); + } + } + for (const auto & [hostname, check] : connectivityMap) { + const char *s = "unknown"; + if (check.ok()) { s = "ok"; } + if (check.bad()) { s = "bad"; } + LOG(info, "outward check status for host %s is: %s", + hostname.c_str(), s); + } + LOG_ASSERT(cntOk + cntBad == connectivityMap.size()); + const char *myName = vespa::Defaults::vespaHostname(); + int myPort = _rpcServer->getPort(); + OutwardCheck selfCheck(myName, myPort, _rpcServer->orb()); + for (int retry = 0; retry < 1000; ++retry) { + if (selfCheck.bad()) { + LOG(error, "Could not connect to '%s' (myself) at port %d", myName, myPort); + throw InvalidConfigException("failed to self-connect"); + } + if (selfCheck.ok()) { + break; + } + std::this_thread::sleep_for(5ms); + } + LOG_ASSERT(selfCheck.ok()); } } diff --git a/configd/src/apps/sentinel/env.h b/configd/src/apps/sentinel/env.h index 0213fd09460..f117854f006 100644 --- a/configd/src/apps/sentinel/env.h +++ b/configd/src/apps/sentinel/env.h @@ -30,8 +30,9 @@ public: void statePort(int portnum); void notifyConfigUpdated(); - void handleCmd(Cmd::UP cmd); private: + void respondAsEmpty(); + void waitForConnectivity(const ModelConfig &model); ConfigOwner _cfgOwner; CommandQueue _rpcCommandQueue; std::unique_ptr<RpcServer> _rpcServer; diff --git a/configd/src/apps/sentinel/outward-check.cpp b/configd/src/apps/sentinel/outward-check.cpp new file mode 100644 index 00000000000..e497f03fe71 --- /dev/null +++ b/configd/src/apps/sentinel/outward-check.cpp @@ -0,0 +1,10 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "outward-check.h" +#include <thread> + +namespace config::sentinel { + +OutwardCheck::~OutwardCheck() = default; + +} diff --git a/configd/src/apps/sentinel/outward-check.h b/configd/src/apps/sentinel/outward-check.h new file mode 100644 index 00000000000..563151af47b --- /dev/null +++ b/configd/src/apps/sentinel/outward-check.h @@ -0,0 +1,31 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "status-callback.h" +#include "peer-check.h" +#include <vespa/fnet/frt/supervisor.h> + +namespace config::sentinel { + +class OutwardCheck : public StatusCallback { + bool _wasOk = false; + bool _wasBad = false; + PeerCheck _check; +public: + OutwardCheck(const std::string &hostname, int portnumber, FRT_Supervisor &orb) + : _check(*this, hostname, portnumber, orb) + {} + virtual ~OutwardCheck(); + bool ok() const { return _wasOk; } + bool bad() const { return _wasBad; } + void returnStatus(bool ok) override { + if (ok) { + _wasBad = false; + _wasOk = true; + } else { + _wasOk = false; + _wasBad = true; + } + } +}; + +} diff --git a/configd/src/apps/sentinel/peer-check.cpp b/configd/src/apps/sentinel/peer-check.cpp index 024f928c994..841d32c9631 100644 --- a/configd/src/apps/sentinel/peer-check.cpp +++ b/configd/src/apps/sentinel/peer-check.cpp @@ -36,7 +36,7 @@ void PeerCheck::RequestDone(FRT_RPCRequest *req) { LOG(warning, "error on ping to %s [port %d]: %s (%d)", _hostname.c_str(), _portnum, req->GetErrorMessage(), req->GetErrorCode()); } else { - LOG(info, "OK ping to %s [port %d]", _hostname.c_str(), _portnum); + LOG(debug, "OK ping to %s [port %d]", _hostname.c_str(), _portnum); statusOk = true; } _req->SubRef(); diff --git a/configd/src/apps/sentinel/peer-check.h b/configd/src/apps/sentinel/peer-check.h index 658375a8d7b..145552a9ab1 100644 --- a/configd/src/apps/sentinel/peer-check.h +++ b/configd/src/apps/sentinel/peer-check.h @@ -25,7 +25,6 @@ public: /** from FRT_IRequestWait **/ void RequestDone(FRT_RPCRequest *req) override; - private: StatusCallback &_callback; std::string _hostname; diff --git a/configd/src/apps/sentinel/rpcserver.h b/configd/src/apps/sentinel/rpcserver.h index ef4b394fdca..4c6dea00ddf 100644 --- a/configd/src/apps/sentinel/rpcserver.h +++ b/configd/src/apps/sentinel/rpcserver.h @@ -22,6 +22,7 @@ public: ~RpcServer(); int getPort() const { return _port; } + FRT_Supervisor &orb() { return _server.supervisor(); } }; } // namespace config::sentinel |