summaryrefslogtreecommitdiffstats
path: root/configd
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2021-06-01 14:42:21 +0000
committerArne Juul <arnej@verizonmedia.com>2021-06-02 07:38:26 +0000
commit63eadd1f540bfbfc21c4515e95d9fa533589eced (patch)
tree5da9d45cad12eb4acc038ca42be8ae73eec6232c /configd
parentc3d3fc8e7aedba5fb5330692ae866bbb2906ecdc (diff)
perform outward connectivity check
* does not abort startup yet, just logs results from check * but requires self-connectivity now
Diffstat (limited to 'configd')
-rw-r--r--configd/src/apps/sentinel/CMakeLists.txt1
-rw-r--r--configd/src/apps/sentinel/env.cpp91
-rw-r--r--configd/src/apps/sentinel/env.h3
-rw-r--r--configd/src/apps/sentinel/outward-check.cpp10
-rw-r--r--configd/src/apps/sentinel/outward-check.h31
-rw-r--r--configd/src/apps/sentinel/peer-check.cpp2
-rw-r--r--configd/src/apps/sentinel/peer-check.h1
-rw-r--r--configd/src/apps/sentinel/rpcserver.h1
8 files changed, 118 insertions, 22 deletions
diff --git a/configd/src/apps/sentinel/CMakeLists.txt b/configd/src/apps/sentinel/CMakeLists.txt
index d67a41f2a75..e77abc19077 100644
--- a/configd/src/apps/sentinel/CMakeLists.txt
+++ b/configd/src/apps/sentinel/CMakeLists.txt
@@ -9,6 +9,7 @@ vespa_add_executable(configd_config-sentinel_app
manager.cpp
metrics.cpp
output-connection.cpp
+ outward-check.cpp
peer-check.cpp
rpchooks.cpp
rpcserver.cpp
diff --git a/configd/src/apps/sentinel/env.cpp b/configd/src/apps/sentinel/env.cpp
index 45eea3c6417..9f76df9c05a 100644
--- a/configd/src/apps/sentinel/env.cpp
+++ b/configd/src/apps/sentinel/env.cpp
@@ -2,6 +2,8 @@
#include "env.h"
#include "check-completion-handler.h"
+#include "outward-check.h"
+#include <vespa/defaults.h>
#include <vespa/log/log.h>
#include <vespa/config/common/exceptions.h>
#include <vespa/vespalib/util/exceptions.h>
@@ -43,21 +45,7 @@ void Env::boot(const std::string &configId) {
rpcPort(cfg.port.rpc);
statePort(cfg.port.telnet);
if (auto up = ConfigOwner::fetchModelConfig(MODEL_TIMEOUT_MS)) {
- const ModelConfig &model = *up;
- for (const auto & h : model.hosts) {
- LOG(info, "- Model for host %s with %zd services", h.name.c_str(), h.services.size());
- for (const auto & s : h.services) {
- if (s.name == "config-sentinel") {
- LOG(info, " - Model for service %s type %s configid %s with %zd ports",
- s.name.c_str(), s.type.c_str(), s.configid.c_str(), s.ports.size());
- for (const auto & p : s.ports) {
- if (p.tags.find("rpc") != p.tags.npos) {
- LOG(info, " - Model for port %d has tags %s", p.number, p.tags.c_str());
- }
- }
- }
- }
- }
+ waitForConnectivity(*up);
}
}
@@ -79,12 +67,12 @@ void Env::statePort(int port) {
throw vespalib::FatalException("Bad port " + std::to_string(port) + ", expected range [1, 65535]", VESPA_STRLOC);
}
if (port == 0) {
- port = 19098;
+ port = 19098; // default in config
}
if (_stateServer && port == _statePort) {
return; // ok already
}
- LOG(debug, "Config-sentinel accepts connections on port %d", port);
+ LOG(debug, "Config-sentinel accepts state connections on port %d", port);
_stateServer = std::make_unique<vespalib::StateServer>(
port, _stateApi.myHealth, _startMetrics.producer, _stateApi.myComponents);
_statePort = port;
@@ -96,8 +84,73 @@ void Env::notifyConfigUpdated() {
}
-void Env::handleCmd(Cmd::UP cmd) {
- cmd->retError("still booting, not ready for all RPC commands");
+void Env::respondAsEmpty() {
+ auto commands = _rpcCommandQueue.drain();
+ for (Cmd::UP &cmd : commands) {
+ cmd->retError("still booting, not ready for all RPC commands");
+ }
+}
+
+void Env::waitForConnectivity(const ModelConfig &model) {
+ std::map<std::string, OutwardCheck> connectivityMap;
+ for (const auto & h : model.hosts) {
+ bool foundSentinelPort = false;
+ for (const auto & s : h.services) {
+ if (s.name == "config-sentinel") {
+ for (const auto & p : s.ports) {
+ if (p.tags.find("rpc") != p.tags.npos) {
+ connectivityMap.try_emplace(h.name, h.name, p.number, _rpcServer->orb());
+ foundSentinelPort = true;
+ }
+ }
+ }
+ }
+ if (! foundSentinelPort) {
+ LOG(warning, "Did not find 'config-sentinel' RPC port in model for host %s [%zd services]",
+ h.name.c_str(), h.services.size());
+ }
+ }
+ size_t cntOk = 0;
+ size_t cntBad = 0;
+ for (int retry = 1; retry <= 100; ++retry) {
+ cntOk = 0;
+ cntBad = 0;
+ for (const auto & [hostname, check] : connectivityMap) {
+ if (check.ok()) {
+ ++cntOk;
+ } else if (check.bad()) {
+ ++cntBad;
+ }
+ }
+ if (cntOk + cntBad == connectivityMap.size()) break;
+ respondAsEmpty();
+ std::this_thread::sleep_for(15ms);
+ if ((retry % 20) == 0) {
+ LOG(warning, "still waiting for connectivity checks after %d retries", retry);
+ }
+ }
+ for (const auto & [hostname, check] : connectivityMap) {
+ const char *s = "unknown";
+ if (check.ok()) { s = "ok"; }
+ if (check.bad()) { s = "bad"; }
+ LOG(info, "outward check status for host %s is: %s",
+ hostname.c_str(), s);
+ }
+ LOG_ASSERT(cntOk + cntBad == connectivityMap.size());
+ const char *myName = vespa::Defaults::vespaHostname();
+ int myPort = _rpcServer->getPort();
+ OutwardCheck selfCheck(myName, myPort, _rpcServer->orb());
+ for (int retry = 0; retry < 1000; ++retry) {
+ if (selfCheck.bad()) {
+ LOG(error, "Could not connect to '%s' (myself) at port %d", myName, myPort);
+ throw InvalidConfigException("failed to self-connect");
+ }
+ if (selfCheck.ok()) {
+ break;
+ }
+ std::this_thread::sleep_for(5ms);
+ }
+ LOG_ASSERT(selfCheck.ok());
}
}
diff --git a/configd/src/apps/sentinel/env.h b/configd/src/apps/sentinel/env.h
index 0213fd09460..f117854f006 100644
--- a/configd/src/apps/sentinel/env.h
+++ b/configd/src/apps/sentinel/env.h
@@ -30,8 +30,9 @@ public:
void statePort(int portnum);
void notifyConfigUpdated();
- void handleCmd(Cmd::UP cmd);
private:
+ void respondAsEmpty();
+ void waitForConnectivity(const ModelConfig &model);
ConfigOwner _cfgOwner;
CommandQueue _rpcCommandQueue;
std::unique_ptr<RpcServer> _rpcServer;
diff --git a/configd/src/apps/sentinel/outward-check.cpp b/configd/src/apps/sentinel/outward-check.cpp
new file mode 100644
index 00000000000..e497f03fe71
--- /dev/null
+++ b/configd/src/apps/sentinel/outward-check.cpp
@@ -0,0 +1,10 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "outward-check.h"
+#include <thread>
+
+namespace config::sentinel {
+
+OutwardCheck::~OutwardCheck() = default;
+
+}
diff --git a/configd/src/apps/sentinel/outward-check.h b/configd/src/apps/sentinel/outward-check.h
new file mode 100644
index 00000000000..563151af47b
--- /dev/null
+++ b/configd/src/apps/sentinel/outward-check.h
@@ -0,0 +1,31 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "status-callback.h"
+#include "peer-check.h"
+#include <vespa/fnet/frt/supervisor.h>
+
+namespace config::sentinel {
+
+class OutwardCheck : public StatusCallback {
+ bool _wasOk = false;
+ bool _wasBad = false;
+ PeerCheck _check;
+public:
+ OutwardCheck(const std::string &hostname, int portnumber, FRT_Supervisor &orb)
+ : _check(*this, hostname, portnumber, orb)
+ {}
+ virtual ~OutwardCheck();
+ bool ok() const { return _wasOk; }
+ bool bad() const { return _wasBad; }
+ void returnStatus(bool ok) override {
+ if (ok) {
+ _wasBad = false;
+ _wasOk = true;
+ } else {
+ _wasOk = false;
+ _wasBad = true;
+ }
+ }
+};
+
+}
diff --git a/configd/src/apps/sentinel/peer-check.cpp b/configd/src/apps/sentinel/peer-check.cpp
index 024f928c994..841d32c9631 100644
--- a/configd/src/apps/sentinel/peer-check.cpp
+++ b/configd/src/apps/sentinel/peer-check.cpp
@@ -36,7 +36,7 @@ void PeerCheck::RequestDone(FRT_RPCRequest *req) {
LOG(warning, "error on ping to %s [port %d]: %s (%d)", _hostname.c_str(), _portnum,
req->GetErrorMessage(), req->GetErrorCode());
} else {
- LOG(info, "OK ping to %s [port %d]", _hostname.c_str(), _portnum);
+ LOG(debug, "OK ping to %s [port %d]", _hostname.c_str(), _portnum);
statusOk = true;
}
_req->SubRef();
diff --git a/configd/src/apps/sentinel/peer-check.h b/configd/src/apps/sentinel/peer-check.h
index 658375a8d7b..145552a9ab1 100644
--- a/configd/src/apps/sentinel/peer-check.h
+++ b/configd/src/apps/sentinel/peer-check.h
@@ -25,7 +25,6 @@ public:
/** from FRT_IRequestWait **/
void RequestDone(FRT_RPCRequest *req) override;
-
private:
StatusCallback &_callback;
std::string _hostname;
diff --git a/configd/src/apps/sentinel/rpcserver.h b/configd/src/apps/sentinel/rpcserver.h
index ef4b394fdca..4c6dea00ddf 100644
--- a/configd/src/apps/sentinel/rpcserver.h
+++ b/configd/src/apps/sentinel/rpcserver.h
@@ -22,6 +22,7 @@ public:
~RpcServer();
int getPort() const { return _port; }
+ FRT_Supervisor &orb() { return _server.supervisor(); }
};
} // namespace config::sentinel