summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2021-06-02 16:16:50 +0200
committerGitHub <noreply@github.com>2021-06-02 16:16:50 +0200
commitf17496e440389c12f1f8028a5937ef0c93a5c7d8 (patch)
tree1d6147a9bd26290217450ee7d4406153bd336d67
parent41cf24e114e7c0f3aa9e8e882725c52b77470c8e (diff)
parente8d528c3c1357b7c009734db01f63155bfb15613 (diff)
Merge pull request #18082 from vespa-engine/arnej/outward-conn-check
Arnej/outward conn check
-rw-r--r--configd/src/apps/sentinel/CMakeLists.txt1
-rw-r--r--configd/src/apps/sentinel/env.cpp87
-rw-r--r--configd/src/apps/sentinel/env.h3
-rw-r--r--configd/src/apps/sentinel/manager.cpp2
-rw-r--r--configd/src/apps/sentinel/manager.h2
-rw-r--r--configd/src/apps/sentinel/outward-check.cpp55
-rw-r--r--configd/src/apps/sentinel/outward-check.h46
-rw-r--r--configd/src/apps/sentinel/peer-check.cpp6
-rw-r--r--configd/src/apps/sentinel/peer-check.h4
-rw-r--r--configd/src/apps/sentinel/rpchooks.cpp10
-rw-r--r--configd/src/apps/sentinel/rpcserver.h1
-rw-r--r--configd/src/apps/sentinel/sentinel.cpp4
12 files changed, 187 insertions, 34 deletions
diff --git a/configd/src/apps/sentinel/CMakeLists.txt b/configd/src/apps/sentinel/CMakeLists.txt
index d67a41f2a75..e77abc19077 100644
--- a/configd/src/apps/sentinel/CMakeLists.txt
+++ b/configd/src/apps/sentinel/CMakeLists.txt
@@ -9,6 +9,7 @@ vespa_add_executable(configd_config-sentinel_app
manager.cpp
metrics.cpp
output-connection.cpp
+ outward-check.cpp
peer-check.cpp
rpchooks.cpp
rpcserver.cpp
diff --git a/configd/src/apps/sentinel/env.cpp b/configd/src/apps/sentinel/env.cpp
index 45eea3c6417..e4174ee450d 100644
--- a/configd/src/apps/sentinel/env.cpp
+++ b/configd/src/apps/sentinel/env.cpp
@@ -2,14 +2,18 @@
#include "env.h"
#include "check-completion-handler.h"
+#include "outward-check.h"
+#include <vespa/defaults.h>
#include <vespa/log/log.h>
#include <vespa/config/common/exceptions.h>
#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/stringfmt.h>
#include <thread>
#include <chrono>
LOG_SETUP(".env");
+using vespalib::make_string_short::fmt;
using namespace std::chrono_literals;
namespace config::sentinel {
@@ -43,21 +47,7 @@ void Env::boot(const std::string &configId) {
rpcPort(cfg.port.rpc);
statePort(cfg.port.telnet);
if (auto up = ConfigOwner::fetchModelConfig(MODEL_TIMEOUT_MS)) {
- const ModelConfig &model = *up;
- for (const auto & h : model.hosts) {
- LOG(info, "- Model for host %s with %zd services", h.name.c_str(), h.services.size());
- for (const auto & s : h.services) {
- if (s.name == "config-sentinel") {
- LOG(info, " - Model for service %s type %s configid %s with %zd ports",
- s.name.c_str(), s.type.c_str(), s.configid.c_str(), s.ports.size());
- for (const auto & p : s.ports) {
- if (p.tags.find("rpc") != p.tags.npos) {
- LOG(info, " - Model for port %d has tags %s", p.number, p.tags.c_str());
- }
- }
- }
- }
- }
+ waitForConnectivity(*up);
}
}
@@ -79,12 +69,12 @@ void Env::statePort(int port) {
throw vespalib::FatalException("Bad port " + std::to_string(port) + ", expected range [1, 65535]", VESPA_STRLOC);
}
if (port == 0) {
- port = 19098;
+ port = 19098; // default in config
}
if (_stateServer && port == _statePort) {
return; // ok already
}
- LOG(debug, "Config-sentinel accepts connections on port %d", port);
+ LOG(debug, "Config-sentinel accepts state connections on port %d", port);
_stateServer = std::make_unique<vespalib::StateServer>(
port, _stateApi.myHealth, _startMetrics.producer, _stateApi.myComponents);
_statePort = port;
@@ -96,8 +86,67 @@ void Env::notifyConfigUpdated() {
}
-void Env::handleCmd(Cmd::UP cmd) {
- cmd->retError("still booting, not ready for all RPC commands");
+void Env::respondAsEmpty() {
+ auto commands = _rpcCommandQueue.drain();
+ for (Cmd::UP &cmd : commands) {
+ cmd->retError("still booting, not ready for all RPC commands");
+ }
+}
+
+namespace {
+
+const char *toString(CcResult value) {
+ switch (value) {
+ case CcResult::UNKNOWN: return "unknown";
+ case CcResult::CONN_FAIL: return "failed to connect";
+ case CcResult::REVERSE_FAIL: return "connect OK, but reverse check FAILED";
+ case CcResult::REVERSE_UNAVAIL: return "connect OK, but reverse check unavailable";
+ case CcResult::ALL_OK: return "both ways connectivity OK";
+ }
+ LOG(error, "Unknown CcResult enum value: %d", (int)value);
+ LOG_ABORT("Unknown CcResult enum value");
+}
+
+std::map<std::string, std::string> specsFrom(const ModelConfig &model) {
+ std::map<std::string, std::string> checkSpecs;
+ for (const auto & h : model.hosts) {
+ bool foundSentinelPort = false;
+ for (const auto & s : h.services) {
+ if (s.name == "config-sentinel") {
+ for (const auto & p : s.ports) {
+ if (p.tags.find("rpc") != p.tags.npos) {
+ auto spec = fmt("tcp/%s:%d", h.name.c_str(), p.number);
+ checkSpecs[h.name] = spec;
+ foundSentinelPort = true;
+ }
+ }
+ }
+ }
+ if (! foundSentinelPort) {
+ LOG(warning, "Did not find 'config-sentinel' RPC port in model for host %s [%zd services]",
+ h.name.c_str(), h.services.size());
+ }
+ }
+ return checkSpecs;
+}
+
+}
+
+void Env::waitForConnectivity(const ModelConfig &model) {
+ auto checkSpecs = specsFrom(model);
+ OutwardCheckContext checkContext(checkSpecs.size(),
+ vespa::Defaults::vespaHostname(),
+ _rpcServer->getPort(),
+ _rpcServer->orb());
+ std::map<std::string, OutwardCheck> connectivityMap;
+ for (const auto & [ hn, spec ] : checkSpecs) {
+ connectivityMap.try_emplace(hn, spec, checkContext);
+ }
+ checkContext.latch.await();
+ for (const auto & [hostname, check] : connectivityMap) {
+ LOG(info, "outward check status for host %s is: %s",
+ hostname.c_str(), toString(check.result()));
+ }
}
}
diff --git a/configd/src/apps/sentinel/env.h b/configd/src/apps/sentinel/env.h
index 0213fd09460..f117854f006 100644
--- a/configd/src/apps/sentinel/env.h
+++ b/configd/src/apps/sentinel/env.h
@@ -30,8 +30,9 @@ public:
void statePort(int portnum);
void notifyConfigUpdated();
- void handleCmd(Cmd::UP cmd);
private:
+ void respondAsEmpty();
+ void waitForConnectivity(const ModelConfig &model);
ConfigOwner _cfgOwner;
CommandQueue _rpcCommandQueue;
std::unique_ptr<RpcServer> _rpcServer;
diff --git a/configd/src/apps/sentinel/manager.cpp b/configd/src/apps/sentinel/manager.cpp
index 80dd2a3fda8..6e0ed78211c 100644
--- a/configd/src/apps/sentinel/manager.cpp
+++ b/configd/src/apps/sentinel/manager.cpp
@@ -109,7 +109,7 @@ Manager::doConfigure()
}
-int
+bool
Manager::doWork()
{
// Return true if there are any running services, false if not.
diff --git a/configd/src/apps/sentinel/manager.h b/configd/src/apps/sentinel/manager.h
index 9d3d14c6aeb..24bd67cbc49 100644
--- a/configd/src/apps/sentinel/manager.h
+++ b/configd/src/apps/sentinel/manager.h
@@ -55,7 +55,7 @@ public:
Manager(Env &env);
virtual ~Manager();
bool terminate();
- int doWork();
+ bool doWork();
void updateActiveFdset(fd_set *fds, int *maxNum);
};
diff --git a/configd/src/apps/sentinel/outward-check.cpp b/configd/src/apps/sentinel/outward-check.cpp
new file mode 100644
index 00000000000..5fed69d0b6e
--- /dev/null
+++ b/configd/src/apps/sentinel/outward-check.cpp
@@ -0,0 +1,55 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "outward-check.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".outward-check");
+
+namespace config::sentinel {
+
+OutwardCheck::OutwardCheck(const std::string &spec, OutwardCheckContext &context)
+ : _spec(spec),
+ _context(context)
+{
+ _target = context.orb.GetTarget(spec.c_str());
+ _req = context.orb.AllocRPCRequest();
+ _req->SetMethodName("sentinel.check.connectivity");
+ _req->GetParams()->AddString(context.myHostname);
+ _req->GetParams()->AddInt32(context.myPortnum);
+ _req->GetParams()->AddInt32(500);
+ _target->InvokeAsync(_req, 1.500, this);
+}
+
+OutwardCheck::~OutwardCheck() = default;
+
+void OutwardCheck::RequestDone(FRT_RPCRequest *req) {
+ LOG_ASSERT(req == _req);
+ if (req->CheckReturnTypes("s")) {
+ std::string answer = _req->GetReturn()->GetValue(0)._string._str;
+ if (answer == "ok") {
+ LOG(debug, "ping to %s with reverse connectivity OK", _spec.c_str());
+ _result = CcResult::ALL_OK;
+ } else {
+ LOG(debug, "connected to %s, but reverse connectivity fails: %s",
+ _spec.c_str(), answer.c_str());
+ _result = CcResult::REVERSE_FAIL;
+ }
+ } else if (req->GetErrorCode() == FRTE_RPC_NO_SUCH_METHOD ||
+ req->GetErrorCode() == FRTE_RPC_WRONG_PARAMS ||
+ req->GetErrorCode() == FRTE_RPC_WRONG_RETURN)
+ {
+ LOG(debug, "Connected OK to %s but no reverse connectivity check available", _spec.c_str());
+ _result = CcResult::REVERSE_UNAVAIL;
+ } else {
+ LOG(debug, "error on request to %s : %s (%d)", _spec.c_str(),
+ req->GetErrorMessage(), req->GetErrorCode());
+ _result = CcResult::CONN_FAIL;
+ }
+ _req->SubRef();
+ _req = nullptr;
+ _target->SubRef();
+ _target = nullptr;
+ _context.latch.countDown();
+}
+
+}
diff --git a/configd/src/apps/sentinel/outward-check.h b/configd/src/apps/sentinel/outward-check.h
new file mode 100644
index 00000000000..01a298aee18
--- /dev/null
+++ b/configd/src/apps/sentinel/outward-check.h
@@ -0,0 +1,46 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <string>
+#include <vespa/vespalib/util/count_down_latch.h>
+#include <vespa/fnet/frt/supervisor.h>
+#include <vespa/fnet/frt/invoker.h>
+#include <vespa/fnet/frt/rpcrequest.h>
+#include <vespa/fnet/frt/supervisor.h>
+#include <vespa/fnet/frt/target.h>
+
+namespace config::sentinel {
+
+struct OutwardCheckContext {
+ vespalib::CountDownLatch latch;
+ const char * myHostname;
+ int myPortnum;
+ FRT_Supervisor &orb;
+ OutwardCheckContext(size_t count,
+ const char * hostname,
+ int portnumber,
+ FRT_Supervisor &supervisor)
+ : latch(count),
+ myHostname(hostname),
+ myPortnum(portnumber),
+ orb(supervisor)
+ {}
+};
+
+enum class CcResult { UNKNOWN, CONN_FAIL, REVERSE_FAIL, REVERSE_UNAVAIL, ALL_OK };
+
+class OutwardCheck : public FRT_IRequestWait {
+private:
+ CcResult _result = CcResult::UNKNOWN;
+ FRT_Target *_target = nullptr;
+ FRT_RPCRequest *_req = nullptr;
+ std::string _spec;
+ OutwardCheckContext &_context;
+public:
+ OutwardCheck(const std::string &spec, OutwardCheckContext &context);
+ virtual ~OutwardCheck();
+ void RequestDone(FRT_RPCRequest *req) override;
+ bool ok() const { return _result == CcResult::ALL_OK; }
+ CcResult result() const { return _result; }
+};
+
+}
diff --git a/configd/src/apps/sentinel/peer-check.cpp b/configd/src/apps/sentinel/peer-check.cpp
index 024f928c994..60c3d9c96c9 100644
--- a/configd/src/apps/sentinel/peer-check.cpp
+++ b/configd/src/apps/sentinel/peer-check.cpp
@@ -10,7 +10,7 @@ using vespalib::make_string_short::fmt;
namespace config::sentinel {
-PeerCheck::PeerCheck(StatusCallback &callback, const std::string &host, int port, FRT_Supervisor &orb)
+PeerCheck::PeerCheck(StatusCallback &callback, const std::string &host, int port, FRT_Supervisor &orb, int timeout_ms)
: _callback(callback),
_hostname(host),
_portnum(port),
@@ -21,7 +21,7 @@ PeerCheck::PeerCheck(StatusCallback &callback, const std::string &host, int port
_target = orb.GetTarget(spec.c_str());
_req = orb.AllocRPCRequest();
_req->SetMethodName("frt.rpc.ping");
- _target->InvokeAsync(_req, 0.500, this);
+ _target->InvokeAsync(_req, timeout_ms * 0.001, this);
}
PeerCheck::~PeerCheck() {
@@ -36,7 +36,7 @@ void PeerCheck::RequestDone(FRT_RPCRequest *req) {
LOG(warning, "error on ping to %s [port %d]: %s (%d)", _hostname.c_str(), _portnum,
req->GetErrorMessage(), req->GetErrorCode());
} else {
- LOG(info, "OK ping to %s [port %d]", _hostname.c_str(), _portnum);
+ LOG(debug, "OK ping to %s [port %d]", _hostname.c_str(), _portnum);
statusOk = true;
}
_req->SubRef();
diff --git a/configd/src/apps/sentinel/peer-check.h b/configd/src/apps/sentinel/peer-check.h
index 658375a8d7b..096f304467b 100644
--- a/configd/src/apps/sentinel/peer-check.h
+++ b/configd/src/apps/sentinel/peer-check.h
@@ -4,7 +4,6 @@
#include "status-callback.h"
#include <string>
-#include <vespa/fnet/task.h>
#include <vespa/fnet/frt/invoker.h>
#include <vespa/fnet/frt/rpcrequest.h>
#include <vespa/fnet/frt/supervisor.h>
@@ -15,7 +14,7 @@ namespace config::sentinel {
class PeerCheck : public FRT_IRequestWait
{
public:
- PeerCheck(StatusCallback &callback, const std::string &host, int portnum, FRT_Supervisor &orb);
+ PeerCheck(StatusCallback &callback, const std::string &host, int portnum, FRT_Supervisor &orb, int timeout_ms);
~PeerCheck();
PeerCheck(const PeerCheck &) = delete;
@@ -25,7 +24,6 @@ public:
/** from FRT_IRequestWait **/
void RequestDone(FRT_RPCRequest *req) override;
-
private:
StatusCallback &_callback;
std::string _hostname;
diff --git a/configd/src/apps/sentinel/rpchooks.cpp b/configd/src/apps/sentinel/rpchooks.cpp
index d364e74154c..24e3cd53509 100644
--- a/configd/src/apps/sentinel/rpchooks.cpp
+++ b/configd/src/apps/sentinel/rpchooks.cpp
@@ -45,11 +45,12 @@ RPCHooks::initRPC(FRT_Supervisor *supervisor)
FRT_METHOD(RPCHooks::rpc_startService), this);
rb.MethodDesc("start a service");
//-------------------------------------------------------------------------
- rb.DefineMethod("sentinel.check.connectivity", "si", "s",
+ rb.DefineMethod("sentinel.check.connectivity", "sii", "s",
FRT_METHOD(RPCHooks::rpc_checkConnectivity), this);
rb.MethodDesc("check connectivity for peer sentinel");
rb.ParamDesc("name", "Hostname of peer sentinel");
rb.ParamDesc("port", "Port number of peer sentinel");
+ rb.ParamDesc("timeout", "Timeout for check in milliseconds");
rb.ReturnDesc("status", "Status (ok, bad, or unknown) for peer");
//-------------------------------------------------------------------------
}
@@ -97,11 +98,12 @@ RPCHooks::rpc_checkConnectivity(FRT_RPCRequest *req)
{
FRT_Values &args = *req->GetParams();
const char *hostname = args[0]._string._str;
- uint32_t portnum = args[1]._intval32;
- LOG(debug, "got checkConnectivity %s [port %d]", hostname, portnum);
+ int portnum = args[1]._intval32;
+ int timeout = args[2]._intval32;
+ LOG(debug, "got checkConnectivity %s [port %d] timeout %d", hostname, portnum, timeout);
req->Detach();
auto & completionHandler = req->getStash().create<CheckCompletionHandler>(req);
- req->getStash().create<PeerCheck>(completionHandler, hostname, portnum, _orb);
+ req->getStash().create<PeerCheck>(completionHandler, hostname, portnum, _orb, timeout);
}
} // namespace slobrok
diff --git a/configd/src/apps/sentinel/rpcserver.h b/configd/src/apps/sentinel/rpcserver.h
index ef4b394fdca..4c6dea00ddf 100644
--- a/configd/src/apps/sentinel/rpcserver.h
+++ b/configd/src/apps/sentinel/rpcserver.h
@@ -22,6 +22,7 @@ public:
~RpcServer();
int getPort() const { return _port; }
+ FRT_Supervisor &orb() { return _server.supervisor(); }
};
} // namespace config::sentinel
diff --git a/configd/src/apps/sentinel/sentinel.cpp b/configd/src/apps/sentinel/sentinel.cpp
index ed0e6c1cfe5..18d4dc28f8a 100644
--- a/configd/src/apps/sentinel/sentinel.cpp
+++ b/configd/src/apps/sentinel/sentinel.cpp
@@ -118,6 +118,6 @@ main(int argc, char **argv)
}
EV_STOPPING("config-sentinel", "normal exit");
- int rv = manager.terminate();
- return rv;
+ bool rv = manager.terminate();
+ return rv ? EXIT_SUCCESS : EXIT_FAILURE;
}