aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2021-08-14 07:25:37 +0200
committerGitHub <noreply@github.com>2021-08-14 07:25:37 +0200
commita97c478777f7c254d772df406b2afa0b6735261c (patch)
tree6140f6e503be843409e37723e020cde815635021
parent53aab177fa20491d6be000e6fac1a86a52cf684b (diff)
parentedc9a9eba2ec607fc154b75c50a2723a9e35e5f8 (diff)
Merge pull request #18740 from vespa-engine/revert-18739-arnej/remote-slobrok-self-checks
Revert "let RemoteSlobrok do its own initial health check"
-rw-r--r--slobrok/src/vespa/slobrok/server/remote_slobrok.cpp116
-rw-r--r--slobrok/src/vespa/slobrok/server/remote_slobrok.h18
2 files changed, 61 insertions, 73 deletions
diff --git a/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp b/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp
index 7b24885a4f1..78ca25d4723 100644
--- a/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp
+++ b/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp
@@ -15,13 +15,12 @@ namespace slobrok {
//-----------------------------------------------------------------------------
-RemoteSlobrok::RemoteSlobrok(const vespalib::string &name, const vespalib::string &spec,
+RemoteSlobrok::RemoteSlobrok(const std::string &name, const std::string &spec,
ExchangeManager &manager)
- : _name(name),
- _spec(spec),
- _exchanger(manager),
+ : _exchanger(manager),
_rpcsrvmanager(manager._rpcsrvmanager),
_remote(nullptr),
+ _rpcserver(name, spec, *this),
_reconnecter(getSupervisor()->GetScheduler(), *this),
_failCnt(0),
_remAddPeerReq(nullptr),
@@ -29,14 +28,13 @@ RemoteSlobrok::RemoteSlobrok(const vespalib::string &name, const vespalib::strin
_remAddReq(nullptr),
_remRemReq(nullptr),
_remFetchReq(nullptr),
- _checkServerReq(nullptr),
_pending()
{
- tryConnect();
+ _rpcserver.healthCheck();
}
void RemoteSlobrok::shutdown() {
- _reconnecter.Kill();
+ _reconnecter.disable();
_pending.clear();
@@ -45,9 +43,6 @@ void RemoteSlobrok::shutdown() {
_remote = nullptr;
}
- if (_checkServerReq != nullptr) {
- _checkServerReq->Abort();
- }
if (_remFetchReq != nullptr) {
_remFetchReq->Abort();
}
@@ -68,6 +63,7 @@ void RemoteSlobrok::shutdown() {
RemoteSlobrok::~RemoteSlobrok() {
shutdown();
+ // _rpcserver destructor called automatically
}
void
@@ -187,10 +183,6 @@ RemoteSlobrok::pushMine()
void
RemoteSlobrok::RequestDone(FRT_RPCRequest *req)
{
- if (req == _checkServerReq) {
- handleCheckServerResult();
- return;
- }
if (req == _remFetchReq) {
handleFetchResult();
return;
@@ -300,9 +292,23 @@ RemoteSlobrok::RequestDone(FRT_RPCRequest *req)
void
+RemoteSlobrok::notifyFailedRpcSrv(ManagedRpcServer *rpcsrv, std::string errmsg)
+{
+ if (++_failCnt > 10) {
+ LOG(warning, "remote location broker at %s failed: %s",
+ rpcsrv->getSpec().c_str(), errmsg.c_str());
+ } else {
+ LOG(debug, "remote location broker at %s failed: %s",
+ rpcsrv->getSpec().c_str(), errmsg.c_str());
+ }
+ LOG_ASSERT(rpcsrv == &_rpcserver);
+ fail();
+}
+
+
+void
RemoteSlobrok::fail()
{
- LOG_ASSERT(_checkServerReq == nullptr);
// disconnect
if (_remote != nullptr) {
_remote->SubRef();
@@ -330,60 +336,42 @@ RemoteSlobrok::maybePushMine()
}
}
+
void
-RemoteSlobrok::tryConnect()
+RemoteSlobrok::notifyOkRpcSrv(ManagedRpcServer *rpcsrv)
{
- if (_remote == nullptr) {
- _remote = getSupervisor()->GetTarget(getSpec().c_str());
- LOG_ASSERT(_checkServerReq == nullptr);
- _checkServerReq = getSupervisor()->AllocRPCRequest();
- _checkServerReq->SetMethodName("slobrok.callback.listNamesServed");
- _remote->InvokeAsync(_checkServerReq, 25.0, this);
- }
-}
+ LOG_ASSERT(rpcsrv == &_rpcserver);
+ (void) rpcsrv;
-void RemoteSlobrok::handleCheckServerResult() {
- LOG_ASSERT(_checkServerReq != nullptr);
- auto & req = *_checkServerReq;
- _checkServerReq = nullptr;
- if (req.GetErrorCode() == FRTE_RPC_ABORT) {
- LOG(debug, "slobrok[%s].check aborted", _name.c_str());
- req.SubRef();
- return;
- }
- bool isOk = false;
- if (req.CheckReturnTypes("S")) {
- const FRT_Values &answer = *req.GetReturn();
- const FRT_StringValue *strings = answer[0]._string_array._pt;
- for (uint32_t i = 0; i < answer[0]._string_array._len; ++i) {
- if (strcmp(strings[i]._str, _name.c_str()) == 0) {
- isOk = true;
- }
- }
- }
- req.SubRef();
- if (isOk) {
- LOG_ASSERT(_remote != nullptr);
- // connection was OK, so disable any pending reconnect
- _reconnecter.disable();
+ // connection was OK, so disable any pending reconnect
+ _reconnecter.disable();
+
+ if (_remote != nullptr) {
maybeStartFetch();
- // at this point, we will do (in sequence):
- // ask peer to connect to us too;
- // ask peer for its list of managed rpcservers, adding to our database
- // add our managed rpcserver on peer
- // any failure will cause disconnect and retry.
- _remAddPeerReq = getSupervisor()->AllocRPCRequest();
- _remAddPeerReq->SetMethodName("slobrok.admin.addPeer");
- _remAddPeerReq->GetParams()->AddString(_exchanger._env.mySpec().c_str());
- _remAddPeerReq->GetParams()->AddString(_exchanger._env.mySpec().c_str());
- _remote->InvokeAsync(_remAddPeerReq, 3.0, this);
- // when _remAddPeerReq is returned, our managed list is added via doAdd()
- } else {
- if (++_failCnt > 10) {
- LOG(warning, "remote location broker at %s failed", _spec.c_str());
- }
- fail();
+ // the rest here should only be done on first notifyOk
+ return;
}
+ _remote = getSupervisor()->GetTarget(getSpec().c_str());
+ maybeStartFetch();
+
+ // at this point, we will do (in sequence):
+ // ask peer to connect to us too;
+ // ask peer for its list of managed rpcservers, adding to our database
+ // add our managed rpcserver on peer
+ // any failure will cause disconnect and retry.
+
+ _remAddPeerReq = getSupervisor()->AllocRPCRequest();
+ _remAddPeerReq->SetMethodName("slobrok.admin.addPeer");
+ _remAddPeerReq->GetParams()->AddString(_exchanger._env.mySpec().c_str());
+ _remAddPeerReq->GetParams()->AddString(_exchanger._env.mySpec().c_str());
+ _remote->InvokeAsync(_remAddPeerReq, 3.0, this);
+ // when _remAddPeerReq is returned, our managed list is added via doAdd()
+}
+
+void
+RemoteSlobrok::tryConnect()
+{
+ _rpcserver.healthCheck();
}
FRT_Supervisor *
diff --git a/slobrok/src/vespa/slobrok/server/remote_slobrok.h b/slobrok/src/vespa/slobrok/server/remote_slobrok.h
index 401a2b4ab39..e463ac6be21 100644
--- a/slobrok/src/vespa/slobrok/server/remote_slobrok.h
+++ b/slobrok/src/vespa/slobrok/server/remote_slobrok.h
@@ -23,7 +23,8 @@ class ExchangeManager;
*
* Handles one single partner slobrok
**/
-class RemoteSlobrok: public FRT_IRequestWait
+class RemoteSlobrok: public IRpcServerManager,
+ public FRT_IRequestWait
{
private:
class Reconnecter : public FNET_Task
@@ -41,12 +42,11 @@ private:
void PerformTask() override;
};
- vespalib::string _name;
- vespalib::string _spec;
ExchangeManager &_exchanger;
RpcServerManager &_rpcsrvmanager;
FRT_Target *_remote;
ServiceMapMirror _serviceMapMirror;
+ ManagedRpcServer _rpcserver;
Reconnecter _reconnecter;
int _failCnt;
@@ -55,18 +55,16 @@ private:
FRT_RPCRequest *_remAddReq;
FRT_RPCRequest *_remRemReq;
FRT_RPCRequest *_remFetchReq;
- FRT_RPCRequest *_checkServerReq;
std::deque<std::unique_ptr<NamedService>> _pending;
void pushMine();
void doPending();
- void handleCheckServerResult();
void handleFetchResult();
public:
RemoteSlobrok(const RemoteSlobrok&) = delete;
RemoteSlobrok& operator= (const RemoteSlobrok&) = delete;
- RemoteSlobrok(const vespalib::string &name, const vespalib::string &spec, ExchangeManager &manager);
+ RemoteSlobrok(const std::string &name, const std::string &spec, ExchangeManager &manager);
~RemoteSlobrok() override;
void fail();
@@ -75,14 +73,16 @@ public:
void maybePushMine();
void maybeStartFetch();
void invokeAsync(FRT_RPCRequest *req, double timeout, FRT_IRequestWait *rwaiter);
- const vespalib::string & getName() const { return _name; }
- const vespalib::string & getSpec() const { return _spec; }
+ const std::string & getName() const { return _rpcserver.getName(); }
+ const std::string & getSpec() const { return _rpcserver.getSpec(); }
ServiceMapMirror &remoteMap() { return _serviceMapMirror; }
void shutdown();
- FRT_Supervisor *getSupervisor();
// interfaces implemented:
+ void notifyFailedRpcSrv(ManagedRpcServer *rpcsrv, std::string errmsg) override;
+ void notifyOkRpcSrv(ManagedRpcServer *rpcsrv) override;
void RequestDone(FRT_RPCRequest *req) override;
+ FRT_Supervisor *getSupervisor() override;
};
//-----------------------------------------------------------------------------