diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2021-08-14 07:25:37 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-14 07:25:37 +0200 |
commit | a97c478777f7c254d772df406b2afa0b6735261c (patch) | |
tree | 6140f6e503be843409e37723e020cde815635021 | |
parent | 53aab177fa20491d6be000e6fac1a86a52cf684b (diff) | |
parent | edc9a9eba2ec607fc154b75c50a2723a9e35e5f8 (diff) |
Merge pull request #18740 from vespa-engine/revert-18739-arnej/remote-slobrok-self-checks
Revert "let RemoteSlobrok do its own initial health check"
-rw-r--r-- | slobrok/src/vespa/slobrok/server/remote_slobrok.cpp | 116 | ||||
-rw-r--r-- | slobrok/src/vespa/slobrok/server/remote_slobrok.h | 18 |
2 files changed, 61 insertions, 73 deletions
diff --git a/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp b/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp index 7b24885a4f1..78ca25d4723 100644 --- a/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp +++ b/slobrok/src/vespa/slobrok/server/remote_slobrok.cpp @@ -15,13 +15,12 @@ namespace slobrok { //----------------------------------------------------------------------------- -RemoteSlobrok::RemoteSlobrok(const vespalib::string &name, const vespalib::string &spec, +RemoteSlobrok::RemoteSlobrok(const std::string &name, const std::string &spec, ExchangeManager &manager) - : _name(name), - _spec(spec), - _exchanger(manager), + : _exchanger(manager), _rpcsrvmanager(manager._rpcsrvmanager), _remote(nullptr), + _rpcserver(name, spec, *this), _reconnecter(getSupervisor()->GetScheduler(), *this), _failCnt(0), _remAddPeerReq(nullptr), @@ -29,14 +28,13 @@ RemoteSlobrok::RemoteSlobrok(const vespalib::string &name, const vespalib::strin _remAddReq(nullptr), _remRemReq(nullptr), _remFetchReq(nullptr), - _checkServerReq(nullptr), _pending() { - tryConnect(); + _rpcserver.healthCheck(); } void RemoteSlobrok::shutdown() { - _reconnecter.Kill(); + _reconnecter.disable(); _pending.clear(); @@ -45,9 +43,6 @@ void RemoteSlobrok::shutdown() { _remote = nullptr; } - if (_checkServerReq != nullptr) { - _checkServerReq->Abort(); - } if (_remFetchReq != nullptr) { _remFetchReq->Abort(); } @@ -68,6 +63,7 @@ void RemoteSlobrok::shutdown() { RemoteSlobrok::~RemoteSlobrok() { shutdown(); + // _rpcserver destructor called automatically } void @@ -187,10 +183,6 @@ RemoteSlobrok::pushMine() void RemoteSlobrok::RequestDone(FRT_RPCRequest *req) { - if (req == _checkServerReq) { - handleCheckServerResult(); - return; - } if (req == _remFetchReq) { handleFetchResult(); return; @@ -300,9 +292,23 @@ RemoteSlobrok::RequestDone(FRT_RPCRequest *req) void +RemoteSlobrok::notifyFailedRpcSrv(ManagedRpcServer *rpcsrv, std::string errmsg) +{ + if (++_failCnt > 10) { + LOG(warning, "remote location broker at %s failed: %s", + rpcsrv->getSpec().c_str(), errmsg.c_str()); + } else { + LOG(debug, "remote location broker at %s failed: %s", + rpcsrv->getSpec().c_str(), errmsg.c_str()); + } + LOG_ASSERT(rpcsrv == &_rpcserver); + fail(); +} + + +void RemoteSlobrok::fail() { - LOG_ASSERT(_checkServerReq == nullptr); // disconnect if (_remote != nullptr) { _remote->SubRef(); @@ -330,60 +336,42 @@ RemoteSlobrok::maybePushMine() } } + void -RemoteSlobrok::tryConnect() +RemoteSlobrok::notifyOkRpcSrv(ManagedRpcServer *rpcsrv) { - if (_remote == nullptr) { - _remote = getSupervisor()->GetTarget(getSpec().c_str()); - LOG_ASSERT(_checkServerReq == nullptr); - _checkServerReq = getSupervisor()->AllocRPCRequest(); - _checkServerReq->SetMethodName("slobrok.callback.listNamesServed"); - _remote->InvokeAsync(_checkServerReq, 25.0, this); - } -} + LOG_ASSERT(rpcsrv == &_rpcserver); + (void) rpcsrv; -void RemoteSlobrok::handleCheckServerResult() { - LOG_ASSERT(_checkServerReq != nullptr); - auto & req = *_checkServerReq; - _checkServerReq = nullptr; - if (req.GetErrorCode() == FRTE_RPC_ABORT) { - LOG(debug, "slobrok[%s].check aborted", _name.c_str()); - req.SubRef(); - return; - } - bool isOk = false; - if (req.CheckReturnTypes("S")) { - const FRT_Values &answer = *req.GetReturn(); - const FRT_StringValue *strings = answer[0]._string_array._pt; - for (uint32_t i = 0; i < answer[0]._string_array._len; ++i) { - if (strcmp(strings[i]._str, _name.c_str()) == 0) { - isOk = true; - } - } - } - req.SubRef(); - if (isOk) { - LOG_ASSERT(_remote != nullptr); - // connection was OK, so disable any pending reconnect - _reconnecter.disable(); + // connection was OK, so disable any pending reconnect + _reconnecter.disable(); + + if (_remote != nullptr) { maybeStartFetch(); - // at this point, we will do (in sequence): - // ask peer to connect to us too; - // ask peer for its list of managed rpcservers, adding to our database - // add our managed rpcserver on peer - // any failure will cause disconnect and retry. - _remAddPeerReq = getSupervisor()->AllocRPCRequest(); - _remAddPeerReq->SetMethodName("slobrok.admin.addPeer"); - _remAddPeerReq->GetParams()->AddString(_exchanger._env.mySpec().c_str()); - _remAddPeerReq->GetParams()->AddString(_exchanger._env.mySpec().c_str()); - _remote->InvokeAsync(_remAddPeerReq, 3.0, this); - // when _remAddPeerReq is returned, our managed list is added via doAdd() - } else { - if (++_failCnt > 10) { - LOG(warning, "remote location broker at %s failed", _spec.c_str()); - } - fail(); + // the rest here should only be done on first notifyOk + return; } + _remote = getSupervisor()->GetTarget(getSpec().c_str()); + maybeStartFetch(); + + // at this point, we will do (in sequence): + // ask peer to connect to us too; + // ask peer for its list of managed rpcservers, adding to our database + // add our managed rpcserver on peer + // any failure will cause disconnect and retry. + + _remAddPeerReq = getSupervisor()->AllocRPCRequest(); + _remAddPeerReq->SetMethodName("slobrok.admin.addPeer"); + _remAddPeerReq->GetParams()->AddString(_exchanger._env.mySpec().c_str()); + _remAddPeerReq->GetParams()->AddString(_exchanger._env.mySpec().c_str()); + _remote->InvokeAsync(_remAddPeerReq, 3.0, this); + // when _remAddPeerReq is returned, our managed list is added via doAdd() +} + +void +RemoteSlobrok::tryConnect() +{ + _rpcserver.healthCheck(); } FRT_Supervisor * diff --git a/slobrok/src/vespa/slobrok/server/remote_slobrok.h b/slobrok/src/vespa/slobrok/server/remote_slobrok.h index 401a2b4ab39..e463ac6be21 100644 --- a/slobrok/src/vespa/slobrok/server/remote_slobrok.h +++ b/slobrok/src/vespa/slobrok/server/remote_slobrok.h @@ -23,7 +23,8 @@ class ExchangeManager; * * Handles one single partner slobrok **/ -class RemoteSlobrok: public FRT_IRequestWait +class RemoteSlobrok: public IRpcServerManager, + public FRT_IRequestWait { private: class Reconnecter : public FNET_Task @@ -41,12 +42,11 @@ private: void PerformTask() override; }; - vespalib::string _name; - vespalib::string _spec; ExchangeManager &_exchanger; RpcServerManager &_rpcsrvmanager; FRT_Target *_remote; ServiceMapMirror _serviceMapMirror; + ManagedRpcServer _rpcserver; Reconnecter _reconnecter; int _failCnt; @@ -55,18 +55,16 @@ private: FRT_RPCRequest *_remAddReq; FRT_RPCRequest *_remRemReq; FRT_RPCRequest *_remFetchReq; - FRT_RPCRequest *_checkServerReq; std::deque<std::unique_ptr<NamedService>> _pending; void pushMine(); void doPending(); - void handleCheckServerResult(); void handleFetchResult(); public: RemoteSlobrok(const RemoteSlobrok&) = delete; RemoteSlobrok& operator= (const RemoteSlobrok&) = delete; - RemoteSlobrok(const vespalib::string &name, const vespalib::string &spec, ExchangeManager &manager); + RemoteSlobrok(const std::string &name, const std::string &spec, ExchangeManager &manager); ~RemoteSlobrok() override; void fail(); @@ -75,14 +73,16 @@ public: void maybePushMine(); void maybeStartFetch(); void invokeAsync(FRT_RPCRequest *req, double timeout, FRT_IRequestWait *rwaiter); - const vespalib::string & getName() const { return _name; } - const vespalib::string & getSpec() const { return _spec; } + const std::string & getName() const { return _rpcserver.getName(); } + const std::string & getSpec() const { return _rpcserver.getSpec(); } ServiceMapMirror &remoteMap() { return _serviceMapMirror; } void shutdown(); - FRT_Supervisor *getSupervisor(); // interfaces implemented: + void notifyFailedRpcSrv(ManagedRpcServer *rpcsrv, std::string errmsg) override; + void notifyOkRpcSrv(ManagedRpcServer *rpcsrv) override; void RequestDone(FRT_RPCRequest *req) override; + FRT_Supervisor *getSupervisor() override; }; //----------------------------------------------------------------------------- |