summaryrefslogtreecommitdiffstats
path: root/slobrok
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2021-06-16 10:31:39 +0000
committerArne Juul <arnej@verizonmedia.com>2021-06-16 10:31:39 +0000
commitf373071a994860a3b9cbb0d494bf0bd6dd917c1e (patch)
treebae36fd6e8246fe807b8dccbe6b3b91c8c3e94ef /slobrok
parent0f935dcc618eabdd29fa2509050de5e3719ea03b (diff)
avoid starting all RPC connections simultaneously
Diffstat (limited to 'slobrok')
-rw-r--r--slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp2
-rw-r--r--slobrok/src/vespa/slobrok/server/selfcheck.cpp18
-rw-r--r--slobrok/src/vespa/slobrok/server/selfcheck.h2
3 files changed, 12 insertions, 10 deletions
diff --git a/slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp b/slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp
index 2b1e71a0d36..1b373284782 100644
--- a/slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp
+++ b/slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp
@@ -173,9 +173,7 @@ RpcServerManager::addRemote(const std::string & name, const std::string &spec)
}
_rpcsrvmap.removeReservation(name);
auto rpcsrv = std::make_unique<ManagedRpcServer>(name, spec, *this);
- ManagedRpcServer & rpcServer = *rpcsrv;
_rpcsrvmap.addNew(std::move(rpcsrv));
- rpcServer.healthCheck();
return OkState(0, "done");
}
diff --git a/slobrok/src/vespa/slobrok/server/selfcheck.cpp b/slobrok/src/vespa/slobrok/server/selfcheck.cpp
index 7fc639ec028..075db42e0d6 100644
--- a/slobrok/src/vespa/slobrok/server/selfcheck.cpp
+++ b/slobrok/src/vespa/slobrok/server/selfcheck.cpp
@@ -18,7 +18,8 @@ SelfCheck::SelfCheck(FNET_Scheduler *sched,
RpcServerMap& rpcsrvmap,
RpcServerManager& rpcsrvman)
: FNET_Task(sched),
- _rpcsrvmap(rpcsrvmap), _rpcsrvmanager(rpcsrvman)
+ _rpcsrvmap(rpcsrvmap), _rpcsrvmanager(rpcsrvman),
+ _checkIndex(0)
{
// start within 1 second
double seconds = randomIn(0.123, 1.000);
@@ -37,17 +38,18 @@ void
SelfCheck::PerformTask()
{
std::vector<const NamedService *> mrpcsrvlist = _rpcsrvmap.allManaged();
-
- for (size_t i = 0; i < mrpcsrvlist.size(); ++i) {
- const NamedService *r = mrpcsrvlist[i];
+ if (_checkIndex < mrpcsrvlist.size()) {
+ const NamedService *r = mrpcsrvlist[_checkIndex++];
ManagedRpcServer *m = _rpcsrvmap.lookupManaged(r->getName());
- LOG_ASSERT(r == m);
LOG(debug, "managed: %s -> %s", m->getName().c_str(), m->getSpec().c_str());
+ LOG_ASSERT(r == m);
m->healthCheck();
+ } else {
+ _checkIndex = 0;
}
- // reschedule in 1-2 seconds:
- double seconds = randomIn(0.987, 2.000);
- LOG(debug, "selfcheck AGAIN in %g seconds", seconds);
+ // reschedule more often with more services, on average 1s per loop:
+ double seconds = randomIn(0.5, 1.5) / (1 + mrpcsrvlist.size());
+ LOG(debug, "next selfcheck in %g seconds", seconds);
Schedule(seconds);
}
diff --git a/slobrok/src/vespa/slobrok/server/selfcheck.h b/slobrok/src/vespa/slobrok/server/selfcheck.h
index 52eced4700b..9bb96808666 100644
--- a/slobrok/src/vespa/slobrok/server/selfcheck.h
+++ b/slobrok/src/vespa/slobrok/server/selfcheck.h
@@ -2,6 +2,7 @@
#pragma once
#include <vespa/fnet/task.h>
+#include <vector>
namespace slobrok {
@@ -22,6 +23,7 @@ class SelfCheck : public FNET_Task
private:
RpcServerMap &_rpcsrvmap;
RpcServerManager &_rpcsrvmanager;
+ size_t _checkIndex;
SelfCheck(const SelfCheck &); // Not used
SelfCheck &operator=(const SelfCheck &); // Not used