summaryrefslogtreecommitdiffstats
path: root/messagebus
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2022-02-18 10:36:19 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2022-02-18 15:38:33 +0000
commitab9703f0f3ef1101fb5556aba4fb4704b0ac2753 (patch)
tree1d6577c571a2d4fd08e837783105dac957b5621b /messagebus
parent3cb027ecb236355a78429e8ed99d62aa0889d7f4 (diff)
Avoid potential MessageBus<->FNET mutex order inversion
Destruct targets outside lock to prevent the following potential deadlock: 1. flushTargets (pool lock) -> FNET transport thread event (transport thread lock) 2. FNET CheckTasks (transport thread lock) -> periodic flushTargets task run -> flushTargets (pool lock) Also explicitly unschedule any target pool task on network shutdown
Diffstat (limited to 'messagebus')
-rw-r--r--messagebus/src/vespa/messagebus/network/rpcnetwork.cpp2
-rw-r--r--messagebus/src/vespa/messagebus/network/rpctargetpool.cpp21
2 files changed, 16 insertions, 7 deletions
diff --git a/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp b/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp
index ed2ce3d638e..21dd3969462 100644
--- a/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp
+++ b/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp
@@ -423,6 +423,8 @@ RPCNetwork::sync()
void
RPCNetwork::shutdown()
{
+ // Unschedule any pending target pool flush task that may race with shutdown target flushing
+ _scheduler.Unschedule(_targetPoolTask.get());
_transport->ShutDown(true);
_threadPool->Close();
_executor->shutdown().sync();
diff --git a/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp b/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp
index 44e6890415a..1c7dd4dd453 100644
--- a/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp
+++ b/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp
@@ -54,14 +54,21 @@ void
RPCTargetPool::flushTargets(bool force)
{
uint64_t currentTime = _timer->getMilliTime();
+ // Erase RPC targets outside our lock to prevent the following mutex order inversion potential:
+ // flushTargets (pool lock) -> FNET transport thread post event (transport thread lock)
+ // FNET CheckTasks (transport thread lock) -> periodic flushTargets task run -> flushTargets (pool lock)
+ std::vector<Entry> to_erase_on_scope_exit;
LockGuard guard(_lock);
- TargetMap::iterator it = _targets.begin();
- while (it != _targets.end()) {
- const Entry &entry = it->second;
- if ( ! entry.inUse(guard) && (force || ((entry.lastUse() + _expireMillis) < currentTime))) {
- _targets.erase(it++); // postfix increment to move the iterator
- } else {
- ++it;
+ {
+ auto it = _targets.begin();
+ while (it != _targets.end()) {
+ const Entry& entry = it->second;
+ if (!entry.inUse(guard) && (force || ((entry.lastUse() + _expireMillis) < currentTime))) {
+ to_erase_on_scope_exit.emplace_back(std::move(it->second));
+ _targets.erase(it++); // postfix increment to move the iterator
+ } else {
+ ++it;
+ }
}
}
}