diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2022-02-18 10:36:19 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahooinc.com> | 2022-02-18 15:38:33 +0000 |
commit | ab9703f0f3ef1101fb5556aba4fb4704b0ac2753 (patch) | |
tree | 1d6577c571a2d4fd08e837783105dac957b5621b /messagebus/src | |
parent | 3cb027ecb236355a78429e8ed99d62aa0889d7f4 (diff) |
Avoid potential MessageBus<->FNET mutex order inversion
Destruct targets outside lock to prevent the following potential deadlock:
1. flushTargets (pool lock) -> FNET transport thread event (transport thread lock)
2. FNET CheckTasks (transport thread lock) -> periodic flushTargets task run -> flushTargets (pool lock)
Also explicitly unschedule any target pool task on network shutdown
Diffstat (limited to 'messagebus/src')
-rw-r--r-- | messagebus/src/vespa/messagebus/network/rpcnetwork.cpp | 2 | ||||
-rw-r--r-- | messagebus/src/vespa/messagebus/network/rpctargetpool.cpp | 21 |
2 files changed, 16 insertions, 7 deletions
diff --git a/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp b/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp index ed2ce3d638e..21dd3969462 100644 --- a/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp +++ b/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp @@ -423,6 +423,8 @@ RPCNetwork::sync() void RPCNetwork::shutdown() { + // Unschedule any pending target pool flush task that may race with shutdown target flushing + _scheduler.Unschedule(_targetPoolTask.get()); _transport->ShutDown(true); _threadPool->Close(); _executor->shutdown().sync(); diff --git a/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp b/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp index 44e6890415a..1c7dd4dd453 100644 --- a/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp +++ b/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp @@ -54,14 +54,21 @@ void RPCTargetPool::flushTargets(bool force) { uint64_t currentTime = _timer->getMilliTime(); + // Erase RPC targets outside our lock to prevent the following mutex order inversion potential: + // flushTargets (pool lock) -> FNET transport thread post event (transport thread lock) + // FNET CheckTasks (transport thread lock) -> periodic flushTargets task run -> flushTargets (pool lock) + std::vector<Entry> to_erase_on_scope_exit; LockGuard guard(_lock); - TargetMap::iterator it = _targets.begin(); - while (it != _targets.end()) { - const Entry &entry = it->second; - if ( ! entry.inUse(guard) && (force || ((entry.lastUse() + _expireMillis) < currentTime))) { - _targets.erase(it++); // postfix increment to move the iterator - } else { - ++it; + { + auto it = _targets.begin(); + while (it != _targets.end()) { + const Entry& entry = it->second; + if (!entry.inUse(guard) && (force || ((entry.lastUse() + _expireMillis) < currentTime))) { + to_erase_on_scope_exit.emplace_back(std::move(it->second)); + _targets.erase(it++); // postfix increment to move the iterator + } else { + ++it; + } } } } |