From ab9703f0f3ef1101fb5556aba4fb4704b0ac2753 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Fri, 18 Feb 2022 10:36:19 +0000 Subject: Avoid potential MessageBus<->FNET mutex order inversion Destruct targets outside lock to prevent the following potential deadlock: 1. flushTargets (pool lock) -> FNET transport thread event (transport thread lock) 2. FNET CheckTasks (transport thread lock) -> periodic flushTargets task run -> flushTargets (pool lock) Also explicitly unschedule any target pool task on network shutdown --- .../src/vespa/messagebus/network/rpcnetwork.cpp | 2 ++ .../src/vespa/messagebus/network/rpctargetpool.cpp | 21 ++++++++++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'messagebus/src') diff --git a/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp b/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp index ed2ce3d638e..21dd3969462 100644 --- a/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp +++ b/messagebus/src/vespa/messagebus/network/rpcnetwork.cpp @@ -423,6 +423,8 @@ RPCNetwork::sync() void RPCNetwork::shutdown() { + // Unschedule any pending target pool flush task that may race with shutdown target flushing + _scheduler.Unschedule(_targetPoolTask.get()); _transport->ShutDown(true); _threadPool->Close(); _executor->shutdown().sync(); diff --git a/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp b/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp index 44e6890415a..1c7dd4dd453 100644 --- a/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp +++ b/messagebus/src/vespa/messagebus/network/rpctargetpool.cpp @@ -54,14 +54,21 @@ void RPCTargetPool::flushTargets(bool force) { uint64_t currentTime = _timer->getMilliTime(); + // Erase RPC targets outside our lock to prevent the following mutex order inversion potential: + // flushTargets (pool lock) -> FNET transport thread post event (transport thread lock) + // FNET CheckTasks (transport thread lock) -> periodic flushTargets task run -> flushTargets (pool lock) + std::vector to_erase_on_scope_exit; LockGuard guard(_lock); - TargetMap::iterator it = _targets.begin(); - while (it != _targets.end()) { - const Entry &entry = it->second; - if ( ! entry.inUse(guard) && (force || ((entry.lastUse() + _expireMillis) < currentTime))) { - _targets.erase(it++); // postfix increment to move the iterator - } else { - ++it; + { + auto it = _targets.begin(); + while (it != _targets.end()) { + const Entry& entry = it->second; + if (!entry.inUse(guard) && (force || ((entry.lastUse() + _expireMillis) < currentTime))) { + to_erase_on_scope_exit.emplace_back(std::move(it->second)); + _targets.erase(it++); // postfix increment to move the iterator + } else { + ++it; + } } } } -- cgit v1.2.3