diff options
author | HÃ¥kon Hallingstad <hakon@verizonmedia.com> | 2020-03-07 17:00:19 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-07 17:00:19 +0100 |
commit | 3eb6aa9161ca016d8db3914970054d5519052c45 (patch) | |
tree | 49ffc47b8092dc81508da8b40ec7ff88ac64b275 /service-monitor | |
parent | 71cf26a0abe0b62b9d3038c86351e69a3dee640e (diff) | |
parent | 92a2c10c684ee128b25e2128433451db438fc14a (diff) |
Merge pull request #12496 from vespa-engine/hakonhall/throw-if-accessing-duper-model-while-holding-status-service-application-lock
Throw if accessing duper model while holding status service application lock
Diffstat (limited to 'service-monitor')
5 files changed, 176 insertions, 31 deletions
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/CriticalRegionChecker.java b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/CriticalRegionChecker.java new file mode 100644 index 00000000000..238b70a7310 --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/CriticalRegionChecker.java @@ -0,0 +1,63 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.duper; + +import com.yahoo.vespa.service.monitor.CriticalRegion; + +import java.util.ArrayList; +import java.util.List; + +/** + * To detect and throw an {@link IllegalStateException} if the execution of the current thread has + * reached code point P, some time after the start of a critical region R and before the end of it: + * + * <ol> + * <li>Declare a static final instance of {@link CriticalRegionChecker}.</li> + * <li>Invoke {@link #startCriticalRegion(String)} when entering region R, and close + * the returned {@link CriticalRegion} when leaving it.</li> + * <li>Invoke {@link #assertOutsideCriticalRegions(String)} at code point P.</li> + * </ol> + * + * @author hakonhall + */ +public class CriticalRegionChecker { + + private final ThreadLocalDescriptions threadLocalDescriptions = new ThreadLocalDescriptions(); + private final String name; + + public CriticalRegionChecker(String name) { + this.name = name; + } + + /** Start of the critical region, within which {@link #assertOutsideCriticalRegions(String)} will throw. */ + public CriticalRegion startCriticalRegion(String regionDescription) { + List<String> regionDescriptions = threadLocalDescriptions.get(); + regionDescriptions.add(regionDescription); + Thread threadAtStart = Thread.currentThread(); + + return () -> { + regionDescriptions.remove(regionDescription); + + Thread treadAtClose = Thread.currentThread(); + if (threadAtStart != treadAtClose) { + throw new IllegalStateException(name + ": A critical region cannot cross threads: " + + regionDescription); + } + }; + } + + /** @throws IllegalStateException if within one or more critical regions. */ + public void assertOutsideCriticalRegions(String codePointDescription) throws IllegalStateException { + List<String> regionDescriptions = threadLocalDescriptions.get(); + if (regionDescriptions.size() > 0) { + throw new IllegalStateException(name + ": Code point " + codePointDescription + + " is within these critical regions: " + regionDescriptions); + } + } + + private static class ThreadLocalDescriptions extends ThreadLocal<List<String>> { + @Override + protected List<String> initialValue() { + return new ArrayList<>(); + } + } +} diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java index baada59dc65..cb1c6d2d95f 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java @@ -12,6 +12,7 @@ import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.SystemName; import com.yahoo.log.LogLevel; import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.service.monitor.CriticalRegion; import com.yahoo.vespa.service.monitor.DuperModelInfraApi; import com.yahoo.vespa.service.monitor.DuperModelListener; import com.yahoo.vespa.service.monitor.DuperModelProvider; @@ -23,8 +24,10 @@ import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.locks.ReentrantLock; import java.util.function.Function; import java.util.logging.Logger; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -46,7 +49,10 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi private final Map<ApplicationId, InfraApplication> supportedInfraApplications; - private final Object monitor = new Object(); + private static CriticalRegionChecker disallowedDuperModeLockAcquisitionRegions = + new CriticalRegionChecker("duper model deadlock detection"); + + private final ReentrantLock lock = new ReentrantLock(true); private final DuperModel duperModel; // The set of active infrastructure ApplicationInfo. Not all are necessarily in the DuperModel for historical reasons. private final Set<ApplicationId> activeInfraInfos = new HashSet<>(10); @@ -83,27 +89,23 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi superModelProvider.registerListener(new SuperModelListener() { @Override public void applicationActivated(SuperModel superModel, ApplicationInfo application) { - synchronized (monitor) { - duperModel.add(application); - } + lockedRunnable(() -> duperModel.add(application)); } @Override public void applicationRemoved(SuperModel superModel, ApplicationId applicationId) { - synchronized (monitor) { - duperModel.remove(applicationId); - } + lockedRunnable(() -> duperModel.remove(applicationId)); } @Override public void notifyOfCompleteness(SuperModel superModel) { - synchronized (monitor) { + lockedRunnable(() -> { if (!superModelIsComplete) { superModelIsComplete = true; logger.log(LogLevel.INFO, "All bootstrap tenant applications have been activated"); maybeSetDuperModelAsComplete(); } - } + }); } }); } @@ -114,9 +116,7 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi */ @Override public void registerListener(DuperModelListener listener) { - synchronized (monitor) { - duperModel.registerListener(listener); - } + lockedRunnable(() -> duperModel.registerListener(listener)); } @Override @@ -138,9 +138,7 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi @Override public boolean infraApplicationIsActive(ApplicationId applicationId) { - synchronized (monitor) { - return activeInfraInfos.contains(applicationId); - } + return lockedSupplier(() -> activeInfraInfos.contains(applicationId)); } @Override @@ -150,10 +148,10 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi throw new IllegalArgumentException("There is no infrastructure application with ID '" + applicationId + "'"); } - synchronized (monitor) { + lockedRunnable(() -> { activeInfraInfos.add(applicationId); duperModel.add(application.makeApplicationInfo(hostnames)); - } + }); } @Override @@ -162,39 +160,41 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi throw new IllegalArgumentException("There is no infrastructure application with ID '" + applicationId + "'"); } - synchronized (monitor) { + lockedRunnable(() -> { activeInfraInfos.remove(applicationId); duperModel.remove(applicationId); - } + }); } @Override public void infraApplicationsIsNowComplete() { - synchronized (monitor) { + lockedRunnable(() -> { if (!infraApplicationsIsComplete) { infraApplicationsIsComplete = true; logger.log(LogLevel.INFO, "All infrastructure applications have been activated"); maybeSetDuperModelAsComplete(); } - } + }); } public Optional<ApplicationInfo> getApplicationInfo(ApplicationId applicationId) { - synchronized (monitor) { - return duperModel.getApplicationInfo(applicationId); - } + return lockedSupplier(() -> duperModel.getApplicationInfo(applicationId)); } public Optional<ApplicationInfo> getApplicationInfo(HostName hostname) { - synchronized (monitor) { - return duperModel.getApplicationInfo(hostname); - } + return lockedSupplier(() -> duperModel.getApplicationInfo(hostname)); } public List<ApplicationInfo> getApplicationInfos() { - synchronized (monitor) { - return duperModel.getApplicationInfos(); - } + return lockedSupplier(() -> duperModel.getApplicationInfos()); + } + + /** + * Within the region, trying to accesss the duper model (without already having the lock) + * will cause an IllegalStateException to be thrown. + */ + public CriticalRegion disallowDuperModelLockAcquisition(String regionDescription) { + return disallowedDuperModeLockAcquisitionRegions.startCriticalRegion(regionDescription); } private void maybeSetDuperModelAsComplete() { @@ -202,4 +202,36 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi duperModel.setComplete(); } } + + private <T> T lockedSupplier(Supplier<T> supplier) { + if (lock.isHeldByCurrentThread()) { + return supplier.get(); + } + + disallowedDuperModeLockAcquisitionRegions + .assertOutsideCriticalRegions("acquiring duper model lock"); + + lock.lock(); + try { + return supplier.get(); + } finally { + lock.unlock(); + } + } + + private void lockedRunnable(Runnable runnable) { + if (lock.isHeldByCurrentThread()) { + runnable.run(); + } + + disallowedDuperModeLockAcquisitionRegions + .assertOutsideCriticalRegions("acquiring duper model lock"); + + lock.lock(); + try { + runnable.run(); + } finally { + lock.unlock(); + } + } } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java b/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java index 7c0f291e121..678b0d3d5ba 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java @@ -14,6 +14,8 @@ import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.service.duper.DuperModelManager; import com.yahoo.vespa.service.manager.MonitorManager; import com.yahoo.vespa.service.manager.UnionMonitorManager; +import com.yahoo.vespa.service.monitor.AntiServiceMonitor; +import com.yahoo.vespa.service.monitor.CriticalRegion; import com.yahoo.vespa.service.monitor.ServiceHostListener; import com.yahoo.vespa.service.monitor.ServiceModel; import com.yahoo.vespa.service.monitor.ServiceMonitor; @@ -24,7 +26,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -public class ServiceMonitorImpl implements ServiceMonitor { +public class ServiceMonitorImpl implements ServiceMonitor, AntiServiceMonitor { private final ServiceMonitorMetrics metrics; private final DuperModelManager duperModelManager; @@ -105,6 +107,11 @@ public class ServiceMonitorImpl implements ServiceMonitor { duperModelManager.registerListener(duperModelListener); } + @Override + public CriticalRegion disallowDuperModelLockAcquisition(String regionDescription) { + return duperModelManager.disallowDuperModelLockAcquisition(regionDescription); + } + private Optional<ApplicationInfo> getApplicationInfo(ApplicationInstanceReference reference) { ApplicationId applicationId = ApplicationInstanceGenerator.toApplicationId(reference); return duperModelManager.getApplicationInfo(applicationId); diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/AntiServiceMonitor.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/AntiServiceMonitor.java new file mode 100644 index 00000000000..1c0460c3797 --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/AntiServiceMonitor.java @@ -0,0 +1,25 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor; + +import com.yahoo.vespa.service.duper.DuperModelManager; + +/** + * Interface that allows a thread to declare it must NOT access certain parts of the ServiceMonitor. + * + * @author hakonhall + */ +public interface AntiServiceMonitor { + /** + * Disallow the current thread to acquire the "duper model lock" (see {@link DuperModelManager}), + * necessarily acquired by most of the {@link ServiceMonitor} methods, starting from now and + * up until the returned region is closed. + * + * <p>For instance, if an application is activated the duper model is notified: The duper model + * will acquire a lock to update the model atomically, and while having that lock notify + * the status service. The status service will typically acquire an application lock and prune + * hosts no longer part of the application. If a thread were to try to acquire these locks + * in the reverse order, it might become deadlocked. This method allows one to detect this + * and throw an exception, causing it to be caught earlier or at least easier to debug.</p> + */ + CriticalRegion disallowDuperModelLockAcquisition(String regionDescription); +} diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/CriticalRegion.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/CriticalRegion.java new file mode 100644 index 00000000000..ec674f133a1 --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/CriticalRegion.java @@ -0,0 +1,18 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor; + +/** + * Represents a region of execution, e.g. the block of a try-with-resources. + * + * @author hakonhall + */ +@FunctionalInterface +public interface CriticalRegion extends AutoCloseable { + /** + * Ends the critical region. + * + * @throws IllegalStateException if the current thread is different from the one that started + * the critical region. + */ + @Override void close() throws IllegalStateException; +} |