summaryrefslogtreecommitdiffstats
path: root/service-monitor
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2020-03-07 14:58:35 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2020-03-07 14:58:35 +0100
commit92a2c10c684ee128b25e2128433451db438fc14a (patch)
tree2e154f542cea745afca461898d98af626d1dca68 /service-monitor
parentb241838f43794aa6a84df55a0f6e9e6877060021 (diff)
Throw if accessing duper model while holding status service application lock
When the duper model is updated, ZooKeeper is atomically updated to e.g. remove extraneous hosts. This is done by acquiring the duper model lock first, then the relevant application lock in the status service. Acquiring these two locks in the reverse order may lead to a deadlock. This PR throws an IllegalStateException when detecting the current thread is about to acquire the duper model lock when the current thread has acquired the application lock.
Diffstat (limited to 'service-monitor')
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/duper/CriticalRegionChecker.java63
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java92
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java9
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/monitor/AntiServiceMonitor.java25
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/monitor/CriticalRegion.java18
5 files changed, 176 insertions, 31 deletions
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/CriticalRegionChecker.java b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/CriticalRegionChecker.java
new file mode 100644
index 00000000000..238b70a7310
--- /dev/null
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/CriticalRegionChecker.java
@@ -0,0 +1,63 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.service.duper;
+
+import com.yahoo.vespa.service.monitor.CriticalRegion;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * To detect and throw an {@link IllegalStateException} if the execution of the current thread has
+ * reached code point P, some time after the start of a critical region R and before the end of it:
+ *
+ * <ol>
+ * <li>Declare a static final instance of {@link CriticalRegionChecker}.</li>
+ * <li>Invoke {@link #startCriticalRegion(String)} when entering region R, and close
+ * the returned {@link CriticalRegion} when leaving it.</li>
+ * <li>Invoke {@link #assertOutsideCriticalRegions(String)} at code point P.</li>
+ * </ol>
+ *
+ * @author hakonhall
+ */
+public class CriticalRegionChecker {
+
+ private final ThreadLocalDescriptions threadLocalDescriptions = new ThreadLocalDescriptions();
+ private final String name;
+
+ public CriticalRegionChecker(String name) {
+ this.name = name;
+ }
+
+ /** Start of the critical region, within which {@link #assertOutsideCriticalRegions(String)} will throw. */
+ public CriticalRegion startCriticalRegion(String regionDescription) {
+ List<String> regionDescriptions = threadLocalDescriptions.get();
+ regionDescriptions.add(regionDescription);
+ Thread threadAtStart = Thread.currentThread();
+
+ return () -> {
+ regionDescriptions.remove(regionDescription);
+
+ Thread treadAtClose = Thread.currentThread();
+ if (threadAtStart != treadAtClose) {
+ throw new IllegalStateException(name + ": A critical region cannot cross threads: " +
+ regionDescription);
+ }
+ };
+ }
+
+ /** @throws IllegalStateException if within one or more critical regions. */
+ public void assertOutsideCriticalRegions(String codePointDescription) throws IllegalStateException {
+ List<String> regionDescriptions = threadLocalDescriptions.get();
+ if (regionDescriptions.size() > 0) {
+ throw new IllegalStateException(name + ": Code point " + codePointDescription +
+ " is within these critical regions: " + regionDescriptions);
+ }
+ }
+
+ private static class ThreadLocalDescriptions extends ThreadLocal<List<String>> {
+ @Override
+ protected List<String> initialValue() {
+ return new ArrayList<>();
+ }
+ }
+}
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java
index baada59dc65..cb1c6d2d95f 100644
--- a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java
@@ -12,6 +12,7 @@ import com.yahoo.config.provision.HostName;
import com.yahoo.config.provision.SystemName;
import com.yahoo.log.LogLevel;
import com.yahoo.vespa.flags.FlagSource;
+import com.yahoo.vespa.service.monitor.CriticalRegion;
import com.yahoo.vespa.service.monitor.DuperModelInfraApi;
import com.yahoo.vespa.service.monitor.DuperModelListener;
import com.yahoo.vespa.service.monitor.DuperModelProvider;
@@ -23,8 +24,10 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Function;
import java.util.logging.Logger;
+import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -46,7 +49,10 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
private final Map<ApplicationId, InfraApplication> supportedInfraApplications;
- private final Object monitor = new Object();
+ private static CriticalRegionChecker disallowedDuperModeLockAcquisitionRegions =
+ new CriticalRegionChecker("duper model deadlock detection");
+
+ private final ReentrantLock lock = new ReentrantLock(true);
private final DuperModel duperModel;
// The set of active infrastructure ApplicationInfo. Not all are necessarily in the DuperModel for historical reasons.
private final Set<ApplicationId> activeInfraInfos = new HashSet<>(10);
@@ -83,27 +89,23 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
superModelProvider.registerListener(new SuperModelListener() {
@Override
public void applicationActivated(SuperModel superModel, ApplicationInfo application) {
- synchronized (monitor) {
- duperModel.add(application);
- }
+ lockedRunnable(() -> duperModel.add(application));
}
@Override
public void applicationRemoved(SuperModel superModel, ApplicationId applicationId) {
- synchronized (monitor) {
- duperModel.remove(applicationId);
- }
+ lockedRunnable(() -> duperModel.remove(applicationId));
}
@Override
public void notifyOfCompleteness(SuperModel superModel) {
- synchronized (monitor) {
+ lockedRunnable(() -> {
if (!superModelIsComplete) {
superModelIsComplete = true;
logger.log(LogLevel.INFO, "All bootstrap tenant applications have been activated");
maybeSetDuperModelAsComplete();
}
- }
+ });
}
});
}
@@ -114,9 +116,7 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
*/
@Override
public void registerListener(DuperModelListener listener) {
- synchronized (monitor) {
- duperModel.registerListener(listener);
- }
+ lockedRunnable(() -> duperModel.registerListener(listener));
}
@Override
@@ -138,9 +138,7 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
@Override
public boolean infraApplicationIsActive(ApplicationId applicationId) {
- synchronized (monitor) {
- return activeInfraInfos.contains(applicationId);
- }
+ return lockedSupplier(() -> activeInfraInfos.contains(applicationId));
}
@Override
@@ -150,10 +148,10 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
throw new IllegalArgumentException("There is no infrastructure application with ID '" + applicationId + "'");
}
- synchronized (monitor) {
+ lockedRunnable(() -> {
activeInfraInfos.add(applicationId);
duperModel.add(application.makeApplicationInfo(hostnames));
- }
+ });
}
@Override
@@ -162,39 +160,41 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
throw new IllegalArgumentException("There is no infrastructure application with ID '" + applicationId + "'");
}
- synchronized (monitor) {
+ lockedRunnable(() -> {
activeInfraInfos.remove(applicationId);
duperModel.remove(applicationId);
- }
+ });
}
@Override
public void infraApplicationsIsNowComplete() {
- synchronized (monitor) {
+ lockedRunnable(() -> {
if (!infraApplicationsIsComplete) {
infraApplicationsIsComplete = true;
logger.log(LogLevel.INFO, "All infrastructure applications have been activated");
maybeSetDuperModelAsComplete();
}
- }
+ });
}
public Optional<ApplicationInfo> getApplicationInfo(ApplicationId applicationId) {
- synchronized (monitor) {
- return duperModel.getApplicationInfo(applicationId);
- }
+ return lockedSupplier(() -> duperModel.getApplicationInfo(applicationId));
}
public Optional<ApplicationInfo> getApplicationInfo(HostName hostname) {
- synchronized (monitor) {
- return duperModel.getApplicationInfo(hostname);
- }
+ return lockedSupplier(() -> duperModel.getApplicationInfo(hostname));
}
public List<ApplicationInfo> getApplicationInfos() {
- synchronized (monitor) {
- return duperModel.getApplicationInfos();
- }
+ return lockedSupplier(() -> duperModel.getApplicationInfos());
+ }
+
+ /**
+ * Within the region, trying to accesss the duper model (without already having the lock)
+ * will cause an IllegalStateException to be thrown.
+ */
+ public CriticalRegion disallowDuperModelLockAcquisition(String regionDescription) {
+ return disallowedDuperModeLockAcquisitionRegions.startCriticalRegion(regionDescription);
}
private void maybeSetDuperModelAsComplete() {
@@ -202,4 +202,36 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
duperModel.setComplete();
}
}
+
+ private <T> T lockedSupplier(Supplier<T> supplier) {
+ if (lock.isHeldByCurrentThread()) {
+ return supplier.get();
+ }
+
+ disallowedDuperModeLockAcquisitionRegions
+ .assertOutsideCriticalRegions("acquiring duper model lock");
+
+ lock.lock();
+ try {
+ return supplier.get();
+ } finally {
+ lock.unlock();
+ }
+ }
+
+ private void lockedRunnable(Runnable runnable) {
+ if (lock.isHeldByCurrentThread()) {
+ runnable.run();
+ }
+
+ disallowedDuperModeLockAcquisitionRegions
+ .assertOutsideCriticalRegions("acquiring duper model lock");
+
+ lock.lock();
+ try {
+ runnable.run();
+ } finally {
+ lock.unlock();
+ }
+ }
}
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java b/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java
index 7c0f291e121..678b0d3d5ba 100644
--- a/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java
@@ -14,6 +14,8 @@ import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.service.duper.DuperModelManager;
import com.yahoo.vespa.service.manager.MonitorManager;
import com.yahoo.vespa.service.manager.UnionMonitorManager;
+import com.yahoo.vespa.service.monitor.AntiServiceMonitor;
+import com.yahoo.vespa.service.monitor.CriticalRegion;
import com.yahoo.vespa.service.monitor.ServiceHostListener;
import com.yahoo.vespa.service.monitor.ServiceModel;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
@@ -24,7 +26,7 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
-public class ServiceMonitorImpl implements ServiceMonitor {
+public class ServiceMonitorImpl implements ServiceMonitor, AntiServiceMonitor {
private final ServiceMonitorMetrics metrics;
private final DuperModelManager duperModelManager;
@@ -105,6 +107,11 @@ public class ServiceMonitorImpl implements ServiceMonitor {
duperModelManager.registerListener(duperModelListener);
}
+ @Override
+ public CriticalRegion disallowDuperModelLockAcquisition(String regionDescription) {
+ return duperModelManager.disallowDuperModelLockAcquisition(regionDescription);
+ }
+
private Optional<ApplicationInfo> getApplicationInfo(ApplicationInstanceReference reference) {
ApplicationId applicationId = ApplicationInstanceGenerator.toApplicationId(reference);
return duperModelManager.getApplicationInfo(applicationId);
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/AntiServiceMonitor.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/AntiServiceMonitor.java
new file mode 100644
index 00000000000..1c0460c3797
--- /dev/null
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/AntiServiceMonitor.java
@@ -0,0 +1,25 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.service.monitor;
+
+import com.yahoo.vespa.service.duper.DuperModelManager;
+
+/**
+ * Interface that allows a thread to declare it must NOT access certain parts of the ServiceMonitor.
+ *
+ * @author hakonhall
+ */
+public interface AntiServiceMonitor {
+ /**
+ * Disallow the current thread to acquire the "duper model lock" (see {@link DuperModelManager}),
+ * necessarily acquired by most of the {@link ServiceMonitor} methods, starting from now and
+ * up until the returned region is closed.
+ *
+ * <p>For instance, if an application is activated the duper model is notified: The duper model
+ * will acquire a lock to update the model atomically, and while having that lock notify
+ * the status service. The status service will typically acquire an application lock and prune
+ * hosts no longer part of the application. If a thread were to try to acquire these locks
+ * in the reverse order, it might become deadlocked. This method allows one to detect this
+ * and throw an exception, causing it to be caught earlier or at least easier to debug.</p>
+ */
+ CriticalRegion disallowDuperModelLockAcquisition(String regionDescription);
+}
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/CriticalRegion.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/CriticalRegion.java
new file mode 100644
index 00000000000..ec674f133a1
--- /dev/null
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/CriticalRegion.java
@@ -0,0 +1,18 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.service.monitor;
+
+/**
+ * Represents a region of execution, e.g. the block of a try-with-resources.
+ *
+ * @author hakonhall
+ */
+@FunctionalInterface
+public interface CriticalRegion extends AutoCloseable {
+ /**
+ * Ends the critical region.
+ *
+ * @throws IllegalStateException if the current thread is different from the one that started
+ * the critical region.
+ */
+ @Override void close() throws IllegalStateException;
+}