summaryrefslogtreecommitdiffstats
path: root/service-monitor
diff options
context:
space:
mode:
authorHÃ¥kon Hallingstad <hakon@verizonmedia.com>2020-03-07 17:00:19 +0100
committerGitHub <noreply@github.com>2020-03-07 17:00:19 +0100
commit3eb6aa9161ca016d8db3914970054d5519052c45 (patch)
tree49ffc47b8092dc81508da8b40ec7ff88ac64b275 /service-monitor
parent71cf26a0abe0b62b9d3038c86351e69a3dee640e (diff)
parent92a2c10c684ee128b25e2128433451db438fc14a (diff)
Merge pull request #12496 from vespa-engine/hakonhall/throw-if-accessing-duper-model-while-holding-status-service-application-lock
Throw if accessing duper model while holding status service application lock
Diffstat (limited to 'service-monitor')
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/duper/CriticalRegionChecker.java63
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java92
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java9
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/monitor/AntiServiceMonitor.java25
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/monitor/CriticalRegion.java18
5 files changed, 176 insertions, 31 deletions
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/CriticalRegionChecker.java b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/CriticalRegionChecker.java
new file mode 100644
index 00000000000..238b70a7310
--- /dev/null
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/CriticalRegionChecker.java
@@ -0,0 +1,63 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.service.duper;
+
+import com.yahoo.vespa.service.monitor.CriticalRegion;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * To detect and throw an {@link IllegalStateException} if the execution of the current thread has
+ * reached code point P, some time after the start of a critical region R and before the end of it:
+ *
+ * <ol>
+ * <li>Declare a static final instance of {@link CriticalRegionChecker}.</li>
+ * <li>Invoke {@link #startCriticalRegion(String)} when entering region R, and close
+ * the returned {@link CriticalRegion} when leaving it.</li>
+ * <li>Invoke {@link #assertOutsideCriticalRegions(String)} at code point P.</li>
+ * </ol>
+ *
+ * @author hakonhall
+ */
+public class CriticalRegionChecker {
+
+ private final ThreadLocalDescriptions threadLocalDescriptions = new ThreadLocalDescriptions();
+ private final String name;
+
+ public CriticalRegionChecker(String name) {
+ this.name = name;
+ }
+
+ /** Start of the critical region, within which {@link #assertOutsideCriticalRegions(String)} will throw. */
+ public CriticalRegion startCriticalRegion(String regionDescription) {
+ List<String> regionDescriptions = threadLocalDescriptions.get();
+ regionDescriptions.add(regionDescription);
+ Thread threadAtStart = Thread.currentThread();
+
+ return () -> {
+ regionDescriptions.remove(regionDescription);
+
+ Thread treadAtClose = Thread.currentThread();
+ if (threadAtStart != treadAtClose) {
+ throw new IllegalStateException(name + ": A critical region cannot cross threads: " +
+ regionDescription);
+ }
+ };
+ }
+
+ /** @throws IllegalStateException if within one or more critical regions. */
+ public void assertOutsideCriticalRegions(String codePointDescription) throws IllegalStateException {
+ List<String> regionDescriptions = threadLocalDescriptions.get();
+ if (regionDescriptions.size() > 0) {
+ throw new IllegalStateException(name + ": Code point " + codePointDescription +
+ " is within these critical regions: " + regionDescriptions);
+ }
+ }
+
+ private static class ThreadLocalDescriptions extends ThreadLocal<List<String>> {
+ @Override
+ protected List<String> initialValue() {
+ return new ArrayList<>();
+ }
+ }
+}
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java
index baada59dc65..cb1c6d2d95f 100644
--- a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/DuperModelManager.java
@@ -12,6 +12,7 @@ import com.yahoo.config.provision.HostName;
import com.yahoo.config.provision.SystemName;
import com.yahoo.log.LogLevel;
import com.yahoo.vespa.flags.FlagSource;
+import com.yahoo.vespa.service.monitor.CriticalRegion;
import com.yahoo.vespa.service.monitor.DuperModelInfraApi;
import com.yahoo.vespa.service.monitor.DuperModelListener;
import com.yahoo.vespa.service.monitor.DuperModelProvider;
@@ -23,8 +24,10 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Function;
import java.util.logging.Logger;
+import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -46,7 +49,10 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
private final Map<ApplicationId, InfraApplication> supportedInfraApplications;
- private final Object monitor = new Object();
+ private static CriticalRegionChecker disallowedDuperModeLockAcquisitionRegions =
+ new CriticalRegionChecker("duper model deadlock detection");
+
+ private final ReentrantLock lock = new ReentrantLock(true);
private final DuperModel duperModel;
// The set of active infrastructure ApplicationInfo. Not all are necessarily in the DuperModel for historical reasons.
private final Set<ApplicationId> activeInfraInfos = new HashSet<>(10);
@@ -83,27 +89,23 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
superModelProvider.registerListener(new SuperModelListener() {
@Override
public void applicationActivated(SuperModel superModel, ApplicationInfo application) {
- synchronized (monitor) {
- duperModel.add(application);
- }
+ lockedRunnable(() -> duperModel.add(application));
}
@Override
public void applicationRemoved(SuperModel superModel, ApplicationId applicationId) {
- synchronized (monitor) {
- duperModel.remove(applicationId);
- }
+ lockedRunnable(() -> duperModel.remove(applicationId));
}
@Override
public void notifyOfCompleteness(SuperModel superModel) {
- synchronized (monitor) {
+ lockedRunnable(() -> {
if (!superModelIsComplete) {
superModelIsComplete = true;
logger.log(LogLevel.INFO, "All bootstrap tenant applications have been activated");
maybeSetDuperModelAsComplete();
}
- }
+ });
}
});
}
@@ -114,9 +116,7 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
*/
@Override
public void registerListener(DuperModelListener listener) {
- synchronized (monitor) {
- duperModel.registerListener(listener);
- }
+ lockedRunnable(() -> duperModel.registerListener(listener));
}
@Override
@@ -138,9 +138,7 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
@Override
public boolean infraApplicationIsActive(ApplicationId applicationId) {
- synchronized (monitor) {
- return activeInfraInfos.contains(applicationId);
- }
+ return lockedSupplier(() -> activeInfraInfos.contains(applicationId));
}
@Override
@@ -150,10 +148,10 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
throw new IllegalArgumentException("There is no infrastructure application with ID '" + applicationId + "'");
}
- synchronized (monitor) {
+ lockedRunnable(() -> {
activeInfraInfos.add(applicationId);
duperModel.add(application.makeApplicationInfo(hostnames));
- }
+ });
}
@Override
@@ -162,39 +160,41 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
throw new IllegalArgumentException("There is no infrastructure application with ID '" + applicationId + "'");
}
- synchronized (monitor) {
+ lockedRunnable(() -> {
activeInfraInfos.remove(applicationId);
duperModel.remove(applicationId);
- }
+ });
}
@Override
public void infraApplicationsIsNowComplete() {
- synchronized (monitor) {
+ lockedRunnable(() -> {
if (!infraApplicationsIsComplete) {
infraApplicationsIsComplete = true;
logger.log(LogLevel.INFO, "All infrastructure applications have been activated");
maybeSetDuperModelAsComplete();
}
- }
+ });
}
public Optional<ApplicationInfo> getApplicationInfo(ApplicationId applicationId) {
- synchronized (monitor) {
- return duperModel.getApplicationInfo(applicationId);
- }
+ return lockedSupplier(() -> duperModel.getApplicationInfo(applicationId));
}
public Optional<ApplicationInfo> getApplicationInfo(HostName hostname) {
- synchronized (monitor) {
- return duperModel.getApplicationInfo(hostname);
- }
+ return lockedSupplier(() -> duperModel.getApplicationInfo(hostname));
}
public List<ApplicationInfo> getApplicationInfos() {
- synchronized (monitor) {
- return duperModel.getApplicationInfos();
- }
+ return lockedSupplier(() -> duperModel.getApplicationInfos());
+ }
+
+ /**
+ * Within the region, trying to accesss the duper model (without already having the lock)
+ * will cause an IllegalStateException to be thrown.
+ */
+ public CriticalRegion disallowDuperModelLockAcquisition(String regionDescription) {
+ return disallowedDuperModeLockAcquisitionRegions.startCriticalRegion(regionDescription);
}
private void maybeSetDuperModelAsComplete() {
@@ -202,4 +202,36 @@ public class DuperModelManager implements DuperModelProvider, DuperModelInfraApi
duperModel.setComplete();
}
}
+
+ private <T> T lockedSupplier(Supplier<T> supplier) {
+ if (lock.isHeldByCurrentThread()) {
+ return supplier.get();
+ }
+
+ disallowedDuperModeLockAcquisitionRegions
+ .assertOutsideCriticalRegions("acquiring duper model lock");
+
+ lock.lock();
+ try {
+ return supplier.get();
+ } finally {
+ lock.unlock();
+ }
+ }
+
+ private void lockedRunnable(Runnable runnable) {
+ if (lock.isHeldByCurrentThread()) {
+ runnable.run();
+ }
+
+ disallowedDuperModeLockAcquisitionRegions
+ .assertOutsideCriticalRegions("acquiring duper model lock");
+
+ lock.lock();
+ try {
+ runnable.run();
+ } finally {
+ lock.unlock();
+ }
+ }
}
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java b/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java
index 7c0f291e121..678b0d3d5ba 100644
--- a/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/model/ServiceMonitorImpl.java
@@ -14,6 +14,8 @@ import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.service.duper.DuperModelManager;
import com.yahoo.vespa.service.manager.MonitorManager;
import com.yahoo.vespa.service.manager.UnionMonitorManager;
+import com.yahoo.vespa.service.monitor.AntiServiceMonitor;
+import com.yahoo.vespa.service.monitor.CriticalRegion;
import com.yahoo.vespa.service.monitor.ServiceHostListener;
import com.yahoo.vespa.service.monitor.ServiceModel;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
@@ -24,7 +26,7 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
-public class ServiceMonitorImpl implements ServiceMonitor {
+public class ServiceMonitorImpl implements ServiceMonitor, AntiServiceMonitor {
private final ServiceMonitorMetrics metrics;
private final DuperModelManager duperModelManager;
@@ -105,6 +107,11 @@ public class ServiceMonitorImpl implements ServiceMonitor {
duperModelManager.registerListener(duperModelListener);
}
+ @Override
+ public CriticalRegion disallowDuperModelLockAcquisition(String regionDescription) {
+ return duperModelManager.disallowDuperModelLockAcquisition(regionDescription);
+ }
+
private Optional<ApplicationInfo> getApplicationInfo(ApplicationInstanceReference reference) {
ApplicationId applicationId = ApplicationInstanceGenerator.toApplicationId(reference);
return duperModelManager.getApplicationInfo(applicationId);
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/AntiServiceMonitor.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/AntiServiceMonitor.java
new file mode 100644
index 00000000000..1c0460c3797
--- /dev/null
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/AntiServiceMonitor.java
@@ -0,0 +1,25 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.service.monitor;
+
+import com.yahoo.vespa.service.duper.DuperModelManager;
+
+/**
+ * Interface that allows a thread to declare it must NOT access certain parts of the ServiceMonitor.
+ *
+ * @author hakonhall
+ */
+public interface AntiServiceMonitor {
+ /**
+ * Disallow the current thread to acquire the "duper model lock" (see {@link DuperModelManager}),
+ * necessarily acquired by most of the {@link ServiceMonitor} methods, starting from now and
+ * up until the returned region is closed.
+ *
+ * <p>For instance, if an application is activated the duper model is notified: The duper model
+ * will acquire a lock to update the model atomically, and while having that lock notify
+ * the status service. The status service will typically acquire an application lock and prune
+ * hosts no longer part of the application. If a thread were to try to acquire these locks
+ * in the reverse order, it might become deadlocked. This method allows one to detect this
+ * and throw an exception, causing it to be caught earlier or at least easier to debug.</p>
+ */
+ CriticalRegion disallowDuperModelLockAcquisition(String regionDescription);
+}
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/CriticalRegion.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/CriticalRegion.java
new file mode 100644
index 00000000000..ec674f133a1
--- /dev/null
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/CriticalRegion.java
@@ -0,0 +1,18 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.service.monitor;
+
+/**
+ * Represents a region of execution, e.g. the block of a try-with-resources.
+ *
+ * @author hakonhall
+ */
+@FunctionalInterface
+public interface CriticalRegion extends AutoCloseable {
+ /**
+ * Ends the critical region.
+ *
+ * @throws IllegalStateException if the current thread is different from the one that started
+ * the critical region.
+ */
+ @Override void close() throws IllegalStateException;
+}