aboutsummaryrefslogtreecommitdiffstats
path: root/orchestrator
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@oath.com>2017-09-15 15:13:05 +0200
committerHåkon Hallingstad <hakon@oath.com>2017-09-15 15:13:05 +0200
commitb7425438dbc2de06ee834d14cf9d59d54c899ea2 (patch)
tree4f359becbdac8794df79be57472ee874303e7e62 /orchestrator
parent664dc8e147928cd05a16954cbfdd6efb34dd144a (diff)
Remove current-thread restriction on Orchestrator lock
Diffstat (limited to 'orchestrator')
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java133
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java37
2 files changed, 9 insertions, 161 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
index a9392711d7d..1f6e8f6ce16 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
@@ -8,20 +8,14 @@ import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.curator.Lock;
import com.yahoo.vespa.orchestrator.OrchestratorUtil;
-import org.apache.curator.SessionFailRetryLoop;
-import org.apache.curator.SessionFailRetryLoop.Mode;
-import org.apache.curator.SessionFailRetryLoop.SessionFailedException;
import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreMutex;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.apache.zookeeper.data.Stat;
-import javax.annotation.concurrent.GuardedBy;
import javax.inject.Inject;
import java.time.Duration;
-import java.util.HashMap;
import java.util.HashSet;
-import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
@@ -37,12 +31,6 @@ public class ZookeeperStatusService implements StatusService {
private static final Logger log = Logger.getLogger(ZookeeperStatusService.class.getName());
- //For debug purposes only: Used to check that operations depending on a lock is done from a single thread,
- //and that a threads doing operations actually owns a corresponding lock,
- //and that a single thread only owns a single lock (across all ZookeeperStatusServices)
- @GuardedBy("threadsHoldingLock")
- private static final Map<Thread, ApplicationInstanceReference> threadsHoldingLock = new HashMap<>();
-
final static String HOST_STATUS_BASE_PATH = "/vespa/host-status-service";
final static String APPLICATION_STATUS_BASE_PATH = "/vespa/application-status-service";
@@ -108,55 +96,16 @@ public class ZookeeperStatusService implements StatusService {
MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(
ApplicationInstanceReference applicationInstanceReference,
long timeoutSeconds) {
- Thread currentThread = Thread.currentThread();
-
- //Due to limitations in SessionFailRetryLoop.
- assertThreadDoesNotHoldLock(currentThread,"Can't lock " + applicationInstanceReference);
+ String lock2Path = applicationInstanceLock2Path(applicationInstanceReference);
+ Lock lock = new Lock(lock2Path, curator);
+ lock.acquire(Duration.ofSeconds(timeoutSeconds));
try {
- SessionFailRetryLoop sessionFailRetryLoop =
- curator.framework().getZookeeperClient().newSessionFailRetryLoop(Mode.FAIL);
- sessionFailRetryLoop.start();
- try {
- String lockPath = applicationInstanceLockPath(applicationInstanceReference);
- InterProcessSemaphoreMutex mutex = acquireMutexOrThrow(timeoutSeconds, TimeUnit.SECONDS, lockPath);
-
- // TODO: Once rolled out, make this the only lock mechanism
- Lock lock2;
- try {
- String lock2Path = applicationInstanceLock2Path(applicationInstanceReference);
- lock2 = new Lock(lock2Path, curator);
- lock2.acquire(Duration.ofSeconds(timeoutSeconds));
- } catch (Throwable t) {
- mutex.release();
- throw t;
- }
-
- synchronized (threadsHoldingLock) {
- threadsHoldingLock.put(currentThread, applicationInstanceReference);
- }
-
- return new ZkMutableStatusRegistry(
- lock2,
- mutex,
- sessionFailRetryLoop,
- applicationInstanceReference,
- currentThread);
- } catch (Throwable t) {
- sessionFailRetryLoop.close();
- throw t;
- }
- } catch (Exception e) {
- //TODO: IOException with explanation
- throw new RuntimeException(e);
- }
- }
-
- private void assertThreadDoesNotHoldLock(Thread currentThread, String message) {
- synchronized (threadsHoldingLock) {
- if (threadsHoldingLock.containsKey(currentThread)) {
- throw new AssertionError(message + ", already have a lock on " + threadsHoldingLock.get(currentThread));
- }
+ return new ZkMutableStatusRegistry(lock, applicationInstanceReference);
+ } catch (Throwable t) {
+ // In case the constructor throws an exception.
+ lock.close();
+ throw t;
}
}
@@ -176,8 +125,6 @@ public class ZookeeperStatusService implements StatusService {
private void setHostStatus(ApplicationInstanceReference applicationInstanceReference,
HostName hostName,
HostStatus status) {
- assertThreadHoldsLock(applicationInstanceReference);
-
String path = hostAllowedDownPath(applicationInstanceReference, hostName);
try {
@@ -195,24 +142,6 @@ public class ZookeeperStatusService implements StatusService {
}
}
- private static void assertThreadHoldsLock(ApplicationInstanceReference applicationInstanceReference) {
- synchronized (threadsHoldingLock) {
- ApplicationInstanceReference lockedApplicationInstanceReference =
- threadsHoldingLock.get(Thread.currentThread());
-
- if (lockedApplicationInstanceReference == null) {
- throw new AssertionError("The current thread does not own any status service locks. " +
- "Application Instance = " + applicationInstanceReference);
- }
-
- if (!lockedApplicationInstanceReference.equals(applicationInstanceReference)) {
- throw new AssertionError("The current thread does not have a lock on " +
- "application instance " + applicationInstanceReference +
- ", but instead have a lock on " + lockedApplicationInstanceReference);
- }
- }
- }
-
private void deleteNode_ignoreNoNodeException(String path, String debugLogMessageIfNotExists) throws Exception {
try {
curator.framework().delete().forPath(path);
@@ -259,7 +188,6 @@ public class ZookeeperStatusService implements StatusService {
private HostStatus getHostStatusWithLock(
final ApplicationInstanceReference applicationInstanceReference,
final HostName hostName) {
- assertThreadHoldsLock(applicationInstanceReference);
return getInternalHostStatus(applicationInstanceReference, hostName);
}
@@ -290,23 +218,13 @@ public class ZookeeperStatusService implements StatusService {
private class ZkMutableStatusRegistry implements MutableStatusRegistry {
private final Lock lock;
- private final InterProcessSemaphoreMutex mutex;
- private final SessionFailRetryLoop sessionFailRetryLoop;
private final ApplicationInstanceReference applicationInstanceReference;
- private final Thread lockingThread;
public ZkMutableStatusRegistry(
Lock lock,
- InterProcessSemaphoreMutex mutex,
- SessionFailRetryLoop sessionFailRetryLoop,
- ApplicationInstanceReference applicationInstanceReference,
- Thread lockingThread) {
-
- this.mutex = mutex;
+ ApplicationInstanceReference applicationInstanceReference) {
this.lock = lock;
- this.sessionFailRetryLoop = sessionFailRetryLoop;
this.applicationInstanceReference = applicationInstanceReference;
- this.lockingThread = lockingThread;
}
@Override
@@ -316,8 +234,6 @@ public class ZookeeperStatusService implements StatusService {
@Override
public void setApplicationInstanceStatus(ApplicationInstanceStatus applicationInstanceStatus) {
- assertThreadHoldsLock(applicationInstanceReference);
-
String path = applicationInstanceSuspendedPath(applicationInstanceReference);
try {
@@ -349,10 +265,6 @@ public class ZookeeperStatusService implements StatusService {
@Override
@NoThrow
public void close() {
- synchronized (threadsHoldingLock) {
- threadsHoldingLock.remove(lockingThread, applicationInstanceReference);
- }
-
try {
lock.close();
} catch (RuntimeException e) {
@@ -360,33 +272,6 @@ public class ZookeeperStatusService implements StatusService {
log.log(LogLevel.WARNING, "Failed to close application lock for " +
ZookeeperStatusService.class.getSimpleName() + ", will ignore and continue", e);
}
-
- try {
- mutex.release();
- } catch (Exception e) {
- if (e.getCause() instanceof SessionFailedException) {
- log.log(LogLevel.DEBUG, "Session expired, mutex should be freed automatically", e);
- } else {
- //Failing to unlock the mutex should not fail the request,
- //since the status database has already been updated at this point.
- log.log(LogLevel.WARNING, "Failed unlocking application instance " + applicationInstanceReference, e);
- }
- }
-
- //Similar precondition checked in sessionFailRetryLoop.close,
- //but this has more useful debug output.
- if (lockingThread != Thread.currentThread()) {
- throw new AssertionError("LockHandle should only be used from a single thread. "
- + "Application instance = " + applicationInstanceReference
- + " Locking thread = " + lockingThread
- + " Current thread = " + Thread.currentThread());
- }
-
- try {
- sessionFailRetryLoop.close();
- } catch (Exception e) {
- log.log(LogLevel.ERROR, "Failed closing SessionRetryLoop", e);
- }
}
}
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java
index e997743fcf0..1e742b5940a 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java
@@ -128,26 +128,6 @@ public class ZookeeperStatusServiceTest {
}
@Test
- public void session_expiry_when_holding_lock_causes_operations_to_fail() throws Exception {
- try (MutableStatusRegistry statusRegistry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(
- TestIds.APPLICATION_INSTANCE_REFERENCE)) {
-
- KillSession.kill(curator.framework().getZookeeperClient().getZooKeeper(), testingServer.getConnectString());
-
- assertSessionFailed(() ->
- statusRegistry.setHostState(
- TestIds.HOST_NAME1,
- HostStatus.ALLOWED_TO_BE_DOWN));
-
-
- assertSessionFailed(() ->
- statusRegistry.getHostStatus(
- TestIds.HOST_NAME1));
-
- }
- }
-
- @Test
public void failing_to_get_lock_closes_SessionFailRetryLoop() throws Exception {
try (Curator curator = createConnectedCuratorFramework(testingServer)) {
ZookeeperStatusService zookeeperStatusService2 = new ZookeeperStatusService(curator);
@@ -221,23 +201,6 @@ public class ZookeeperStatusServiceTest {
}
}
- /**
- * This requirement is due to limitations in SessionFailRetryLoop
- */
- @Test(expected = AssertionError.class)
- public void multiple_locks_in_a_single_thread_gives_error() throws InterruptedException {
- try (Curator curator = createConnectedCuratorFramework(testingServer)) {
- ZookeeperStatusService zookeeperStatusService2 = new ZookeeperStatusService(curator);
-
- try (MutableStatusRegistry statusRegistry1 = zookeeperStatusService
- .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE);
- MutableStatusRegistry statusRegistry2 = zookeeperStatusService2
- .lockApplicationInstance_forCurrentThreadOnly(TestIds.APPLICATION_INSTANCE_REFERENCE2))
- {
- }
- }
- }
-
@Test
public void suspend_and_resume_application_works_and_is_symmetric() {