summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java8
-rw-r--r--orchestrator/pom.xml12
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java82
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java36
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java74
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorContextTest.java59
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java83
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java5
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java1
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java21
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService2Test.java127
11 files changed, 455 insertions, 53 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index 5bda3559ccf..0a2ba901c8f 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -114,6 +114,14 @@ public class Flags {
"scheduled evenly distributed in the 1x-2x range (and naturally guaranteed at the 2x boundary).",
"Takes effect on next run of NodeRebooter");
+ public static final UnboundBooleanFlag ENABLE_LARGE_ORCHESTRATOR_LOCKS = defineFeatureFlag(
+ "enable-large-orchestrator-locks", false,
+ "If enabled, the orchestrator will accumulate application locks during probe in batch suspension, " +
+ "and release them in reverse order only after the non-probe is complete. Can be set depending on " +
+ "parent hostname.",
+ "Takes immediate effect for new batch suspensions.",
+ HOSTNAME);
+
public static final UnboundBooleanFlag ENABLE_DYNAMIC_PROVISIONING = defineFeatureFlag(
"enable-dynamic-provisioning", false,
"Provision a new docker host when we otherwise can't allocate a docker node",
diff --git a/orchestrator/pom.xml b/orchestrator/pom.xml
index 3ebc87010fb..5dd4e7ea87d 100644
--- a/orchestrator/pom.xml
+++ b/orchestrator/pom.xml
@@ -42,6 +42,12 @@
</dependency>
<dependency>
<groupId>com.yahoo.vespa</groupId>
+ <artifactId>flags</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
<artifactId>vespajlib</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
@@ -59,6 +65,12 @@
<scope>test</scope>
</dependency>
<dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>zookeeper-server-common</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
index d3bdaa6dc64..17dfb924973 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
@@ -1,13 +1,17 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator;
+import com.yahoo.log.LogLevel;
import com.yahoo.time.TimeBudget;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
+import java.util.HashMap;
import java.util.Optional;
+import java.util.logging.Logger;
/**
* Context for an operation (or suboperation) of the Orchestrator that needs to pass through to the backend,
@@ -15,29 +19,43 @@ import java.util.Optional;
*
* @author hakonhall
*/
-public class OrchestratorContext {
+public class OrchestratorContext implements AutoCloseable {
+ private static final Logger logger = Logger.getLogger(OrchestratorContext.class.getName());
private static final Duration DEFAULT_TIMEOUT_FOR_SINGLE_OP = Duration.ofSeconds(10);
private static final Duration DEFAULT_TIMEOUT_FOR_BATCH_OP = Duration.ofSeconds(60);
private static final Duration TIMEOUT_OVERHEAD = Duration.ofMillis(500);
+ private final Optional<OrchestratorContext> parent;
private final Clock clock;
private final TimeBudget timeBudget;
private final boolean probe;
+ private final boolean largeLocks;
+
+ // The key set is the set of applications locked by this context tree: Only the
+ // root context has a non-empty set. The value is an unlock callback to be called
+ // when the root context is closed.
+ private final HashMap<ApplicationInstanceReference, Runnable> locks = new HashMap<>();
/** Create an OrchestratorContext for operations on multiple applications. */
- public static OrchestratorContext createContextForMultiAppOp(Clock clock) {
- return new OrchestratorContext(clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_BATCH_OP), false);
+ public static OrchestratorContext createContextForMultiAppOp(Clock clock, boolean largeLocks) {
+ return new OrchestratorContext(null, clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_BATCH_OP), false, largeLocks);
}
/** Create an OrchestratorContext for an operation on a single application. */
public static OrchestratorContext createContextForSingleAppOp(Clock clock) {
- return new OrchestratorContext(clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_SINGLE_OP), false);
+ return new OrchestratorContext(null, clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_SINGLE_OP), false, false);
}
- private OrchestratorContext(Clock clock, TimeBudget timeBudget, boolean probe) {
+ private OrchestratorContext(OrchestratorContext parentOrNull,
+ Clock clock,
+ TimeBudget timeBudget,
+ boolean probe,
+ boolean largeLocks) {
+ this.parent = Optional.ofNullable(parentOrNull);
this.clock = clock;
this.timeBudget = timeBudget;
this.probe = probe;
+ this.largeLocks = largeLocks;
}
public Duration getTimeLeft() {
@@ -53,12 +71,47 @@ public class OrchestratorContext {
return probe;
}
+ /** Whether application locks acquired during probing of a batch suspend should be closed after the non-probe is done. */
+ public boolean largeLocks() { return largeLocks; }
+
+ /**
+ * Returns true if 1. large locks is enabled, and 2.
+ * {@link #registerLockAcquisition(ApplicationInstanceReference, Runnable) registerLockAcquisition}
+ * has been invoked on any context below the root context that returned true.
+ */
+ public boolean hasLock(ApplicationInstanceReference application) {
+ return parent.map(p -> p.hasLock(application)).orElseGet(() -> locks.containsKey(application));
+ }
+
+ /**
+ * Returns true if large locks is enabled in the root context, and in case the unlock callback
+ * will be invoked when the root context is closed.
+ */
+ public boolean registerLockAcquisition(ApplicationInstanceReference application, Runnable unlock) {
+ if (parent.isPresent()) {
+ return parent.get().registerLockAcquisition(application, unlock);
+ }
+
+ if (!largeLocks) {
+ return false;
+ }
+
+ if (locks.containsKey(application)) {
+ unlock.run();
+ throw new IllegalStateException("Application " + application + " was already associated with a lock");
+ }
+
+ locks.put(application, unlock);
+
+ return true;
+ }
+
/** Create OrchestratorContext to use within an application lock. */
public OrchestratorContext createSubcontextWithinLock() {
// Move deadline towards past by a fixed amount to ensure there's time to process exceptions and
// access ZooKeeper before the lock times out.
TimeBudget subTimeBudget = timeBudget.withDeadline(timeBudget.deadline().get().minus(TIMEOUT_OVERHEAD));
- return new OrchestratorContext(clock, subTimeBudget, probe);
+ return new OrchestratorContext(this, clock, subTimeBudget, probe, largeLocks);
}
/** Create an OrchestratorContext for an operation on a single application, but limited to current timeout. */
@@ -70,9 +123,18 @@ public class OrchestratorContext {
deadline = maxDeadline;
}
- return new OrchestratorContext(
- clock,
- TimeBudget.from(clock, now, Optional.of(Duration.between(now, deadline))),
- probe);
+ TimeBudget timeBudget = TimeBudget.from(clock, now, Optional.of(Duration.between(now, deadline)));
+ return new OrchestratorContext(this, clock, timeBudget, probe, largeLocks);
+ }
+
+ @Override
+ public void close() {
+ locks.forEach((application, unlock) -> {
+ try {
+ unlock.run();
+ } catch (RuntimeException e) {
+ logger.log(LogLevel.ERROR, "Failed run on close : " + e.getMessage());
+ }
+ });
}
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
index 05268232119..244fd4d9b5d 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
@@ -12,6 +12,10 @@ import com.yahoo.vespa.applicationmodel.ClusterId;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceCluster;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.flags.BooleanFlag;
+import com.yahoo.vespa.flags.FetchVector;
+import com.yahoo.vespa.flags.FlagSource;
+import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.orchestrator.config.OrchestratorConfig;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
@@ -60,13 +64,15 @@ public class OrchestratorImpl implements Orchestrator {
private final ClusterControllerClientFactory clusterControllerClientFactory;
private final Clock clock;
private final ApplicationApiFactory applicationApiFactory;
+ private final BooleanFlag enableLargeOrchestratorLocks;
@Inject
public OrchestratorImpl(ClusterControllerClientFactory clusterControllerClientFactory,
StatusService statusService,
OrchestratorConfig orchestratorConfig,
InstanceLookupService instanceLookupService,
- ConfigserverConfig configServerConfig)
+ ConfigserverConfig configServerConfig,
+ FlagSource flagSource)
{
this(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clusterControllerClientFactory, new ApplicationApiFactory(configServerConfig.zookeeperserver().size())),
clusterControllerClientFactory,
@@ -74,7 +80,8 @@ public class OrchestratorImpl implements Orchestrator {
instanceLookupService,
orchestratorConfig.serviceMonitorConvergenceLatencySeconds(),
Clock.systemUTC(),
- new ApplicationApiFactory(configServerConfig.zookeeperserver().size()));
+ new ApplicationApiFactory(configServerConfig.zookeeperserver().size()),
+ flagSource);
}
public OrchestratorImpl(Policy policy,
@@ -83,7 +90,8 @@ public class OrchestratorImpl implements Orchestrator {
InstanceLookupService instanceLookupService,
int serviceMonitorConvergenceLatencySeconds,
Clock clock,
- ApplicationApiFactory applicationApiFactory)
+ ApplicationApiFactory applicationApiFactory,
+ FlagSource flagSource)
{
this.policy = policy;
this.clusterControllerClientFactory = clusterControllerClientFactory;
@@ -92,6 +100,7 @@ public class OrchestratorImpl implements Orchestrator {
this.instanceLookupService = instanceLookupService;
this.clock = clock;
this.applicationApiFactory = applicationApiFactory;
+ this.enableLargeOrchestratorLocks = Flags.ENABLE_LARGE_ORCHESTRATOR_LOCKS.bindTo(flagSource);
}
@Override
@@ -231,17 +240,20 @@ public class OrchestratorImpl implements Orchestrator {
@Override
public void suspendAll(HostName parentHostname, List<HostName> hostNames)
throws BatchHostStateChangeDeniedException, BatchHostNameNotFoundException, BatchInternalErrorException {
- OrchestratorContext context = OrchestratorContext.createContextForMultiAppOp(clock);
+ boolean largeLocks = enableLargeOrchestratorLocks
+ .with(FetchVector.Dimension.HOSTNAME, parentHostname.s())
+ .value();
+ try (OrchestratorContext context = OrchestratorContext.createContextForMultiAppOp(clock, largeLocks)) {
+ List<NodeGroup> nodeGroupsOrderedByApplication;
+ try {
+ nodeGroupsOrderedByApplication = nodeGroupsOrderedForSuspend(hostNames);
+ } catch (HostNameNotFoundException e) {
+ throw new BatchHostNameNotFoundException(parentHostname, hostNames, e);
+ }
- List<NodeGroup> nodeGroupsOrderedByApplication;
- try {
- nodeGroupsOrderedByApplication = nodeGroupsOrderedForSuspend(hostNames);
- } catch (HostNameNotFoundException e) {
- throw new BatchHostNameNotFoundException(parentHostname, hostNames, e);
+ suspendAllNodeGroups(context, parentHostname, nodeGroupsOrderedByApplication, true);
+ suspendAllNodeGroups(context, parentHostname, nodeGroupsOrderedByApplication, false);
}
-
- suspendAllNodeGroups(context, parentHostname, nodeGroupsOrderedByApplication, true);
- suspendAllNodeGroups(context, parentHostname, nodeGroupsOrderedByApplication, false);
}
private void suspendAllNodeGroups(OrchestratorContext context,
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
index e192f25d68c..68862859615 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
@@ -75,6 +75,14 @@ public class ZookeeperStatusService implements StatusService {
});
}
+ /** Non-private for testing only. */
+ ZookeeperStatusService(Curator curator, Metric metric, Timer timer, HostInfosCache hostInfosCache) {
+ this.curator = curator;
+ this.metric = metric;
+ this.timer = timer;
+ this.hostInfosCache = hostInfosCache;
+ }
+
@Override
public Set<ApplicationInstanceReference> getAllSuspendedApplications() {
try {
@@ -123,6 +131,35 @@ public class ZookeeperStatusService implements StatusService {
public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly(
OrchestratorContext context,
ApplicationInstanceReference applicationInstanceReference) throws UncheckedTimeoutException {
+ Runnable onRegistryClose;
+
+ // A multi-application operation, aka batch suspension, will first issue a probe
+ // then a non-probe. With "large locks", the lock is not release in between -
+ // no lock is taken on the non-probe. Instead, the release is done on the multi-application
+ // context close.
+ if (context.hasLock(applicationInstanceReference)) {
+ onRegistryClose = () -> {};
+ } else {
+ Runnable unlock = acquireLock(context, applicationInstanceReference);
+ if (context.registerLockAcquisition(applicationInstanceReference, unlock)) {
+ onRegistryClose = () -> {};
+ } else {
+ onRegistryClose = unlock;
+ }
+ }
+
+ try {
+ return new ZkMutableStatusRegistry(onRegistryClose, applicationInstanceReference, context.isProbe());
+ } catch (Throwable t) {
+ // In case the constructor throws an exception.
+ onRegistryClose.run();
+ throw t;
+ }
+ }
+
+ private Runnable acquireLock(OrchestratorContext context,
+ ApplicationInstanceReference applicationInstanceReference)
+ throws UncheckedTimeoutException {
ApplicationId applicationId = OrchestratorUtil.toApplicationId(applicationInstanceReference);
String app = applicationId.application().value() + "." + applicationId.instance().value();
Map<String, String> dimensions = Map.of(
@@ -152,20 +189,21 @@ public class ZookeeperStatusService implements StatusService {
metric.add(acquireResultMetricName, 1, metricContext);
}
- Runnable updateLockHoldMetric = () -> {
+ return () -> {
+ try {
+ lock.close();
+ } catch (RuntimeException e) {
+ // We may want to avoid logging some exceptions that may be expected, like when session expires.
+ log.log(LogLevel.WARNING,
+ "Failed to close application lock for " +
+ ZookeeperStatusService.class.getSimpleName() + ", will ignore and continue",
+ e);
+ }
+
Instant lockReleasedTime = timer.currentTime();
double seconds = durationInSeconds(acquireEndTime, lockReleasedTime);
metric.set("orchestrator.lock.hold-latency", seconds, metricContext);
};
-
- try {
- return new ZkMutableStatusRegistry(lock, applicationInstanceReference, context.isProbe(), updateLockHoldMetric);
- } catch (Throwable t) {
- // In case the constructor throws an exception.
- updateLockHoldMetric.run();
- lock.close();
- throw t;
- }
}
private double durationInSeconds(Instant startInstant, Instant endInstant) {
@@ -337,19 +375,16 @@ public class ZookeeperStatusService implements StatusService {
private class ZkMutableStatusRegistry implements MutableStatusRegistry {
- private final Lock lock;
+ private final Runnable onClose;
private final ApplicationInstanceReference applicationInstanceReference;
private final boolean probe;
- private final Runnable onLockRelease;
- public ZkMutableStatusRegistry(Lock lock,
+ public ZkMutableStatusRegistry(Runnable onClose,
ApplicationInstanceReference applicationInstanceReference,
- boolean probe,
- Runnable onLockRelease) {
- this.lock = lock;
+ boolean probe) {
+ this.onClose = onClose;
this.applicationInstanceReference = applicationInstanceReference;
this.probe = probe;
- this.onLockRelease = onLockRelease;
}
@Override
@@ -399,13 +434,12 @@ public class ZookeeperStatusService implements StatusService {
@Override
public void close() {
- onLockRelease.run();
try {
- lock.close();
+ onClose.run();
} catch (RuntimeException e) {
// We may want to avoid logging some exceptions that may be expected, like when session expires.
log.log(LogLevel.WARNING,
- "Failed to close application lock for " +
+ "Failed close application lock in " +
ZookeeperStatusService.class.getSimpleName() + ", will ignore and continue",
e);
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorContextTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorContextTest.java
new file mode 100644
index 00000000000..607894ee104
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorContextTest.java
@@ -0,0 +1,59 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator;
+
+import com.yahoo.test.ManualClock;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.TenantId;
+import org.junit.Test;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author hakonhall
+ */
+public class OrchestratorContextTest {
+ private final ApplicationInstanceReference application = new ApplicationInstanceReference(
+ new TenantId("tenant"),
+ new ApplicationInstanceId("app:dev:us-east-1:default"));
+
+ @Test
+ public void testLargeLocks() {
+ var mutable = new Object() { boolean locked = true; };
+ Runnable unlock = () -> mutable.locked = false;
+
+ try (OrchestratorContext rootContext = OrchestratorContext.createContextForMultiAppOp(new ManualClock(), true)) {
+ try (OrchestratorContext probeContext = rootContext.createSubcontextForSingleAppOp(true)) {
+ assertFalse(probeContext.hasLock(application));
+ assertTrue(probeContext.registerLockAcquisition(application, unlock));
+
+ assertTrue(probeContext.hasLock(application));
+ assertTrue(mutable.locked);
+ }
+
+ try (OrchestratorContext nonProbeContext = rootContext.createSubcontextForSingleAppOp(false)) {
+ assertTrue(nonProbeContext.hasLock(application));
+ assertTrue(mutable.locked);
+ }
+
+ assertTrue(mutable.locked);
+ }
+ assertFalse(mutable.locked);
+ }
+
+ @Test
+ public void testLargeLocksDisabled() {
+ var mutable = new Object() { boolean locked = true; };
+ Runnable unlock = () -> mutable.locked = false;
+
+ try (OrchestratorContext rootContext = OrchestratorContext.createContextForMultiAppOp(new ManualClock(), false)) {
+ try (OrchestratorContext probeContext = rootContext.createSubcontextForSingleAppOp(true)) {
+ assertFalse(probeContext.hasLock(application));
+ assertFalse(probeContext.registerLockAcquisition(application, unlock));
+ }
+ }
+
+ assertTrue(mutable.locked);
+ }
+} \ No newline at end of file
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
index 45d4c531898..a2c99b86ae2 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
@@ -17,6 +17,8 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.curator.mock.MockCurator;
+import com.yahoo.vespa.flags.Flags;
+import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactoryMock;
import com.yahoo.vespa.orchestrator.model.ApplicationApiFactory;
@@ -26,6 +28,7 @@ import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy;
import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy;
import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry;
import com.yahoo.vespa.orchestrator.status.StatusService;
import com.yahoo.vespa.orchestrator.status.ZookeeperStatusService;
import com.yahoo.vespa.service.monitor.ServiceModel;
@@ -36,7 +39,9 @@ import org.mockito.InOrder;
import java.util.Arrays;
import java.util.Iterator;
+import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import static com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN;
@@ -55,6 +60,11 @@ import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+import static org.mockito.internal.verification.VerificationModeFactory.atLeastOnce;
/**
* Test Orchestrator with a mock backend (the MockCurator)
@@ -64,6 +74,7 @@ import static org.mockito.Mockito.spy;
public class OrchestratorImplTest {
private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3);
+ private final InMemoryFlagSource flagSource = new InMemoryFlagSource();
private ApplicationId app1;
private ApplicationId app2;
@@ -88,7 +99,8 @@ public class OrchestratorImplTest {
new DummyInstanceLookupService(),
0,
new ManualClock(),
- applicationApiFactory);
+ applicationApiFactory,
+ flagSource);
clustercontroller.setAllDummyNodesAsUp();
}
@@ -312,6 +324,72 @@ public class OrchestratorImplTest {
}
@Test
+ public void testLargeLocks() throws Exception {
+ flagSource.withBooleanFlag(Flags.ENABLE_LARGE_ORCHESTRATOR_LOCKS.id(), true);
+
+ var tenantId = new TenantId("tenant");
+ var applicationInstanceId = new ApplicationInstanceId("app:dev:us-east-1:default");
+ var applicationInstanceReference = new ApplicationInstanceReference(tenantId, applicationInstanceId);
+
+ var policy = mock(HostedVespaPolicy.class);
+ var zookeeperStatusService = mock(ZookeeperStatusService.class);
+ var instanceLookupService = mock(InstanceLookupService.class);
+ var applicationInstance = mock(ApplicationInstance.class);
+ var clusterControllerClientFactory = mock(ClusterControllerClientFactory.class);
+ var clock = new ManualClock();
+ var applicationApiFactory = mock(ApplicationApiFactory.class);
+ var hostStatusRegistry = mock(MutableStatusRegistry.class);
+
+ when(instanceLookupService.findInstanceByHost(any())).thenReturn(Optional.of(applicationInstance));
+ when(applicationInstance.reference()).thenReturn(applicationInstanceReference);
+ when(zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(any(), any()))
+ .thenReturn(hostStatusRegistry);
+ when(hostStatusRegistry.getStatus()).thenReturn(NO_REMARKS);
+
+ var orchestrator = new OrchestratorImpl(
+ policy,
+ clusterControllerClientFactory,
+ zookeeperStatusService,
+ instanceLookupService,
+ 20,
+ clock,
+ applicationApiFactory,
+ flagSource);
+
+ HostName parentHostname = new HostName("parent.vespa.ai");
+
+ orchestrator.suspendAll(parentHostname, List.of(parentHostname));
+
+ ArgumentCaptor<OrchestratorContext> contextCaptor = ArgumentCaptor.forClass(OrchestratorContext.class);
+ verify(zookeeperStatusService, times(2)).lockApplicationInstance_forCurrentThreadOnly(contextCaptor.capture(), any());
+ List<OrchestratorContext> contexts = contextCaptor.getAllValues();
+
+ // First invocation is probe, second is not.
+ assertEquals(2, contexts.size());
+ assertTrue(contexts.get(0).isProbe());
+ assertTrue(contexts.get(0).largeLocks());
+ assertFalse(contexts.get(1).isProbe());
+ assertTrue(contexts.get(1).largeLocks());
+
+ verify(applicationApiFactory, times(2)).create(any(), any(), any());
+ verify(policy, times(2)).grantSuspensionRequest(any(), any());
+ verify(instanceLookupService, atLeastOnce()).findInstanceByHost(any());
+ verify(hostStatusRegistry, times(2)).getStatus();
+
+ // Each zookeeperStatusService that is created, is closed.
+ verify(zookeeperStatusService, times(2)).lockApplicationInstance_forCurrentThreadOnly(any(), any());
+ verify(hostStatusRegistry, times(2)).close();
+
+ verifyNoMoreInteractions(
+ policy,
+ clusterControllerClientFactory,
+ zookeeperStatusService,
+ hostStatusRegistry,
+ instanceLookupService,
+ applicationApiFactory);
+ }
+
+ @Test
public void testGetHost() throws Exception {
ClusterControllerClientFactory clusterControllerClientFactory = new ClusterControllerClientFactoryMock();
StatusService statusService = new ZookeeperStatusService(new MockCurator(), mock(Metric.class), new TestTimer());
@@ -349,7 +427,8 @@ public class OrchestratorImplTest {
lookupService,
0,
new ManualClock(),
- applicationApiFactory);
+ applicationApiFactory,
+ flagSource);
orchestrator.setNodeStatus(hostName, HostStatus.ALLOWED_TO_BE_DOWN);
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
index 87e5f226c42..eff222bc074 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
@@ -16,6 +16,7 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.curator.mock.MockCurator;
+import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.orchestrator.OrchestrationException;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.OrchestratorContext;
@@ -52,6 +53,7 @@ class ModelTestUtils {
public static final int NUMBER_OF_CONFIG_SERVERS = 3;
+ private final InMemoryFlagSource flagSource = new InMemoryFlagSource();
private final Map<ApplicationInstanceReference, ApplicationInstance> applications = new HashMap<>();
private final ClusterControllerClientFactory clusterControllerClientFactory = new ClusterControllerClientFactoryMock();
private final Map<HostName, HostStatus> hostStatusMap = new HashMap<>();
@@ -62,7 +64,8 @@ class ModelTestUtils {
new ServiceMonitorInstanceLookupService(() -> new ServiceModel(applications)),
0,
new ManualClock(),
- applicationApiFactory());
+ applicationApiFactory(),
+ flagSource);
ApplicationApiFactory applicationApiFactory() {
return new ApplicationApiFactory(NUMBER_OF_CONFIG_SERVERS);
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java
index 89f421e9125..2996220561c 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java
@@ -158,6 +158,7 @@ public class ApplicationSuspensionResourceTest {
" <config name=\"container.handler.threadpool\">\n" +
" <maxthreads>10</maxthreads>\n" +
" </config>\n" +
+ " <component id=\"com.yahoo.vespa.flags.InMemoryFlagSource\" bundle=\"flags\" />\n" +
" <component id=\"com.yahoo.vespa.curator.mock.MockCurator\" bundle=\"zkfacade\" />\n" +
" <component id=\"com.yahoo.vespa.orchestrator.status.ZookeeperStatusService\" bundle=\"orchestrator\" />\n" +
" <component id=\"com.yahoo.vespa.orchestrator.DummyInstanceLookupService\" bundle=\"orchestrator\" />\n" +
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
index fec1554396d..b19f96a5867 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
@@ -16,6 +16,7 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.curator.mock.MockCurator;
+import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.orchestrator.BatchHostNameNotFoundException;
import com.yahoo.vespa.orchestrator.BatchInternalErrorException;
import com.yahoo.vespa.orchestrator.Host;
@@ -90,6 +91,8 @@ public class HostResourceTest {
makeServiceClusterSet())));
}
+ private final InMemoryFlagSource flagSource = new InMemoryFlagSource();
+
private static final InstanceLookupService alwaysEmptyInstanceLookUpService = new InstanceLookupService() {
@Override
public Optional<ApplicationInstance> findInstanceById(
@@ -129,23 +132,23 @@ public class HostResourceTest {
}
}
- private static final OrchestratorImpl alwaysAllowOrchestrator = new OrchestratorImpl(
+ private final OrchestratorImpl alwaysAllowOrchestrator = new OrchestratorImpl(
new AlwaysAllowPolicy(),
new ClusterControllerClientFactoryMock(),
EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, mockInstanceLookupService,
SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS,
clock,
- applicationApiFactory
- );
+ applicationApiFactory,
+ flagSource);
- private static final OrchestratorImpl hostNotFoundOrchestrator = new OrchestratorImpl(
+ private final OrchestratorImpl hostNotFoundOrchestrator = new OrchestratorImpl(
new AlwaysAllowPolicy(),
new ClusterControllerClientFactoryMock(),
EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, alwaysEmptyInstanceLookUpService,
SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS,
clock,
- applicationApiFactory
- );
+ applicationApiFactory,
+ flagSource);
private final UriInfo uriInfo = mock(UriInfo.class);
@@ -247,7 +250,8 @@ public class HostResourceTest {
EVERY_HOST_IS_UP_HOST_STATUS_SERVICE,mockInstanceLookupService,
SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS,
clock,
- applicationApiFactory);
+ applicationApiFactory,
+ flagSource);
try {
HostResource hostResource = new HostResource(alwaysRejectResolver, uriInfo);
@@ -267,7 +271,8 @@ public class HostResourceTest {
mockInstanceLookupService,
SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS,
clock,
- applicationApiFactory);
+ applicationApiFactory,
+ flagSource);
try {
HostSuspensionResource hostSuspensionResource = new HostSuspensionResource(alwaysRejectResolver);
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService2Test.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService2Test.java
new file mode 100644
index 00000000000..8f530f4abf3
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService2Test.java
@@ -0,0 +1,127 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.jdisc.Metric;
+import com.yahoo.jdisc.Timer;
+import com.yahoo.jdisc.test.TestTimer;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.TenantId;
+import com.yahoo.vespa.curator.Curator;
+import com.yahoo.vespa.orchestrator.OrchestratorContext;
+import org.apache.curator.framework.recipes.locks.InterProcessMutex;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+
+import java.time.Duration;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyLong;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+/**
+ * @author hakonhall
+ */
+public class ZookeeperStatusService2Test {
+ private final Curator curator = mock(Curator.class);
+ private final Timer timer = new TestTimer();
+ private final Metric metric = mock(Metric.class);
+ private final HostInfosCache cache = mock(HostInfosCache.class);
+ private final ZookeeperStatusService zookeeperStatusService = new ZookeeperStatusService(curator, metric, timer, cache);
+
+ private final OrchestratorContext context = mock(OrchestratorContext.class);
+ private final InterProcessMutex mutex = mock(InterProcessMutex.class);
+ private final ApplicationInstanceReference reference = new ApplicationInstanceReference(
+ new TenantId("tenant"), new ApplicationInstanceId("app:dev:us-east-1:default"));
+
+ @Test
+ public void verifyLocks() throws Exception {
+ when(context.isProbe()).thenReturn(true);
+ when(context.hasLock(any())).thenReturn(false);
+ when(context.registerLockAcquisition(any(), any())).thenReturn(false);
+
+ when(curator.createMutex(any())).thenReturn(mutex);
+ when(mutex.acquire(anyLong(), any())).thenReturn(true);
+
+ when(context.getTimeLeft()).thenReturn(Duration.ofSeconds(12));
+
+ try (MutableStatusRegistry registry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(context, reference)) {
+ // nothing
+ }
+
+ verify(curator, times(1)).createMutex(any());
+ verify(mutex, times(1)).acquire(anyLong(), any());
+ verify(mutex, times(1)).release();
+ verify(context, times(1)).hasLock(any());
+ verify(context, times(1)).registerLockAcquisition(any(), any());
+ verifyNoMoreInteractions(mutex);
+
+ // Now the non-probe suspension
+
+ when(context.isProbe()).thenReturn(false);
+
+ try (MutableStatusRegistry registry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(context, reference)) {
+ // nothing
+ }
+
+ verify(mutex, times(2)).acquire(anyLong(), any());
+ verify(mutex, times(2)).release();
+ ArgumentCaptor<Runnable> runnableCaptor = ArgumentCaptor.forClass(Runnable.class);
+ verify(context, times(2)).hasLock(any());
+ verify(context, times(2)).registerLockAcquisition(any(), any());
+ verifyNoMoreInteractions(mutex);
+ }
+
+ @Test
+ public void verifyLargeLocks() throws Exception {
+ when(context.isProbe()).thenReturn(true);
+ when(context.hasLock(any())).thenReturn(false);
+ when(context.registerLockAcquisition(any(), any())).thenReturn(true);
+
+ when(curator.createMutex(any())).thenReturn(mutex);
+ when(mutex.acquire(anyLong(), any())).thenReturn(true);
+
+ when(context.getTimeLeft()).thenReturn(Duration.ofSeconds(12));
+
+ try (MutableStatusRegistry registry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(context, reference)) {
+ // nothing
+ }
+
+ verify(curator, times(1)).createMutex(any());
+ verify(mutex, times(1)).acquire(anyLong(), any());
+ verify(mutex, times(0)).release();
+ verify(context, times(1)).hasLock(any());
+ verify(context, times(1)).registerLockAcquisition(any(), any());
+ verifyNoMoreInteractions(mutex);
+
+ // Now the non-probe suspension
+
+ when(context.isProbe()).thenReturn(false);
+ when(context.hasLock(any())).thenReturn(true);
+ when(context.registerLockAcquisition(any(), any())).thenReturn(false);
+
+ try (MutableStatusRegistry registry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(context, reference)) {
+ // nothing
+ }
+
+ // No (additional) acquire, and no releases.
+ verify(mutex, times(1)).acquire(anyLong(), any());
+ verify(mutex, times(0)).release();
+ verify(context, times(2)).hasLock(any());
+ verify(context, times(1)).registerLockAcquisition(any(), any());
+ verifyNoMoreInteractions(mutex);
+
+ // Verify the context runnable releases the mutex
+ ArgumentCaptor<Runnable> runnableCaptor = ArgumentCaptor.forClass(Runnable.class);
+ verify(context, times(1)).registerLockAcquisition(any(), runnableCaptor.capture());
+ assertEquals(1, runnableCaptor.getAllValues().size());
+ runnableCaptor.getAllValues().forEach(Runnable::run);
+ verify(mutex, times(1)).acquire(anyLong(), any());
+ verify(mutex, times(1)).release();
+ }
+} \ No newline at end of file