From c5394b67824ff1838645b92b802c1c4889f57237 Mon Sep 17 00:00:00 2001 From: HÃ¥kon Hallingstad Date: Mon, 3 Feb 2020 18:52:05 +0100 Subject: Support large orchestrator lock --- .../src/main/java/com/yahoo/vespa/flags/Flags.java | 8 ++ orchestrator/pom.xml | 12 ++ .../vespa/orchestrator/OrchestratorContext.java | 45 ++++++-- .../yahoo/vespa/orchestrator/OrchestratorImpl.java | 36 ++++-- .../status/ZookeeperStatusService.java | 75 +++++++++---- .../vespa/orchestrator/OrchestratorImplTest.java | 87 ++++++++++++++- .../vespa/orchestrator/model/ModelTestUtils.java | 5 +- .../ApplicationSuspensionResourceTest.java | 1 + .../orchestrator/resources/HostResourceTest.java | 21 ++-- .../status/ZookeeperStatusService2Test.java | 121 +++++++++++++++++++++ .../status/ZookeeperStatusServiceTest.java | 10 ++ 11 files changed, 371 insertions(+), 50 deletions(-) create mode 100644 orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService2Test.java diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 5bda3559ccf..0a2ba901c8f 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -114,6 +114,14 @@ public class Flags { "scheduled evenly distributed in the 1x-2x range (and naturally guaranteed at the 2x boundary).", "Takes effect on next run of NodeRebooter"); + public static final UnboundBooleanFlag ENABLE_LARGE_ORCHESTRATOR_LOCKS = defineFeatureFlag( + "enable-large-orchestrator-locks", false, + "If enabled, the orchestrator will accumulate application locks during probe in batch suspension, " + + "and release them in reverse order only after the non-probe is complete. Can be set depending on " + + "parent hostname.", + "Takes immediate effect for new batch suspensions.", + HOSTNAME); + public static final UnboundBooleanFlag ENABLE_DYNAMIC_PROVISIONING = defineFeatureFlag( "enable-dynamic-provisioning", false, "Provision a new docker host when we otherwise can't allocate a docker node", diff --git a/orchestrator/pom.xml b/orchestrator/pom.xml index 3ebc87010fb..5dd4e7ea87d 100644 --- a/orchestrator/pom.xml +++ b/orchestrator/pom.xml @@ -40,6 +40,12 @@ ${project.version} provided + + com.yahoo.vespa + flags + ${project.version} + provided + com.yahoo.vespa vespajlib @@ -58,6 +64,12 @@ ${project.version} test + + com.yahoo.vespa + zookeeper-server-common + ${project.version} + test + junit junit diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java index d3bdaa6dc64..356d645fca4 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java @@ -1,13 +1,17 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.orchestrator; +import com.yahoo.log.LogLevel; import com.yahoo.time.TimeBudget; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientTimeouts; import java.time.Clock; import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; +import java.util.logging.Logger; /** * Context for an operation (or suboperation) of the Orchestrator that needs to pass through to the backend, @@ -15,31 +19,39 @@ import java.util.Optional; * * @author hakonhall */ -public class OrchestratorContext { +public class OrchestratorContext implements AutoCloseable { + private static final Logger logger = Logger.getLogger(OrchestratorContext.class.getName()); private static final Duration DEFAULT_TIMEOUT_FOR_SINGLE_OP = Duration.ofSeconds(10); private static final Duration DEFAULT_TIMEOUT_FOR_BATCH_OP = Duration.ofSeconds(60); private static final Duration TIMEOUT_OVERHEAD = Duration.ofMillis(500); private final Clock clock; + private final boolean partOfMultiAppOp; private final TimeBudget timeBudget; private final boolean probe; + private final boolean largeLocks; + private final List onClose = new ArrayList<>(); /** Create an OrchestratorContext for operations on multiple applications. */ - public static OrchestratorContext createContextForMultiAppOp(Clock clock) { - return new OrchestratorContext(clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_BATCH_OP), false); + public static OrchestratorContext createContextForMultiAppOp(Clock clock, boolean largeLocks) { + return new OrchestratorContext(clock, true, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_BATCH_OP), false, largeLocks); } /** Create an OrchestratorContext for an operation on a single application. */ public static OrchestratorContext createContextForSingleAppOp(Clock clock) { - return new OrchestratorContext(clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_SINGLE_OP), false); + return new OrchestratorContext(clock, false, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_SINGLE_OP), false, false); } - private OrchestratorContext(Clock clock, TimeBudget timeBudget, boolean probe) { + private OrchestratorContext(Clock clock, boolean partOfMultiAppOp, TimeBudget timeBudget, boolean probe, boolean largeLocks) { this.clock = clock; + this.partOfMultiAppOp = partOfMultiAppOp; this.timeBudget = timeBudget; this.probe = probe; + this.largeLocks = largeLocks; } + public boolean partOfMultiAppOp() { return partOfMultiAppOp; } + public Duration getTimeLeft() { return timeBudget.timeLeftOrThrow().get(); } @@ -53,12 +65,15 @@ public class OrchestratorContext { return probe; } + /** Whether application locks acquired during probing of a batch suspend should be closed after the non-probe is done. */ + public boolean largeLocks() { return largeLocks; } + /** Create OrchestratorContext to use within an application lock. */ public OrchestratorContext createSubcontextWithinLock() { // Move deadline towards past by a fixed amount to ensure there's time to process exceptions and // access ZooKeeper before the lock times out. TimeBudget subTimeBudget = timeBudget.withDeadline(timeBudget.deadline().get().minus(TIMEOUT_OVERHEAD)); - return new OrchestratorContext(clock, subTimeBudget, probe); + return new OrchestratorContext(clock, partOfMultiAppOp, subTimeBudget, probe, false); } /** Create an OrchestratorContext for an operation on a single application, but limited to current timeout. */ @@ -72,7 +87,23 @@ public class OrchestratorContext { return new OrchestratorContext( clock, + partOfMultiAppOp, TimeBudget.from(clock, now, Optional.of(Duration.between(now, deadline))), - probe); + probe, + false); + } + + public void runOnClose(Runnable runnable) { onClose.add(runnable); } + + @Override + public void close() { + int i = onClose.size(); + while (i --> 0) { + try { + onClose.get(i).run(); + } catch (RuntimeException e) { + logger.log(LogLevel.ERROR, "Failed run on close : " + e.getMessage()); + } + } } } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java index 05268232119..244fd4d9b5d 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java @@ -12,6 +12,10 @@ import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceCluster; import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.flags.BooleanFlag; +import com.yahoo.vespa.flags.FetchVector; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.orchestrator.config.OrchestratorConfig; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; @@ -60,13 +64,15 @@ public class OrchestratorImpl implements Orchestrator { private final ClusterControllerClientFactory clusterControllerClientFactory; private final Clock clock; private final ApplicationApiFactory applicationApiFactory; + private final BooleanFlag enableLargeOrchestratorLocks; @Inject public OrchestratorImpl(ClusterControllerClientFactory clusterControllerClientFactory, StatusService statusService, OrchestratorConfig orchestratorConfig, InstanceLookupService instanceLookupService, - ConfigserverConfig configServerConfig) + ConfigserverConfig configServerConfig, + FlagSource flagSource) { this(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clusterControllerClientFactory, new ApplicationApiFactory(configServerConfig.zookeeperserver().size())), clusterControllerClientFactory, @@ -74,7 +80,8 @@ public class OrchestratorImpl implements Orchestrator { instanceLookupService, orchestratorConfig.serviceMonitorConvergenceLatencySeconds(), Clock.systemUTC(), - new ApplicationApiFactory(configServerConfig.zookeeperserver().size())); + new ApplicationApiFactory(configServerConfig.zookeeperserver().size()), + flagSource); } public OrchestratorImpl(Policy policy, @@ -83,7 +90,8 @@ public class OrchestratorImpl implements Orchestrator { InstanceLookupService instanceLookupService, int serviceMonitorConvergenceLatencySeconds, Clock clock, - ApplicationApiFactory applicationApiFactory) + ApplicationApiFactory applicationApiFactory, + FlagSource flagSource) { this.policy = policy; this.clusterControllerClientFactory = clusterControllerClientFactory; @@ -92,6 +100,7 @@ public class OrchestratorImpl implements Orchestrator { this.instanceLookupService = instanceLookupService; this.clock = clock; this.applicationApiFactory = applicationApiFactory; + this.enableLargeOrchestratorLocks = Flags.ENABLE_LARGE_ORCHESTRATOR_LOCKS.bindTo(flagSource); } @Override @@ -231,17 +240,20 @@ public class OrchestratorImpl implements Orchestrator { @Override public void suspendAll(HostName parentHostname, List hostNames) throws BatchHostStateChangeDeniedException, BatchHostNameNotFoundException, BatchInternalErrorException { - OrchestratorContext context = OrchestratorContext.createContextForMultiAppOp(clock); + boolean largeLocks = enableLargeOrchestratorLocks + .with(FetchVector.Dimension.HOSTNAME, parentHostname.s()) + .value(); + try (OrchestratorContext context = OrchestratorContext.createContextForMultiAppOp(clock, largeLocks)) { + List nodeGroupsOrderedByApplication; + try { + nodeGroupsOrderedByApplication = nodeGroupsOrderedForSuspend(hostNames); + } catch (HostNameNotFoundException e) { + throw new BatchHostNameNotFoundException(parentHostname, hostNames, e); + } - List nodeGroupsOrderedByApplication; - try { - nodeGroupsOrderedByApplication = nodeGroupsOrderedForSuspend(hostNames); - } catch (HostNameNotFoundException e) { - throw new BatchHostNameNotFoundException(parentHostname, hostNames, e); + suspendAllNodeGroups(context, parentHostname, nodeGroupsOrderedByApplication, true); + suspendAllNodeGroups(context, parentHostname, nodeGroupsOrderedByApplication, false); } - - suspendAllNodeGroups(context, parentHostname, nodeGroupsOrderedByApplication, true); - suspendAllNodeGroups(context, parentHostname, nodeGroupsOrderedByApplication, false); } private void suspendAllNodeGroups(OrchestratorContext context, diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java index e192f25d68c..4bfd2b3867e 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java @@ -75,6 +75,14 @@ public class ZookeeperStatusService implements StatusService { }); } + /** Non-private for testing only. */ + ZookeeperStatusService(Curator curator, Metric metric, Timer timer, HostInfosCache hostInfosCache) { + this.curator = curator; + this.metric = metric; + this.timer = timer; + this.hostInfosCache = hostInfosCache; + } + @Override public Set getAllSuspendedApplications() { try { @@ -123,6 +131,36 @@ public class ZookeeperStatusService implements StatusService { public MutableStatusRegistry lockApplicationInstance_forCurrentThreadOnly( OrchestratorContext context, ApplicationInstanceReference applicationInstanceReference) throws UncheckedTimeoutException { + Runnable onRegistryClose; + + // A multi-application operation, aka batch suspension, will first issue a probe + // then a non-probe. With "large locks", the lock is not release in between - + // no lock is taken on the non-probe. Instead, the release is done on the multi-application + // context close. + if (!context.largeLocks() || !context.partOfMultiAppOp() || context.isProbe()) { + Runnable unlock = acquireLock(context, applicationInstanceReference); + if (context.largeLocks() && context.isProbe()) { + context.runOnClose(unlock); + onRegistryClose = () -> {}; + } else { + onRegistryClose = unlock; + } + } else { + onRegistryClose = () -> {}; + } + + try { + return new ZkMutableStatusRegistry(onRegistryClose, applicationInstanceReference, context.isProbe()); + } catch (Throwable t) { + // In case the constructor throws an exception. + onRegistryClose.run(); + throw t; + } + } + + private Runnable acquireLock(OrchestratorContext context, + ApplicationInstanceReference applicationInstanceReference) + throws UncheckedTimeoutException { ApplicationId applicationId = OrchestratorUtil.toApplicationId(applicationInstanceReference); String app = applicationId.application().value() + "." + applicationId.instance().value(); Map dimensions = Map.of( @@ -152,20 +190,21 @@ public class ZookeeperStatusService implements StatusService { metric.add(acquireResultMetricName, 1, metricContext); } - Runnable updateLockHoldMetric = () -> { + return () -> { + try { + lock.close(); + } catch (RuntimeException e) { + // We may want to avoid logging some exceptions that may be expected, like when session expires. + log.log(LogLevel.WARNING, + "Failed to close application lock for " + + ZookeeperStatusService.class.getSimpleName() + ", will ignore and continue", + e); + } + Instant lockReleasedTime = timer.currentTime(); double seconds = durationInSeconds(acquireEndTime, lockReleasedTime); metric.set("orchestrator.lock.hold-latency", seconds, metricContext); }; - - try { - return new ZkMutableStatusRegistry(lock, applicationInstanceReference, context.isProbe(), updateLockHoldMetric); - } catch (Throwable t) { - // In case the constructor throws an exception. - updateLockHoldMetric.run(); - lock.close(); - throw t; - } } private double durationInSeconds(Instant startInstant, Instant endInstant) { @@ -337,19 +376,16 @@ public class ZookeeperStatusService implements StatusService { private class ZkMutableStatusRegistry implements MutableStatusRegistry { - private final Lock lock; + private final Runnable onClose; private final ApplicationInstanceReference applicationInstanceReference; private final boolean probe; - private final Runnable onLockRelease; - public ZkMutableStatusRegistry(Lock lock, + public ZkMutableStatusRegistry(Runnable onClose, ApplicationInstanceReference applicationInstanceReference, - boolean probe, - Runnable onLockRelease) { - this.lock = lock; + boolean probe) { + this.onClose = onClose; this.applicationInstanceReference = applicationInstanceReference; this.probe = probe; - this.onLockRelease = onLockRelease; } @Override @@ -399,13 +435,12 @@ public class ZookeeperStatusService implements StatusService { @Override public void close() { - onLockRelease.run(); try { - lock.close(); + onClose.run(); } catch (RuntimeException e) { // We may want to avoid logging some exceptions that may be expected, like when session expires. log.log(LogLevel.WARNING, - "Failed to close application lock for " + + "Failed close application lock in " + ZookeeperStatusService.class.getSimpleName() + ", will ignore and continue", e); } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java index 45d4c531898..73e479d290b 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java @@ -16,7 +16,9 @@ import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; +import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.mock.MockCurator; +import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactoryMock; import com.yahoo.vespa.orchestrator.model.ApplicationApiFactory; @@ -25,18 +27,26 @@ import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy; import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy; +import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.orchestrator.status.MutableStatusRegistry; import com.yahoo.vespa.orchestrator.status.StatusService; import com.yahoo.vespa.orchestrator.status.ZookeeperStatusService; import com.yahoo.vespa.service.monitor.ServiceModel; +import org.apache.curator.framework.recipes.atomic.AtomicValue; +import org.apache.curator.framework.recipes.atomic.DistributedAtomicLong; +import org.apache.curator.framework.recipes.locks.InterProcessLock; import org.junit.Before; import org.junit.Test; import org.mockito.ArgumentCaptor; import org.mockito.InOrder; +import java.time.Clock; import java.util.Arrays; import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import static com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN; @@ -54,7 +64,13 @@ import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; +import static org.mockito.internal.verification.VerificationModeFactory.atLeastOnce; /** * Test Orchestrator with a mock backend (the MockCurator) @@ -64,6 +80,7 @@ import static org.mockito.Mockito.spy; public class OrchestratorImplTest { private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3); + private final InMemoryFlagSource flagSource = new InMemoryFlagSource(); private ApplicationId app1; private ApplicationId app2; @@ -88,7 +105,8 @@ public class OrchestratorImplTest { new DummyInstanceLookupService(), 0, new ManualClock(), - applicationApiFactory); + applicationApiFactory, + flagSource); clustercontroller.setAllDummyNodesAsUp(); } @@ -311,6 +329,70 @@ public class OrchestratorImplTest { order.verifyNoMoreInteractions(); } + @Test + public void testLargeLocks() throws Exception { + var tenantId = new TenantId("tenant"); + var applicationInstanceId = new ApplicationInstanceId("app:dev:us-east-1:default"); + var applicationInstanceReference = new ApplicationInstanceReference(tenantId, applicationInstanceId); + + var policy = mock(HostedVespaPolicy.class); + var zookeeperStatusService = mock(ZookeeperStatusService.class); + var instanceLookupService = mock(InstanceLookupService.class); + var applicationInstance = mock(ApplicationInstance.class); + var clusterControllerClientFactory = mock(ClusterControllerClientFactory.class); + var clock = new ManualClock(); + var applicationApiFactory = mock(ApplicationApiFactory.class); + var hostStatusRegistry = mock(MutableStatusRegistry.class); + + when(instanceLookupService.findInstanceByHost(any())).thenReturn(Optional.of(applicationInstance)); + when(applicationInstance.reference()).thenReturn(applicationInstanceReference); + when(zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(any(), any())) + .thenReturn(hostStatusRegistry); + when(hostStatusRegistry.getStatus()).thenReturn(NO_REMARKS); + + var orchestrator = new OrchestratorImpl( + policy, + clusterControllerClientFactory, + zookeeperStatusService, + instanceLookupService, + 20, + clock, + applicationApiFactory, + flagSource); + + HostName parentHostname = new HostName("parent.vespa.ai"); + + orchestrator.suspendAll(parentHostname, List.of(parentHostname)); + + ArgumentCaptor contextCaptor = ArgumentCaptor.forClass(OrchestratorContext.class); + verify(zookeeperStatusService, times(2)).lockApplicationInstance_forCurrentThreadOnly(contextCaptor.capture(), any()); + List contexts = contextCaptor.getAllValues(); + + // First invocation is probe, second is not. + assertEquals(2, contexts.size()); + assertTrue(contexts.get(0).isProbe()); + assertTrue(contexts.get(0).partOfMultiAppOp()); + assertFalse(contexts.get(1).isProbe()); + assertTrue(contexts.get(1).partOfMultiAppOp()); + + verify(applicationApiFactory, times(2)).create(any(), any(), any()); + verify(policy, times(2)).grantSuspensionRequest(any(), any()); + verify(instanceLookupService, atLeastOnce()).findInstanceByHost(any()); + verify(hostStatusRegistry, times(2)).getStatus(); + + // Each zookeeperStatusService that is created, is closed. + verify(zookeeperStatusService, times(2)).lockApplicationInstance_forCurrentThreadOnly(any(), any()); + verify(hostStatusRegistry, times(2)).close(); + + verifyNoMoreInteractions( + policy, + clusterControllerClientFactory, + zookeeperStatusService, + hostStatusRegistry, + instanceLookupService, + applicationApiFactory); + } + @Test public void testGetHost() throws Exception { ClusterControllerClientFactory clusterControllerClientFactory = new ClusterControllerClientFactoryMock(); @@ -349,7 +431,8 @@ public class OrchestratorImplTest { lookupService, 0, new ManualClock(), - applicationApiFactory); + applicationApiFactory, + flagSource); orchestrator.setNodeStatus(hostName, HostStatus.ALLOWED_TO_BE_DOWN); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java index 87e5f226c42..eff222bc074 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; import com.yahoo.vespa.curator.mock.MockCurator; +import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.orchestrator.OrchestrationException; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.orchestrator.OrchestratorContext; @@ -52,6 +53,7 @@ class ModelTestUtils { public static final int NUMBER_OF_CONFIG_SERVERS = 3; + private final InMemoryFlagSource flagSource = new InMemoryFlagSource(); private final Map applications = new HashMap<>(); private final ClusterControllerClientFactory clusterControllerClientFactory = new ClusterControllerClientFactoryMock(); private final Map hostStatusMap = new HashMap<>(); @@ -62,7 +64,8 @@ class ModelTestUtils { new ServiceMonitorInstanceLookupService(() -> new ServiceModel(applications)), 0, new ManualClock(), - applicationApiFactory()); + applicationApiFactory(), + flagSource); ApplicationApiFactory applicationApiFactory() { return new ApplicationApiFactory(NUMBER_OF_CONFIG_SERVERS); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java index 89f421e9125..2996220561c 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/ApplicationSuspensionResourceTest.java @@ -158,6 +158,7 @@ public class ApplicationSuspensionResourceTest { " \n" + " 10\n" + " \n" + + " \n" + " \n" + " \n" + " \n" + diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java index fec1554396d..b19f96a5867 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; import com.yahoo.vespa.curator.mock.MockCurator; +import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.orchestrator.BatchHostNameNotFoundException; import com.yahoo.vespa.orchestrator.BatchInternalErrorException; import com.yahoo.vespa.orchestrator.Host; @@ -90,6 +91,8 @@ public class HostResourceTest { makeServiceClusterSet()))); } + private final InMemoryFlagSource flagSource = new InMemoryFlagSource(); + private static final InstanceLookupService alwaysEmptyInstanceLookUpService = new InstanceLookupService() { @Override public Optional findInstanceById( @@ -129,23 +132,23 @@ public class HostResourceTest { } } - private static final OrchestratorImpl alwaysAllowOrchestrator = new OrchestratorImpl( + private final OrchestratorImpl alwaysAllowOrchestrator = new OrchestratorImpl( new AlwaysAllowPolicy(), new ClusterControllerClientFactoryMock(), EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, mockInstanceLookupService, SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS, clock, - applicationApiFactory - ); + applicationApiFactory, + flagSource); - private static final OrchestratorImpl hostNotFoundOrchestrator = new OrchestratorImpl( + private final OrchestratorImpl hostNotFoundOrchestrator = new OrchestratorImpl( new AlwaysAllowPolicy(), new ClusterControllerClientFactoryMock(), EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, alwaysEmptyInstanceLookUpService, SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS, clock, - applicationApiFactory - ); + applicationApiFactory, + flagSource); private final UriInfo uriInfo = mock(UriInfo.class); @@ -247,7 +250,8 @@ public class HostResourceTest { EVERY_HOST_IS_UP_HOST_STATUS_SERVICE,mockInstanceLookupService, SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS, clock, - applicationApiFactory); + applicationApiFactory, + flagSource); try { HostResource hostResource = new HostResource(alwaysRejectResolver, uriInfo); @@ -267,7 +271,8 @@ public class HostResourceTest { mockInstanceLookupService, SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS, clock, - applicationApiFactory); + applicationApiFactory, + flagSource); try { HostSuspensionResource hostSuspensionResource = new HostSuspensionResource(alwaysRejectResolver); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService2Test.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService2Test.java new file mode 100644 index 00000000000..fc644970fa5 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService2Test.java @@ -0,0 +1,121 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.status; + +import com.yahoo.jdisc.Metric; +import com.yahoo.jdisc.Timer; +import com.yahoo.jdisc.test.TestTimer; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; +import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.TenantId; +import com.yahoo.vespa.curator.Curator; +import com.yahoo.vespa.orchestrator.OrchestratorContext; +import org.apache.curator.framework.recipes.locks.InterProcessMutex; +import org.junit.Test; +import org.mockito.ArgumentCaptor; + +import java.time.Duration; + +import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; + +/** + * @author hakonhall + */ +public class ZookeeperStatusService2Test { + private final Curator curator = mock(Curator.class); + private final Timer timer = new TestTimer(); + private final Metric metric = mock(Metric.class); + private final HostInfosCache cache = mock(HostInfosCache.class); + private final ZookeeperStatusService zookeeperStatusService = new ZookeeperStatusService(curator, metric, timer, cache); + + private final OrchestratorContext context = mock(OrchestratorContext.class); + private final InterProcessMutex mutex = mock(InterProcessMutex.class); + private final ApplicationInstanceReference reference = new ApplicationInstanceReference( + new TenantId("tenant"), new ApplicationInstanceId("app:dev:us-east-1:default")); + + @Test + public void verifyLocks() throws Exception { + when(context.isProbe()).thenReturn(true); + when(context.largeLocks()).thenReturn(false); + when(context.partOfMultiAppOp()).thenReturn(true); + + when(curator.createMutex(any())).thenReturn(mutex); + when(mutex.acquire(anyLong(), any())).thenReturn(true); + + when(context.getTimeLeft()).thenReturn(Duration.ofSeconds(12)); + + try (MutableStatusRegistry registry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(context, reference)) { + // nothing + } + + verify(curator, times(1)).createMutex(any()); + verify(mutex, times(1)).acquire(anyLong(), any()); + verify(mutex, times(1)).release(); + verify(context, times(0)).runOnClose(any()); + verifyNoMoreInteractions(mutex); + + // Now the non-probe suspension + + when(context.isProbe()).thenReturn(false); + + try (MutableStatusRegistry registry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(context, reference)) { + // nothing + } + + verify(mutex, times(2)).acquire(anyLong(), any()); + verify(mutex, times(2)).release(); + ArgumentCaptor runnableCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(context, times(0)).runOnClose(runnableCaptor.capture()); + verifyNoMoreInteractions(mutex); + } + + @Test + public void verifyLargeLocks() throws Exception { + when(context.isProbe()).thenReturn(true); + when(context.largeLocks()).thenReturn(true); + when(context.partOfMultiAppOp()).thenReturn(true); + + when(curator.createMutex(any())).thenReturn(mutex); + when(mutex.acquire(anyLong(), any())).thenReturn(true); + + when(context.getTimeLeft()).thenReturn(Duration.ofSeconds(12)); + + try (MutableStatusRegistry registry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(context, reference)) { + // nothing + } + + verify(curator, times(1)).createMutex(any()); + verify(mutex, times(1)).acquire(anyLong(), any()); + verify(mutex, times(0)).release(); + verify(context, times(1)).runOnClose(any()); + verifyNoMoreInteractions(mutex); + + // Now the non-probe suspension + + when(context.isProbe()).thenReturn(false); + + try (MutableStatusRegistry registry = zookeeperStatusService.lockApplicationInstance_forCurrentThreadOnly(context, reference)) { + // nothing + } + + // No (additional) acquire, and no releases. + verify(mutex, times(1)).acquire(anyLong(), any()); + verify(mutex, times(0)).release(); + ArgumentCaptor runnableCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(context, times(1)).runOnClose(runnableCaptor.capture()); + verifyNoMoreInteractions(mutex); + + // Verify the context runnable releases the mutex + assertEquals(1, runnableCaptor.getAllValues().size()); + runnableCaptor.getAllValues().forEach(Runnable::run); + verify(mutex, times(1)).acquire(anyLong(), any()); + verify(mutex, times(1)).release(); + + } +} \ No newline at end of file diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java index 12622f22837..df2bf572a71 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java @@ -288,6 +288,16 @@ public class ZookeeperStatusServiceTest { assertThat(suspendedApps, hasItem(TestIds.APPLICATION_INSTANCE_REFERENCE2)); } + @Test + public void large_orchestrator_locks() { + when(context.isProbe()).thenReturn(true); + + try (MutableStatusRegistry statusRegistry = zookeeperStatusService + .lockApplicationInstance_forCurrentThreadOnly(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) { + statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); + } + } + //TODO: move to vespajlib @SafeVarargs private static List shuffledList(T... values) { -- cgit v1.2.3