summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@oath.com>2018-02-26 10:00:32 +0100
committerHåkon Hallingstad <hakon@oath.com>2018-02-26 10:00:32 +0100
commit1a35f346a1f2553fb642c6ea2f0e3e0531a7a36d (patch)
tree6879430520814f3feaecaea2f05d50a6db17f26d /node-repository
parent22b977be625e2bf35d67565a9e527fa8cea007eb (diff)
Skip config request requirement for standalone node admin
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java12
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java14
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java19
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java48
4 files changed, 82 insertions, 11 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index a21bd3ff1a1..a2a7aa2545a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.cloud.config.ConfigserverConfig;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Deployment;
import com.yahoo.config.provision.HostLivenessTracker;
@@ -57,12 +58,14 @@ public class NodeFailer extends Maintainer {
private final Instant constructionTime;
private final ThrottlePolicy throttlePolicy;
private final Metric metric;
+ private final ConfigserverConfig configserverConfig;
public NodeFailer(Deployer deployer, HostLivenessTracker hostLivenessTracker,
ServiceMonitor serviceMonitor, NodeRepository nodeRepository,
Duration downTimeLimit, Clock clock, Orchestrator orchestrator,
ThrottlePolicy throttlePolicy, Metric metric,
- JobControl jobControl) {
+ JobControl jobControl,
+ ConfigserverConfig configserverConfig) {
// check ping status every five minutes, but at least twice as often as the down time limit
super(nodeRepository, min(downTimeLimit.dividedBy(2), Duration.ofMinutes(5)), jobControl);
this.deployer = deployer;
@@ -74,6 +77,7 @@ public class NodeFailer extends Maintainer {
this.constructionTime = clock.instant();
this.throttlePolicy = throttlePolicy;
this.metric = metric;
+ this.configserverConfig = configserverConfig;
}
@Override
@@ -126,7 +130,7 @@ public class NodeFailer extends Maintainer {
Map<Node, String> nodesByFailureReason = new HashMap<>();
for (Node node : nodeRepository().getNodes(Node.State.ready)) {
- if (! hasNodeRequestedConfigAfter(node, oldestAcceptableRequestTime)) {
+ if (expectConfigRequests(node) && ! hasNodeRequestedConfigAfter(node, oldestAcceptableRequestTime)) {
nodesByFailureReason.put(node, "Not receiving config requests from node");
} else if (node.status().hardwareFailureDescription().isPresent()) {
nodesByFailureReason.put(node, "Node has hardware failure");
@@ -137,6 +141,10 @@ public class NodeFailer extends Maintainer {
return nodesByFailureReason;
}
+ private boolean expectConfigRequests(Node node) {
+ return !node.type().isDockerHost() || configserverConfig.nodeAdminInContainer();
+ }
+
private boolean hasNodeRequestedConfigAfter(Node node, Instant instant) {
return !wasMadeReadyBefore(node, instant) || hasRecordedRequestAfter(node, instant);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index be792630445..7b0606b809b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.hosted.provision.maintenance;
import com.google.inject.Inject;
+import com.yahoo.cloud.config.ConfigserverConfig;
import com.yahoo.component.AbstractComponent;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Environment;
@@ -53,17 +54,20 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
@Inject
public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, Curator curator,
HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor,
- Zone zone, Orchestrator orchestrator, Metric metric) {
- this(nodeRepository, deployer, curator, hostLivenessTracker, serviceMonitor, zone, Clock.systemUTC(), orchestrator, metric);
+ Zone zone, Orchestrator orchestrator, Metric metric,
+ ConfigserverConfig configserverConfig) {
+ this(nodeRepository, deployer, curator, hostLivenessTracker, serviceMonitor, zone, Clock.systemUTC(),
+ orchestrator, metric, configserverConfig);
}
public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, Curator curator,
- HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor,
- Zone zone, Clock clock, Orchestrator orchestrator, Metric metric) {
+ HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor,
+ Zone zone, Clock clock, Orchestrator orchestrator, Metric metric,
+ ConfigserverConfig configserverConfig) {
DefaultTimes defaults = new DefaultTimes(zone.environment());
jobControl = new JobControl(nodeRepository.database());
- nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, durationFromEnv("fail_grace").orElse(defaults.failGrace), clock, orchestrator, throttlePolicyFromEnv("throttle_policy").orElse(defaults.throttlePolicy), metric, jobControl);
+ nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, durationFromEnv("fail_grace").orElse(defaults.failGrace), clock, orchestrator, throttlePolicyFromEnv("throttle_policy").orElse(defaults.throttlePolicy), metric, jobControl, configserverConfig);
periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, nodeRepository, durationFromEnv("periodic_redeploy_interval").orElse(defaults.periodicRedeployInterval), jobControl);
operatorChangeApplicationMaintainer = new OperatorChangeApplicationMaintainer(deployer, nodeRepository, clock, durationFromEnv("operator_change_redeploy_interval").orElse(defaults.operatorChangeRedeployInterval), jobControl);
zooKeeperAccessMaintainer = new ZooKeeperAccessMaintainer(nodeRepository, curator, durationFromEnv("zookeeper_access_maintenance_interval").orElse(defaults.zooKeeperAccessMaintenanceInterval), jobControl);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java
index 5534c28cc1a..a03b06fda13 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.cloud.config.ConfigserverConfig;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ApplicationName;
@@ -70,19 +71,29 @@ public class NodeFailTester {
private final Orchestrator orchestrator;
private final NodeRepositoryProvisioner provisioner;
private final Curator curator;
+ private final ConfigserverConfig configserverConfig;
private NodeFailTester() {
+ this(new ConfigserverConfig(new ConfigserverConfig.Builder()));
+ }
+
+ private NodeFailTester(ConfigserverConfig configserverConfig) {
clock = new ManualClock();
curator = new MockCurator();
nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone, new MockNameResolver().mockAnyLookup(),
- new DockerImage("docker-registry.domain.tld:8080/dist/vespa"));
+ new DockerImage("docker-registry.domain.tld:8080/dist/vespa"));
provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone);
hostLivenessTracker = new TestHostLivenessTracker(clock);
orchestrator = new OrchestratorMock();
+ this.configserverConfig = configserverConfig;
}
-
+
public static NodeFailTester withTwoApplications() {
- NodeFailTester tester = new NodeFailTester();
+ return withTwoApplications(new ConfigserverConfig(new ConfigserverConfig.Builder()));
+ }
+
+ public static NodeFailTester withTwoApplications(ConfigserverConfig configserverConfig) {
+ NodeFailTester tester = new NodeFailTester(configserverConfig);
tester.createReadyNodes(16);
tester.createHostNodes(3);
@@ -184,7 +195,7 @@ public class NodeFailTester {
}
public NodeFailer createFailer() {
- return new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, downtimeLimitOneHour, clock, orchestrator, NodeFailer.ThrottlePolicy.hosted, metric, new JobControl(nodeRepository.database()));
+ return new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, downtimeLimitOneHour, clock, orchestrator, NodeFailer.ThrottlePolicy.hosted, metric, new JobControl(nodeRepository.database()), configserverConfig);
}
public void allNodesMakeAConfigRequestExcept(Node ... deadNodeArray) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
index 6d41cfa08e5..63bc04ac671 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.cloud.config.ConfigserverConfig;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
@@ -194,6 +195,53 @@ public class NodeFailerTest {
}
@Test
+ public void docker_host_failed_without_config_requests() {
+ NodeFailTester tester = NodeFailTester.withTwoApplications();
+
+ // For a day all nodes work so nothing happens
+ for (int minutes = 0, interval = 30; minutes < 24 * 60; minutes += interval) {
+ tester.clock.advance(Duration.ofMinutes(interval));
+ tester.allNodesMakeAConfigRequestExcept();
+ tester.failer.run();
+ assertEquals( 3, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).size());
+ assertEquals( 0, tester.nodeRepository.getNodes(NodeType.host, Node.State.failed).size());
+ }
+
+
+ // Two ready nodes and a ready docker node die, but only 2 of those are failed out
+ tester.clock.advance(Duration.ofMinutes(180));
+ Node dockerHost = tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).iterator().next();
+ tester.allNodesMakeAConfigRequestExcept(dockerHost);
+ tester.failer.run();
+ assertEquals( 2, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).size());
+ assertEquals( 1, tester.nodeRepository.getNodes(NodeType.host, Node.State.failed).size());
+ }
+
+ @Test
+ public void not_failed_without_config_requests_if_node_admin_on_host() {
+ NodeFailTester tester = NodeFailTester.withTwoApplications(
+ new ConfigserverConfig(new ConfigserverConfig.Builder().nodeAdminInContainer(false)));
+
+ // For a day all nodes work so nothing happens
+ for (int minutes = 0, interval = 30; minutes < 24 * 60; minutes += interval) {
+ tester.clock.advance(Duration.ofMinutes(interval));
+ tester.allNodesMakeAConfigRequestExcept();
+ tester.failer.run();
+ assertEquals( 3, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).size());
+ assertEquals( 0, tester.nodeRepository.getNodes(NodeType.host, Node.State.failed).size());
+ }
+
+
+ // Two ready nodes and a ready docker node die, but only 2 of those are failed out
+ tester.clock.advance(Duration.ofMinutes(180));
+ Node dockerHost = tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).iterator().next();
+ tester.allNodesMakeAConfigRequestExcept(dockerHost);
+ tester.failer.run();
+ assertEquals( 3, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).size());
+ assertEquals( 0, tester.nodeRepository.getNodes(NodeType.host, Node.State.failed).size());
+ }
+
+ @Test
public void failing_docker_hosts() {
NodeFailTester tester = NodeFailTester.withTwoApplicationsOnDocker(7);