diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-10-13 11:36:30 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-13 11:36:30 +0200 |
commit | 6f31b8fe84694406d4e42ca0ca9f1b62ab5c8ee6 (patch) | |
tree | 561174d085f890b311e827d3a464775974b87f70 /container-core | |
parent | 78930c993b17f4e43c298c8a8e0d14a53863776a (diff) | |
parent | b722d66b6347bc42254d9014f320413b187f7ae6 (diff) |
Merge pull request #14805 from vespa-engine/hakonhall/take-statemonitor-snapshot-every-60s
Take StateMonitor snapshot every 60s
Diffstat (limited to 'container-core')
7 files changed, 66 insertions, 104 deletions
diff --git a/container-core/src/main/java/com/yahoo/container/handler/VipStatus.java b/container-core/src/main/java/com/yahoo/container/handler/VipStatus.java index e1b5b769906..876d05b7be9 100644 --- a/container-core/src/main/java/com/yahoo/container/handler/VipStatus.java +++ b/container-core/src/main/java/com/yahoo/container/handler/VipStatus.java @@ -50,7 +50,7 @@ public class VipStatus { /** For testing */ public VipStatus(QrSearchersConfig dispatchers, ClustersStatus clustersStatus) { - this(dispatchers, new VipStatusConfig.Builder().build(), clustersStatus, new StateMonitor()); + this(dispatchers, new VipStatusConfig.Builder().build(), clustersStatus, StateMonitor.createForTesting()); } @Inject diff --git a/container-core/src/main/java/com/yahoo/container/jdisc/state/StateMonitor.java b/container-core/src/main/java/com/yahoo/container/jdisc/state/StateMonitor.java index 0018dd22dd9..ccd0864b3ab 100644 --- a/container-core/src/main/java/com/yahoo/container/jdisc/state/StateMonitor.java +++ b/container-core/src/main/java/com/yahoo/container/jdisc/state/StateMonitor.java @@ -7,14 +7,16 @@ import com.yahoo.container.jdisc.config.HealthMonitorConfig; import com.yahoo.jdisc.Timer; import com.yahoo.jdisc.application.MetricConsumer; import com.yahoo.jdisc.core.SystemTimer; -import java.util.logging.Level; import java.util.Map; +import java.util.Optional; import java.util.TreeSet; import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; +import java.util.logging.Level; import java.util.logging.Logger; /** @@ -31,45 +33,36 @@ public class StateMonitor extends AbstractComponent { public enum Status {up, down, initializing} private final CopyOnWriteArrayList<StateMetricConsumer> consumers = new CopyOnWriteArrayList<>(); - private final Thread thread; + private final Optional<ScheduledExecutorService> executor; private final Timer timer; private final long snapshotIntervalMs; private volatile long lastSnapshotTimeMs; private volatile MetricSnapshot snapshot; private volatile Status status; private final TreeSet<String> valueNames = new TreeSet<>(); - private final AtomicBoolean stopped = new AtomicBoolean(false); - - /** For testing */ - public StateMonitor() { - this(new HealthMonitorConfig.Builder().build(), new SystemTimer()); - } @Inject public StateMonitor(HealthMonitorConfig config, Timer timer) { - this(config, - timer, - runnable -> { - Thread thread = new Thread(runnable, "StateMonitor"); - thread.setDaemon(true); - return thread; - }); + this(config, timer, runnable -> { + Thread thread = new Thread(runnable, "StateMonitor"); + thread.setDaemon(true); + return thread; + }); } - StateMonitor(HealthMonitorConfig config, Timer timer, ThreadFactory threadFactory) { - this((long)(config.snapshot_interval() * TimeUnit.SECONDS.toMillis(1)), - Status.valueOf(config.initialStatus()), - timer, threadFactory); + public static StateMonitor createForTesting() { + return new StateMonitor(new HealthMonitorConfig.Builder().build(), new SystemTimer()); } - /* Public for testing only */ - public StateMonitor(long snapshotIntervalMS, Status status, Timer timer, ThreadFactory threadFactory) { + /** Non-private only for unit testing this class. */ + StateMonitor(HealthMonitorConfig config, Timer timer, ThreadFactory threadFactory) { this.timer = timer; - this.snapshotIntervalMs = snapshotIntervalMS; + this.snapshotIntervalMs = (long)(config.snapshot_interval() * TimeUnit.SECONDS.toMillis(1)); this.lastSnapshotTimeMs = timer.currentTimeMillis(); - this.status = status; - thread = threadFactory.newThread(this::run); - thread.start(); + this.status = Status.valueOf(config.initialStatus()); + this.executor = Optional.ofNullable(threadFactory).map(Executors::newSingleThreadScheduledExecutor); + this.executor.ifPresent(exec -> exec.scheduleAtFixedRate(this::updateSnapshot, snapshotIntervalMs, + snapshotIntervalMs, TimeUnit.MILLISECONDS)); } /** Returns a metric consumer for jDisc which will write metrics back to this */ @@ -96,28 +89,11 @@ public class StateMonitor extends AbstractComponent { /** Returns the interval between each metrics snapshot used by this */ public long getSnapshotIntervalMillis() { return snapshotIntervalMs; } - /** NOTE: For unit testing only. May lead to undefined behaviour if StateMonitor thread is running simultaneously **/ - boolean checkTime() { + /** NOTE: Non-private for unit testing only. **/ + void updateSnapshot() { long now = timer.currentTimeMillis(); - if (now < lastSnapshotTimeMs + snapshotIntervalMs) { - return false; - } snapshot = createSnapshot(lastSnapshotTimeMs, now); lastSnapshotTimeMs = now; - return true; - } - - private void run() { - log.finest("StateMonitor started."); - try { - synchronized (stopped) { - while (!stopped.get()) { - checkTime(); - stopped.wait((lastSnapshotTimeMs + snapshotIntervalMs) - timer.currentTimeMillis()); - } - } - } catch (InterruptedException e) { } - log.finest("StateMonitor stopped."); } private MetricSnapshot createSnapshot(long fromMillis, long toMillis) { @@ -148,15 +124,16 @@ public class StateMonitor extends AbstractComponent { @Override public void deconstruct() { - synchronized (stopped) { - stopped.set(true); - stopped.notifyAll(); - } - try { - thread.join(5000); - } catch (InterruptedException e) { } - if (thread.isAlive()) { - log.warning("StateMonitor failed to terminate within 5 seconds of interrupt signal. Ignoring."); - } + executor.ifPresent(exec -> { + exec.shutdown(); + try { + exec.awaitTermination(5, TimeUnit.SECONDS); + } catch (InterruptedException e) { + } + + if (!exec.isTerminated()) { + log.warning("StateMonitor failed to terminate within 5 seconds of interrupt signal. Ignoring."); + } + }); } } diff --git a/container-core/src/test/java/com/yahoo/container/handler/VipStatusTestCase.java b/container-core/src/test/java/com/yahoo/container/handler/VipStatusTestCase.java index e7a9a1442f3..3f33efa1993 100644 --- a/container-core/src/test/java/com/yahoo/container/handler/VipStatusTestCase.java +++ b/container-core/src/test/java/com/yahoo/container/handler/VipStatusTestCase.java @@ -1,14 +1,15 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.container.handler; -import static org.junit.Assert.*; - import com.yahoo.container.QrSearchersConfig; import com.yahoo.container.core.VipStatusConfig; import com.yahoo.container.jdisc.state.StateMonitor; -import com.yahoo.jdisc.core.SystemTimer; import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + /** * @author bratseth */ @@ -138,11 +139,9 @@ public class VipStatusTestCase { } private static StateMonitor createStateMonitor(StateMonitor.Status startState) { - return new StateMonitor(1000, startState, new SystemTimer(), runnable -> { - Thread thread = new Thread(runnable, "StateMonitor"); - thread.setDaemon(true); - return thread; - }); + StateMonitor stateMonitor = StateMonitor.createForTesting(); + stateMonitor.status(startState); + return stateMonitor; } private static void removeFromRotation(String[] clusters, VipStatus v) { diff --git a/container-core/src/test/java/com/yahoo/container/jdisc/state/MetricsPacketsHandlerTest.java b/container-core/src/test/java/com/yahoo/container/jdisc/state/MetricsPacketsHandlerTest.java index 98dfb2e281b..ef700597537 100644 --- a/container-core/src/test/java/com/yahoo/container/jdisc/state/MetricsPacketsHandlerTest.java +++ b/container-core/src/test/java/com/yahoo/container/jdisc/state/MetricsPacketsHandlerTest.java @@ -17,7 +17,6 @@ import static com.yahoo.container.jdisc.state.MetricsPacketsHandler.PACKET_SEPAR import static com.yahoo.container.jdisc.state.MetricsPacketsHandler.STATUS_CODE_KEY; import static com.yahoo.container.jdisc.state.MetricsPacketsHandler.STATUS_MSG_KEY; import static com.yahoo.container.jdisc.state.MetricsPacketsHandler.TIMESTAMP_KEY; -import static com.yahoo.container.jdisc.state.StateHandlerTestBase.SNAPSHOT_INTERVAL; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -123,7 +122,7 @@ public class MetricsPacketsHandlerTest extends StateHandlerTestBase { } private List<JsonNode> incrementTimeAndGetJsonPackets() throws Exception { - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); String response = requestAsString("http://localhost/metrics-packets"); return toJsonPackets(response); diff --git a/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java b/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java index d53b189f932..26a2f817acc 100644 --- a/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java +++ b/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTest.java @@ -34,13 +34,13 @@ public class StateHandlerTest extends StateHandlerTestBase { public void testReportIncludesMetricsAfterSnapshot() throws Exception { metric.add("foo", 1, null); metric.set("bar", 4, null); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); JsonNode json1 = requestAsJson("http://localhost/state/v1/metrics"); assertEquals(json1.toString(), "up", json1.get("status").get("code").asText()); assertEquals(json1.toString(), 2, json1.get("metrics").get("values").size()); metric.add("fuz", 1, metric.createContext(new HashMap<>(0))); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); JsonNode json2 = requestAsJson("http://localhost/state/v1/metrics"); assertEquals(json2.toString(), "up", json2.get("status").get("code").asText()); assertEquals(json2.toString(), 3, json2.get("metrics").get("values").size()); @@ -61,7 +61,7 @@ public class StateHandlerTest extends StateHandlerTestBase { metric.add(metricName, 2, metricContext); // Change it to a gauge metric metric.set(metricName, 9, metricContext); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); MetricValue resultingMetric = monitor.snapshot().iterator().next().getValue().get(metricName); assertEquals(GaugeMetric.class, resultingMetric.getClass()); assertEquals("Value was reset and produces the last gauge value", @@ -74,7 +74,7 @@ public class StateHandlerTest extends StateHandlerTestBase { // Change it to a count metric metric.add(metricName, 1, metricContext); metric.add(metricName, 2, metricContext); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); MetricValue resultingMetric = monitor.snapshot().iterator().next().getValue().get(metricName); assertEquals(CountMetric.class, resultingMetric.getClass()); assertEquals("Value was reset, and changed to add semantics giving 1+2", @@ -88,7 +88,7 @@ public class StateHandlerTest extends StateHandlerTestBase { metric.set("bar", 5, null); metric.set("bar", 7, null); metric.set("bar", 2, null); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); JsonNode json = requestAsJson("http://localhost/state/v1/all"); assertEquals(json.toString(), "up", json.get("status").get("code").asText()); assertEquals(json.toString(), 1, json.get("metrics").get("values").size()); @@ -102,7 +102,7 @@ public class StateHandlerTest extends StateHandlerTestBase { metric.add("foo", 1, null); metric.add("foo", 2, null); metric.add("foo", 1, null); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); JsonNode json = requestAsJson("http://localhost/state/v1/all"); assertEquals(json.toString(), "up", json.get("status").get("code").asText()); assertEquals(json.toString(), 1, json.get("metrics").get("values").size()); @@ -113,7 +113,7 @@ public class StateHandlerTest extends StateHandlerTestBase { @Test public void testReadabilityOfJsonReport() throws Exception { metric.add("foo", 1, null); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); assertEquals("{\n" + " \"metrics\": {\n" + " \"snapshot\": {\n" + @@ -138,7 +138,7 @@ public class StateHandlerTest extends StateHandlerTestBase { metric.set("bar", 3, ctx); metric.set("bar", 4, ctx); metric.set("bar", 5, ctx); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); assertEquals("{\n" + " \"metrics\": {\n" + " \"snapshot\": {\n" + @@ -178,10 +178,10 @@ public class StateHandlerTest extends StateHandlerTestBase { public void testNotAggregatingCountsBeyondSnapshots() throws Exception { metric.add("foo", 1, null); metric.add("foo", 1, null); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); metric.add("foo", 2, null); metric.add("foo", 1, null); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); JsonNode json = requestAsJson("http://localhost/state/v1/all"); assertEquals(json.toString(), "up", json.get("status").get("code").asText()); assertEquals(json.toString(), 1, json.get("metrics").get("values").size()); @@ -194,21 +194,19 @@ public class StateHandlerTest extends StateHandlerTestBase { metric.add("foo", 1, null); metric.set("bar", 3, null); // At this time we should not have done any snapshotting - incrementCurrentTimeAndAssertNoSnapshot(SNAPSHOT_INTERVAL - 1); { JsonNode json = requestAsJson("http://localhost/state/v1/all"); assertFalse(json.toString(), json.get("metrics").has("snapshot")); } // At this time first snapshot should have been generated - incrementCurrentTimeAndAssertSnapshot(1); + advanceToNextSnapshot(); { JsonNode json = requestAsJson("http://localhost/state/v1/all"); assertTrue(json.toString(), json.get("metrics").has("snapshot")); assertEquals(0.0, json.get("metrics").get("snapshot").get("from").asDouble(), 0.00001); assertEquals(300.0, json.get("metrics").get("snapshot").get("to").asDouble(), 0.00001); } - // No new snapshot at this time - incrementCurrentTimeAndAssertNoSnapshot(SNAPSHOT_INTERVAL - 1); + // No new snapshot { JsonNode json = requestAsJson("http://localhost/state/v1/all"); assertTrue(json.toString(), json.get("metrics").has("snapshot")); @@ -216,7 +214,7 @@ public class StateHandlerTest extends StateHandlerTestBase { assertEquals(300.0, json.get("metrics").get("snapshot").get("to").asDouble(), 0.00001); } // A new snapshot - incrementCurrentTimeAndAssertSnapshot(1); + advanceToNextSnapshot(); { JsonNode json = requestAsJson("http://localhost/state/v1/all"); assertTrue(json.toString(), json.get("metrics").has("snapshot")); @@ -229,10 +227,10 @@ public class StateHandlerTest extends StateHandlerTestBase { public void testFreshStartOfValuesBeyondSnapshot() throws Exception { metric.set("bar", 4, null); metric.set("bar", 5, null); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); metric.set("bar", 4, null); metric.set("bar", 2, null); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); JsonNode json = requestAsJson("http://localhost/state/v1/all"); assertEquals(json.toString(), "up", json.get("status").get("code").asText()); assertEquals(json.toString(), 1, json.get("metrics").get("values").size()); @@ -243,8 +241,8 @@ public class StateHandlerTest extends StateHandlerTestBase { @Test public void snapshotsPreserveLastGaugeValue() throws Exception { metric.set("bar", 4, null); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); + advanceToNextSnapshot(); JsonNode json = requestAsJson("http://localhost/state/v1/all"); JsonNode metricValues = getFirstMetricValueNode(json); assertEquals(json.toString(), 4, metricValues.get("last").asDouble(), 0.001); @@ -266,10 +264,10 @@ public class StateHandlerTest extends StateHandlerTestBase { @Test public void gaugeSnapshotsTracksCountMinMaxAvgPerPeriod() throws Exception { metric.set("bar", 10000, null); // Ensure any cross-snapshot noise is visible - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); metric.set("bar", 20, null); metric.set("bar", 40, null); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); JsonNode json = requestAsJson("http://localhost/state/v1/all"); JsonNode metricValues = getFirstMetricValueNode(json); assertEquals(json.toString(), 40, metricValues.get("last").asDouble(), 0.001); @@ -294,7 +292,7 @@ public class StateHandlerTest extends StateHandlerTestBase { metric.set("serverTotalSuccessfulResponseLatency", 20, context1); metric.set("serverTotalSuccessfulResponseLatency", 40, context2); metric.add("random", 3, context1); - incrementCurrentTimeAndAssertSnapshot(SNAPSHOT_INTERVAL); + advanceToNextSnapshot(); JsonNode json = requestAsJson("http://localhost/state/v1/health"); assertEquals(json.toString(), "up", json.get("status").get("code").asText()); assertEquals(json.toString(), 2, json.get("metrics").get("values").size()); @@ -324,9 +322,4 @@ public class StateHandlerTest extends StateHandlerTestBase { JsonNode version = root.get("version"); assertEquals(Vtag.currentVersion.toString(), version.asText()); } - - private void incrementCurrentTimeAndAssertNoSnapshot(long val) { - currentTimeMillis.addAndGet(val); - assertFalse("Expected no snapshot", monitor.checkTime());; - } } diff --git a/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTestBase.java b/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTestBase.java index 8a1640e2c0e..57a9ba4abdb 100644 --- a/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTestBase.java +++ b/container-core/src/test/java/com/yahoo/container/jdisc/state/StateHandlerTestBase.java @@ -22,14 +22,12 @@ import org.junit.Before; import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.StandardCharsets; -import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.mock; /** * @author Simon Thoresen Hult @@ -59,8 +57,7 @@ public class StateHandlerTestBase { new HealthMonitorConfig.Builder() .snapshot_interval(TimeUnit.MILLISECONDS.toSeconds(SNAPSHOT_INTERVAL)) .initialStatus("up")); - ThreadFactory threadFactory = ignored -> mock(Thread.class); - this.monitor = new StateMonitor(healthMonitorConfig, timer, threadFactory); + this.monitor = new StateMonitor(healthMonitorConfig, timer, null); builder.guiceModules().install(new AbstractModule() { @Override @@ -110,9 +107,9 @@ public class StateHandlerTestBase { return mapper.readTree(mapper.getFactory().createParser(requestAsString(requestUri))); } - void incrementCurrentTimeAndAssertSnapshot(long val) { - currentTimeMillis.addAndGet(val); - assertTrue("Expected a new snapshot to be generated", monitor.checkTime()); + void advanceToNextSnapshot() { + currentTimeMillis.addAndGet(SNAPSHOT_INTERVAL); + monitor.updateSnapshot(); } } diff --git a/container-core/src/test/java/com/yahoo/container/jdisc/state/StateMonitorBenchmarkTest.java b/container-core/src/test/java/com/yahoo/container/jdisc/state/StateMonitorBenchmarkTest.java index 3892f81b8b5..22a2fc274c7 100644 --- a/container-core/src/test/java/com/yahoo/container/jdisc/state/StateMonitorBenchmarkTest.java +++ b/container-core/src/test/java/com/yahoo/container/jdisc/state/StateMonitorBenchmarkTest.java @@ -2,12 +2,10 @@ package com.yahoo.container.jdisc.state; import com.google.inject.Provider; -import com.yahoo.container.jdisc.config.HealthMonitorConfig; import com.yahoo.jdisc.Metric; import com.yahoo.jdisc.application.ContainerThread; import com.yahoo.jdisc.application.MetricConsumer; import com.yahoo.jdisc.application.MetricProvider; -import com.yahoo.jdisc.core.SystemTimer; import org.junit.Test; import java.util.ArrayList; @@ -32,8 +30,7 @@ public class StateMonitorBenchmarkTest { @Test public void requireThatHealthMonitorDoesNotBlockMetricThreads() throws Exception { - StateMonitor monitor = new StateMonitor(new HealthMonitorConfig(new HealthMonitorConfig.Builder()), - new SystemTimer()); + StateMonitor monitor = StateMonitor.createForTesting(); Provider<MetricConsumer> provider = MetricConsumerProviders.wrap(monitor); performUpdates(provider, 8); for (int i = 1; i <= NUM_THREADS; i *= 2) { |