summaryrefslogtreecommitdiffstats
path: root/metrics-proxy
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@verizonmedia.com>2020-02-25 15:07:53 +0100
committerJon Bratseth <bratseth@verizonmedia.com>2020-02-25 15:07:53 +0100
commit035651fade6aba9bdcfdcd43d52e01f9c42382a9 (patch)
tree3ad1b48967797c372342874e860904535905060d /metrics-proxy
parent58ccc5ac198042ac8b64647b3d2d9121d571860d (diff)
parent1e8356694522c1eabbdad2e27ac1b6b77828f7ae (diff)
Merge branch 'master' into bratseth/node-metrics
Diffstat (limited to 'metrics-proxy')
-rw-r--r--metrics-proxy/pom.xml4
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/Telegraf.java95
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/TelegrafRegistry.java33
-rw-r--r--metrics-proxy/src/main/resources/configdefinitions/telegraf.def5
-rw-r--r--metrics-proxy/src/main/resources/templates/telegraf.conf.vm44
-rw-r--r--metrics-proxy/src/test/java/ai/vespa/metricsproxy/telegraf/TelegrafTest.java42
-rw-r--r--metrics-proxy/src/test/resources/telegraf-config-with-two-cloudwatch-plugins.txt46
7 files changed, 266 insertions, 3 deletions
diff --git a/metrics-proxy/pom.xml b/metrics-proxy/pom.xml
index f72ad75c6af..355f420c2a4 100644
--- a/metrics-proxy/pom.xml
+++ b/metrics-proxy/pom.xml
@@ -132,6 +132,10 @@
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.velocity</groupId>
+ <artifactId>velocity</artifactId>
+ </dependency>
<!-- test scope -->
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/Telegraf.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/Telegraf.java
new file mode 100644
index 00000000000..2afc0267434
--- /dev/null
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/Telegraf.java
@@ -0,0 +1,95 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package ai.vespa.metricsproxy.telegraf;
+
+import com.google.inject.Inject;
+import com.yahoo.component.AbstractComponent;
+import com.yahoo.log.LogLevel;
+import com.yahoo.system.execution.ProcessExecutor;
+import com.yahoo.system.execution.ProcessResult;
+import com.yahoo.vespa.defaults.Defaults;
+import org.apache.velocity.VelocityContext;
+import org.apache.velocity.app.VelocityEngine;
+
+import java.io.FileWriter;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.Writer;
+import java.util.logging.Logger;
+
+import static com.yahoo.yolean.Exceptions.uncheck;
+
+/**
+ * @author olaa
+ */
+public class Telegraf extends AbstractComponent {
+
+ private static final String TELEGRAF_CONFIG_PATH = "/etc/telegraf/telegraf.conf";
+ private static final String TELEGRAF_CONFIG_TEMPLATE_PATH = "templates/telegraf.conf.vm";
+ private static final String TELEGRAF_LOG_FILE_PATH = Defaults.getDefaults().underVespaHome("logs/telegraf/telegraf.log");
+ private final TelegrafRegistry telegrafRegistry;
+
+ private static final Logger logger = Logger.getLogger(Telegraf.class.getName());
+
+ @Inject
+ public Telegraf(TelegrafRegistry telegrafRegistry, TelegrafConfig telegrafConfig) {
+ this.telegrafRegistry = telegrafRegistry;
+ telegrafRegistry.addInstance(this);
+ writeConfig(telegrafConfig, uncheck(() -> new FileWriter(TELEGRAF_CONFIG_PATH)));
+ restartTelegraf();
+ }
+
+ protected static void writeConfig(TelegrafConfig telegrafConfig, Writer writer) {
+ VelocityContext context = new VelocityContext();
+ context.put("logFilePath", TELEGRAF_LOG_FILE_PATH);
+ context.put("intervalSeconds", telegrafConfig.intervalSeconds());
+ context.put("cloudwatchPlugins", telegrafConfig.cloudWatch());
+ // TODO: Add node cert if hosted
+
+ VelocityEngine velocityEngine = new VelocityEngine();
+ velocityEngine.init();
+ velocityEngine.evaluate(context, writer, "TelegrafConfigWriter", getTemplateReader());
+ uncheck(writer::close);
+ }
+
+ private void restartTelegraf() {
+ executeCommand("service telegraf restart");
+ }
+
+ private void stopTelegraf() {
+ executeCommand("service telegraf stop");
+ }
+
+ private void executeCommand(String command) {
+ logger.info(String.format("Running command: %s", command));
+ ProcessExecutor processExecutor = new ProcessExecutor
+ .Builder(10)
+ .successExitCodes(0)
+ .build();
+ ProcessResult processResult = uncheck(() -> processExecutor.execute(command))
+ .orElseThrow(() -> new RuntimeException("Timed out running command: " + command));
+
+ logger.log(LogLevel.DEBUG, () -> String.format("Exit code: %d\nstdOut: %s\nstdErr: %s",
+ processResult.exitCode,
+ processResult.stdOut,
+ processResult.stdErr));
+
+ if (!processResult.stdErr.isBlank())
+ logger.warning(String.format("stdErr not empty: %s", processResult.stdErr));
+ }
+
+ @SuppressWarnings("ConstantConditions")
+ private static Reader getTemplateReader() {
+ return new InputStreamReader(Telegraf.class.getClassLoader()
+ .getResourceAsStream(TELEGRAF_CONFIG_TEMPLATE_PATH)
+ );
+
+ }
+
+ @Override
+ public void deconstruct() {
+ telegrafRegistry.removeInstance(this);
+ if (telegrafRegistry.isEmpty()) {
+ stopTelegraf();
+ }
+ }
+}
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/TelegrafRegistry.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/TelegrafRegistry.java
new file mode 100644
index 00000000000..23da51ea082
--- /dev/null
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/TelegrafRegistry.java
@@ -0,0 +1,33 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package ai.vespa.metricsproxy.telegraf;
+
+import com.yahoo.log.LogLevel;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ * @author olaa
+ */
+public class TelegrafRegistry {
+
+ private static final List<Telegraf> telegrafInstances = Collections.synchronizedList(new ArrayList<>());
+
+ private static final Logger logger = Logger.getLogger(TelegrafRegistry.class.getName());
+
+ public void addInstance(Telegraf telegraf) {
+ logger.log(LogLevel.DEBUG, () -> "Adding Telegraf instance to registry: " + telegraf.hashCode());
+ telegrafInstances.add(telegraf);
+ }
+
+ public void removeInstance(Telegraf telegraf) {
+ logger.log(LogLevel.DEBUG, () -> "Removing Telegraf instance from registry: " + telegraf.hashCode());
+ telegrafInstances.remove(telegraf);
+ }
+
+ public boolean isEmpty() {
+ return telegrafInstances.isEmpty();
+ }
+}
diff --git a/metrics-proxy/src/main/resources/configdefinitions/telegraf.def b/metrics-proxy/src/main/resources/configdefinitions/telegraf.def
index f3b5db35d52..6abbd7921b5 100644
--- a/metrics-proxy/src/main/resources/configdefinitions/telegraf.def
+++ b/metrics-proxy/src/main/resources/configdefinitions/telegraf.def
@@ -5,9 +5,8 @@ package=ai.vespa.metricsproxy.telegraf
intervalSeconds int default=60
-# The consumer to get metrics for
-vespa.consumer string default="default"
-
+# The Vespa metrics consumer to get metrics for
+cloudWatch[].consumer string
cloudWatch[].region string default="us-east-1"
cloudWatch[].namespace string
diff --git a/metrics-proxy/src/main/resources/templates/telegraf.conf.vm b/metrics-proxy/src/main/resources/templates/telegraf.conf.vm
new file mode 100644
index 00000000000..e99bab8b02d
--- /dev/null
+++ b/metrics-proxy/src/main/resources/templates/telegraf.conf.vm
@@ -0,0 +1,44 @@
+# Configuration for telegraf agent
+[agent]
+ interval = "${intervalSeconds}s"
+ round_interval = true
+ metric_batch_size = 1000
+ metric_buffer_limit = 10000
+ collection_jitter = "0s"
+ flush_interval = "${intervalSeconds}s"
+ flush_jitter = "0s"
+ precision = ""
+ logtarget = "file"
+ logfile = "$logFilePath"
+ logfile_rotation_interval = "1d"
+ logfile_rotation_max_size = "20MB"
+ logfile_rotation_max_archives = 5
+
+#foreach( $cloudwatch in $cloudwatchPlugins )
+# Configuration for AWS CloudWatch output.
+[[outputs.cloudwatch]]
+ region = "$cloudwatch.region()"
+ namespace = "$cloudwatch.namespace()"
+#if( $cloudwatch.accessKeyName() != "" )
+ access_key = "$cloudwatch.accessKeyName()"
+ secret_key = "$cloudwatch.secretKeyName()"
+#elseif( $cloudwatch.profile() != "" )
+ profile = "$cloudwatch.profile()"
+#end
+ tagexclude = ["vespa_consumer"]
+ [outputs.cloudwatch.tagpass]
+ vespa_consumer = ["$cloudwatch.consumer()"]
+
+# Configuration for Vespa input plugin
+[[inputs.vespa]]
+ url = "http://localhost:19092/metrics/v2/values?consumer=$cloudwatch.consumer()"
+ [inputs.vespa.tags]
+ vespa_consumer = "$cloudwatch.consumer()"
+#* TODO: Add node cert if hosted
+#if( $isHosted )
+ tls_cert = "${VESPA_CERTIFICATE_PATH}"
+ tls_key = "${VESPA_KEY_PATH}"
+ insecure_skip_verify = true
+#end
+*###
+#end \ No newline at end of file
diff --git a/metrics-proxy/src/test/java/ai/vespa/metricsproxy/telegraf/TelegrafTest.java b/metrics-proxy/src/test/java/ai/vespa/metricsproxy/telegraf/TelegrafTest.java
new file mode 100644
index 00000000000..9ad31a0d9e8
--- /dev/null
+++ b/metrics-proxy/src/test/java/ai/vespa/metricsproxy/telegraf/TelegrafTest.java
@@ -0,0 +1,42 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package ai.vespa.metricsproxy.telegraf;
+
+import ai.vespa.metricsproxy.TestUtil;
+import org.junit.Test;
+
+import java.io.StringWriter;
+
+import static org.junit.Assert.*;
+
+/**
+ * @author olaa
+ */
+public class TelegrafTest {
+
+ @Test
+ public void test_writing_correct_telegraf_plugin_config() {
+ TelegrafConfig telegrafConfig = new TelegrafConfig.Builder()
+ .cloudWatch(
+ new TelegrafConfig.CloudWatch.Builder()
+ .accessKeyName("accessKey1")
+ .namespace("namespace1")
+ .secretKeyName("secretKey1")
+ .region("us-east-1")
+ .consumer("consumer1")
+ )
+ .cloudWatch(
+ new TelegrafConfig.CloudWatch.Builder()
+ .namespace("namespace2")
+ .profile("awsprofile")
+ .region("us-east-2")
+ .consumer("consumer2")
+ )
+ .intervalSeconds(300)
+ .build();
+ StringWriter stringWriter = new StringWriter();
+ Telegraf.writeConfig(telegrafConfig, stringWriter);
+ String expectedConfig = TestUtil.getFileContents( "telegraf-config-with-two-cloudwatch-plugins.txt");
+ assertEquals(expectedConfig, stringWriter.toString());
+ }
+
+}
diff --git a/metrics-proxy/src/test/resources/telegraf-config-with-two-cloudwatch-plugins.txt b/metrics-proxy/src/test/resources/telegraf-config-with-two-cloudwatch-plugins.txt
new file mode 100644
index 00000000000..accd2cc87eb
--- /dev/null
+++ b/metrics-proxy/src/test/resources/telegraf-config-with-two-cloudwatch-plugins.txt
@@ -0,0 +1,46 @@
+# Configuration for telegraf agent
+[agent]
+ interval = "300s"
+ round_interval = true
+ metric_batch_size = 1000
+ metric_buffer_limit = 10000
+ collection_jitter = "0s"
+ flush_interval = "300s"
+ flush_jitter = "0s"
+ precision = ""
+ logtarget = "file"
+ logfile = "/opt/vespa/logs/telegraf/telegraf.log"
+ logfile_rotation_interval = "1d"
+ logfile_rotation_max_size = "20MB"
+ logfile_rotation_max_archives = 5
+
+# Configuration for AWS CloudWatch output.
+[[outputs.cloudwatch]]
+ region = "us-east-1"
+ namespace = "namespace1"
+ access_key = "accessKey1"
+ secret_key = "secretKey1"
+ tagexclude = ["vespa_consumer"]
+ [outputs.cloudwatch.tagpass]
+ vespa_consumer = ["consumer1"]
+
+# Configuration for Vespa input plugin
+[[inputs.vespa]]
+ url = "http://localhost:19092/metrics/v2/values?consumer=consumer1"
+ [inputs.vespa.tags]
+ vespa_consumer = "consumer1"
+# Configuration for AWS CloudWatch output.
+[[outputs.cloudwatch]]
+ region = "us-east-2"
+ namespace = "namespace2"
+ profile = "awsprofile"
+ tagexclude = ["vespa_consumer"]
+ [outputs.cloudwatch.tagpass]
+ vespa_consumer = ["consumer2"]
+
+# Configuration for Vespa input plugin
+[[inputs.vespa]]
+ url = "http://localhost:19092/metrics/v2/values?consumer=consumer2"
+ [inputs.vespa.tags]
+ vespa_consumer = "consumer2"
+