diff options
Diffstat (limited to 'metrics-proxy/src')
6 files changed, 262 insertions, 3 deletions
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/Telegraf.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/Telegraf.java new file mode 100644 index 00000000000..2afc0267434 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/Telegraf.java @@ -0,0 +1,95 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package ai.vespa.metricsproxy.telegraf; + +import com.google.inject.Inject; +import com.yahoo.component.AbstractComponent; +import com.yahoo.log.LogLevel; +import com.yahoo.system.execution.ProcessExecutor; +import com.yahoo.system.execution.ProcessResult; +import com.yahoo.vespa.defaults.Defaults; +import org.apache.velocity.VelocityContext; +import org.apache.velocity.app.VelocityEngine; + +import java.io.FileWriter; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.Writer; +import java.util.logging.Logger; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * @author olaa + */ +public class Telegraf extends AbstractComponent { + + private static final String TELEGRAF_CONFIG_PATH = "/etc/telegraf/telegraf.conf"; + private static final String TELEGRAF_CONFIG_TEMPLATE_PATH = "templates/telegraf.conf.vm"; + private static final String TELEGRAF_LOG_FILE_PATH = Defaults.getDefaults().underVespaHome("logs/telegraf/telegraf.log"); + private final TelegrafRegistry telegrafRegistry; + + private static final Logger logger = Logger.getLogger(Telegraf.class.getName()); + + @Inject + public Telegraf(TelegrafRegistry telegrafRegistry, TelegrafConfig telegrafConfig) { + this.telegrafRegistry = telegrafRegistry; + telegrafRegistry.addInstance(this); + writeConfig(telegrafConfig, uncheck(() -> new FileWriter(TELEGRAF_CONFIG_PATH))); + restartTelegraf(); + } + + protected static void writeConfig(TelegrafConfig telegrafConfig, Writer writer) { + VelocityContext context = new VelocityContext(); + context.put("logFilePath", TELEGRAF_LOG_FILE_PATH); + context.put("intervalSeconds", telegrafConfig.intervalSeconds()); + context.put("cloudwatchPlugins", telegrafConfig.cloudWatch()); + // TODO: Add node cert if hosted + + VelocityEngine velocityEngine = new VelocityEngine(); + velocityEngine.init(); + velocityEngine.evaluate(context, writer, "TelegrafConfigWriter", getTemplateReader()); + uncheck(writer::close); + } + + private void restartTelegraf() { + executeCommand("service telegraf restart"); + } + + private void stopTelegraf() { + executeCommand("service telegraf stop"); + } + + private void executeCommand(String command) { + logger.info(String.format("Running command: %s", command)); + ProcessExecutor processExecutor = new ProcessExecutor + .Builder(10) + .successExitCodes(0) + .build(); + ProcessResult processResult = uncheck(() -> processExecutor.execute(command)) + .orElseThrow(() -> new RuntimeException("Timed out running command: " + command)); + + logger.log(LogLevel.DEBUG, () -> String.format("Exit code: %d\nstdOut: %s\nstdErr: %s", + processResult.exitCode, + processResult.stdOut, + processResult.stdErr)); + + if (!processResult.stdErr.isBlank()) + logger.warning(String.format("stdErr not empty: %s", processResult.stdErr)); + } + + @SuppressWarnings("ConstantConditions") + private static Reader getTemplateReader() { + return new InputStreamReader(Telegraf.class.getClassLoader() + .getResourceAsStream(TELEGRAF_CONFIG_TEMPLATE_PATH) + ); + + } + + @Override + public void deconstruct() { + telegrafRegistry.removeInstance(this); + if (telegrafRegistry.isEmpty()) { + stopTelegraf(); + } + } +} diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/TelegrafRegistry.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/TelegrafRegistry.java new file mode 100644 index 00000000000..23da51ea082 --- /dev/null +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/telegraf/TelegrafRegistry.java @@ -0,0 +1,33 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package ai.vespa.metricsproxy.telegraf; + +import com.yahoo.log.LogLevel; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.logging.Logger; + +/** + * @author olaa + */ +public class TelegrafRegistry { + + private static final List<Telegraf> telegrafInstances = Collections.synchronizedList(new ArrayList<>()); + + private static final Logger logger = Logger.getLogger(TelegrafRegistry.class.getName()); + + public void addInstance(Telegraf telegraf) { + logger.log(LogLevel.DEBUG, () -> "Adding Telegraf instance to registry: " + telegraf.hashCode()); + telegrafInstances.add(telegraf); + } + + public void removeInstance(Telegraf telegraf) { + logger.log(LogLevel.DEBUG, () -> "Removing Telegraf instance from registry: " + telegraf.hashCode()); + telegrafInstances.remove(telegraf); + } + + public boolean isEmpty() { + return telegrafInstances.isEmpty(); + } +} diff --git a/metrics-proxy/src/main/resources/configdefinitions/telegraf.def b/metrics-proxy/src/main/resources/configdefinitions/telegraf.def index f3b5db35d52..6abbd7921b5 100644 --- a/metrics-proxy/src/main/resources/configdefinitions/telegraf.def +++ b/metrics-proxy/src/main/resources/configdefinitions/telegraf.def @@ -5,9 +5,8 @@ package=ai.vespa.metricsproxy.telegraf intervalSeconds int default=60 -# The consumer to get metrics for -vespa.consumer string default="default" - +# The Vespa metrics consumer to get metrics for +cloudWatch[].consumer string cloudWatch[].region string default="us-east-1" cloudWatch[].namespace string diff --git a/metrics-proxy/src/main/resources/templates/telegraf.conf.vm b/metrics-proxy/src/main/resources/templates/telegraf.conf.vm new file mode 100644 index 00000000000..e99bab8b02d --- /dev/null +++ b/metrics-proxy/src/main/resources/templates/telegraf.conf.vm @@ -0,0 +1,44 @@ +# Configuration for telegraf agent +[agent] + interval = "${intervalSeconds}s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "${intervalSeconds}s" + flush_jitter = "0s" + precision = "" + logtarget = "file" + logfile = "$logFilePath" + logfile_rotation_interval = "1d" + logfile_rotation_max_size = "20MB" + logfile_rotation_max_archives = 5 + +#foreach( $cloudwatch in $cloudwatchPlugins ) +# Configuration for AWS CloudWatch output. +[[outputs.cloudwatch]] + region = "$cloudwatch.region()" + namespace = "$cloudwatch.namespace()" +#if( $cloudwatch.accessKeyName() != "" ) + access_key = "$cloudwatch.accessKeyName()" + secret_key = "$cloudwatch.secretKeyName()" +#elseif( $cloudwatch.profile() != "" ) + profile = "$cloudwatch.profile()" +#end + tagexclude = ["vespa_consumer"] + [outputs.cloudwatch.tagpass] + vespa_consumer = ["$cloudwatch.consumer()"] + +# Configuration for Vespa input plugin +[[inputs.vespa]] + url = "http://localhost:19092/metrics/v2/values?consumer=$cloudwatch.consumer()" + [inputs.vespa.tags] + vespa_consumer = "$cloudwatch.consumer()" +#* TODO: Add node cert if hosted +#if( $isHosted ) + tls_cert = "${VESPA_CERTIFICATE_PATH}" + tls_key = "${VESPA_KEY_PATH}" + insecure_skip_verify = true +#end +*### +#end
\ No newline at end of file diff --git a/metrics-proxy/src/test/java/ai/vespa/metricsproxy/telegraf/TelegrafTest.java b/metrics-proxy/src/test/java/ai/vespa/metricsproxy/telegraf/TelegrafTest.java new file mode 100644 index 00000000000..9ad31a0d9e8 --- /dev/null +++ b/metrics-proxy/src/test/java/ai/vespa/metricsproxy/telegraf/TelegrafTest.java @@ -0,0 +1,42 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package ai.vespa.metricsproxy.telegraf; + +import ai.vespa.metricsproxy.TestUtil; +import org.junit.Test; + +import java.io.StringWriter; + +import static org.junit.Assert.*; + +/** + * @author olaa + */ +public class TelegrafTest { + + @Test + public void test_writing_correct_telegraf_plugin_config() { + TelegrafConfig telegrafConfig = new TelegrafConfig.Builder() + .cloudWatch( + new TelegrafConfig.CloudWatch.Builder() + .accessKeyName("accessKey1") + .namespace("namespace1") + .secretKeyName("secretKey1") + .region("us-east-1") + .consumer("consumer1") + ) + .cloudWatch( + new TelegrafConfig.CloudWatch.Builder() + .namespace("namespace2") + .profile("awsprofile") + .region("us-east-2") + .consumer("consumer2") + ) + .intervalSeconds(300) + .build(); + StringWriter stringWriter = new StringWriter(); + Telegraf.writeConfig(telegrafConfig, stringWriter); + String expectedConfig = TestUtil.getFileContents( "telegraf-config-with-two-cloudwatch-plugins.txt"); + assertEquals(expectedConfig, stringWriter.toString()); + } + +} diff --git a/metrics-proxy/src/test/resources/telegraf-config-with-two-cloudwatch-plugins.txt b/metrics-proxy/src/test/resources/telegraf-config-with-two-cloudwatch-plugins.txt new file mode 100644 index 00000000000..accd2cc87eb --- /dev/null +++ b/metrics-proxy/src/test/resources/telegraf-config-with-two-cloudwatch-plugins.txt @@ -0,0 +1,46 @@ +# Configuration for telegraf agent +[agent] + interval = "300s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "300s" + flush_jitter = "0s" + precision = "" + logtarget = "file" + logfile = "/opt/vespa/logs/telegraf/telegraf.log" + logfile_rotation_interval = "1d" + logfile_rotation_max_size = "20MB" + logfile_rotation_max_archives = 5 + +# Configuration for AWS CloudWatch output. +[[outputs.cloudwatch]] + region = "us-east-1" + namespace = "namespace1" + access_key = "accessKey1" + secret_key = "secretKey1" + tagexclude = ["vespa_consumer"] + [outputs.cloudwatch.tagpass] + vespa_consumer = ["consumer1"] + +# Configuration for Vespa input plugin +[[inputs.vespa]] + url = "http://localhost:19092/metrics/v2/values?consumer=consumer1" + [inputs.vespa.tags] + vespa_consumer = "consumer1" +# Configuration for AWS CloudWatch output. +[[outputs.cloudwatch]] + region = "us-east-2" + namespace = "namespace2" + profile = "awsprofile" + tagexclude = ["vespa_consumer"] + [outputs.cloudwatch.tagpass] + vespa_consumer = ["consumer2"] + +# Configuration for Vespa input plugin +[[inputs.vespa]] + url = "http://localhost:19092/metrics/v2/values?consumer=consumer2" + [inputs.vespa.tags] + vespa_consumer = "consumer2" + |