From 30fd1322c666d8e48fae340dc69ce5030069e30b Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Mon, 3 Sep 2018 08:00:12 +0000 Subject: enforce log retention policies * for access logs, save meta-data about the log file itself in a simple format. * implement a proof-of-concept shell script that removes log files after one month. * ensure retention enforcer is started when services start * note that retention enforcer will continue running even after services stop, but it has protection to ensure that it won't multiply endlessly. --- config-proxy/src/main/sh/vespa-config-ctl.sh | 1 + .../yahoo/container/logging/LogFileHandler.java | 6 +- .../resources/configdefinitions/access-log.def | 8 +- logd/CMakeLists.txt | 2 + logd/src/apps/retention/retention-enforcer.sh | 115 +++++++++++++++++++++ vespalog/pom.xml | 4 + .../src/main/java/com/yahoo/log/LogFileDb.java | 50 +++++++++ .../src/test/java/com/yahoo/log/LogFileDbTest.java | 29 ++++++ 8 files changed, 213 insertions(+), 2 deletions(-) create mode 100755 logd/src/apps/retention/retention-enforcer.sh create mode 100644 vespalog/src/main/java/com/yahoo/log/LogFileDb.java create mode 100644 vespalog/src/test/java/com/yahoo/log/LogFileDbTest.java diff --git a/config-proxy/src/main/sh/vespa-config-ctl.sh b/config-proxy/src/main/sh/vespa-config-ctl.sh index a670e69cdbf..649eef951c0 100755 --- a/config-proxy/src/main/sh/vespa-config-ctl.sh +++ b/config-proxy/src/main/sh/vespa-config-ctl.sh @@ -103,6 +103,7 @@ export LD_LIBRARY_PATH="$VESPA_HOME/lib64" case $1 in start) + nohup sbin/vespa-retention-enforcer > ${LOGDIR}/vre-start.log 2>&1 largestN) largestN=thisN; } - file.renameTo(new File(dir,file.getName() + "." + (largestN + 1))); + File newFn = new File(dir, file.getName() + "." + (largestN + 1)); + LogFileDb.nowLoggingTo(newFn.getAbsolutePath()); + file.renameTo(newFn); } /** diff --git a/container-accesslogging/src/main/resources/configdefinitions/access-log.def b/container-accesslogging/src/main/resources/configdefinitions/access-log.def index 276128e0405..9df9299ae19 100644 --- a/container-accesslogging/src/main/resources/configdefinitions/access-log.def +++ b/container-accesslogging/src/main/resources/configdefinitions/access-log.def @@ -1,11 +1,16 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. namespace=container.core - # File name patterns supporting the expected time variables, e.g. ".%Y%m%d%H%M%S" fileHandler.pattern string + +# When should rotation happen, in minutes after midnight +# Does this really need to be configurable? +# Could just configure "every N minutes" instead fileHandler.rotation string default="0 60 ..." +# TODO remove in Vespa 7, always use DATE +# # Defines how file rotation is done. There are two options: # # DATE: @@ -27,4 +32,5 @@ fileHandler.rotateScheme enum {DATE, SEQUENCE} default=DATE fileHandler.symlink string default="" # compress the previous access log after rotation +# TODO change to "true" for Vespa 7 fileHandler.compressOnRotation bool default=false diff --git a/logd/CMakeLists.txt b/logd/CMakeLists.txt index 3eeeb7adb66..6a8296564a3 100644 --- a/logd/CMakeLists.txt +++ b/logd/CMakeLists.txt @@ -18,3 +18,5 @@ vespa_define_module( src/tests/info src/tests/rotate ) + +vespa_install_script(src/apps/retention/retention-enforcer.sh vespa-retention-enforcer sbin) diff --git a/logd/src/apps/retention/retention-enforcer.sh b/logd/src/apps/retention/retention-enforcer.sh new file mode 100755 index 00000000000..8c8f56a7fea --- /dev/null +++ b/logd/src/apps/retention/retention-enforcer.sh @@ -0,0 +1,115 @@ +#!/bin/sh + +DBGF=logs/vespa/debug.retention-enforcer +DBDIR=var/db/vespa/logfiledb +PIDF=$DBDIR/retention-enforcer.pid +RETAIN_DAYS=31 + +prereq_dir() { + if [ -d $1 ] && [ -w $1 ]; then + : + else + echo "$0: missing directory '$1' in '`pwd`'" >&2 + exit 1 + fi +} + +check_prereqs() { + prereq_dir var/db/vespa + prereq_dir logs/vespa +} + +ensure_dir () { + if [ -d $1 ] && [ -w $1 ]; then + return 0 + fi + echo "Creating directory '$1' in '`pwd`'" + mkdir -p $1 || exit 1 +} + +prepare_stuff() { + check_prereqs + exec > $DBGF.$$.log 2>&1 + ensure_dir $DBDIR +} + +mark_pid() { + echo $$ > $PIDF.$$.tmp + mv $PIDF.$$.tmp $PIDF || exit 1 +} + +check_pidfile() { + read pid < $PIDF + [ "$pid" = $$ ] && return 0 + if [ "$pid" ] && [ $pid -gt $$ ]; then + sleep 30 + read pid_again < $PIDF + if [ "$pid_again" != "$pid" ]; then return 1; fi + ps -p $pid >/dev/null 2>&1 || return 1 + proc=$(ps -p $pid 2>&1) + case $proc in *retention*) ;; *) return 1;; esac + echo "$0 [$$]: Yielding my place to pid '$pid'" + exit 1 + fi +} + +maybe_collect() { + now=$(date +%s) + chopnow=${now%?????} + ts=${1##*/*.} + [ "$ts" ] || return 1 + [ "$ts" -gt 0 ] || return 1 + add=$((3 * $RETAIN_DAYS)) + lim1=$(($ts + $add)) + mod_time=$(get_mode_time "$1") + add=$((3 * 86400 * $RETAIN_DAYS)) + lim2=$(($mod_time + $add)) + if [ $lim1 -lt $chopnow ] && [ $lim2 -lt $now ]; then + echo "Collect meta-logfile '$1' ts '$ts' (lim $lim, now $chopnow)" + rm -f "$1" + fi +} + +get_mod_time() { + perl -e 'print (((stat("'"$1"'"))[9]) . "\n")' +} + +process_file() { + now=$(date +%s) + add=$((86400 * $RETAIN_DAYS)) + found=0 + while read timestamp logfilename; do + if [ -f "$logfilename" ]; then + found=1 + lim1=$(($timestamp + $add)) + mod_time=$(get_mod_time "$logfilename") + lim2=$((mod_time + $add)) + if [ $lim1 -lt $now ] && [ $lim2 -lt $now ]; then + echo "Collect logfile '$logfilename' timestamped $timestamp modified $mod_time" + rm -f "$logfilename" + fi + fi + done < $1 + if [ $found = 0 ]; then + maybe_collect $1 + fi +} + +process_all() { + for dbf in $DBDIR/logfiles.* ; do + [ -f "$dbf" ] && process_file "$dbf" + done +} + +mainloop() { + while true; do + mark_pid + process_all + sleep 3600 + check_pidfile + done +} + +prepare_stuff +mainloop +exit 0 diff --git a/vespalog/pom.xml b/vespalog/pom.xml index 6443769afbe..7b167ee2c1c 100644 --- a/vespalog/pom.xml +++ b/vespalog/pom.xml @@ -50,6 +50,10 @@ org.apache.maven.plugins maven-surefire-plugin + once + + ${project.build.directory} + ${test.hide} diff --git a/vespalog/src/main/java/com/yahoo/log/LogFileDb.java b/vespalog/src/main/java/com/yahoo/log/LogFileDb.java new file mode 100644 index 00000000000..d0fa64805bf --- /dev/null +++ b/vespalog/src/main/java/com/yahoo/log/LogFileDb.java @@ -0,0 +1,50 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.log; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.file.StandardOpenOption.*; + +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import static com.yahoo.vespa.defaults.Defaults.getDefaults; + + +/** + * @author arnej + * + * This class takes care of saving meta-data about a log-file, + * ensuring that we can enact policies about log retention. + **/ +public class LogFileDb { + + static final String DBDIR = "var/db/vespa/logfiledb/"; + + private static long dayStamp() { + long s = System.currentTimeMillis() / 1000; + return s / 100000; + } + + private static OutputStream metaFile() throws java.io.IOException { + String fn = getDefaults().underVespaHome(DBDIR + "logfiles." + dayStamp()); + Path path = Paths.get(fn); + return Files.newOutputStream(path, CREATE, APPEND); + } + + public static void nowLoggingTo(String filename) { + if (filename.contains("\n")) { + throw new IllegalArgumentException("Cannot use filename with newline: "+filename); + } + long s = System.currentTimeMillis() / 1000; + String meta = "" + s + " " + filename + "\n"; + byte[] data = meta.getBytes(UTF_8); + try (OutputStream out = metaFile()) { + out.write(data); + } catch (java.io.IOException e) { + System.err.println("Saving meta-data about logfile "+filename+" failed: "+e); + // ignore + } + } +} diff --git a/vespalog/src/test/java/com/yahoo/log/LogFileDbTest.java b/vespalog/src/test/java/com/yahoo/log/LogFileDbTest.java new file mode 100644 index 00000000000..4dd7bd0978c --- /dev/null +++ b/vespalog/src/test/java/com/yahoo/log/LogFileDbTest.java @@ -0,0 +1,29 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.log; + +import java.io.File; +import static com.yahoo.vespa.defaults.Defaults.getDefaults; +import org.junit.Test; + +/** + * @author arnej + */ +public class LogFileDbTest { + + @Test + public void canSave() { + System.err.println("VH: "+System.getenv("VESPA_HOME")); + File dir = new File(getDefaults().underVespaHome(LogFileDb.DBDIR)); + dir.mkdirs(); + if (dir.isDirectory()) { + System.err.println("using directory: "+dir); + new File(getDefaults().underVespaHome("logs/extra")).mkdirs(); + String fn = getDefaults().underVespaHome("logs/extra/foo-bar.log"); + LogFileDb.nowLoggingTo(fn); + fn = getDefaults().underVespaHome("logs/extra/stamped-1.log"); + LogFileDb.nowLoggingTo(fn); + } else { + System.err.println("cannot create directory: "+dir); + } + } +} -- cgit v1.2.3