summaryrefslogtreecommitdiffstats
path: root/logd
diff options
context:
space:
mode:
authorArne Juul <arnej@yahoo-inc.com>2018-09-03 08:00:12 +0000
committerArne Juul <arnej@yahoo-inc.com>2018-09-06 13:00:14 +0000
commit30fd1322c666d8e48fae340dc69ce5030069e30b (patch)
treeee2b340ea309ff07d8bc867952dc65abed863f06 /logd
parentae594f2b7453ff1c5109fd8a9cec9339ec0a6366 (diff)
enforce log retention policies
* for access logs, save meta-data about the log file itself in a simple format. * implement a proof-of-concept shell script that removes log files after one month. * ensure retention enforcer is started when services start * note that retention enforcer will continue running even after services stop, but it has protection to ensure that it won't multiply endlessly.
Diffstat (limited to 'logd')
-rw-r--r--logd/CMakeLists.txt2
-rwxr-xr-xlogd/src/apps/retention/retention-enforcer.sh115
2 files changed, 117 insertions, 0 deletions
diff --git a/logd/CMakeLists.txt b/logd/CMakeLists.txt
index 3eeeb7adb66..6a8296564a3 100644
--- a/logd/CMakeLists.txt
+++ b/logd/CMakeLists.txt
@@ -18,3 +18,5 @@ vespa_define_module(
src/tests/info
src/tests/rotate
)
+
+vespa_install_script(src/apps/retention/retention-enforcer.sh vespa-retention-enforcer sbin)
diff --git a/logd/src/apps/retention/retention-enforcer.sh b/logd/src/apps/retention/retention-enforcer.sh
new file mode 100755
index 00000000000..8c8f56a7fea
--- /dev/null
+++ b/logd/src/apps/retention/retention-enforcer.sh
@@ -0,0 +1,115 @@
+#!/bin/sh
+
+DBGF=logs/vespa/debug.retention-enforcer
+DBDIR=var/db/vespa/logfiledb
+PIDF=$DBDIR/retention-enforcer.pid
+RETAIN_DAYS=31
+
+prereq_dir() {
+ if [ -d $1 ] && [ -w $1 ]; then
+ :
+ else
+ echo "$0: missing directory '$1' in '`pwd`'" >&2
+ exit 1
+ fi
+}
+
+check_prereqs() {
+ prereq_dir var/db/vespa
+ prereq_dir logs/vespa
+}
+
+ensure_dir () {
+ if [ -d $1 ] && [ -w $1 ]; then
+ return 0
+ fi
+ echo "Creating directory '$1' in '`pwd`'"
+ mkdir -p $1 || exit 1
+}
+
+prepare_stuff() {
+ check_prereqs
+ exec > $DBGF.$$.log 2>&1
+ ensure_dir $DBDIR
+}
+
+mark_pid() {
+ echo $$ > $PIDF.$$.tmp
+ mv $PIDF.$$.tmp $PIDF || exit 1
+}
+
+check_pidfile() {
+ read pid < $PIDF
+ [ "$pid" = $$ ] && return 0
+ if [ "$pid" ] && [ $pid -gt $$ ]; then
+ sleep 30
+ read pid_again < $PIDF
+ if [ "$pid_again" != "$pid" ]; then return 1; fi
+ ps -p $pid >/dev/null 2>&1 || return 1
+ proc=$(ps -p $pid 2>&1)
+ case $proc in *retention*) ;; *) return 1;; esac
+ echo "$0 [$$]: Yielding my place to pid '$pid'"
+ exit 1
+ fi
+}
+
+maybe_collect() {
+ now=$(date +%s)
+ chopnow=${now%?????}
+ ts=${1##*/*.}
+ [ "$ts" ] || return 1
+ [ "$ts" -gt 0 ] || return 1
+ add=$((3 * $RETAIN_DAYS))
+ lim1=$(($ts + $add))
+ mod_time=$(get_mode_time "$1")
+ add=$((3 * 86400 * $RETAIN_DAYS))
+ lim2=$(($mod_time + $add))
+ if [ $lim1 -lt $chopnow ] && [ $lim2 -lt $now ]; then
+ echo "Collect meta-logfile '$1' ts '$ts' (lim $lim, now $chopnow)"
+ rm -f "$1"
+ fi
+}
+
+get_mod_time() {
+ perl -e 'print (((stat("'"$1"'"))[9]) . "\n")'
+}
+
+process_file() {
+ now=$(date +%s)
+ add=$((86400 * $RETAIN_DAYS))
+ found=0
+ while read timestamp logfilename; do
+ if [ -f "$logfilename" ]; then
+ found=1
+ lim1=$(($timestamp + $add))
+ mod_time=$(get_mod_time "$logfilename")
+ lim2=$((mod_time + $add))
+ if [ $lim1 -lt $now ] && [ $lim2 -lt $now ]; then
+ echo "Collect logfile '$logfilename' timestamped $timestamp modified $mod_time"
+ rm -f "$logfilename"
+ fi
+ fi
+ done < $1
+ if [ $found = 0 ]; then
+ maybe_collect $1
+ fi
+}
+
+process_all() {
+ for dbf in $DBDIR/logfiles.* ; do
+ [ -f "$dbf" ] && process_file "$dbf"
+ done
+}
+
+mainloop() {
+ while true; do
+ mark_pid
+ process_all
+ sleep 3600
+ check_pidfile
+ done
+}
+
+prepare_stuff
+mainloop
+exit 0