Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

File system maintenance service and its btrfs handle #256

Merged
merged 2 commits into from
Feb 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions extras/bin/btrfs-maint
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/bin/sh
# This is a handle of maintaining btrfs file system, scheduled by fs-service
#
set -e

echo_stderr(){
echo "$1" >&2
}

check_root(){
if [ "$(id -u)" != "0" ]; then
return 1
fi
}

is_load_allow(){
sys_cores=$(nproc)
load_1m=$(uptime|awk '{print $(NF-2)}'|sed 's/\..*//')
load_5m=$(uptime|awk '{print $(NF-2)}'|sed 's/\..*//')
avg_1m=$((load_1m/sys_cores))
avg_5m=$((load_5m/sys_cores))

if [ "$avg_1m" -gt 8 ] || [ "$avg_5m" -gt 5 ]; then
return 1
fi
}

is_io_allow(){
# FIXME: This iowait is not a good estimation
iowait=$(vmstat|tail -1|awk '{ print $(NF-1)}')

if [ "$iowait" -gt 20 ]; then
return 1
fi
}

fs_sync(){
btrfs filesystem sync "$1" >/dev/null 2>&1
}

fs_balance(){
m_total=$(btrfs filesystem df -b "$1"|grep Metadata|head -1|sed "s/.*total=\([0-9]*\).*/\1/")
m_used=$(btrfs filesystem df -b "$1"|grep Metadata|head -1|sed "s/.*used=\([0-9]*\).*/\1/")
m_ratio=$((100*m_used/m_total))
if [ "$m_ratio" -gt 50 ]; then
btrfs balance start -musage=50 -dusage=0 "$1" >/dev/null 2>&1
fi
}

fs_scrub(){
btrfs scrub start -B "$1" >/dev/null 2>&1
}

fs_check(){
btrfs check --readonly --qgroup-report --progress "$1"
}

fs_preset_hourly(){
for i in $(btrfs subvolume list -o "$SNAP_FS"|cut -d' ' -f9); do
fs_sync "$SNAP_FS/$i"
sleep 1
done
}

fs_preset_daily(){
for i in $(btrfs subvolume list -o "$SNAP_FS"|cut -d' ' -f9); do
while true; do
if is_load_allow && is_io_allow; then
fs_balance "$SNAP_FS/$i"
break
else
sleep 30
fi
done
sleep 10
done
}

fs_preset_weekly(){
for i in $(btrfs subvolume list -o "$SNAP_FS"|cut -d' ' -f9); do
while true; do
if is_load_allow && is_io_allow; then
fs_scrub "$SNAP_FS/$i"
break
else
sleep 30
fi
done
sleep 10
done
}

if ! check_root; then
echo_stderr "ERROR: This command must be run as root!"
exit 1
fi

if [ ! -z "$1" ] || [ ! -z "$2" ]; then
SNAP_FS=$1
LV=$2
else
echo_stderr "ERROR: Not enough argument."
exit 1
fi

case $LV in
hourly)
fs_preset_hourly
;;
daily)
fs_preset_daily
;;
weekly)
fs_preset_weekly
;;
*)
echo_stderr "ERROR: Wrong argument."
exit 1
;;
esac
244 changes: 244 additions & 0 deletions extras/bin/fs-service
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
#!/bin/sh
# This is a dumb service wrapper to get file system maintanence tasks done
# properly under snappy constrains where timers are not always available.
#

trap sig_HUP 1 2 3 6
trap sig_TERM 15

# Backend maintainence tool
# Args: $PATH_TO_CHECK {hourly|daily|weekly}
FS_MAINT=btrfs-maint

# Scheduling interval
SCHED_INTERVAL=30
# DO NOT CHANGE
SCHED_HOURLY=3600
SCHED_DAILY=86400
SCHED_WEEKLY=604800

PID_HOURLY=0
PID_DAYLY=0
PID_WEEKLY=0

echo_stderr(){
echo "$1" >&2
}

if [ "$SNAP_NAME" = "" ]; then
SNAP_NAME=$(for i in /snap/*; do echo "$i" | grep subutai; done| head -n1 | sed 's/\/snap\///')
fi
if [ "$SNAP_NAME" = "" ]; then
# Die hard when not possible to determine a proper snap environment
echo_stderr "FATAL: Cannot determine SNAP_NAME, quit."
exit 1
fi

SNAP="/snap/$SNAP_NAME/current/"
SNAP_DATA="/var/$SNAP"
PATH=$PATH:$SNAP/bin/
SNAP_FS=/var/snap/$SNAP_NAME/common/lxc

FS_PROP=$SNAP_DATA/fs-service
FS_TIMESTAMP=$FS_PROP/timestamp
FS_PIDLOCK=$FS_PROP/pid

check_root(){
if [ ! "$(id -u)" = "0" ]; then
return 1
fi
}

is_finished(){
if [ ! "$1" -eq 0 ] && kill -s 0 "$1" >/dev/null 2>&1; then
return 1;
fi
}

lock_aquire(){
PID=$$
if [ ! -f "$FS_PIDLOCK" ]; then
# Normally we aquire the lock quick
mkdir -p "$FS_PROP"
echo "$PID" > "$FS_PIDLOCK"
return 0
fi
PID_LOCK=$(cat "$FS_PIDLOCK")
if is_finished "$PID_LOCK"; then
# Lock owner is dead
echo "$PID" > "$FS_PIDLOCK"
return 0
elif [ "$PID" -eq "$PID_LOCK" ]; then
# We are the lock owner
return 0
fi
# Failed to aquire the lock
return 1
}

lock_release(){
PID=$$
PID_LOCK=0
if [ -f "$FS_PIDLOCK" ]; then
PID_LOCK=$(cat "$FS_PIDLOCK")
fi
if [ ! "$PID" -eq "$PID_LOCK" ] && ! is_finished "$PID_LOCK"; then
return 1
fi
rm -f "$FS_PIDLOCK"
}

sig_HUP(){
# Ignoring SIGHUP or likewise
true
}

sig_TERM(){
# Quit politely
shutdown_graceful
}

shutdown_graceful(){
echo_stderr "INFO: Graceful shutdown requsted, quitting."
export SCHED_HALT=1
wait
lock_release
exit 0
}

check_ecc(){
if ! which dmidecode >/dev/null 2>&1; then
return 0
fi
ecc_type=$(dmidecode --type 16 | grep "Error Correction Type" | sed 's/.*: //')
if [ "$ecc_type" = "None" ]; then
return 1
fi
}

check_mnt(){
# $1: full path to check
if ! grep btrfs /proc/mounts|grep " $1 " >/dev/null 2>&1; then
return 1
fi

mnt_opts=$(grep btrfs /proc/mounts|grep " $1 "|cut -d" " -f4)
if echo "$mnt_opts" | grep ro 1>/dev/null 2>&1; then
return 1
fi
if echo "$mnt_opts" | grep nobarrier 1>/dev/null 2>&1; then
return 1
fi
}

time_last(){
# $1: {hourly|daily|weekly}
if [ -z "$1" ]; then
return 1
fi
if [ ! -f "$FS_TIMESTAMP" ]; then
mkdir -p "$FS_PROP"
echo 0 && return 0
fi
case $1 in
hourly)
cut -d',' -f1 < "$FS_TIMESTAMP"
;;
daily)
cut -d',' -f2 < "$FS_TIMESTAMP"
;;
weekly)
cut -d',' -f2 < "$FS_TIMESTAMP"
;;
*)
echo 0 && return 1
;;
esac
}

time_update(){
# $1: {hourly|daily|weekly}
# $2: timestamp to write
if [ -z "$1" ] || [ -z "$2" ]; then
return 1
fi
if [ ! -f "$FS_TIMESTAMP" ]; then
mkdir -p "$FS_PROP"
echo "0,0,0" > "$FS_TIMESTAMP"
fi
LT_HOURLY=$(cut -d',' -f1 < "$FS_TIMESTAMP")
LT_DAILY=$(cut -d',' -f2 < "$FS_TIMESTAMP")
LT_WEEKLY=$(cut -d',' -f3 < "$FS_TIMESTAMP")
case $1 in
hourly)
echo "$2,$LT_DAILY,$LT_WEEKLY" > "$FS_TIMESTAMP"
;;
daily)
echo "$LT_HOURLY,$2,$LT_WEEKLY" > "$FS_TIMESTAMP"
;;
weekly)
echo "$LT_HOURLY,$LT_DAILY,$2" > "$FS_TIMESTAMP"
;;
*)
return 1
;;
esac
}

if ! check_root; then
echo_stderr "FATAL: This command must be run as root, quit."
exit 1
fi

if ! lock_aquire; then
echo_stderr "FATAL: Another process is running, quit."
exit 1
else
echo_stderr "INFO: File system maintenance service is started."
fi

if ! check_ecc; then
echo_stderr "WARN: System RAM does not support ECC, not suitable for production use."
fi

export SCHED_HALT=0
while [ "$SCHED_HALT" -lt 1 ]; do
TIME_LAST_HOURLY=$(time_last hourly)
TIME_LAST_DAILY=$(time_last daily)
TIME_LAST_WEEKYLY=$(time_last weekly)
TIME_CUR=$(date +%s)

if ! check_mnt "$SNAP_FS"; then
echo_stderr "ERROR: Target filesystem is not mounted properly! Will retry after $SCHED_INTERVAL seconds."
sleep "$SCHED_INTERVAL"
continue
fi

if test $((TIME_CUR)) -gt "$((TIME_LAST_HOURLY+SCHED_HOURLY))"; then
if test "$SCHED_HALT" -lt 1 && is_finished "$PID_HOURLY"; then
"$FS_MAINT" "$SNAP_FS" hourly &
PID_HOURLY=$!
time_update hourly "$TIME_CUR"
fi
fi

if test "$TIME_CUR" -gt "$((TIME_LAST_DAILY+SCHED_DAILY))"; then
if test "$SCHED_HALT" -lt 1 && is_finished "$PID_DAYLY"; then
"$FS_MAINT" "$SNAP_FS" daily &
PID_DAYLY=$!
time_update daily "$TIME_CUR"
fi
fi

if test "$TIME_CUR" -gt "$((TIME_LAST_WEEKYLY+SCHED_WEEKLY))"; then
if test "$SCHED_HALT" -lt 1 && is_finished "$PID_WEEKLY"; then
"$FS_MAINT" "$SNAP_FS" weekly &
PID_WEEKLY=$!
time_update weekly "$TIME_CUR"
fi
fi

if [ "$SCHED_HALT" -lt 1 ]; then
sleep "$SCHED_INTERVAL"
fi
done
3 changes: 3 additions & 0 deletions snapcraft.yaml.templ
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ apps:
command: bin/agent-shutdown
daemon: oneshot
restart-condition: never
fs-service:
command: bin/fs-service
daemon: simple
parts:
wrappers:
plugin: dump
Expand Down