nix/nixos/common/software/cli/scripts/btrfs-backup.sh

359 lines
12 KiB
Bash
Executable file

#!/usr/bin/env bash
# BTRFS Backup Script with Snapper Integration
# Author: The Assistant (Kagi AI)
# Created: 2024-02-18
# Modified: 2024-02-18
#
# Description: This script performs BTRFS snapshot backups using Snapper, with remote
# transfer capabilities and Gotify notifications. It handles both full and incremental
# backups, cleanup of old snapshots, and comprehensive error reporting.
#
# Dependencies:
# - snapper
# - btrfs-progs
# - pv
# - ssh (configured for remote access)
# - curl (for Gotify notifications)
#
# Usage: ./script.sh SNAPPER_CONFIG
# Example: ./script.sh root
# Exit on any error
set -eE
# Configuration validation
if ! command -v snapper >/dev/null 2>&1 || \
! command -v btrfs >/dev/null 2>&1 || \
! command -v pv >/dev/null 2>&1 || \
! command -v ssh >/dev/null 2>&1 || \
! command -v curl >/dev/null 2>&1; then
echo "ERROR: Missing required dependencies. Please ensure snapper, btrfs-progs, pv, ssh, and curl are installed."
exit 1
fi
# Lock file handling
LOCK_FILE="/var/run/btrfs-backup-${1}.lock"
# Lock file creation with cleanup
cleanup_lock() {
sudo rm -f "$LOCK_FILE"
}
create_lock() {
if [ -e "$LOCK_FILE" ]; then
# Check if the process is still running
if pid=$(cat "$LOCK_FILE" 2>/dev/null) && kill -0 "$pid" 2>/dev/null; then
echo "ERROR: Another backup process (PID: $pid) is already running"
exit 1
else
# Lock file exists but process is not running, remove stale lock
cleanup_lock
fi
fi
# Create new lock file with current PID
sudo echo $$ | sudo tee "$LOCK_FILE"
# Ensure lock file is removed on script exit
trap cleanup_lock EXIT
}
# Create lock file
create_lock
# Help function
show_help() {
cat << EOF
Usage: $(basename "$0") SNAPPER_CONFIG
Performs BTRFS snapshot backups using Snapper with remote transfer capabilities.
Arguments:
SNAPPER_CONFIG The name of the snapper configuration to backup
Environment:
GOTIFY_TOKEN Read from /run/secrets/btrfs-backups/gotify_token
REMOTE_HOST Default: root@synology
KEEP_SNAPSHOTS Default: 14
Example:
$(basename "$0") root
$(basename "$0") home
EOF
exit 1
}
# Check for help flag or correct number of arguments
if [ $# -ne 1 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
show_help
fi
# Configurable variables (can be overridden by environment)
: "${REMOTE_HOST:=root@synology}"
: "${KEEP_SNAPSHOTS:=14}"
: "${BACKUP_DESCRIPTION:=btrfs-backup}"
# Configuration
SNAPPER_CONFIG="$1"
# Check if secrets file exists and is readable
if [ ! -r "/run/secrets/btrfs-backups/gotify_token" ]; then
echo "ERROR: Gotify token file not found or not readable at /run/secrets/btrfs-backups/gotify_token"
exit 1
fi
# Gotify Configuration
GOTIFY_URL="https://gotify.sysctl.io"
GOTIFY_TOKEN=$(cat /run/secrets/btrfs-backups/gotify_token)
# Verify token was read successfully
if [ -z "$GOTIFY_TOKEN" ]; then
echo "ERROR: Could not read Gotify token from /run/secrets/btrfs-backups/gotify_token"
exit 1
fi
# Notification function
send_notification() {
local title="$1"
local message="$2"
local priority="${3:-5}" # Default priority is 5 if not specified
curl -X POST \
-H "Content-Type: application/json" \
-H "X-Gotify-Key: $GOTIFY_TOKEN" \
-d "{\"title\": \"$title\", \"message\": \"$message\", \"priority\": $priority}" \
"$GOTIFY_URL/message"
}
# Error handler function
error_handler() {
local line_number=$1
local error_code=$2
local last_command="${BASH_COMMAND}"
log "ERROR: Command '$last_command' failed with exit code $error_code on line $line_number"
send_notification "Backup Failed" "Error on $HOSTNME: Command '$last_command' failed with exit code $error_code on line $line_number" 8
exit $error_code
}
# Cleanup function
cleanup() {
local exit_code=$?
if [ $exit_code -ne 0 ]; then
# Clean up the new snapshot if it exists and we're exiting with an error
if [ -n "$NEW_SNAPSHOT" ] && verify_snapshot "$NEW_SNAPSHOT" >/dev/null 2>&1; then
log "Cleaning up failed snapshot $NEW_SNAPSHOT"
sudo snapper -c "$SNAPPER_CONFIG" delete "$NEW_SNAPSHOT" || true
fi
fi
# Remove lock file
cleanup_lock
exit $exit_code
}
# Set up traps
trap 'error_handler ${LINENO} $?' ERR
trap cleanup EXIT INT TERM
# Get the actual snapshot location from snapper config
SOURCE_PATH=$(sudo snapper -c "$SNAPPER_CONFIG" get-config | grep '^SUBVOLUME' | cut -d'=' -f2 | tr -d '"'| awk {'print $3'})
echo "SOURCE_PATH: $SOURCE_PATH"
if [ -z "$SOURCE_PATH" ]; then
send_notification "Backup Failed" "Could not determine snapshot path for config '$SNAPPER_CONFIG' on $HOSTNME" 8
echo "ERROR: Could not determine snapshot path for config '$SNAPPER_CONFIG'"
exit 1
fi
# Convert subvolume path to snapshot path
SNAPSHOT_PATH="$SOURCE_PATH/.snapshots"
if [ ! -d "$SNAPSHOT_PATH" ]; then
send_notification "Backup Failed" "Snapshot directory '$SNAPSHOT_PATH' does not exist on $HOSTNME" 8
echo "ERROR: Snapshot directory '$SNAPSHOT_PATH' does not exist"
exit 1
fi
# Create new snapshot with backup description
NEW_SNAPSHOT=$(sudo snapper -c "$SNAPPER_CONFIG" create --description "$BACKUP_DESCRIPTION" --print-number)
if [ -z "$NEW_SNAPSHOT" ]; then
send_notification "Backup Failed" "Failed to create new snapshot on $HOSTNME" 8
echo "ERROR: Failed to create new snapshot"
exit 1
fi
HOSTNME=$(hostname)
BASE_DEST_PATH="/volume1/BTRFS_Receives/`hostname`/${SNAPPER_CONFIG}"
DEST_PATH="/volume1/BTRFS_Receives/`hostname`/${SNAPPER_CONFIG}/${NEW_SNAPSHOT}"
STATE_FILE="/var/lib/snapper-backup-${SNAPPER_CONFIG}.state"
LOG_FILE="/var/log/snapper-backup-${SNAPPER_CONFIG}.log"
# Get latest successful transfer number from snapshots with backup description
LAST_TRANSFERRED=$(cat "$STATE_FILE" 2>/dev/null || echo "")
# Verify snapper config exists
if ! sudo snapper -c "$SNAPPER_CONFIG" list &>/dev/null; then
send_notification "Backup Failed" "Snapper config '$SNAPPER_CONFIG' does not exist on $HOSTNME" 8
echo "ERROR: Snapper config '$SNAPPER_CONFIG' does not exist"
exit 1
fi
# Logging function
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | sudo tee -a "$LOG_FILE"
}
# Initial logging
log "=== Starting backup script v1.0 ==="
log "Host: $HOSTNME"
log "Config: $SNAPPER_CONFIG"
log "Remote: $REMOTE_HOST"
log "Retention: $KEEP_SNAPSHOTS snapshots"
# Function to verify snapshot exists
verify_snapshot() {
local snapshot_num="$1"
if [ ! -d "$SNAPSHOT_PATH/$snapshot_num/snapshot" ]; then
send_notification "Backup Failed" "Snapshot $snapshot_num does not exist on $HOSTNME" 8
log "ERROR: Snapshot $snapshot_num does not exist"
return 1
fi
return 0
}
# Function to verify remote connectivity
verify_remote() {
if ! ssh -q "$REMOTE_HOST" "exit"; then
send_notification "Backup Failed" "Cannot connect to remote host from $HOSTNME" 8
log "ERROR: Cannot connect to remote host"
exit 1
fi
}
# Function to get remote snapshots
get_remote_snapshots() {
ssh "$REMOTE_HOST" "find '$BASE_DEST_PATH' -maxdepth 1 -type d -name '[0-9]*' | sort -n"
}
# Function to get local backup snapshots
get_local_snapshots() {
sudo snapper -c "$SNAPPER_CONFIG" list | grep "$BACKUP_DESCRIPTION" | awk '{print $1}' | sort -n
}
# Function to cleanup old snapshots both locally and remotely
cleanup_snapshots() {
local local_snapshots=($(get_local_snapshots))
local remote_snapshots=($(get_remote_snapshots))
local count=${#local_snapshots[@]}
if [ $count -gt $KEEP_SNAPSHOTS ]; then
local to_delete=$((count - KEEP_SNAPSHOTS))
log "Cleaning up $to_delete old snapshots both locally and remotely"
for ((i=0; i<$to_delete; i++)); do
local snapshot="${local_snapshots[$i]}"
# Delete remote snapshot first
if ssh "$REMOTE_HOST" "[ -d '$BASE_DEST_PATH/$snapshot' ]"; then
log "Deleting remote snapshot: $snapshot"
if ! ssh "$REMOTE_HOST" "btrfs subvolume delete '$BASE_DEST_PATH/$snapshot/snapshot'"; then
send_notification "Backup Warning" "Failed to delete remote snapshot $snapshot on $HOSTNME" 6
log "WARNING: Failed to delete remote snapshot $snapshot"
fi
if ! ssh "$REMOTE_HOST" "rm -rf '$BASE_DEST_PATH/$snapshot'"; then
send_notification "Backup Warning" "Failed to cleanup remote snapshot directory $snapshot on $HOSTNME" 6
log "WARNING: Failed to cleanup remote snapshot directory $snapshot"
fi
fi
# Then delete local snapshot
log "Deleting local snapshot: $snapshot"
if ! sudo snapper -c "$SNAPPER_CONFIG" delete "$snapshot"; then
send_notification "Backup Warning" "Failed to delete local snapshot $snapshot on $HOSTNME" 6
log "WARNING: Failed to delete local snapshot $snapshot"
fi
done
fi
}
# Start backup process
log "Starting backup for snapper config: $SNAPPER_CONFIG"
log "Using snapshot path: $SNAPSHOT_PATH"
log "Created new snapshot: $NEW_SNAPSHOT"
# Verify remote connectivity first
verify_remote
# Verify snapshots exist
verify_snapshot "$NEW_SNAPSHOT" || exit 1
if [ -n "$LAST_TRANSFERRED" ]; then
verify_snapshot "$LAST_TRANSFERRED" || exit 1
fi
# Create destination directory if it doesn't exist
ssh "$REMOTE_HOST" "mkdir -p '$DEST_PATH'"
# Perform the transfer
if [ -z "$LAST_TRANSFERRED" ]; then
# First time backup - full send
log "Performing full send of snapshot $NEW_SNAPSHOT"
sudo btrfs send "$SNAPSHOT_PATH/$NEW_SNAPSHOT/snapshot" | \
pv --bytes | \
ssh "$REMOTE_HOST" "btrfs receive '$DEST_PATH'" && {
echo "$NEW_SNAPSHOT" | sudo tee "$STATE_FILE"
log "Full send completed successfully"
} || {
send_notification "Backup Failed" "Full send failed for $SNAPPER_CONFIG on $HOSTNME" 8
log "ERROR: Full send failed"
sudo snapper -c "$SNAPPER_CONFIG" delete "$NEW_SNAPSHOT"
exit 1
}
else
# Incremental send
log "Performing incremental send from $LAST_TRANSFERRED to $NEW_SNAPSHOT"
sudo btrfs send -p "$SNAPSHOT_PATH/$LAST_TRANSFERRED/snapshot" \
"$SNAPSHOT_PATH/$NEW_SNAPSHOT/snapshot" | \
pv --bytes | \
ssh "$REMOTE_HOST" "btrfs receive '$DEST_PATH'" && {
echo "$NEW_SNAPSHOT" | sudo tee "$STATE_FILE"
log "Incremental send completed successfully"
} || {
send_notification "Backup Failed" "Incremental send failed for $SNAPPER_CONFIG on $HOSTNME" 8
log "ERROR: Incremental send failed"
sudo snapper -c "$SNAPPER_CONFIG" delete "$NEW_SNAPSHOT"
exit 1
}
fi
# Cleanup old snapshots if transfer was successful
cleanup_snapshots
# Verify remote snapshots
log "Current remote snapshots:"
get_remote_snapshots | sudo tee -a "$LOG_FILE"
log "Current local snapshots:"
get_local_snapshots | sudo tee -a "$LOG_FILE"
# Gather statistics
TOTAL_SNAPSHOTS=$(get_local_snapshots | wc -l)
REMOTE_SNAPSHOTS=$(get_remote_snapshots | wc -l)
log "Statistics:"
log "- Total local snapshots: $TOTAL_SNAPSHOTS"
log "- Total remote snapshots: $REMOTE_SNAPSHOTS"
log "- Latest snapshot: $NEW_SNAPSHOT"
if [ -n "$LAST_TRANSFERRED" ]; then
log "- Previous snapshot: $LAST_TRANSFERRED"
fi
# Final verification
if ! ssh "$REMOTE_HOST" "btrfs subvolume show '$DEST_PATH/snapshot'" &>/dev/null; then
send_notification "Backup Failed" "Final verification failed for $SNAPPER_CONFIG on $HOSTNME" 8
log "WARNING: Final verification failed"
exit 1
fi
log "Backup completed successfully"
send_notification "Backup Successful" "BTRFS backup completed successfully for $SNAPPER_CONFIG on $HOSTNME" 5