392 lines
16 KiB
Python
392 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
BTRFS Backup Script
|
|
==================
|
|
A comprehensive backup solution for BTRFS snapshots with incremental transfer support.
|
|
|
|
Description:
|
|
------------
|
|
This script performs automated backup of BTRFS snapshots from a local host to a remote
|
|
destination. It supports incremental transfers and cleanup of orphaned snapshots.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import logging
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
import shutil
|
|
import traceback
|
|
import signal
|
|
import json
|
|
|
|
# Configuration Constants
|
|
SOURCE_BASE = "/volume1/BTRFS_Receives/hosts"
|
|
DEST_HOST = "root@synology-backup"
|
|
DEST_BASE = "/volume1/BTRFS_Receives/hosts"
|
|
LOG_FILE = "/var/log/btrfs_backup.log"
|
|
|
|
# Initialize logging with more detailed format
|
|
logging.basicConfig(
|
|
level=logging.INFO, # Changed to DEBUG level for more detailed logging
|
|
format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d - %(funcName)s()] - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler(LOG_FILE),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
|
|
class BTRFSBackup:
|
|
"""Main class for handling BTRFS snapshot backups."""
|
|
|
|
def __init__(self):
|
|
self.stats = {
|
|
'transferred': 0,
|
|
'deleted': 0,
|
|
'errors': 0,
|
|
'start_time': None
|
|
}
|
|
signal.signal(signal.SIGINT, self._signal_handler)
|
|
signal.signal(signal.SIGTERM, self._signal_handler)
|
|
|
|
def _signal_handler(self, signum, frame):
|
|
"""Handle interrupt signals gracefully."""
|
|
logging.warning("Received interrupt signal. Cleaning up...")
|
|
sys.exit(1)
|
|
|
|
def run_command(self, command, shell=False, check=True):
|
|
"""Execute system command and handle errors."""
|
|
try:
|
|
cmd_str = command if isinstance(command, str) else ' '.join(command)
|
|
logging.debug(f"Executing command: {cmd_str}")
|
|
|
|
result = subprocess.run(
|
|
command,
|
|
shell=shell,
|
|
check=check,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True
|
|
)
|
|
|
|
logging.debug(f"Command output: {result.stdout}")
|
|
if result.stderr:
|
|
logging.debug(f"Command stderr: {result.stderr}")
|
|
|
|
return result
|
|
except subprocess.CalledProcessError as e:
|
|
logging.error(f"Command failed: {e.cmd}")
|
|
logging.error(f"Return code: {e.returncode}")
|
|
logging.error(f"Error output: {e.stderr}")
|
|
raise
|
|
|
|
def get_local_snapshots(self, path):
|
|
"""Get list of local snapshots."""
|
|
try:
|
|
logging.debug(f"Scanning for local snapshots in: {path}")
|
|
snapshots = []
|
|
path = Path(path)
|
|
if path.exists():
|
|
for item in path.glob("*/snapshot"):
|
|
if item.is_dir():
|
|
snapshots.append(item.parent.name)
|
|
|
|
sorted_snapshots = sorted(snapshots, key=lambda x: int(x))
|
|
logging.debug(f"Found local snapshots: {json.dumps(sorted_snapshots)}")
|
|
return sorted_snapshots
|
|
except Exception as e:
|
|
logging.error(f"Error getting local snapshots: {e}")
|
|
logging.error(traceback.format_exc())
|
|
raise
|
|
|
|
def get_remote_snapshots(self, path):
|
|
"""Get list of remote snapshots using find command with max depth 1."""
|
|
try:
|
|
logging.debug(f"Scanning for remote snapshots in: {path}")
|
|
# Use find with maxdepth 1 to locate snapshot directories at the correct level
|
|
cmd = f"ssh {DEST_HOST} 'find {path} -maxdepth 1 -type d'"
|
|
result = self.run_command(cmd, shell=True, check=False)
|
|
|
|
snapshots = []
|
|
if result.returncode == 0:
|
|
for line in result.stdout.splitlines():
|
|
if line.strip():
|
|
# Extract directory name from path
|
|
dirname = os.path.basename(line.strip())
|
|
try:
|
|
# Verify it's a number
|
|
int(dirname)
|
|
# Verify snapshot subdirectory exists
|
|
verify_cmd = f"ssh {DEST_HOST} '[ -d {line.strip()}/snapshot ] && echo exists'"
|
|
verify_result = self.run_command(verify_cmd, shell=True, check=False)
|
|
if verify_result.returncode == 0 and verify_result.stdout.strip() == 'exists':
|
|
snapshots.append(dirname)
|
|
except ValueError:
|
|
logging.debug(f"Skipping non-numeric directory: {dirname}")
|
|
|
|
sorted_snapshots = sorted(snapshots, key=lambda x: int(x))
|
|
logging.debug(f"Found remote snapshots: {json.dumps(sorted_snapshots)}")
|
|
return sorted_snapshots
|
|
else:
|
|
logging.debug(f"Find command failed with return code {result.returncode}")
|
|
logging.debug(f"Error output: {result.stderr}")
|
|
return []
|
|
except Exception as e:
|
|
logging.error(f"Error getting remote snapshots: {e}")
|
|
logging.error(traceback.format_exc())
|
|
raise
|
|
|
|
|
|
def verify_btrfs_subvolume(self, path):
|
|
"""Verify if a path is a valid BTRFS subvolume."""
|
|
try:
|
|
logging.debug(f"Verifying BTRFS subvolume: {path}")
|
|
result = self.run_command(['btrfs', 'subvolume', 'show', path], check=False)
|
|
is_valid = result.returncode == 0
|
|
logging.debug(f"Subvolume verification result: {'valid' if is_valid else 'invalid'}")
|
|
return is_valid
|
|
except Exception as e:
|
|
logging.error(f"Error verifying subvolume: {e}")
|
|
return False
|
|
|
|
def get_parent_snapshot(self, host, subvol, snapshot):
|
|
"""Find the most recent parent snapshot that exists both locally and remotely."""
|
|
try:
|
|
logging.debug(f"Finding parent snapshot for {host}/{subvol}/{snapshot}")
|
|
|
|
local_path = f"{SOURCE_BASE}/{host}/{subvol}"
|
|
remote_path = f"{DEST_BASE}/{host}/{subvol}"
|
|
|
|
logging.debug(f"Scanning local path: {local_path}")
|
|
logging.debug(f"Scanning remote path: {remote_path}")
|
|
|
|
local_snapshots = set(self.get_local_snapshots(local_path))
|
|
remote_snapshots = set(self.get_remote_snapshots(remote_path))
|
|
|
|
logging.debug(f"Local snapshots: {json.dumps(list(local_snapshots))}")
|
|
logging.debug(f"Remote snapshots: {json.dumps(list(remote_snapshots))}")
|
|
|
|
# Find common snapshots that could be parents
|
|
common_snapshots = local_snapshots & remote_snapshots
|
|
logging.debug(f"Common snapshots: {json.dumps(list(common_snapshots))}")
|
|
|
|
if not common_snapshots:
|
|
logging.info("No common snapshots found - will perform full send")
|
|
return None
|
|
|
|
current = int(snapshot)
|
|
logging.debug(f"Current snapshot number: {current}")
|
|
|
|
# Get the list of potential parents (snapshots older than current)
|
|
potential_parents = [int(s) for s in common_snapshots if int(s) < current]
|
|
logging.debug(f"Potential parent snapshots: {json.dumps(potential_parents)}")
|
|
|
|
if not potential_parents:
|
|
logging.info("No valid parent snapshots found - will perform full send")
|
|
return None
|
|
|
|
# Get the most recent potential parent
|
|
latest_parent = str(max(potential_parents))
|
|
logging.debug(f"Selected latest potential parent: {latest_parent}")
|
|
|
|
# Verify the parent snapshot exists and is valid
|
|
parent_path = f"{SOURCE_BASE}/{host}/{subvol}/{latest_parent}/snapshot"
|
|
logging.debug(f"Checking parent path: {parent_path}")
|
|
|
|
if not os.path.exists(parent_path):
|
|
logging.warning(f"Parent snapshot path {parent_path} does not exist")
|
|
return None
|
|
|
|
# Verify the parent snapshot is actually a BTRFS subvolume
|
|
if not self.verify_btrfs_subvolume(parent_path):
|
|
logging.warning(f"Parent snapshot {latest_parent} is not a valid BTRFS subvolume")
|
|
return None
|
|
|
|
logging.info(f"Found valid parent snapshot: {latest_parent}")
|
|
return latest_parent
|
|
|
|
except Exception as e:
|
|
logging.error(f"Error finding parent snapshot: {e}")
|
|
logging.error(traceback.format_exc())
|
|
return None
|
|
|
|
def transfer_snapshot(self, host, subvol, snapshot):
|
|
"""Transfer a single snapshot to the backup destination."""
|
|
source_path = f"{SOURCE_BASE}/{host}/{subvol}/{snapshot}/snapshot"
|
|
dest_path = f"{DEST_BASE}/{host}/{subvol}/{snapshot}"
|
|
|
|
try:
|
|
logging.debug(f"Starting transfer for snapshot: {host}/{subvol}/{snapshot}")
|
|
logging.debug(f"Source path: {source_path}")
|
|
logging.debug(f"Destination path: {dest_path}")
|
|
|
|
# Create destination directory
|
|
self.run_command(f"ssh {DEST_HOST} 'mkdir -p {dest_path}'", shell=True)
|
|
|
|
# Find parent snapshot for incremental transfer
|
|
parent = self.get_parent_snapshot(host, subvol, snapshot)
|
|
logging.debug(f"Parent snapshot found: {parent}")
|
|
|
|
# Prepare send command
|
|
if parent:
|
|
parent_path = f"{SOURCE_BASE}/{host}/{subvol}/{parent}/snapshot"
|
|
logging.info(f"Performing incremental transfer using parent snapshot: {parent}")
|
|
logging.debug(f"Parent path: {parent_path}")
|
|
send_cmd = f"btrfs send -p {parent_path} {source_path}"
|
|
else:
|
|
logging.info("Performing full transfer (no parent snapshot found)")
|
|
send_cmd = f"btrfs send {source_path}"
|
|
|
|
receive_cmd = f"ssh {DEST_HOST} 'btrfs receive {dest_path}'"
|
|
|
|
logging.debug(f"Send command: {send_cmd}")
|
|
logging.debug(f"Receive command: {receive_cmd}")
|
|
|
|
# Execute the transfer
|
|
send_process = subprocess.Popen(
|
|
send_cmd.split(),
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE
|
|
)
|
|
|
|
receive_process = subprocess.Popen(
|
|
receive_cmd,
|
|
shell=True,
|
|
stdin=send_process.stdout,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE
|
|
)
|
|
|
|
send_process.stdout.close()
|
|
send_stderr = send_process.stderr.read()
|
|
receive_stdout, receive_stderr = receive_process.communicate()
|
|
|
|
send_retcode = send_process.wait()
|
|
receive_retcode = receive_process.wait()
|
|
|
|
logging.debug(f"Send process return code: {send_retcode}")
|
|
logging.debug(f"Receive process return code: {receive_retcode}")
|
|
|
|
if send_stderr:
|
|
logging.debug(f"Send process stderr: {send_stderr}")
|
|
if receive_stderr:
|
|
logging.debug(f"Receive process stderr: {receive_stderr}")
|
|
|
|
if send_retcode != 0 or receive_retcode != 0:
|
|
raise subprocess.CalledProcessError(
|
|
send_retcode or receive_retcode,
|
|
send_cmd,
|
|
output=receive_stdout,
|
|
stderr=send_stderr + receive_stderr
|
|
)
|
|
|
|
self.stats['transferred'] += 1
|
|
logging.info(f"Successfully transferred snapshot {snapshot}")
|
|
|
|
except Exception as e:
|
|
logging.error(f"Error transferring snapshot {snapshot}: {e}")
|
|
logging.error(traceback.format_exc())
|
|
raise
|
|
|
|
def delete_remote_snapshot(self, host, subvol, snapshot):
|
|
"""Delete orphaned snapshot from backup destination."""
|
|
try:
|
|
path = f"{DEST_BASE}/{host}/{subvol}/{snapshot}"
|
|
logging.debug(f"Attempting to delete remote snapshot: {path}")
|
|
|
|
cmd = f"ssh {DEST_HOST} 'btrfs subvolume delete {path}/snapshot && rm -rf {path}'"
|
|
self.run_command(cmd, shell=True)
|
|
self.stats['deleted'] += 1
|
|
logging.info(f"Deleted orphaned snapshot: {path}")
|
|
except Exception as e:
|
|
logging.error(f"Error deleting remote snapshot: {e}")
|
|
logging.error(traceback.format_exc())
|
|
raise
|
|
|
|
def show_inventory(self):
|
|
"""Display current snapshot inventory."""
|
|
print("\nCurrent Snapshot Inventory:")
|
|
print("==========================")
|
|
|
|
for host in os.listdir(SOURCE_BASE):
|
|
host_path = os.path.join(SOURCE_BASE, host)
|
|
if not os.path.isdir(host_path):
|
|
continue
|
|
|
|
print(f"\nHost: {host}")
|
|
for subvol in os.listdir(host_path):
|
|
subvol_path = os.path.join(host_path, subvol)
|
|
if not os.path.isdir(subvol_path):
|
|
continue
|
|
|
|
print(f" Subvolume: {subvol}")
|
|
local_snapshots = set(self.get_local_snapshots(subvol_path))
|
|
remote_snapshots = set(self.get_remote_snapshots(f"{DEST_BASE}/{host}/{subvol}"))
|
|
|
|
all_snapshots = sorted(local_snapshots | remote_snapshots, key=lambda x: int(x))
|
|
for snapshot in all_snapshots:
|
|
local_status = "✅" if snapshot in local_snapshots else "❌"
|
|
remote_status = "✅" if snapshot in remote_snapshots else "❌"
|
|
print(f" {snapshot}: Local: [{local_status}] Remote: [{remote_status}]")
|
|
|
|
def run(self):
|
|
"""Main execution method."""
|
|
try:
|
|
self.stats['start_time'] = time.time()
|
|
logging.info("Starting BTRFS backup process")
|
|
|
|
# Show inventory and get confirmation
|
|
self.show_inventory()
|
|
|
|
# Process all hosts
|
|
for host in os.listdir(SOURCE_BASE):
|
|
host_path = os.path.join(SOURCE_BASE, host)
|
|
if not os.path.isdir(host_path):
|
|
continue
|
|
|
|
logging.info(f"Processing host: {host}")
|
|
# Process all subvolumes
|
|
for subvol in os.listdir(host_path):
|
|
subvol_path = os.path.join(host_path, subvol)
|
|
if not os.path.isdir(subvol_path):
|
|
continue
|
|
|
|
logging.info(f"Processing subvolume: {subvol}")
|
|
# Get snapshot lists
|
|
local_snapshots = set(self.get_local_snapshots(subvol_path))
|
|
remote_snapshots = set(self.get_remote_snapshots(f"{DEST_BASE}/{host}/{subvol}"))
|
|
|
|
# Transfer missing snapshots
|
|
missing_snapshots = sorted(local_snapshots - remote_snapshots, key=lambda x: int(x))
|
|
if missing_snapshots:
|
|
logging.info(f"Found {len(missing_snapshots)} new snapshots to transfer")
|
|
for snapshot in missing_snapshots:
|
|
logging.info(f"Transferring snapshot: {host}/{subvol}/{snapshot}")
|
|
self.transfer_snapshot(host, subvol, snapshot)
|
|
|
|
# Delete orphaned snapshots
|
|
orphaned_snapshots = remote_snapshots - local_snapshots
|
|
if orphaned_snapshots:
|
|
logging.info(f"Found {len(orphaned_snapshots)} orphaned snapshots to delete")
|
|
for snapshot in orphaned_snapshots:
|
|
logging.info(f"Deleting orphaned {host}/{subvol}/{snapshot}")
|
|
self.delete_remote_snapshot(host, subvol, snapshot)
|
|
|
|
# Show final statistics
|
|
elapsed_time = time.time() - self.stats['start_time']
|
|
print("\nBackup Complete!")
|
|
print(f"Transferred: {self.stats['transferred']} snapshots")
|
|
print(f"Deleted: {self.stats['deleted']} snapshots")
|
|
print(f"Total time: {str(timedelta(seconds=int(elapsed_time)))}")
|
|
|
|
except Exception as e:
|
|
logging.error(f"Backup failed: {e}")
|
|
logging.error(traceback.format_exc())
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
backup = BTRFSBackup()
|
|
backup.run()
|
|
|