#!/usr/bin/env python3 """ BTRFS Backup Script ================== A comprehensive backup solution for BTRFS snapshots with incremental transfer support. Description: ------------ This script performs automated backup of BTRFS snapshots from a local host to a remote destination. It supports incremental transfers and cleanup of orphaned snapshots. """ import os import sys import subprocess import logging import time from datetime import datetime, timedelta from pathlib import Path import shutil import traceback import signal import json # Configuration Constants SOURCE_BASE = "/volume1/BTRFS_Receives/hosts" DEST_HOST = "root@synology-backup" DEST_BASE = "/volume1/BTRFS_Receives/hosts" LOG_FILE = "/var/log/btrfs_backup.log" # Initialize logging with more detailed format logging.basicConfig( level=logging.INFO, # Changed to DEBUG level for more detailed logging format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d - %(funcName)s()] - %(message)s', handlers=[ logging.FileHandler(LOG_FILE), logging.StreamHandler() ] ) class BTRFSBackup: """Main class for handling BTRFS snapshot backups.""" def __init__(self): self.stats = { 'transferred': 0, 'deleted': 0, 'errors': 0, 'start_time': None } signal.signal(signal.SIGINT, self._signal_handler) signal.signal(signal.SIGTERM, self._signal_handler) def _signal_handler(self, signum, frame): """Handle interrupt signals gracefully.""" logging.warning("Received interrupt signal. Cleaning up...") sys.exit(1) def run_command(self, command, shell=False, check=True): """Execute system command and handle errors.""" try: cmd_str = command if isinstance(command, str) else ' '.join(command) logging.debug(f"Executing command: {cmd_str}") result = subprocess.run( command, shell=shell, check=check, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) logging.debug(f"Command output: {result.stdout}") if result.stderr: logging.debug(f"Command stderr: {result.stderr}") return result except subprocess.CalledProcessError as e: logging.error(f"Command failed: {e.cmd}") logging.error(f"Return code: {e.returncode}") logging.error(f"Error output: {e.stderr}") raise def get_local_snapshots(self, path): """Get list of local snapshots.""" try: logging.debug(f"Scanning for local snapshots in: {path}") snapshots = [] path = Path(path) if path.exists(): for item in path.glob("*/snapshot"): if item.is_dir(): snapshots.append(item.parent.name) sorted_snapshots = sorted(snapshots, key=lambda x: int(x)) logging.debug(f"Found local snapshots: {json.dumps(sorted_snapshots)}") return sorted_snapshots except Exception as e: logging.error(f"Error getting local snapshots: {e}") logging.error(traceback.format_exc()) raise def get_remote_snapshots(self, path): """Get list of remote snapshots using find command with max depth 1.""" try: logging.debug(f"Scanning for remote snapshots in: {path}") # Use find with maxdepth 1 to locate snapshot directories at the correct level cmd = f"ssh {DEST_HOST} 'find {path} -maxdepth 1 -type d'" result = self.run_command(cmd, shell=True, check=False) snapshots = [] if result.returncode == 0: for line in result.stdout.splitlines(): if line.strip(): # Extract directory name from path dirname = os.path.basename(line.strip()) try: # Verify it's a number int(dirname) # Verify snapshot subdirectory exists verify_cmd = f"ssh {DEST_HOST} '[ -d {line.strip()}/snapshot ] && echo exists'" verify_result = self.run_command(verify_cmd, shell=True, check=False) if verify_result.returncode == 0 and verify_result.stdout.strip() == 'exists': snapshots.append(dirname) except ValueError: logging.debug(f"Skipping non-numeric directory: {dirname}") sorted_snapshots = sorted(snapshots, key=lambda x: int(x)) logging.debug(f"Found remote snapshots: {json.dumps(sorted_snapshots)}") return sorted_snapshots else: logging.debug(f"Find command failed with return code {result.returncode}") logging.debug(f"Error output: {result.stderr}") return [] except Exception as e: logging.error(f"Error getting remote snapshots: {e}") logging.error(traceback.format_exc()) raise def verify_btrfs_subvolume(self, path): """Verify if a path is a valid BTRFS subvolume.""" try: logging.debug(f"Verifying BTRFS subvolume: {path}") result = self.run_command(['btrfs', 'subvolume', 'show', path], check=False) is_valid = result.returncode == 0 logging.debug(f"Subvolume verification result: {'valid' if is_valid else 'invalid'}") return is_valid except Exception as e: logging.error(f"Error verifying subvolume: {e}") return False def get_parent_snapshot(self, host, subvol, snapshot): """Find the most recent parent snapshot that exists both locally and remotely.""" try: logging.debug(f"Finding parent snapshot for {host}/{subvol}/{snapshot}") local_path = f"{SOURCE_BASE}/{host}/{subvol}" remote_path = f"{DEST_BASE}/{host}/{subvol}" logging.debug(f"Scanning local path: {local_path}") logging.debug(f"Scanning remote path: {remote_path}") local_snapshots = set(self.get_local_snapshots(local_path)) remote_snapshots = set(self.get_remote_snapshots(remote_path)) logging.debug(f"Local snapshots: {json.dumps(list(local_snapshots))}") logging.debug(f"Remote snapshots: {json.dumps(list(remote_snapshots))}") # Find common snapshots that could be parents common_snapshots = local_snapshots & remote_snapshots logging.debug(f"Common snapshots: {json.dumps(list(common_snapshots))}") if not common_snapshots: logging.info("No common snapshots found - will perform full send") return None current = int(snapshot) logging.debug(f"Current snapshot number: {current}") # Get the list of potential parents (snapshots older than current) potential_parents = [int(s) for s in common_snapshots if int(s) < current] logging.debug(f"Potential parent snapshots: {json.dumps(potential_parents)}") if not potential_parents: logging.info("No valid parent snapshots found - will perform full send") return None # Get the most recent potential parent latest_parent = str(max(potential_parents)) logging.debug(f"Selected latest potential parent: {latest_parent}") # Verify the parent snapshot exists and is valid parent_path = f"{SOURCE_BASE}/{host}/{subvol}/{latest_parent}/snapshot" logging.debug(f"Checking parent path: {parent_path}") if not os.path.exists(parent_path): logging.warning(f"Parent snapshot path {parent_path} does not exist") return None # Verify the parent snapshot is actually a BTRFS subvolume if not self.verify_btrfs_subvolume(parent_path): logging.warning(f"Parent snapshot {latest_parent} is not a valid BTRFS subvolume") return None logging.info(f"Found valid parent snapshot: {latest_parent}") return latest_parent except Exception as e: logging.error(f"Error finding parent snapshot: {e}") logging.error(traceback.format_exc()) return None def transfer_snapshot(self, host, subvol, snapshot): """Transfer a single snapshot to the backup destination.""" source_path = f"{SOURCE_BASE}/{host}/{subvol}/{snapshot}/snapshot" dest_path = f"{DEST_BASE}/{host}/{subvol}/{snapshot}" try: logging.debug(f"Starting transfer for snapshot: {host}/{subvol}/{snapshot}") logging.debug(f"Source path: {source_path}") logging.debug(f"Destination path: {dest_path}") # Create destination directory self.run_command(f"ssh {DEST_HOST} 'mkdir -p {dest_path}'", shell=True) # Find parent snapshot for incremental transfer parent = self.get_parent_snapshot(host, subvol, snapshot) logging.debug(f"Parent snapshot found: {parent}") # Prepare send command if parent: parent_path = f"{SOURCE_BASE}/{host}/{subvol}/{parent}/snapshot" logging.info(f"Performing incremental transfer using parent snapshot: {parent}") logging.debug(f"Parent path: {parent_path}") send_cmd = f"btrfs send -p {parent_path} {source_path}" else: logging.info("Performing full transfer (no parent snapshot found)") send_cmd = f"btrfs send {source_path}" receive_cmd = f"ssh {DEST_HOST} 'btrfs receive {dest_path}'" logging.debug(f"Send command: {send_cmd}") logging.debug(f"Receive command: {receive_cmd}") # Execute the transfer send_process = subprocess.Popen( send_cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE ) receive_process = subprocess.Popen( receive_cmd, shell=True, stdin=send_process.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) send_process.stdout.close() send_stderr = send_process.stderr.read() receive_stdout, receive_stderr = receive_process.communicate() send_retcode = send_process.wait() receive_retcode = receive_process.wait() logging.debug(f"Send process return code: {send_retcode}") logging.debug(f"Receive process return code: {receive_retcode}") if send_stderr: logging.debug(f"Send process stderr: {send_stderr}") if receive_stderr: logging.debug(f"Receive process stderr: {receive_stderr}") if send_retcode != 0 or receive_retcode != 0: raise subprocess.CalledProcessError( send_retcode or receive_retcode, send_cmd, output=receive_stdout, stderr=send_stderr + receive_stderr ) self.stats['transferred'] += 1 logging.info(f"Successfully transferred snapshot {snapshot}") except Exception as e: logging.error(f"Error transferring snapshot {snapshot}: {e}") logging.error(traceback.format_exc()) raise def delete_remote_snapshot(self, host, subvol, snapshot): """Delete orphaned snapshot from backup destination.""" try: path = f"{DEST_BASE}/{host}/{subvol}/{snapshot}" logging.debug(f"Attempting to delete remote snapshot: {path}") cmd = f"ssh {DEST_HOST} 'btrfs subvolume delete {path}/snapshot && rm -rf {path}'" self.run_command(cmd, shell=True) self.stats['deleted'] += 1 logging.info(f"Deleted orphaned snapshot: {path}") except Exception as e: logging.error(f"Error deleting remote snapshot: {e}") logging.error(traceback.format_exc()) raise def show_inventory(self): """Display current snapshot inventory.""" print("\nCurrent Snapshot Inventory:") print("==========================") for host in os.listdir(SOURCE_BASE): host_path = os.path.join(SOURCE_BASE, host) if not os.path.isdir(host_path): continue print(f"\nHost: {host}") for subvol in os.listdir(host_path): subvol_path = os.path.join(host_path, subvol) if not os.path.isdir(subvol_path): continue print(f" Subvolume: {subvol}") local_snapshots = set(self.get_local_snapshots(subvol_path)) remote_snapshots = set(self.get_remote_snapshots(f"{DEST_BASE}/{host}/{subvol}")) all_snapshots = sorted(local_snapshots | remote_snapshots, key=lambda x: int(x)) for snapshot in all_snapshots: local_status = "✅" if snapshot in local_snapshots else "❌" remote_status = "✅" if snapshot in remote_snapshots else "❌" print(f" {snapshot}: Local: [{local_status}] Remote: [{remote_status}]") def run(self): """Main execution method.""" try: self.stats['start_time'] = time.time() logging.info("Starting BTRFS backup process") # Show inventory and get confirmation self.show_inventory() # Process all hosts for host in os.listdir(SOURCE_BASE): host_path = os.path.join(SOURCE_BASE, host) if not os.path.isdir(host_path): continue logging.info(f"Processing host: {host}") # Process all subvolumes for subvol in os.listdir(host_path): subvol_path = os.path.join(host_path, subvol) if not os.path.isdir(subvol_path): continue logging.info(f"Processing subvolume: {subvol}") # Get snapshot lists local_snapshots = set(self.get_local_snapshots(subvol_path)) remote_snapshots = set(self.get_remote_snapshots(f"{DEST_BASE}/{host}/{subvol}")) # Transfer missing snapshots missing_snapshots = sorted(local_snapshots - remote_snapshots, key=lambda x: int(x)) if missing_snapshots: logging.info(f"Found {len(missing_snapshots)} new snapshots to transfer") for snapshot in missing_snapshots: logging.info(f"Transferring snapshot: {host}/{subvol}/{snapshot}") self.transfer_snapshot(host, subvol, snapshot) # Delete orphaned snapshots orphaned_snapshots = remote_snapshots - local_snapshots if orphaned_snapshots: logging.info(f"Found {len(orphaned_snapshots)} orphaned snapshots to delete") for snapshot in orphaned_snapshots: logging.info(f"Deleting orphaned {host}/{subvol}/{snapshot}") self.delete_remote_snapshot(host, subvol, snapshot) # Show final statistics elapsed_time = time.time() - self.stats['start_time'] print("\nBackup Complete!") print(f"Transferred: {self.stats['transferred']} snapshots") print(f"Deleted: {self.stats['deleted']} snapshots") print(f"Total time: {str(timedelta(seconds=int(elapsed_time)))}") except Exception as e: logging.error(f"Backup failed: {e}") logging.error(traceback.format_exc()) sys.exit(1) if __name__ == "__main__": backup = BTRFSBackup() backup.run()