From d5949acda79987423585baa0f4a3495d3dcb1ce8 Mon Sep 17 00:00:00 2001 From: albert Date: Thu, 26 Dec 2024 02:39:55 +0100 Subject: [PATCH] Add sync-synology-btrfs.py backup script --- docs/scripts/sync-synology-btrfs.py | 392 ++++++++++++++++++++++++++++ 1 file changed, 392 insertions(+) create mode 100644 docs/scripts/sync-synology-btrfs.py diff --git a/docs/scripts/sync-synology-btrfs.py b/docs/scripts/sync-synology-btrfs.py new file mode 100644 index 00000000..5560908a --- /dev/null +++ b/docs/scripts/sync-synology-btrfs.py @@ -0,0 +1,392 @@ +#!/usr/bin/env python3 +""" +BTRFS Backup Script +================== +A comprehensive backup solution for BTRFS snapshots with incremental transfer support. + +Description: +------------ +This script performs automated backup of BTRFS snapshots from a local host to a remote +destination. It supports incremental transfers and cleanup of orphaned snapshots. +""" + +import os +import sys +import subprocess +import logging +import time +from datetime import datetime, timedelta +from pathlib import Path +import shutil +import traceback +import signal +import json + +# Configuration Constants +SOURCE_BASE = "/volume1/BTRFS_Receives/hosts" +DEST_HOST = "root@synology-backup" +DEST_BASE = "/volume1/BTRFS_Receives/hosts" +LOG_FILE = "/var/log/btrfs_backup.log" + +# Initialize logging with more detailed format +logging.basicConfig( + level=logging.INFO, # Changed to DEBUG level for more detailed logging + format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d - %(funcName)s()] - %(message)s', + handlers=[ + logging.FileHandler(LOG_FILE), + logging.StreamHandler() + ] +) + +class BTRFSBackup: + """Main class for handling BTRFS snapshot backups.""" + + def __init__(self): + self.stats = { + 'transferred': 0, + 'deleted': 0, + 'errors': 0, + 'start_time': None + } + signal.signal(signal.SIGINT, self._signal_handler) + signal.signal(signal.SIGTERM, self._signal_handler) + + def _signal_handler(self, signum, frame): + """Handle interrupt signals gracefully.""" + logging.warning("Received interrupt signal. Cleaning up...") + sys.exit(1) + + def run_command(self, command, shell=False, check=True): + """Execute system command and handle errors.""" + try: + cmd_str = command if isinstance(command, str) else ' '.join(command) + logging.debug(f"Executing command: {cmd_str}") + + result = subprocess.run( + command, + shell=shell, + check=check, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + logging.debug(f"Command output: {result.stdout}") + if result.stderr: + logging.debug(f"Command stderr: {result.stderr}") + + return result + except subprocess.CalledProcessError as e: + logging.error(f"Command failed: {e.cmd}") + logging.error(f"Return code: {e.returncode}") + logging.error(f"Error output: {e.stderr}") + raise + + def get_local_snapshots(self, path): + """Get list of local snapshots.""" + try: + logging.debug(f"Scanning for local snapshots in: {path}") + snapshots = [] + path = Path(path) + if path.exists(): + for item in path.glob("*/snapshot"): + if item.is_dir(): + snapshots.append(item.parent.name) + + sorted_snapshots = sorted(snapshots, key=lambda x: int(x)) + logging.debug(f"Found local snapshots: {json.dumps(sorted_snapshots)}") + return sorted_snapshots + except Exception as e: + logging.error(f"Error getting local snapshots: {e}") + logging.error(traceback.format_exc()) + raise + + def get_remote_snapshots(self, path): + """Get list of remote snapshots using find command with max depth 1.""" + try: + logging.debug(f"Scanning for remote snapshots in: {path}") + # Use find with maxdepth 1 to locate snapshot directories at the correct level + cmd = f"ssh {DEST_HOST} 'find {path} -maxdepth 1 -type d'" + result = self.run_command(cmd, shell=True, check=False) + + snapshots = [] + if result.returncode == 0: + for line in result.stdout.splitlines(): + if line.strip(): + # Extract directory name from path + dirname = os.path.basename(line.strip()) + try: + # Verify it's a number + int(dirname) + # Verify snapshot subdirectory exists + verify_cmd = f"ssh {DEST_HOST} '[ -d {line.strip()}/snapshot ] && echo exists'" + verify_result = self.run_command(verify_cmd, shell=True, check=False) + if verify_result.returncode == 0 and verify_result.stdout.strip() == 'exists': + snapshots.append(dirname) + except ValueError: + logging.debug(f"Skipping non-numeric directory: {dirname}") + + sorted_snapshots = sorted(snapshots, key=lambda x: int(x)) + logging.debug(f"Found remote snapshots: {json.dumps(sorted_snapshots)}") + return sorted_snapshots + else: + logging.debug(f"Find command failed with return code {result.returncode}") + logging.debug(f"Error output: {result.stderr}") + return [] + except Exception as e: + logging.error(f"Error getting remote snapshots: {e}") + logging.error(traceback.format_exc()) + raise + + + def verify_btrfs_subvolume(self, path): + """Verify if a path is a valid BTRFS subvolume.""" + try: + logging.debug(f"Verifying BTRFS subvolume: {path}") + result = self.run_command(['btrfs', 'subvolume', 'show', path], check=False) + is_valid = result.returncode == 0 + logging.debug(f"Subvolume verification result: {'valid' if is_valid else 'invalid'}") + return is_valid + except Exception as e: + logging.error(f"Error verifying subvolume: {e}") + return False + + def get_parent_snapshot(self, host, subvol, snapshot): + """Find the most recent parent snapshot that exists both locally and remotely.""" + try: + logging.debug(f"Finding parent snapshot for {host}/{subvol}/{snapshot}") + + local_path = f"{SOURCE_BASE}/{host}/{subvol}" + remote_path = f"{DEST_BASE}/{host}/{subvol}" + + logging.debug(f"Scanning local path: {local_path}") + logging.debug(f"Scanning remote path: {remote_path}") + + local_snapshots = set(self.get_local_snapshots(local_path)) + remote_snapshots = set(self.get_remote_snapshots(remote_path)) + + logging.debug(f"Local snapshots: {json.dumps(list(local_snapshots))}") + logging.debug(f"Remote snapshots: {json.dumps(list(remote_snapshots))}") + + # Find common snapshots that could be parents + common_snapshots = local_snapshots & remote_snapshots + logging.debug(f"Common snapshots: {json.dumps(list(common_snapshots))}") + + if not common_snapshots: + logging.info("No common snapshots found - will perform full send") + return None + + current = int(snapshot) + logging.debug(f"Current snapshot number: {current}") + + # Get the list of potential parents (snapshots older than current) + potential_parents = [int(s) for s in common_snapshots if int(s) < current] + logging.debug(f"Potential parent snapshots: {json.dumps(potential_parents)}") + + if not potential_parents: + logging.info("No valid parent snapshots found - will perform full send") + return None + + # Get the most recent potential parent + latest_parent = str(max(potential_parents)) + logging.debug(f"Selected latest potential parent: {latest_parent}") + + # Verify the parent snapshot exists and is valid + parent_path = f"{SOURCE_BASE}/{host}/{subvol}/{latest_parent}/snapshot" + logging.debug(f"Checking parent path: {parent_path}") + + if not os.path.exists(parent_path): + logging.warning(f"Parent snapshot path {parent_path} does not exist") + return None + + # Verify the parent snapshot is actually a BTRFS subvolume + if not self.verify_btrfs_subvolume(parent_path): + logging.warning(f"Parent snapshot {latest_parent} is not a valid BTRFS subvolume") + return None + + logging.info(f"Found valid parent snapshot: {latest_parent}") + return latest_parent + + except Exception as e: + logging.error(f"Error finding parent snapshot: {e}") + logging.error(traceback.format_exc()) + return None + + def transfer_snapshot(self, host, subvol, snapshot): + """Transfer a single snapshot to the backup destination.""" + source_path = f"{SOURCE_BASE}/{host}/{subvol}/{snapshot}/snapshot" + dest_path = f"{DEST_BASE}/{host}/{subvol}/{snapshot}" + + try: + logging.debug(f"Starting transfer for snapshot: {host}/{subvol}/{snapshot}") + logging.debug(f"Source path: {source_path}") + logging.debug(f"Destination path: {dest_path}") + + # Create destination directory + self.run_command(f"ssh {DEST_HOST} 'mkdir -p {dest_path}'", shell=True) + + # Find parent snapshot for incremental transfer + parent = self.get_parent_snapshot(host, subvol, snapshot) + logging.debug(f"Parent snapshot found: {parent}") + + # Prepare send command + if parent: + parent_path = f"{SOURCE_BASE}/{host}/{subvol}/{parent}/snapshot" + logging.info(f"Performing incremental transfer using parent snapshot: {parent}") + logging.debug(f"Parent path: {parent_path}") + send_cmd = f"btrfs send -p {parent_path} {source_path}" + else: + logging.info("Performing full transfer (no parent snapshot found)") + send_cmd = f"btrfs send {source_path}" + + receive_cmd = f"ssh {DEST_HOST} 'btrfs receive {dest_path}'" + + logging.debug(f"Send command: {send_cmd}") + logging.debug(f"Receive command: {receive_cmd}") + + # Execute the transfer + send_process = subprocess.Popen( + send_cmd.split(), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + + receive_process = subprocess.Popen( + receive_cmd, + shell=True, + stdin=send_process.stdout, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + + send_process.stdout.close() + send_stderr = send_process.stderr.read() + receive_stdout, receive_stderr = receive_process.communicate() + + send_retcode = send_process.wait() + receive_retcode = receive_process.wait() + + logging.debug(f"Send process return code: {send_retcode}") + logging.debug(f"Receive process return code: {receive_retcode}") + + if send_stderr: + logging.debug(f"Send process stderr: {send_stderr}") + if receive_stderr: + logging.debug(f"Receive process stderr: {receive_stderr}") + + if send_retcode != 0 or receive_retcode != 0: + raise subprocess.CalledProcessError( + send_retcode or receive_retcode, + send_cmd, + output=receive_stdout, + stderr=send_stderr + receive_stderr + ) + + self.stats['transferred'] += 1 + logging.info(f"Successfully transferred snapshot {snapshot}") + + except Exception as e: + logging.error(f"Error transferring snapshot {snapshot}: {e}") + logging.error(traceback.format_exc()) + raise + + def delete_remote_snapshot(self, host, subvol, snapshot): + """Delete orphaned snapshot from backup destination.""" + try: + path = f"{DEST_BASE}/{host}/{subvol}/{snapshot}" + logging.debug(f"Attempting to delete remote snapshot: {path}") + + cmd = f"ssh {DEST_HOST} 'btrfs subvolume delete {path}/snapshot && rm -rf {path}'" + self.run_command(cmd, shell=True) + self.stats['deleted'] += 1 + logging.info(f"Deleted orphaned snapshot: {path}") + except Exception as e: + logging.error(f"Error deleting remote snapshot: {e}") + logging.error(traceback.format_exc()) + raise + + def show_inventory(self): + """Display current snapshot inventory.""" + print("\nCurrent Snapshot Inventory:") + print("==========================") + + for host in os.listdir(SOURCE_BASE): + host_path = os.path.join(SOURCE_BASE, host) + if not os.path.isdir(host_path): + continue + + print(f"\nHost: {host}") + for subvol in os.listdir(host_path): + subvol_path = os.path.join(host_path, subvol) + if not os.path.isdir(subvol_path): + continue + + print(f" Subvolume: {subvol}") + local_snapshots = set(self.get_local_snapshots(subvol_path)) + remote_snapshots = set(self.get_remote_snapshots(f"{DEST_BASE}/{host}/{subvol}")) + + all_snapshots = sorted(local_snapshots | remote_snapshots, key=lambda x: int(x)) + for snapshot in all_snapshots: + local_status = "✅" if snapshot in local_snapshots else "❌" + remote_status = "✅" if snapshot in remote_snapshots else "❌" + print(f" {snapshot}: Local: [{local_status}] Remote: [{remote_status}]") + + def run(self): + """Main execution method.""" + try: + self.stats['start_time'] = time.time() + logging.info("Starting BTRFS backup process") + + # Show inventory and get confirmation + self.show_inventory() + + # Process all hosts + for host in os.listdir(SOURCE_BASE): + host_path = os.path.join(SOURCE_BASE, host) + if not os.path.isdir(host_path): + continue + + logging.info(f"Processing host: {host}") + # Process all subvolumes + for subvol in os.listdir(host_path): + subvol_path = os.path.join(host_path, subvol) + if not os.path.isdir(subvol_path): + continue + + logging.info(f"Processing subvolume: {subvol}") + # Get snapshot lists + local_snapshots = set(self.get_local_snapshots(subvol_path)) + remote_snapshots = set(self.get_remote_snapshots(f"{DEST_BASE}/{host}/{subvol}")) + + # Transfer missing snapshots + missing_snapshots = sorted(local_snapshots - remote_snapshots, key=lambda x: int(x)) + if missing_snapshots: + logging.info(f"Found {len(missing_snapshots)} new snapshots to transfer") + for snapshot in missing_snapshots: + logging.info(f"Transferring snapshot: {host}/{subvol}/{snapshot}") + self.transfer_snapshot(host, subvol, snapshot) + + # Delete orphaned snapshots + orphaned_snapshots = remote_snapshots - local_snapshots + if orphaned_snapshots: + logging.info(f"Found {len(orphaned_snapshots)} orphaned snapshots to delete") + for snapshot in orphaned_snapshots: + logging.info(f"Deleting orphaned {host}/{subvol}/{snapshot}") + self.delete_remote_snapshot(host, subvol, snapshot) + + # Show final statistics + elapsed_time = time.time() - self.stats['start_time'] + print("\nBackup Complete!") + print(f"Transferred: {self.stats['transferred']} snapshots") + print(f"Deleted: {self.stats['deleted']} snapshots") + print(f"Total time: {str(timedelta(seconds=int(elapsed_time)))}") + + except Exception as e: + logging.error(f"Backup failed: {e}") + logging.error(traceback.format_exc()) + sys.exit(1) + +if __name__ == "__main__": + backup = BTRFSBackup() + backup.run() +