Add sync-synology-btrfs.py backup script

This commit is contained in:
albert 2024-12-26 02:39:55 +01:00
parent fa8fa57b0f
commit d5949acda7
Signed by: albert
GPG key ID: 3895DD267CA11BA9

View file

@ -0,0 +1,392 @@
#!/usr/bin/env python3
"""
BTRFS Backup Script
==================
A comprehensive backup solution for BTRFS snapshots with incremental transfer support.
Description:
------------
This script performs automated backup of BTRFS snapshots from a local host to a remote
destination. It supports incremental transfers and cleanup of orphaned snapshots.
"""
import os
import sys
import subprocess
import logging
import time
from datetime import datetime, timedelta
from pathlib import Path
import shutil
import traceback
import signal
import json
# Configuration Constants
SOURCE_BASE = "/volume1/BTRFS_Receives/hosts"
DEST_HOST = "root@synology-backup"
DEST_BASE = "/volume1/BTRFS_Receives/hosts"
LOG_FILE = "/var/log/btrfs_backup.log"
# Initialize logging with more detailed format
logging.basicConfig(
level=logging.INFO, # Changed to DEBUG level for more detailed logging
format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d - %(funcName)s()] - %(message)s',
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler()
]
)
class BTRFSBackup:
"""Main class for handling BTRFS snapshot backups."""
def __init__(self):
self.stats = {
'transferred': 0,
'deleted': 0,
'errors': 0,
'start_time': None
}
signal.signal(signal.SIGINT, self._signal_handler)
signal.signal(signal.SIGTERM, self._signal_handler)
def _signal_handler(self, signum, frame):
"""Handle interrupt signals gracefully."""
logging.warning("Received interrupt signal. Cleaning up...")
sys.exit(1)
def run_command(self, command, shell=False, check=True):
"""Execute system command and handle errors."""
try:
cmd_str = command if isinstance(command, str) else ' '.join(command)
logging.debug(f"Executing command: {cmd_str}")
result = subprocess.run(
command,
shell=shell,
check=check,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
logging.debug(f"Command output: {result.stdout}")
if result.stderr:
logging.debug(f"Command stderr: {result.stderr}")
return result
except subprocess.CalledProcessError as e:
logging.error(f"Command failed: {e.cmd}")
logging.error(f"Return code: {e.returncode}")
logging.error(f"Error output: {e.stderr}")
raise
def get_local_snapshots(self, path):
"""Get list of local snapshots."""
try:
logging.debug(f"Scanning for local snapshots in: {path}")
snapshots = []
path = Path(path)
if path.exists():
for item in path.glob("*/snapshot"):
if item.is_dir():
snapshots.append(item.parent.name)
sorted_snapshots = sorted(snapshots, key=lambda x: int(x))
logging.debug(f"Found local snapshots: {json.dumps(sorted_snapshots)}")
return sorted_snapshots
except Exception as e:
logging.error(f"Error getting local snapshots: {e}")
logging.error(traceback.format_exc())
raise
def get_remote_snapshots(self, path):
"""Get list of remote snapshots using find command with max depth 1."""
try:
logging.debug(f"Scanning for remote snapshots in: {path}")
# Use find with maxdepth 1 to locate snapshot directories at the correct level
cmd = f"ssh {DEST_HOST} 'find {path} -maxdepth 1 -type d'"
result = self.run_command(cmd, shell=True, check=False)
snapshots = []
if result.returncode == 0:
for line in result.stdout.splitlines():
if line.strip():
# Extract directory name from path
dirname = os.path.basename(line.strip())
try:
# Verify it's a number
int(dirname)
# Verify snapshot subdirectory exists
verify_cmd = f"ssh {DEST_HOST} '[ -d {line.strip()}/snapshot ] && echo exists'"
verify_result = self.run_command(verify_cmd, shell=True, check=False)
if verify_result.returncode == 0 and verify_result.stdout.strip() == 'exists':
snapshots.append(dirname)
except ValueError:
logging.debug(f"Skipping non-numeric directory: {dirname}")
sorted_snapshots = sorted(snapshots, key=lambda x: int(x))
logging.debug(f"Found remote snapshots: {json.dumps(sorted_snapshots)}")
return sorted_snapshots
else:
logging.debug(f"Find command failed with return code {result.returncode}")
logging.debug(f"Error output: {result.stderr}")
return []
except Exception as e:
logging.error(f"Error getting remote snapshots: {e}")
logging.error(traceback.format_exc())
raise
def verify_btrfs_subvolume(self, path):
"""Verify if a path is a valid BTRFS subvolume."""
try:
logging.debug(f"Verifying BTRFS subvolume: {path}")
result = self.run_command(['btrfs', 'subvolume', 'show', path], check=False)
is_valid = result.returncode == 0
logging.debug(f"Subvolume verification result: {'valid' if is_valid else 'invalid'}")
return is_valid
except Exception as e:
logging.error(f"Error verifying subvolume: {e}")
return False
def get_parent_snapshot(self, host, subvol, snapshot):
"""Find the most recent parent snapshot that exists both locally and remotely."""
try:
logging.debug(f"Finding parent snapshot for {host}/{subvol}/{snapshot}")
local_path = f"{SOURCE_BASE}/{host}/{subvol}"
remote_path = f"{DEST_BASE}/{host}/{subvol}"
logging.debug(f"Scanning local path: {local_path}")
logging.debug(f"Scanning remote path: {remote_path}")
local_snapshots = set(self.get_local_snapshots(local_path))
remote_snapshots = set(self.get_remote_snapshots(remote_path))
logging.debug(f"Local snapshots: {json.dumps(list(local_snapshots))}")
logging.debug(f"Remote snapshots: {json.dumps(list(remote_snapshots))}")
# Find common snapshots that could be parents
common_snapshots = local_snapshots & remote_snapshots
logging.debug(f"Common snapshots: {json.dumps(list(common_snapshots))}")
if not common_snapshots:
logging.info("No common snapshots found - will perform full send")
return None
current = int(snapshot)
logging.debug(f"Current snapshot number: {current}")
# Get the list of potential parents (snapshots older than current)
potential_parents = [int(s) for s in common_snapshots if int(s) < current]
logging.debug(f"Potential parent snapshots: {json.dumps(potential_parents)}")
if not potential_parents:
logging.info("No valid parent snapshots found - will perform full send")
return None
# Get the most recent potential parent
latest_parent = str(max(potential_parents))
logging.debug(f"Selected latest potential parent: {latest_parent}")
# Verify the parent snapshot exists and is valid
parent_path = f"{SOURCE_BASE}/{host}/{subvol}/{latest_parent}/snapshot"
logging.debug(f"Checking parent path: {parent_path}")
if not os.path.exists(parent_path):
logging.warning(f"Parent snapshot path {parent_path} does not exist")
return None
# Verify the parent snapshot is actually a BTRFS subvolume
if not self.verify_btrfs_subvolume(parent_path):
logging.warning(f"Parent snapshot {latest_parent} is not a valid BTRFS subvolume")
return None
logging.info(f"Found valid parent snapshot: {latest_parent}")
return latest_parent
except Exception as e:
logging.error(f"Error finding parent snapshot: {e}")
logging.error(traceback.format_exc())
return None
def transfer_snapshot(self, host, subvol, snapshot):
"""Transfer a single snapshot to the backup destination."""
source_path = f"{SOURCE_BASE}/{host}/{subvol}/{snapshot}/snapshot"
dest_path = f"{DEST_BASE}/{host}/{subvol}/{snapshot}"
try:
logging.debug(f"Starting transfer for snapshot: {host}/{subvol}/{snapshot}")
logging.debug(f"Source path: {source_path}")
logging.debug(f"Destination path: {dest_path}")
# Create destination directory
self.run_command(f"ssh {DEST_HOST} 'mkdir -p {dest_path}'", shell=True)
# Find parent snapshot for incremental transfer
parent = self.get_parent_snapshot(host, subvol, snapshot)
logging.debug(f"Parent snapshot found: {parent}")
# Prepare send command
if parent:
parent_path = f"{SOURCE_BASE}/{host}/{subvol}/{parent}/snapshot"
logging.info(f"Performing incremental transfer using parent snapshot: {parent}")
logging.debug(f"Parent path: {parent_path}")
send_cmd = f"btrfs send -p {parent_path} {source_path}"
else:
logging.info("Performing full transfer (no parent snapshot found)")
send_cmd = f"btrfs send {source_path}"
receive_cmd = f"ssh {DEST_HOST} 'btrfs receive {dest_path}'"
logging.debug(f"Send command: {send_cmd}")
logging.debug(f"Receive command: {receive_cmd}")
# Execute the transfer
send_process = subprocess.Popen(
send_cmd.split(),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
receive_process = subprocess.Popen(
receive_cmd,
shell=True,
stdin=send_process.stdout,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
send_process.stdout.close()
send_stderr = send_process.stderr.read()
receive_stdout, receive_stderr = receive_process.communicate()
send_retcode = send_process.wait()
receive_retcode = receive_process.wait()
logging.debug(f"Send process return code: {send_retcode}")
logging.debug(f"Receive process return code: {receive_retcode}")
if send_stderr:
logging.debug(f"Send process stderr: {send_stderr}")
if receive_stderr:
logging.debug(f"Receive process stderr: {receive_stderr}")
if send_retcode != 0 or receive_retcode != 0:
raise subprocess.CalledProcessError(
send_retcode or receive_retcode,
send_cmd,
output=receive_stdout,
stderr=send_stderr + receive_stderr
)
self.stats['transferred'] += 1
logging.info(f"Successfully transferred snapshot {snapshot}")
except Exception as e:
logging.error(f"Error transferring snapshot {snapshot}: {e}")
logging.error(traceback.format_exc())
raise
def delete_remote_snapshot(self, host, subvol, snapshot):
"""Delete orphaned snapshot from backup destination."""
try:
path = f"{DEST_BASE}/{host}/{subvol}/{snapshot}"
logging.debug(f"Attempting to delete remote snapshot: {path}")
cmd = f"ssh {DEST_HOST} 'btrfs subvolume delete {path}/snapshot && rm -rf {path}'"
self.run_command(cmd, shell=True)
self.stats['deleted'] += 1
logging.info(f"Deleted orphaned snapshot: {path}")
except Exception as e:
logging.error(f"Error deleting remote snapshot: {e}")
logging.error(traceback.format_exc())
raise
def show_inventory(self):
"""Display current snapshot inventory."""
print("\nCurrent Snapshot Inventory:")
print("==========================")
for host in os.listdir(SOURCE_BASE):
host_path = os.path.join(SOURCE_BASE, host)
if not os.path.isdir(host_path):
continue
print(f"\nHost: {host}")
for subvol in os.listdir(host_path):
subvol_path = os.path.join(host_path, subvol)
if not os.path.isdir(subvol_path):
continue
print(f" Subvolume: {subvol}")
local_snapshots = set(self.get_local_snapshots(subvol_path))
remote_snapshots = set(self.get_remote_snapshots(f"{DEST_BASE}/{host}/{subvol}"))
all_snapshots = sorted(local_snapshots | remote_snapshots, key=lambda x: int(x))
for snapshot in all_snapshots:
local_status = "" if snapshot in local_snapshots else ""
remote_status = "" if snapshot in remote_snapshots else ""
print(f" {snapshot}: Local: [{local_status}] Remote: [{remote_status}]")
def run(self):
"""Main execution method."""
try:
self.stats['start_time'] = time.time()
logging.info("Starting BTRFS backup process")
# Show inventory and get confirmation
self.show_inventory()
# Process all hosts
for host in os.listdir(SOURCE_BASE):
host_path = os.path.join(SOURCE_BASE, host)
if not os.path.isdir(host_path):
continue
logging.info(f"Processing host: {host}")
# Process all subvolumes
for subvol in os.listdir(host_path):
subvol_path = os.path.join(host_path, subvol)
if not os.path.isdir(subvol_path):
continue
logging.info(f"Processing subvolume: {subvol}")
# Get snapshot lists
local_snapshots = set(self.get_local_snapshots(subvol_path))
remote_snapshots = set(self.get_remote_snapshots(f"{DEST_BASE}/{host}/{subvol}"))
# Transfer missing snapshots
missing_snapshots = sorted(local_snapshots - remote_snapshots, key=lambda x: int(x))
if missing_snapshots:
logging.info(f"Found {len(missing_snapshots)} new snapshots to transfer")
for snapshot in missing_snapshots:
logging.info(f"Transferring snapshot: {host}/{subvol}/{snapshot}")
self.transfer_snapshot(host, subvol, snapshot)
# Delete orphaned snapshots
orphaned_snapshots = remote_snapshots - local_snapshots
if orphaned_snapshots:
logging.info(f"Found {len(orphaned_snapshots)} orphaned snapshots to delete")
for snapshot in orphaned_snapshots:
logging.info(f"Deleting orphaned {host}/{subvol}/{snapshot}")
self.delete_remote_snapshot(host, subvol, snapshot)
# Show final statistics
elapsed_time = time.time() - self.stats['start_time']
print("\nBackup Complete!")
print(f"Transferred: {self.stats['transferred']} snapshots")
print(f"Deleted: {self.stats['deleted']} snapshots")
print(f"Total time: {str(timedelta(seconds=int(elapsed_time)))}")
except Exception as e:
logging.error(f"Backup failed: {e}")
logging.error(traceback.format_exc())
sys.exit(1)
if __name__ == "__main__":
backup = BTRFSBackup()
backup.run()