Add functionality to find files not already stored in backup.

This commit is contained in:
Wout Bouckaert 2024-08-15 20:38:40 -06:00
parent cb599b32b9
commit a6d37ee2ff
No known key found for this signature in database

View File

@ -228,7 +228,7 @@ class BackupManager:
self.fail_backup(why, backup_config, server)
# Create backup manifest for server files.
backup_manifest = self.create_snapshot_backup_manifest(
backup_manifest, count_of_files = self.create_snapshot_backup_manifest(
pathlib.Path(server.server_path)
)
@ -237,6 +237,11 @@ class BackupManager:
backup_manifest, backup_target_location, backup_config["backup_id"]
)
# Find files that are not already stored in the backup repository.
files_to_save = self.find_files_not_in_repository(
backup_manifest, backup_target_location
)
@staticmethod
def ensure_snapshot_directory_is_valid(backup_path: pathlib.Path) -> bool:
backup_path.mkdir(exist_ok=True)
@ -278,7 +283,7 @@ class BackupManager:
# Return path with base remove
return str(desired_path)[len(str(base.absolute())) + 1 :]
def create_snapshot_backup_manifest(self, backup_dir: pathlib.Path) -> dict:
def create_snapshot_backup_manifest(self, backup_dir: pathlib.Path) -> (dict, int):
"""
Creates dict showing all directories in backup source as a relative path, and
all files with their hashes as a relative path. All returned paths are relative
@ -292,6 +297,7 @@ class BackupManager:
"""
output = {"directories": [], "files": []}
files_count = 0
# Iterate over backups source dir.
for p in backup_dir.rglob("*"):
@ -308,6 +314,8 @@ class BackupManager:
else:
# For files.
files_count += 1
# We must store file hash and path to file.
# calculate_file_hash_blake2b returns bytes, b64 is stored as a string.
file_hash = helper.crypto_helper.bytes_to_b64(
@ -318,7 +326,7 @@ class BackupManager:
output["files"].append(
(file_hash, str(self.get_local_path_with_base(p, backup_dir)))
)
return output
return output, files_count
@staticmethod
def create_depends_file_from_backup_manifest(
@ -354,3 +362,50 @@ class BackupManager:
# Iterate through files and add b64 hashes to file.
for depended_file in manifest["files"]:
f.write(depended_file[0] + "\n")
def find_files_not_in_repository(
self, backup_manifest: dict, backup_repository: pathlib.Path
) -> list[(str, str)]:
"""
Discovers what files are not already contained in the backup repository by hash.
Returns a hash of files that are not in the repository in backup manifest
format.
Args:
self: self
backup_manifest: backup manifest as generated by
create_snapshot_backup_manifest.
backup_repository: Path to the backup storage location or backup
"repository."
Returns: List of files that are not in the repository in backup manifest format.
[(file hash), (file name)]
"""
output = []
# If file does not exist add it array.
for file_tuple in backup_manifest["files"]:
file_path = self.get_path_from_hash(file_tuple[0], backup_repository)
if not file_path.exists():
output.append(file_tuple)
return output
@staticmethod
def get_path_from_hash(file_hash: str, repository: pathlib.Path) -> pathlib.Path:
"""
Get file path in backup repository based on file hash and path to the backup
repository.
Args:
file_hash: Hash of target file.
repository: Path to the backup repository.
Returns: Path to where file should be stored.
"""
# Example:
# Repo path: /path/to/backup/repo/
# Hash: 1234...890
# Example: /path/to/backup/repo/data/12/34...890
return repository / "data" / str(file_hash[:2]) / str(file_hash[-126:])