diff --git a/app/classes/shared/backup_mgr.py b/app/classes/shared/backup_mgr.py index 380d7b07..8b7afc4b 100644 --- a/app/classes/shared/backup_mgr.py +++ b/app/classes/shared/backup_mgr.py @@ -228,7 +228,7 @@ class BackupManager: self.fail_backup(why, backup_config, server) # Create backup manifest for server files. - backup_manifest = self.create_snapshot_backup_manifest( + backup_manifest, count_of_files = self.create_snapshot_backup_manifest( pathlib.Path(server.server_path) ) @@ -237,6 +237,11 @@ class BackupManager: backup_manifest, backup_target_location, backup_config["backup_id"] ) + # Find files that are not already stored in the backup repository. + files_to_save = self.find_files_not_in_repository( + backup_manifest, backup_target_location + ) + @staticmethod def ensure_snapshot_directory_is_valid(backup_path: pathlib.Path) -> bool: backup_path.mkdir(exist_ok=True) @@ -278,7 +283,7 @@ class BackupManager: # Return path with base remove return str(desired_path)[len(str(base.absolute())) + 1 :] - def create_snapshot_backup_manifest(self, backup_dir: pathlib.Path) -> dict: + def create_snapshot_backup_manifest(self, backup_dir: pathlib.Path) -> (dict, int): """ Creates dict showing all directories in backup source as a relative path, and all files with their hashes as a relative path. All returned paths are relative @@ -292,6 +297,7 @@ class BackupManager: """ output = {"directories": [], "files": []} + files_count = 0 # Iterate over backups source dir. for p in backup_dir.rglob("*"): @@ -308,6 +314,8 @@ class BackupManager: else: # For files. + files_count += 1 + # We must store file hash and path to file. # calculate_file_hash_blake2b returns bytes, b64 is stored as a string. file_hash = helper.crypto_helper.bytes_to_b64( @@ -318,7 +326,7 @@ class BackupManager: output["files"].append( (file_hash, str(self.get_local_path_with_base(p, backup_dir))) ) - return output + return output, files_count @staticmethod def create_depends_file_from_backup_manifest( @@ -354,3 +362,50 @@ class BackupManager: # Iterate through files and add b64 hashes to file. for depended_file in manifest["files"]: f.write(depended_file[0] + "\n") + + def find_files_not_in_repository( + self, backup_manifest: dict, backup_repository: pathlib.Path + ) -> list[(str, str)]: + """ + Discovers what files are not already contained in the backup repository by hash. + Returns a hash of files that are not in the repository in backup manifest + format. + + Args: + self: self + backup_manifest: backup manifest as generated by + create_snapshot_backup_manifest. + backup_repository: Path to the backup storage location or backup + "repository." + + Returns: List of files that are not in the repository in backup manifest format. + [(file hash), (file name)] + + """ + output = [] + + # If file does not exist add it array. + for file_tuple in backup_manifest["files"]: + file_path = self.get_path_from_hash(file_tuple[0], backup_repository) + if not file_path.exists(): + output.append(file_tuple) + return output + + @staticmethod + def get_path_from_hash(file_hash: str, repository: pathlib.Path) -> pathlib.Path: + """ + Get file path in backup repository based on file hash and path to the backup + repository. + + Args: + file_hash: Hash of target file. + repository: Path to the backup repository. + + Returns: Path to where file should be stored. + + """ + # Example: + # Repo path: /path/to/backup/repo/ + # Hash: 1234...890 + # Example: /path/to/backup/repo/data/12/34...890 + return repository / "data" / str(file_hash[:2]) / str(file_hash[-126:])