Speed up hashing

This commit is contained in:
Brandon Rising 2024-03-07 13:59:02 -05:00
parent ad70cdfe87
commit e1c16c33a4

View File

@ -12,6 +12,8 @@ import hashlib
import os import os
from pathlib import Path from pathlib import Path
from typing import Callable, Literal, Optional, Union from typing import Callable, Literal, Optional, Union
from concurrent.futures import ThreadPoolExecutor, as_completed
from blake3 import blake3 from blake3 import blake3
@ -105,12 +107,12 @@ class ModelHash:
""" """
model_component_paths = self._get_file_paths(dir, self._file_filter) model_component_paths = self._get_file_paths(dir, self._file_filter)
component_hashes: list[str] = [] # Use ThreadPoolExecutor to hash files in parallel
for component in sorted(model_component_paths): with ThreadPoolExecutor() as executor:
component_hashes.append(self._hash_file(component)) future_to_component = {executor.submit(self._hash_file, component): component for component in sorted(model_component_paths)}
component_hashes = [future.result() for future in as_completed(future_to_component)]
# BLAKE3 is cryptographically secure. We may as well fall back on a secure algorithm # BLAKE3 to hash the hashes
# for the composite hash
composite_hasher = blake3() composite_hasher = blake3()
for h in component_hashes: for h in component_hashes:
composite_hasher.update(h.encode("utf-8")) composite_hasher.update(h.encode("utf-8"))