feat(mm): add algorithm prefix to hashes

For example:
- md5:a0cd925fc063f98dbf029eee315060c3
- sha1:9e362940e5603fdc60566ea100a288ba2fe48b8c
- blake3:ce3f0c5f3c05d119f4a5dcaf209b50d3149046a0d3a9adee9fed4c83cad6b4d0
This commit is contained in:
psychedelicious
2024-03-14 11:32:33 +11:00
parent a4be935458
commit 9fcd67b5c0
2 changed files with 47 additions and 17 deletions

View File

@ -9,14 +9,15 @@ from blake3 import blake3
from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, MODEL_FILE_EXTENSIONS, ModelHash
test_cases: list[tuple[HASHING_ALGORITHMS, str]] = [
("md5", "a0cd925fc063f98dbf029eee315060c3"),
("sha1", "9e362940e5603fdc60566ea100a288ba2fe48b8c"),
("sha256", "6dbdb6a147ad4d808455652bf5a10120161678395f6bfbd21eb6fe4e731aceeb"),
("md5", "md5:a0cd925fc063f98dbf029eee315060c3"),
("sha1", "sha1:9e362940e5603fdc60566ea100a288ba2fe48b8c"),
("sha256", "sha256:6dbdb6a147ad4d808455652bf5a10120161678395f6bfbd21eb6fe4e731aceeb"),
(
"sha512",
"c4a10476b21e00042f638ad5755c561d91f2bb599d3504d25409495e1c7eda94543332a1a90fbb4efdaf9ee462c33e0336b5eae4acfb1fa0b186af452dd67dc6",
"sha512:c4a10476b21e00042f638ad5755c561d91f2bb599d3504d25409495e1c7eda94543332a1a90fbb4efdaf9ee462c33e0336b5eae4acfb1fa0b186af452dd67dc6",
),
("blake3", "ce3f0c5f3c05d119f4a5dcaf209b50d3149046a0d3a9adee9fed4c83cad6b4d0"),
("blake3", "blake3:ce3f0c5f3c05d119f4a5dcaf209b50d3149046a0d3a9adee9fed4c83cad6b4d0"),
("blake3_single", "blake3:ce3f0c5f3c05d119f4a5dcaf209b50d3149046a0d3a9adee9fed4c83cad6b4d0"),
]
@ -24,11 +25,11 @@ test_cases: list[tuple[HASHING_ALGORITHMS, str]] = [
def test_model_hash_hashes_file(tmp_path: Path, algorithm: HASHING_ALGORITHMS, expected_hash: str):
file = Path(tmp_path / "test")
file.write_text("model data")
md5 = ModelHash(algorithm).hash(file)
assert md5 == expected_hash
hash_ = ModelHash(algorithm).hash(file)
assert hash_ == expected_hash
@pytest.mark.parametrize("algorithm", ["md5", "sha1", "sha256", "sha512", "blake3"])
@pytest.mark.parametrize("algorithm", ["md5", "sha1", "sha256", "sha512", "blake3", "blake3_single"])
def test_model_hash_hashes_dir(tmp_path: Path, algorithm: HASHING_ALGORITHMS):
model_hash = ModelHash(algorithm)
files = [Path(tmp_path, f"{i}.bin") for i in range(5)]
@ -36,15 +37,33 @@ def test_model_hash_hashes_dir(tmp_path: Path, algorithm: HASHING_ALGORITHMS):
for f in files:
f.write_text("data")
md5 = model_hash.hash(tmp_path)
hash_ = model_hash.hash(tmp_path)
# Manual implementation of composite hash - always uses BLAKE3
component_hashes: list[str] = []
for f in sorted(ModelHash._get_file_paths(tmp_path, ModelHash._default_file_filter)):
component_hashes.append(model_hash._hash_file(f))
composite_hasher = blake3()
for f in files:
h = model_hash.hash(f)
for h in component_hashes:
composite_hasher.update(h.encode("utf-8"))
assert md5 == composite_hasher.hexdigest()
assert hash_ == ModelHash._get_prefix(algorithm) + composite_hasher.hexdigest()
@pytest.mark.parametrize(
"algorithm,expected_prefix",
[
("md5", "md5:"),
("sha1", "sha1:"),
("sha256", "sha256:"),
("sha512", "sha512:"),
("blake3", "blake3:"),
("blake3_single", "blake3:"),
],
)
def test_model_hash_gets_prefix(algorithm: HASHING_ALGORITHMS, expected_prefix: str):
assert ModelHash._get_prefix(algorithm) == expected_prefix
def test_model_hash_blake3_matches_blake3_single(tmp_path: Path):