Don't check if file exists in download_file

This is done outside the function.
This commit is contained in:
Ivan Habunek 2024-08-28 12:38:40 +02:00
parent 936c6a9da1
commit a808b7d8ec
No known key found for this signature in database
GPG Key ID: 01DB3DD0D824504C

View File

@ -4,7 +4,7 @@ import os
import time import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from pathlib import Path from pathlib import Path
from typing import List, Optional, Tuple from typing import List, Optional
import httpx import httpx
@ -146,16 +146,12 @@ async def download_all(
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
def download_file(url: str, target: Path, retries: int = RETRY_COUNT) -> Tuple[int, bool]: def download_file(url: str, target: Path, retries: int = RETRY_COUNT) -> None:
if target.exists(): """Download URL to given target path with retries"""
from_disk = True
return os.path.getsize(target), from_disk
from_disk = False
error_message = "" error_message = ""
for _ in range(retries): for _ in range(retries):
try: try:
return _do_download_file(url, target), from_disk return _do_download_file(url, target)
except httpx.HTTPStatusError as ex: except httpx.HTTPStatusError as ex:
error_message = f"Server responded with HTTP {ex.response.status_code}" error_message = f"Server responded with HTTP {ex.response.status_code}"
except httpx.RequestError as ex: except httpx.RequestError as ex:
@ -164,15 +160,13 @@ def download_file(url: str, target: Path, retries: int = RETRY_COUNT) -> Tuple[i
raise ConsoleError(f"Failed downloading after {retries} attempts: {error_message}") raise ConsoleError(f"Failed downloading after {retries} attempts: {error_message}")
def _do_download_file(url: str, target: Path): def _do_download_file(url: str, target: Path) -> None:
tmp_path = Path(str(target) + ".tmp") tmp_path = Path(str(target) + ".tmp")
size = 0
with httpx.stream("GET", url, timeout=TIMEOUT, follow_redirects=True) as response: with httpx.stream("GET", url, timeout=TIMEOUT, follow_redirects=True) as response:
response.raise_for_status() response.raise_for_status()
with open(tmp_path, "wb") as f: with open(tmp_path, "wb") as f:
for chunk in response.iter_bytes(chunk_size=CHUNK_SIZE): for chunk in response.iter_bytes(chunk_size=CHUNK_SIZE):
f.write(chunk) f.write(chunk)
size += len(chunk)
os.rename(tmp_path, target) os.rename(tmp_path, target)
return size