2023-12-22 17:35:57 +00:00
|
|
|
# Copyright (c) 2023, Lincoln D. Stein
|
|
|
|
"""Implementation of multithreaded download queue for invokeai."""
|
|
|
|
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import threading
|
2024-02-12 19:27:17 +00:00
|
|
|
import time
|
2023-12-22 17:35:57 +00:00
|
|
|
import traceback
|
|
|
|
from pathlib import Path
|
|
|
|
from queue import Empty, PriorityQueue
|
2024-06-09 08:34:43 +00:00
|
|
|
from typing import Any, Dict, List, Literal, Optional, Set
|
2023-12-22 17:35:57 +00:00
|
|
|
|
|
|
|
import requests
|
|
|
|
from pydantic.networks import AnyHttpUrl
|
|
|
|
from requests import HTTPError
|
2024-02-14 16:10:50 +00:00
|
|
|
from tqdm import tqdm
|
2023-12-22 17:35:57 +00:00
|
|
|
|
2024-05-06 01:00:31 +00:00
|
|
|
from invokeai.app.services.config import InvokeAIAppConfig, get_config
|
2023-12-22 17:35:57 +00:00
|
|
|
from invokeai.app.services.events.events_base import EventServiceBase
|
|
|
|
from invokeai.app.util.misc import get_iso_timestamp
|
2024-05-13 02:14:00 +00:00
|
|
|
from invokeai.backend.model_manager.metadata import RemoteModelFile
|
2023-12-22 17:35:57 +00:00
|
|
|
from invokeai.backend.util.logging import InvokeAILogger
|
|
|
|
|
|
|
|
from .download_base import (
|
|
|
|
DownloadEventHandler,
|
2024-01-14 19:54:53 +00:00
|
|
|
DownloadExceptionHandler,
|
2023-12-22 17:35:57 +00:00
|
|
|
DownloadJob,
|
2024-05-13 22:31:40 +00:00
|
|
|
DownloadJobBase,
|
2023-12-22 17:35:57 +00:00
|
|
|
DownloadJobCancelledException,
|
|
|
|
DownloadJobStatus,
|
|
|
|
DownloadQueueServiceBase,
|
2024-05-13 02:14:00 +00:00
|
|
|
MultiFileDownloadJob,
|
2023-12-22 17:35:57 +00:00
|
|
|
ServiceInactiveException,
|
|
|
|
UnknownJobIDException,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Maximum number of bytes to download during each call to requests.iter_content()
|
|
|
|
DOWNLOAD_CHUNK_SIZE = 100000
|
|
|
|
|
|
|
|
|
|
|
|
class DownloadQueueService(DownloadQueueServiceBase):
|
|
|
|
"""Class for queued download of models."""
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
max_parallel_dl: int = 5,
|
2024-05-06 01:00:31 +00:00
|
|
|
app_config: Optional[InvokeAIAppConfig] = None,
|
2024-03-14 08:04:19 +00:00
|
|
|
event_bus: Optional["EventServiceBase"] = None,
|
2023-12-22 17:35:57 +00:00
|
|
|
requests_session: Optional[requests.sessions.Session] = None,
|
|
|
|
):
|
|
|
|
"""
|
|
|
|
Initialize DownloadQueue.
|
|
|
|
|
2024-05-06 01:00:31 +00:00
|
|
|
:param app_config: InvokeAIAppConfig object
|
2023-12-22 17:35:57 +00:00
|
|
|
:param max_parallel_dl: Number of simultaneous downloads allowed [5].
|
|
|
|
:param requests_session: Optional requests.sessions.Session object, for unit tests.
|
|
|
|
"""
|
2024-05-06 01:00:31 +00:00
|
|
|
self._app_config = app_config or get_config()
|
2024-02-13 05:26:49 +00:00
|
|
|
self._jobs: Dict[int, DownloadJob] = {}
|
2024-05-13 02:14:00 +00:00
|
|
|
self._download_part2parent: Dict[AnyHttpUrl, MultiFileDownloadJob] = {}
|
2023-12-22 17:35:57 +00:00
|
|
|
self._next_job_id = 0
|
2024-02-13 05:26:49 +00:00
|
|
|
self._queue: PriorityQueue[DownloadJob] = PriorityQueue()
|
2023-12-22 17:35:57 +00:00
|
|
|
self._stop_event = threading.Event()
|
2024-05-13 02:14:00 +00:00
|
|
|
self._job_terminated_event = threading.Event()
|
2024-02-13 05:26:49 +00:00
|
|
|
self._worker_pool: Set[threading.Thread] = set()
|
2023-12-22 17:35:57 +00:00
|
|
|
self._lock = threading.Lock()
|
|
|
|
self._logger = InvokeAILogger.get_logger("DownloadQueueService")
|
|
|
|
self._event_bus = event_bus
|
|
|
|
self._requests = requests_session or requests.Session()
|
|
|
|
self._accept_download_requests = False
|
|
|
|
self._max_parallel_dl = max_parallel_dl
|
|
|
|
|
|
|
|
def start(self, *args: Any, **kwargs: Any) -> None:
|
|
|
|
"""Start the download worker threads."""
|
|
|
|
with self._lock:
|
|
|
|
if self._worker_pool:
|
|
|
|
raise Exception("Attempt to start the download service twice")
|
|
|
|
self._stop_event.clear()
|
|
|
|
self._start_workers(self._max_parallel_dl)
|
|
|
|
self._accept_download_requests = True
|
|
|
|
|
|
|
|
def stop(self, *args: Any, **kwargs: Any) -> None:
|
|
|
|
"""Stop the download worker threads."""
|
|
|
|
with self._lock:
|
|
|
|
if not self._worker_pool:
|
|
|
|
raise Exception("Attempt to stop the download service before it was started")
|
|
|
|
self._accept_download_requests = False # reject attempts to add new jobs to queue
|
|
|
|
queued_jobs = [x for x in self.list_jobs() if x.status == DownloadJobStatus.WAITING]
|
|
|
|
active_jobs = [x for x in self.list_jobs() if x.status == DownloadJobStatus.RUNNING]
|
|
|
|
if queued_jobs:
|
|
|
|
self._logger.warning(f"Cancelling {len(queued_jobs)} queued downloads")
|
|
|
|
if active_jobs:
|
|
|
|
self._logger.info(f"Waiting for {len(active_jobs)} active download jobs to complete")
|
|
|
|
with self._queue.mutex:
|
|
|
|
self._queue.queue.clear()
|
2024-03-22 03:50:52 +00:00
|
|
|
self.cancel_all_jobs()
|
2023-12-22 17:35:57 +00:00
|
|
|
self._stop_event.set()
|
2024-03-21 02:11:52 +00:00
|
|
|
for thread in self._worker_pool:
|
|
|
|
thread.join()
|
2023-12-22 17:35:57 +00:00
|
|
|
self._worker_pool.clear()
|
|
|
|
|
2024-01-14 19:54:53 +00:00
|
|
|
def submit_download_job(
|
2023-12-22 17:35:57 +00:00
|
|
|
self,
|
2024-01-14 19:54:53 +00:00
|
|
|
job: DownloadJob,
|
2023-12-22 17:35:57 +00:00
|
|
|
on_start: Optional[DownloadEventHandler] = None,
|
|
|
|
on_progress: Optional[DownloadEventHandler] = None,
|
|
|
|
on_complete: Optional[DownloadEventHandler] = None,
|
|
|
|
on_cancelled: Optional[DownloadEventHandler] = None,
|
2024-01-14 19:54:53 +00:00
|
|
|
on_error: Optional[DownloadExceptionHandler] = None,
|
|
|
|
) -> None:
|
|
|
|
"""Enqueue a download job."""
|
2023-12-22 17:35:57 +00:00
|
|
|
if not self._accept_download_requests:
|
|
|
|
raise ServiceInactiveException(
|
|
|
|
"The download service is not currently accepting requests. Please call start() to initialize the service."
|
|
|
|
)
|
2024-05-14 02:49:15 +00:00
|
|
|
job.id = self._next_id()
|
|
|
|
job.set_callbacks(
|
|
|
|
on_start=on_start,
|
|
|
|
on_progress=on_progress,
|
|
|
|
on_complete=on_complete,
|
|
|
|
on_cancelled=on_cancelled,
|
|
|
|
on_error=on_error,
|
|
|
|
)
|
|
|
|
self._jobs[job.id] = job
|
|
|
|
self._queue.put(job)
|
2024-01-14 19:54:53 +00:00
|
|
|
|
|
|
|
def download(
|
|
|
|
self,
|
|
|
|
source: AnyHttpUrl,
|
|
|
|
dest: Path,
|
|
|
|
priority: int = 10,
|
|
|
|
access_token: Optional[str] = None,
|
|
|
|
on_start: Optional[DownloadEventHandler] = None,
|
|
|
|
on_progress: Optional[DownloadEventHandler] = None,
|
|
|
|
on_complete: Optional[DownloadEventHandler] = None,
|
|
|
|
on_cancelled: Optional[DownloadEventHandler] = None,
|
|
|
|
on_error: Optional[DownloadExceptionHandler] = None,
|
|
|
|
) -> DownloadJob:
|
|
|
|
"""Create and enqueue a download job and return it."""
|
|
|
|
if not self._accept_download_requests:
|
|
|
|
raise ServiceInactiveException(
|
|
|
|
"The download service is not currently accepting requests. Please call start() to initialize the service."
|
|
|
|
)
|
|
|
|
job = DownloadJob(
|
|
|
|
source=source,
|
|
|
|
dest=dest,
|
|
|
|
priority=priority,
|
2024-05-06 01:00:31 +00:00
|
|
|
access_token=access_token or self._lookup_access_token(source),
|
2024-01-14 19:54:53 +00:00
|
|
|
)
|
|
|
|
self.submit_download_job(
|
|
|
|
job,
|
|
|
|
on_start=on_start,
|
|
|
|
on_progress=on_progress,
|
|
|
|
on_complete=on_complete,
|
|
|
|
on_cancelled=on_cancelled,
|
|
|
|
on_error=on_error,
|
|
|
|
)
|
2023-12-22 17:35:57 +00:00
|
|
|
return job
|
|
|
|
|
2024-05-13 02:14:00 +00:00
|
|
|
def multifile_download(
|
|
|
|
self,
|
2024-05-14 02:49:15 +00:00
|
|
|
parts: List[RemoteModelFile],
|
2024-05-13 02:14:00 +00:00
|
|
|
dest: Path,
|
|
|
|
access_token: Optional[str] = None,
|
2024-05-14 02:49:15 +00:00
|
|
|
submit_job: bool = True,
|
2024-05-13 22:31:40 +00:00
|
|
|
on_start: Optional[DownloadEventHandler] = None,
|
|
|
|
on_progress: Optional[DownloadEventHandler] = None,
|
|
|
|
on_complete: Optional[DownloadEventHandler] = None,
|
|
|
|
on_cancelled: Optional[DownloadEventHandler] = None,
|
|
|
|
on_error: Optional[DownloadExceptionHandler] = None,
|
2024-05-13 02:14:00 +00:00
|
|
|
) -> MultiFileDownloadJob:
|
2024-05-14 02:49:15 +00:00
|
|
|
mfdj = MultiFileDownloadJob(dest=dest, id=self._next_id())
|
2024-05-13 02:14:00 +00:00
|
|
|
mfdj.set_callbacks(
|
|
|
|
on_start=on_start,
|
|
|
|
on_progress=on_progress,
|
|
|
|
on_complete=on_complete,
|
|
|
|
on_cancelled=on_cancelled,
|
|
|
|
on_error=on_error,
|
|
|
|
)
|
|
|
|
|
|
|
|
for part in parts:
|
|
|
|
url = part.url
|
|
|
|
path = dest / part.path
|
|
|
|
assert path.is_relative_to(dest), "only relative download paths accepted"
|
|
|
|
job = DownloadJob(
|
|
|
|
source=url,
|
|
|
|
dest=path,
|
|
|
|
access_token=access_token,
|
|
|
|
)
|
|
|
|
mfdj.download_parts.add(job)
|
|
|
|
self._download_part2parent[job.source] = mfdj
|
2024-05-14 02:49:15 +00:00
|
|
|
if submit_job:
|
|
|
|
self.submit_multifile_download(mfdj)
|
2024-05-13 22:31:40 +00:00
|
|
|
return mfdj
|
2024-05-13 02:14:00 +00:00
|
|
|
|
2024-05-13 22:31:40 +00:00
|
|
|
def submit_multifile_download(self, job: MultiFileDownloadJob) -> None:
|
|
|
|
for download_job in job.download_parts:
|
2024-05-13 02:14:00 +00:00
|
|
|
self.submit_download_job(
|
|
|
|
download_job,
|
|
|
|
on_start=self._mfd_started,
|
|
|
|
on_progress=self._mfd_progress,
|
|
|
|
on_complete=self._mfd_complete,
|
|
|
|
on_cancelled=self._mfd_cancelled,
|
|
|
|
on_error=self._mfd_error,
|
|
|
|
)
|
|
|
|
|
2023-12-22 17:35:57 +00:00
|
|
|
def join(self) -> None:
|
|
|
|
"""Wait for all jobs to complete."""
|
|
|
|
self._queue.join()
|
|
|
|
|
2024-05-14 02:49:15 +00:00
|
|
|
def _next_id(self) -> int:
|
|
|
|
with self._lock:
|
|
|
|
id = self._next_job_id
|
|
|
|
self._next_job_id += 1
|
|
|
|
return id
|
|
|
|
|
2023-12-22 17:35:57 +00:00
|
|
|
def list_jobs(self) -> List[DownloadJob]:
|
|
|
|
"""List all the jobs."""
|
|
|
|
return list(self._jobs.values())
|
|
|
|
|
|
|
|
def prune_jobs(self) -> None:
|
|
|
|
"""Prune completed and errored queue items from the job list."""
|
|
|
|
with self._lock:
|
|
|
|
to_delete = set()
|
|
|
|
for job_id, job in self._jobs.items():
|
2024-01-14 19:54:53 +00:00
|
|
|
if job.in_terminal_state:
|
2023-12-22 17:35:57 +00:00
|
|
|
to_delete.add(job_id)
|
|
|
|
for job_id in to_delete:
|
|
|
|
del self._jobs[job_id]
|
|
|
|
|
|
|
|
def id_to_job(self, id: int) -> DownloadJob:
|
|
|
|
"""Translate a job ID into a DownloadJob object."""
|
|
|
|
try:
|
|
|
|
return self._jobs[id]
|
|
|
|
except KeyError as excp:
|
|
|
|
raise UnknownJobIDException("Unrecognized job") from excp
|
|
|
|
|
2024-05-14 02:49:15 +00:00
|
|
|
def cancel_job(self, job: DownloadJobBase) -> None:
|
2023-12-22 17:35:57 +00:00
|
|
|
"""
|
|
|
|
Cancel the indicated job.
|
|
|
|
|
|
|
|
If it is running it will be stopped.
|
|
|
|
job.status will be set to DownloadJobStatus.CANCELLED
|
|
|
|
"""
|
2024-05-13 02:14:00 +00:00
|
|
|
if job.status in [DownloadJobStatus.WAITING, DownloadJobStatus.RUNNING]:
|
2023-12-22 17:35:57 +00:00
|
|
|
job.cancel()
|
|
|
|
|
2024-01-14 19:54:53 +00:00
|
|
|
def cancel_all_jobs(self) -> None:
|
2023-12-22 17:35:57 +00:00
|
|
|
"""Cancel all jobs (those not in enqueued, running or paused state)."""
|
|
|
|
for job in self._jobs.values():
|
2024-01-14 19:54:53 +00:00
|
|
|
if not job.in_terminal_state:
|
2023-12-22 17:35:57 +00:00
|
|
|
self.cancel_job(job)
|
|
|
|
|
2024-05-14 02:49:15 +00:00
|
|
|
def wait_for_job(self, job: DownloadJobBase, timeout: int = 0) -> DownloadJobBase:
|
2024-02-12 19:27:17 +00:00
|
|
|
"""Block until the indicated job has reached terminal state, or when timeout limit reached."""
|
|
|
|
start = time.time()
|
|
|
|
while not job.in_terminal_state:
|
2024-05-13 02:14:00 +00:00
|
|
|
if self._job_terminated_event.wait(timeout=0.25): # in case we miss an event
|
|
|
|
self._job_terminated_event.clear()
|
2024-02-12 19:27:17 +00:00
|
|
|
if timeout > 0 and time.time() - start > timeout:
|
|
|
|
raise TimeoutError("Timeout exceeded")
|
|
|
|
return job
|
|
|
|
|
2023-12-22 17:35:57 +00:00
|
|
|
def _start_workers(self, max_workers: int) -> None:
|
|
|
|
"""Start the requested number of worker threads."""
|
|
|
|
self._stop_event.clear()
|
|
|
|
for i in range(0, max_workers): # noqa B007
|
|
|
|
worker = threading.Thread(target=self._download_next_item, daemon=True)
|
|
|
|
self._logger.debug(f"Download queue worker thread {worker.name} starting.")
|
|
|
|
worker.start()
|
|
|
|
self._worker_pool.add(worker)
|
|
|
|
|
|
|
|
def _download_next_item(self) -> None:
|
|
|
|
"""Worker thread gets next job on priority queue."""
|
|
|
|
done = False
|
|
|
|
while not done:
|
|
|
|
if self._stop_event.is_set():
|
|
|
|
done = True
|
|
|
|
continue
|
|
|
|
try:
|
|
|
|
job = self._queue.get(timeout=1)
|
|
|
|
except Empty:
|
|
|
|
continue
|
|
|
|
try:
|
|
|
|
job.job_started = get_iso_timestamp()
|
|
|
|
self._do_download(job)
|
|
|
|
self._signal_job_complete(job)
|
|
|
|
except DownloadJobCancelledException:
|
|
|
|
self._signal_job_cancelled(job)
|
|
|
|
self._cleanup_cancelled_job(job)
|
2024-05-13 02:14:00 +00:00
|
|
|
except Exception as excp:
|
|
|
|
job.error_type = excp.__class__.__name__ + f"({str(excp)})"
|
|
|
|
job.error = traceback.format_exc()
|
|
|
|
self._signal_job_error(job, excp)
|
2023-12-22 17:35:57 +00:00
|
|
|
finally:
|
|
|
|
job.job_ended = get_iso_timestamp()
|
2024-05-13 02:14:00 +00:00
|
|
|
self._job_terminated_event.set() # signal a change to terminal state
|
|
|
|
self._download_part2parent.pop(job.source, None) # if this is a subpart of a multipart job, remove it
|
|
|
|
self._job_terminated_event.set()
|
2023-12-22 17:35:57 +00:00
|
|
|
self._queue.task_done()
|
2024-05-13 02:14:00 +00:00
|
|
|
|
2023-12-22 17:35:57 +00:00
|
|
|
self._logger.debug(f"Download queue worker thread {threading.current_thread().name} exiting.")
|
|
|
|
|
|
|
|
def _do_download(self, job: DownloadJob) -> None:
|
|
|
|
"""Do the actual download."""
|
2024-05-13 02:14:00 +00:00
|
|
|
|
2023-12-22 17:35:57 +00:00
|
|
|
url = job.source
|
|
|
|
header = {"Authorization": f"Bearer {job.access_token}"} if job.access_token else {}
|
|
|
|
open_mode = "wb"
|
2024-02-29 20:08:10 +00:00
|
|
|
|
2023-12-22 17:35:57 +00:00
|
|
|
# Make a streaming request. This will retrieve headers including
|
|
|
|
# content-length and content-disposition, but not fetch any content itself
|
2024-03-08 02:32:26 +00:00
|
|
|
resp = self._requests.get(str(url), headers=header, stream=True)
|
2023-12-22 17:35:57 +00:00
|
|
|
if not resp.ok:
|
|
|
|
raise HTTPError(resp.reason)
|
2024-01-14 19:54:53 +00:00
|
|
|
|
|
|
|
job.content_type = resp.headers.get("Content-Type")
|
2023-12-22 17:35:57 +00:00
|
|
|
content_length = int(resp.headers.get("content-length", 0))
|
|
|
|
job.total_bytes = content_length
|
|
|
|
|
|
|
|
if job.dest.is_dir():
|
|
|
|
file_name = os.path.basename(str(url.path)) # default is to use the last bit of the URL
|
|
|
|
|
|
|
|
if match := re.search('filename="(.+)"', resp.headers.get("Content-Disposition", "")):
|
|
|
|
remote_name = match.group(1)
|
|
|
|
if self._validate_filename(job.dest.as_posix(), remote_name):
|
|
|
|
file_name = remote_name
|
|
|
|
|
|
|
|
job.download_path = job.dest / file_name
|
|
|
|
|
|
|
|
else:
|
|
|
|
job.dest.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
job.download_path = job.dest
|
|
|
|
|
|
|
|
assert job.download_path
|
|
|
|
|
|
|
|
# Don't clobber an existing file. See commit 82c2c85202f88c6d24ff84710f297cfc6ae174af
|
|
|
|
# for code that instead resumes an interrupted download.
|
|
|
|
if job.download_path.exists():
|
|
|
|
raise OSError(f"[Errno 17] File {job.download_path} exists")
|
|
|
|
|
|
|
|
# append ".downloading" to the path
|
|
|
|
in_progress_path = self._in_progress_path(job.download_path)
|
|
|
|
|
|
|
|
# signal caller that the download is starting. At this point, key fields such as
|
|
|
|
# download_path and total_bytes will be populated. We call it here because the might
|
|
|
|
# discover that the local file is already complete and generate a COMPLETED status.
|
|
|
|
self._signal_job_started(job)
|
|
|
|
|
|
|
|
# "range not satisfiable" - local file is at least as large as the remote file
|
|
|
|
if resp.status_code == 416 or (content_length > 0 and job.bytes >= content_length):
|
|
|
|
self._logger.warning(f"{job.download_path}: complete file found. Skipping.")
|
|
|
|
return
|
|
|
|
|
|
|
|
# "partial content" - local file is smaller than remote file
|
|
|
|
elif resp.status_code == 206 or job.bytes > 0:
|
|
|
|
self._logger.warning(f"{job.download_path}: partial file found. Resuming")
|
|
|
|
|
|
|
|
# some other error
|
|
|
|
elif resp.status_code != 200:
|
|
|
|
raise HTTPError(resp.reason)
|
|
|
|
|
|
|
|
self._logger.debug(f"{job.source}: Downloading {job.download_path}")
|
|
|
|
report_delta = job.total_bytes / 100 # report every 1% change
|
|
|
|
last_report_bytes = 0
|
|
|
|
|
|
|
|
# DOWNLOAD LOOP
|
|
|
|
with open(in_progress_path, open_mode) as file:
|
|
|
|
for data in resp.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
|
|
|
|
if job.cancelled:
|
|
|
|
raise DownloadJobCancelledException("Job was cancelled at caller's request")
|
|
|
|
|
|
|
|
job.bytes += file.write(data)
|
|
|
|
if (job.bytes - last_report_bytes >= report_delta) or (job.bytes >= job.total_bytes):
|
|
|
|
last_report_bytes = job.bytes
|
|
|
|
self._signal_job_progress(job)
|
|
|
|
|
|
|
|
# if we get here we are done and can rename the file to the original dest
|
2024-01-14 19:54:53 +00:00
|
|
|
self._logger.debug(f"{job.source}: saved to {job.download_path} (bytes={job.bytes})")
|
2023-12-22 17:35:57 +00:00
|
|
|
in_progress_path.rename(job.download_path)
|
|
|
|
|
|
|
|
def _validate_filename(self, directory: str, filename: str) -> bool:
|
2024-04-30 11:09:18 +00:00
|
|
|
pc_name_max = get_pc_name_max(directory)
|
|
|
|
pc_path_max = get_pc_path_max(directory)
|
2023-12-22 17:35:57 +00:00
|
|
|
if "/" in filename:
|
|
|
|
return False
|
|
|
|
if filename.startswith(".."):
|
|
|
|
return False
|
|
|
|
if len(filename) > pc_name_max:
|
|
|
|
return False
|
|
|
|
if len(os.path.join(directory, filename)) > pc_path_max:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
def _in_progress_path(self, path: Path) -> Path:
|
|
|
|
return path.with_name(path.name + ".downloading")
|
|
|
|
|
2024-05-06 01:00:31 +00:00
|
|
|
def _lookup_access_token(self, source: AnyHttpUrl) -> Optional[str]:
|
|
|
|
# Pull the token from config if it exists and matches the URL
|
|
|
|
token = None
|
|
|
|
for pair in self._app_config.remote_api_tokens or []:
|
|
|
|
if re.search(pair.url_regex, str(source)):
|
|
|
|
token = pair.token
|
|
|
|
break
|
|
|
|
return token
|
|
|
|
|
2023-12-22 17:35:57 +00:00
|
|
|
def _signal_job_started(self, job: DownloadJob) -> None:
|
|
|
|
job.status = DownloadJobStatus.RUNNING
|
2024-05-13 02:14:00 +00:00
|
|
|
self._execute_cb(job, "on_start")
|
2023-12-22 17:35:57 +00:00
|
|
|
if self._event_bus:
|
2024-03-31 01:02:49 +00:00
|
|
|
self._event_bus.emit_download_started(job)
|
2023-12-22 17:35:57 +00:00
|
|
|
|
|
|
|
def _signal_job_progress(self, job: DownloadJob) -> None:
|
2024-05-13 02:14:00 +00:00
|
|
|
self._execute_cb(job, "on_progress")
|
2023-12-22 17:35:57 +00:00
|
|
|
if self._event_bus:
|
2024-03-31 01:02:49 +00:00
|
|
|
self._event_bus.emit_download_progress(job)
|
2023-12-22 17:35:57 +00:00
|
|
|
|
|
|
|
def _signal_job_complete(self, job: DownloadJob) -> None:
|
|
|
|
job.status = DownloadJobStatus.COMPLETED
|
2024-05-13 02:14:00 +00:00
|
|
|
self._execute_cb(job, "on_complete")
|
2023-12-22 17:35:57 +00:00
|
|
|
if self._event_bus:
|
2024-03-31 01:02:49 +00:00
|
|
|
self._event_bus.emit_download_complete(job)
|
2023-12-22 17:35:57 +00:00
|
|
|
|
|
|
|
def _signal_job_cancelled(self, job: DownloadJob) -> None:
|
2024-01-14 19:54:53 +00:00
|
|
|
if job.status not in [DownloadJobStatus.RUNNING, DownloadJobStatus.WAITING]:
|
|
|
|
return
|
2023-12-22 17:35:57 +00:00
|
|
|
job.status = DownloadJobStatus.CANCELLED
|
2024-05-13 02:14:00 +00:00
|
|
|
self._execute_cb(job, "on_cancelled")
|
2023-12-22 17:35:57 +00:00
|
|
|
if self._event_bus:
|
2024-03-31 01:02:49 +00:00
|
|
|
self._event_bus.emit_download_cancelled(job)
|
2023-12-22 17:35:57 +00:00
|
|
|
|
2024-05-13 02:14:00 +00:00
|
|
|
# if multifile download, then signal the parent
|
|
|
|
if parent_job := self._download_part2parent.get(job.source, None):
|
|
|
|
if not parent_job.in_terminal_state:
|
|
|
|
parent_job.status = DownloadJobStatus.CANCELLED
|
|
|
|
self._execute_cb(parent_job, "on_cancelled")
|
|
|
|
|
2024-01-14 19:54:53 +00:00
|
|
|
def _signal_job_error(self, job: DownloadJob, excp: Optional[Exception] = None) -> None:
|
2023-12-22 17:35:57 +00:00
|
|
|
job.status = DownloadJobStatus.ERROR
|
2024-01-14 19:54:53 +00:00
|
|
|
self._logger.error(f"{str(job.source)}: {traceback.format_exception(excp)}")
|
2024-05-13 02:14:00 +00:00
|
|
|
self._execute_cb(job, "on_error", excp)
|
|
|
|
|
2023-12-22 17:35:57 +00:00
|
|
|
if self._event_bus:
|
2024-03-31 01:02:49 +00:00
|
|
|
self._event_bus.emit_download_error(job)
|
2023-12-22 17:35:57 +00:00
|
|
|
|
|
|
|
def _cleanup_cancelled_job(self, job: DownloadJob) -> None:
|
2024-01-14 19:54:53 +00:00
|
|
|
self._logger.debug(f"Cleaning up leftover files from cancelled download job {job.download_path}")
|
2023-12-22 17:35:57 +00:00
|
|
|
try:
|
|
|
|
if job.download_path:
|
|
|
|
partial_file = self._in_progress_path(job.download_path)
|
|
|
|
partial_file.unlink()
|
|
|
|
except OSError as excp:
|
|
|
|
self._logger.warning(excp)
|
|
|
|
|
2024-05-13 02:14:00 +00:00
|
|
|
########################################
|
|
|
|
# callbacks used for multifile downloads
|
|
|
|
########################################
|
|
|
|
def _mfd_started(self, download_job: DownloadJob) -> None:
|
|
|
|
self._logger.info(f"File download started: {download_job.source}")
|
|
|
|
with self._lock:
|
|
|
|
mf_job = self._download_part2parent[download_job.source]
|
|
|
|
if mf_job.waiting:
|
|
|
|
mf_job.total_bytes = sum(x.total_bytes for x in mf_job.download_parts)
|
|
|
|
mf_job.status = DownloadJobStatus.RUNNING
|
2024-05-14 02:49:15 +00:00
|
|
|
assert download_job.download_path is not None
|
|
|
|
path_relative_to_destdir = download_job.download_path.relative_to(mf_job.dest)
|
|
|
|
mf_job.download_path = (
|
|
|
|
mf_job.dest / path_relative_to_destdir.parts[0]
|
|
|
|
) # keep just the first component of the path
|
2024-05-13 02:14:00 +00:00
|
|
|
self._execute_cb(mf_job, "on_start")
|
|
|
|
|
|
|
|
def _mfd_progress(self, download_job: DownloadJob) -> None:
|
|
|
|
with self._lock:
|
|
|
|
mf_job = self._download_part2parent[download_job.source]
|
|
|
|
if mf_job.cancelled:
|
|
|
|
for part in mf_job.download_parts:
|
|
|
|
self.cancel_job(part)
|
|
|
|
elif mf_job.running:
|
|
|
|
mf_job.total_bytes = sum(x.total_bytes for x in mf_job.download_parts)
|
|
|
|
mf_job.bytes = sum(x.total_bytes for x in mf_job.download_parts)
|
|
|
|
self._execute_cb(mf_job, "on_progress")
|
|
|
|
|
|
|
|
def _mfd_complete(self, download_job: DownloadJob) -> None:
|
|
|
|
self._logger.info(f"Download complete: {download_job.source}")
|
|
|
|
with self._lock:
|
|
|
|
mf_job = self._download_part2parent[download_job.source]
|
|
|
|
|
|
|
|
# are there any more active jobs left in this task?
|
|
|
|
if mf_job.running and all(x.complete for x in mf_job.download_parts):
|
|
|
|
mf_job.status = DownloadJobStatus.COMPLETED
|
|
|
|
self._execute_cb(mf_job, "on_complete")
|
|
|
|
|
|
|
|
# we're done with this sub-job
|
|
|
|
self._job_terminated_event.set()
|
|
|
|
|
|
|
|
def _mfd_cancelled(self, download_job: DownloadJob) -> None:
|
|
|
|
with self._lock:
|
|
|
|
mf_job = self._download_part2parent[download_job.source]
|
|
|
|
assert mf_job is not None
|
|
|
|
|
|
|
|
if not mf_job.in_terminal_state:
|
|
|
|
self._logger.warning(f"Download cancelled: {download_job.source}")
|
|
|
|
mf_job.cancel()
|
|
|
|
|
|
|
|
for s in mf_job.download_parts:
|
|
|
|
self.cancel_job(s)
|
|
|
|
|
|
|
|
def _mfd_error(self, download_job: DownloadJob, excp: Optional[Exception] = None) -> None:
|
|
|
|
with self._lock:
|
|
|
|
mf_job = self._download_part2parent[download_job.source]
|
|
|
|
assert mf_job is not None
|
|
|
|
if not mf_job.in_terminal_state:
|
|
|
|
mf_job.status = download_job.status
|
|
|
|
mf_job.error = download_job.error
|
|
|
|
mf_job.error_type = download_job.error_type
|
|
|
|
self._execute_cb(mf_job, "on_error", excp)
|
|
|
|
self._logger.error(
|
|
|
|
f"Cancelling {mf_job.dest} due to an error while downloading {download_job.source}: {str(excp)}"
|
|
|
|
)
|
|
|
|
for s in [x for x in mf_job.download_parts if x.running]:
|
|
|
|
self.cancel_job(s)
|
|
|
|
self._download_part2parent.pop(download_job.source)
|
|
|
|
self._job_terminated_event.set()
|
|
|
|
|
|
|
|
def _execute_cb(
|
|
|
|
self,
|
|
|
|
job: DownloadJob | MultiFileDownloadJob,
|
2024-06-03 00:05:52 +00:00
|
|
|
callback_name: Literal[
|
|
|
|
"on_start",
|
|
|
|
"on_progress",
|
|
|
|
"on_complete",
|
|
|
|
"on_cancelled",
|
|
|
|
"on_error",
|
|
|
|
],
|
2024-05-13 02:14:00 +00:00
|
|
|
excp: Optional[Exception] = None,
|
|
|
|
) -> None:
|
|
|
|
if callback := getattr(job, callback_name, None):
|
|
|
|
args = [job, excp] if excp else [job]
|
|
|
|
try:
|
|
|
|
callback(*args)
|
|
|
|
except Exception as e:
|
|
|
|
self._logger.error(
|
|
|
|
f"An error occurred while processing the {callback_name} callback: {traceback.format_exception(e)}"
|
|
|
|
)
|
|
|
|
|
2023-12-22 17:35:57 +00:00
|
|
|
|
2024-04-30 11:09:18 +00:00
|
|
|
def get_pc_name_max(directory: str) -> int:
|
|
|
|
if hasattr(os, "pathconf"):
|
|
|
|
try:
|
|
|
|
return os.pathconf(directory, "PC_NAME_MAX")
|
|
|
|
except OSError:
|
|
|
|
# macOS w/ external drives raise OSError
|
|
|
|
pass
|
|
|
|
return 260 # hardcoded for windows
|
|
|
|
|
|
|
|
|
|
|
|
def get_pc_path_max(directory: str) -> int:
|
|
|
|
if hasattr(os, "pathconf"):
|
|
|
|
try:
|
|
|
|
return os.pathconf(directory, "PC_PATH_MAX")
|
|
|
|
except OSError:
|
|
|
|
# some platforms may not have this value
|
|
|
|
pass
|
|
|
|
return 32767 # hardcoded for windows with long names enabled
|
|
|
|
|
|
|
|
|
2023-12-22 17:35:57 +00:00
|
|
|
# Example on_progress event handler to display a TQDM status bar
|
|
|
|
# Activate with:
|
2024-02-12 19:27:17 +00:00
|
|
|
# download_service.download(DownloadJob('http://foo.bar/baz', '/tmp', on_progress=TqdmProgress().update))
|
2023-12-22 17:35:57 +00:00
|
|
|
class TqdmProgress(object):
|
|
|
|
"""TQDM-based progress bar object to use in on_progress handlers."""
|
|
|
|
|
2024-02-13 05:26:49 +00:00
|
|
|
_bars: Dict[int, tqdm] # type: ignore
|
2023-12-22 17:35:57 +00:00
|
|
|
_last: Dict[int, int] # last bytes downloaded
|
|
|
|
|
|
|
|
def __init__(self) -> None: # noqa D107
|
|
|
|
self._bars = {}
|
|
|
|
self._last = {}
|
|
|
|
|
|
|
|
def update(self, job: DownloadJob) -> None: # noqa D102
|
|
|
|
job_id = job.id
|
|
|
|
# new job
|
|
|
|
if job_id not in self._bars:
|
|
|
|
assert job.download_path
|
|
|
|
dest = Path(job.download_path).name
|
|
|
|
self._bars[job_id] = tqdm(
|
|
|
|
desc=dest,
|
|
|
|
initial=0,
|
|
|
|
total=job.total_bytes,
|
|
|
|
unit="iB",
|
|
|
|
unit_scale=True,
|
|
|
|
)
|
|
|
|
self._last[job_id] = 0
|
|
|
|
self._bars[job_id].update(job.bytes - self._last[job_id])
|
|
|
|
self._last[job_id] = job.bytes
|