From 81846764a1cc77db9890aaee9fd9cf25dd4db01c Mon Sep 17 00:00:00 2001 From: Ivan Habunek Date: Sun, 14 Aug 2022 10:02:41 +0200 Subject: [PATCH] Don't download already downloaded files --- tests/test_progress.py | 14 +++++++++++--- twitchdl/http.py | 6 ++++++ twitchdl/progress.py | 21 +++++++++++++++++---- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/tests/test_progress.py b/tests/test_progress.py index dcb711f..8b2b672 100644 --- a/tests/test_progress.py +++ b/tests/test_progress.py @@ -19,26 +19,32 @@ def test_downloaded(): progress.start(3, 300) assert progress.downloaded == 0 + assert progress.progress_bytes == 0 assert progress.progress_perc == 0 progress.advance(1, 100) assert progress.downloaded == 100 + assert progress.progress_bytes == 100 assert progress.progress_perc == 11 progress.advance(2, 200) assert progress.downloaded == 300 + assert progress.progress_bytes == 300 assert progress.progress_perc == 33 progress.advance(3, 150) assert progress.downloaded == 450 + assert progress.progress_bytes == 450 assert progress.progress_perc == 50 progress.advance(1, 50) assert progress.downloaded == 500 + assert progress.progress_bytes == 500 assert progress.progress_perc == 55 progress.abort(2) - assert progress.downloaded == 300 + assert progress.downloaded == 500 + assert progress.progress_bytes == 300 assert progress.progress_perc == 33 progress.start(2, 300) @@ -47,14 +53,16 @@ def test_downloaded(): progress.advance(2, 300) progress.advance(3, 150) - assert progress.downloaded == 900 + assert progress.downloaded == 1100 + assert progress.progress_bytes == 900 assert progress.progress_perc == 100 progress.end(1) progress.end(2) progress.end(3) - assert progress.downloaded == 900 + assert progress.downloaded == 1100 + assert progress.progress_bytes == 900 assert progress.progress_perc == 100 diff --git a/twitchdl/http.py b/twitchdl/http.py index 2452b6a..02b6a2a 100644 --- a/twitchdl/http.py +++ b/twitchdl/http.py @@ -1,6 +1,7 @@ import asyncio import httpx import logging +import os import time from typing import List, Optional, Union @@ -91,6 +92,11 @@ async def download_with_retries( token_bucket: AnyTokenBucket, ): async with semaphore: + if os.path.exists(target): + size = os.path.getsize(target) + progress.already_downloaded(task_id, size) + return + for n in range(RETRY_COUNT): try: return await download(client, task_id, source, target, progress, token_bucket) diff --git a/twitchdl/progress.py b/twitchdl/progress.py index 5ce57c6..d34d09b 100644 --- a/twitchdl/progress.py +++ b/twitchdl/progress.py @@ -29,6 +29,7 @@ class Progress: vod_count: int downloaded: int = 0 estimated_total: Optional[int] = None + progress_bytes: int = 0 progress_perc: int = 0 remaining_time: Optional[int] = None speed: Optional[float] = None @@ -54,10 +55,22 @@ class Progress: raise ValueError(f"Task {task_id}: cannot advance, not started") self.downloaded += chunk_size + self.progress_bytes += chunk_size self.tasks[task_id].advance(chunk_size) self._calculate_progress() self.print() + def already_downloaded(self, task_id: int, size: int): + if task_id in self.tasks: + raise ValueError(f"Task {task_id}: cannot mark as downloaded, already started") + + self.tasks[task_id] = Task(task_id, size) + self.progress_bytes += size + self.vod_downloaded_count += 1 + self._calculate_total() + self._calculate_progress() + self.print() + def abort(self, task_id: int): logger.debug(f"#{task_id} abort") @@ -65,7 +78,7 @@ class Progress: raise ValueError(f"Task {task_id}: cannot abort, not started") del self.tasks[task_id] - self.downloaded = sum(t.downloaded for t in self.tasks.values()) + self.progress_bytes = sum(t.downloaded for t in self.tasks.values()) self._calculate_total() self._calculate_progress() @@ -89,15 +102,15 @@ class Progress: def _calculate_progress(self): elapsed_time = time.time() - self.start_time - self.progress_perc = int(100 * self.downloaded / self.estimated_total) if self.estimated_total else 0 + self.progress_perc = int(100 * self.progress_bytes / self.estimated_total) if self.estimated_total else 0 self.speed = self.downloaded / elapsed_time if elapsed_time else None - self.remaining_time = int((self.estimated_total - self.downloaded) / self.speed) if self.estimated_total and self.speed else None + self.remaining_time = int((self.estimated_total - self.progress_bytes) / self.speed) if self.estimated_total and self.speed else None def print(self): progress = " ".join([ f"Downloaded {self.vod_downloaded_count}/{self.vod_count} VODs", f"({self.progress_perc}%)", - f"{format_size(self.downloaded)}", + f"{format_size(self.progress_bytes)}", f"of ~{format_size(self.estimated_total)}" if self.estimated_total else "", f"at {format_size(self.speed)}/s" if self.speed else "", f"remaining ~{format_duration(self.remaining_time)}" if self.remaining_time is not None else "",