Don't download already downloaded files

This commit is contained in:
Ivan Habunek 2022-08-14 10:02:41 +02:00
parent 23f1a74aa6
commit 81846764a1
No known key found for this signature in database
GPG Key ID: CDBD63C43A30BB95
3 changed files with 34 additions and 7 deletions

View File

@ -19,26 +19,32 @@ def test_downloaded():
progress.start(3, 300) progress.start(3, 300)
assert progress.downloaded == 0 assert progress.downloaded == 0
assert progress.progress_bytes == 0
assert progress.progress_perc == 0 assert progress.progress_perc == 0
progress.advance(1, 100) progress.advance(1, 100)
assert progress.downloaded == 100 assert progress.downloaded == 100
assert progress.progress_bytes == 100
assert progress.progress_perc == 11 assert progress.progress_perc == 11
progress.advance(2, 200) progress.advance(2, 200)
assert progress.downloaded == 300 assert progress.downloaded == 300
assert progress.progress_bytes == 300
assert progress.progress_perc == 33 assert progress.progress_perc == 33
progress.advance(3, 150) progress.advance(3, 150)
assert progress.downloaded == 450 assert progress.downloaded == 450
assert progress.progress_bytes == 450
assert progress.progress_perc == 50 assert progress.progress_perc == 50
progress.advance(1, 50) progress.advance(1, 50)
assert progress.downloaded == 500 assert progress.downloaded == 500
assert progress.progress_bytes == 500
assert progress.progress_perc == 55 assert progress.progress_perc == 55
progress.abort(2) progress.abort(2)
assert progress.downloaded == 300 assert progress.downloaded == 500
assert progress.progress_bytes == 300
assert progress.progress_perc == 33 assert progress.progress_perc == 33
progress.start(2, 300) progress.start(2, 300)
@ -47,14 +53,16 @@ def test_downloaded():
progress.advance(2, 300) progress.advance(2, 300)
progress.advance(3, 150) progress.advance(3, 150)
assert progress.downloaded == 900 assert progress.downloaded == 1100
assert progress.progress_bytes == 900
assert progress.progress_perc == 100 assert progress.progress_perc == 100
progress.end(1) progress.end(1)
progress.end(2) progress.end(2)
progress.end(3) progress.end(3)
assert progress.downloaded == 900 assert progress.downloaded == 1100
assert progress.progress_bytes == 900
assert progress.progress_perc == 100 assert progress.progress_perc == 100

View File

@ -1,6 +1,7 @@
import asyncio import asyncio
import httpx import httpx
import logging import logging
import os
import time import time
from typing import List, Optional, Union from typing import List, Optional, Union
@ -91,6 +92,11 @@ async def download_with_retries(
token_bucket: AnyTokenBucket, token_bucket: AnyTokenBucket,
): ):
async with semaphore: async with semaphore:
if os.path.exists(target):
size = os.path.getsize(target)
progress.already_downloaded(task_id, size)
return
for n in range(RETRY_COUNT): for n in range(RETRY_COUNT):
try: try:
return await download(client, task_id, source, target, progress, token_bucket) return await download(client, task_id, source, target, progress, token_bucket)

View File

@ -29,6 +29,7 @@ class Progress:
vod_count: int vod_count: int
downloaded: int = 0 downloaded: int = 0
estimated_total: Optional[int] = None estimated_total: Optional[int] = None
progress_bytes: int = 0
progress_perc: int = 0 progress_perc: int = 0
remaining_time: Optional[int] = None remaining_time: Optional[int] = None
speed: Optional[float] = None speed: Optional[float] = None
@ -54,10 +55,22 @@ class Progress:
raise ValueError(f"Task {task_id}: cannot advance, not started") raise ValueError(f"Task {task_id}: cannot advance, not started")
self.downloaded += chunk_size self.downloaded += chunk_size
self.progress_bytes += chunk_size
self.tasks[task_id].advance(chunk_size) self.tasks[task_id].advance(chunk_size)
self._calculate_progress() self._calculate_progress()
self.print() self.print()
def already_downloaded(self, task_id: int, size: int):
if task_id in self.tasks:
raise ValueError(f"Task {task_id}: cannot mark as downloaded, already started")
self.tasks[task_id] = Task(task_id, size)
self.progress_bytes += size
self.vod_downloaded_count += 1
self._calculate_total()
self._calculate_progress()
self.print()
def abort(self, task_id: int): def abort(self, task_id: int):
logger.debug(f"#{task_id} abort") logger.debug(f"#{task_id} abort")
@ -65,7 +78,7 @@ class Progress:
raise ValueError(f"Task {task_id}: cannot abort, not started") raise ValueError(f"Task {task_id}: cannot abort, not started")
del self.tasks[task_id] del self.tasks[task_id]
self.downloaded = sum(t.downloaded for t in self.tasks.values()) self.progress_bytes = sum(t.downloaded for t in self.tasks.values())
self._calculate_total() self._calculate_total()
self._calculate_progress() self._calculate_progress()
@ -89,15 +102,15 @@ class Progress:
def _calculate_progress(self): def _calculate_progress(self):
elapsed_time = time.time() - self.start_time elapsed_time = time.time() - self.start_time
self.progress_perc = int(100 * self.downloaded / self.estimated_total) if self.estimated_total else 0 self.progress_perc = int(100 * self.progress_bytes / self.estimated_total) if self.estimated_total else 0
self.speed = self.downloaded / elapsed_time if elapsed_time else None self.speed = self.downloaded / elapsed_time if elapsed_time else None
self.remaining_time = int((self.estimated_total - self.downloaded) / self.speed) if self.estimated_total and self.speed else None self.remaining_time = int((self.estimated_total - self.progress_bytes) / self.speed) if self.estimated_total and self.speed else None
def print(self): def print(self):
progress = " ".join([ progress = " ".join([
f"Downloaded {self.vod_downloaded_count}/{self.vod_count} VODs", f"Downloaded {self.vod_downloaded_count}/{self.vod_count} VODs",
f"({self.progress_perc}%)", f"({self.progress_perc}%)",
f"<cyan>{format_size(self.downloaded)}</cyan>", f"<cyan>{format_size(self.progress_bytes)}</cyan>",
f"of <cyan>~{format_size(self.estimated_total)}</cyan>" if self.estimated_total else "", f"of <cyan>~{format_size(self.estimated_total)}</cyan>" if self.estimated_total else "",
f"at <cyan>{format_size(self.speed)}/s</cyan>" if self.speed else "", f"at <cyan>{format_size(self.speed)}/s</cyan>" if self.speed else "",
f"remaining <cyan>~{format_duration(self.remaining_time)}</cyan>" if self.remaining_time is not None else "", f"remaining <cyan>~{format_duration(self.remaining_time)}</cyan>" if self.remaining_time is not None else "",