Source code for voxcity.downloader.utils

# Utility functions for downloading files from various sources
import time
import requests
from ..utils.logging import get_logger

__all__ = ["download_file"]

_logger = get_logger(__name__)


[docs] def download_file(url, filename, *, timeout=60, max_retries=3, initial_delay=2.0, backoff_factor=2.0, chunk_size=8192): """Download a file from a URL and save it locally with retry and streaming. Uses streaming to avoid loading large files entirely into memory and retries on transient network failures with exponential backoff. Args: url (str): URL of the file to download. filename (str): Local path where the downloaded file will be saved. timeout (int): Request timeout in seconds (default 60). max_retries (int): Number of retry attempts on failure (default 3). initial_delay (float): Seconds to wait before the first retry (default 2.0). backoff_factor (float): Multiplier for delay between retries (default 2.0). chunk_size (int): Bytes per chunk when streaming (default 8192). Raises: requests.HTTPError: If download fails after all retries. Example: >>> download_file('https://example.com/file.pdf', 'local_file.pdf') """ last_error = None for attempt in range(max_retries): if attempt > 0: delay = initial_delay * (backoff_factor ** (attempt - 1)) _logger.info("Retry %d/%d: waiting %.1fs...", attempt, max_retries - 1, delay) time.sleep(delay) try: with requests.get(url, stream=True, timeout=timeout) as response: response.raise_for_status() with open(filename, 'wb') as file: for chunk in response.iter_content(chunk_size=chunk_size): file.write(chunk) _logger.info("File downloaded successfully and saved as %s", filename) return except (requests.RequestException, OSError) as exc: last_error = exc _logger.warning("Download attempt %d failed: %s", attempt + 1, exc) raise requests.HTTPError( f"Failed to download {url} after {max_retries} attempts: {last_error}" )