Initial Commit

2025-07-18 00:46:05 +00:00
commit d37014f53f
94 changed files with 17458 additions and 0 deletions
--- a/unshackle/core/downloaders/init.py
+++ b/unshackle/core/downloaders/init.py
@@ -0,0 +1,6 @@
+from .aria2c import aria2c
+from .curl_impersonate import curl_impersonate
+from .n_m3u8dl_re import n_m3u8dl_re
+from .requests import requests
+
+__all__ = ("aria2c", "curl_impersonate", "requests", "n_m3u8dl_re")
--- a/unshackle/core/downloaders/aria2c.py
+++ b/unshackle/core/downloaders/aria2c.py
@@ -0,0 +1,331 @@
+import os
+import subprocess
+import textwrap
+import time
+from functools import partial
+from http.cookiejar import CookieJar
+from pathlib import Path
+from typing import Any, Callable, Generator, MutableMapping, Optional, Union
+from urllib.parse import urlparse
+
+import requests
+from Crypto.Random import get_random_bytes
+from requests import Session
+from requests.cookies import cookiejar_from_dict, get_cookie_header
+from rich import filesize
+from rich.text import Text
+
+from unshackle.core import binaries
+from unshackle.core.config import config
+from unshackle.core.console import console
+from unshackle.core.constants import DOWNLOAD_CANCELLED
+from unshackle.core.utilities import get_extension, get_free_port
+
+
+def rpc(caller: Callable, secret: str, method: str, params: Optional[list[Any]] = None) -> Any:
+    """Make a call to Aria2's JSON-RPC API."""
+    try:
+        rpc_res = caller(
+            json={
+                "jsonrpc": "2.0",
+                "id": get_random_bytes(16).hex(),
+                "method": method,
+                "params": [f"token:{secret}", *(params or [])],
+            }
+        ).json()
+        if rpc_res.get("code"):
+            # wrap to console width - padding - '[Aria2c]: '
+            error_pretty = "\n          ".join(
+                textwrap.wrap(
+                    f"RPC Error: {rpc_res['message']} ({rpc_res['code']})".strip(),
+                    width=console.width - 20,
+                    initial_indent="",
+                )
+            )
+            console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty))
+        return rpc_res["result"]
+    except requests.exceptions.ConnectionError:
+        # absorb, process likely ended as it was calling RPC
+        return
+
+
+def download(
+    urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
+    output_dir: Path,
+    filename: str,
+    headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
+    cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
+    proxy: Optional[str] = None,
+    max_workers: Optional[int] = None,
+) -> Generator[dict[str, Any], None, None]:
+    if not urls:
+        raise ValueError("urls must be provided and not empty")
+    elif not isinstance(urls, (str, dict, list)):
+        raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
+
+    if not output_dir:
+        raise ValueError("output_dir must be provided")
+    elif not isinstance(output_dir, Path):
+        raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
+
+    if not filename:
+        raise ValueError("filename must be provided")
+    elif not isinstance(filename, str):
+        raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
+
+    if not isinstance(headers, (MutableMapping, type(None))):
+        raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
+
+    if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
+        raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
+
+    if not isinstance(proxy, (str, type(None))):
+        raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
+
+    if not max_workers:
+        max_workers = min(32, (os.cpu_count() or 1) + 4)
+    elif not isinstance(max_workers, int):
+        raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
+
+    if not isinstance(urls, list):
+        urls = [urls]
+
+    if not binaries.Aria2:
+        raise EnvironmentError("Aria2c executable not found...")
+
+    if proxy and not proxy.lower().startswith("http://"):
+        raise ValueError("Only HTTP proxies are supported by aria2(c)")
+
+    if cookies and not isinstance(cookies, CookieJar):
+        cookies = cookiejar_from_dict(cookies)
+
+    url_files = []
+    for i, url in enumerate(urls):
+        if isinstance(url, str):
+            url_data = {"url": url}
+        else:
+            url_data: dict[str, Any] = url
+        url_filename = filename.format(i=i, ext=get_extension(url_data["url"]))
+        url_text = url_data["url"]
+        url_text += f"\n\tdir={output_dir}"
+        url_text += f"\n\tout={url_filename}"
+        if cookies:
+            mock_request = requests.Request(url=url_data["url"])
+            cookie_header = get_cookie_header(cookies, mock_request)
+            if cookie_header:
+                url_text += f"\n\theader=Cookie: {cookie_header}"
+        for key, value in url_data.items():
+            if key == "url":
+                continue
+            if key == "headers":
+                for header_name, header_value in value.items():
+                    url_text += f"\n\theader={header_name}: {header_value}"
+            else:
+                url_text += f"\n\t{key}={value}"
+        url_files.append(url_text)
+    url_file = "\n".join(url_files)
+
+    rpc_port = get_free_port()
+    rpc_secret = get_random_bytes(16).hex()
+    rpc_uri = f"http://127.0.0.1:{rpc_port}/jsonrpc"
+    rpc_session = Session()
+
+    max_concurrent_downloads = int(config.aria2c.get("max_concurrent_downloads", max_workers))
+    max_connection_per_server = int(config.aria2c.get("max_connection_per_server", 1))
+    split = int(config.aria2c.get("split", 5))
+    file_allocation = config.aria2c.get("file_allocation", "prealloc")
+    if len(urls) > 1:
+        split = 1
+        file_allocation = "none"
+
+    arguments = [
+        # [Basic Options]
+        "--input-file",
+        "-",
+        "--all-proxy",
+        proxy or "",
+        "--continue=true",
+        # [Connection Options]
+        f"--max-concurrent-downloads={max_concurrent_downloads}",
+        f"--max-connection-per-server={max_connection_per_server}",
+        f"--split={split}",  # each split uses their own connection
+        "--max-file-not-found=5",  # counted towards --max-tries
+        "--max-tries=5",
+        "--retry-wait=2",
+        # [Advanced Options]
+        "--allow-overwrite=true",
+        "--auto-file-renaming=false",
+        "--console-log-level=warn",
+        "--download-result=default",
+        f"--file-allocation={file_allocation}",
+        "--summary-interval=0",
+        # [RPC Options]
+        "--enable-rpc=true",
+        f"--rpc-listen-port={rpc_port}",
+        f"--rpc-secret={rpc_secret}",
+    ]
+
+    for header, value in (headers or {}).items():
+        if header.lower() == "cookie":
+            raise ValueError("You cannot set Cookies as a header manually, please use the `cookies` param.")
+        if header.lower() == "accept-encoding":
+            # we cannot set an allowed encoding, or it will return compressed
+            # and the code is not set up to uncompress the data
+            continue
+        if header.lower() == "referer":
+            arguments.extend(["--referer", value])
+            continue
+        if header.lower() == "user-agent":
+            arguments.extend(["--user-agent", value])
+            continue
+        arguments.extend(["--header", f"{header}: {value}"])
+
+    yield dict(total=len(urls))
+
+    try:
+        p = subprocess.Popen([binaries.Aria2, *arguments], stdin=subprocess.PIPE, stdout=subprocess.DEVNULL)
+
+        p.stdin.write(url_file.encode())
+        p.stdin.close()
+
+        while p.poll() is None:
+            global_stats: dict[str, Any] = (
+                rpc(caller=partial(rpc_session.post, url=rpc_uri), secret=rpc_secret, method="aria2.getGlobalStat")
+                or {}
+            )
+
+            number_stopped = int(global_stats.get("numStoppedTotal", 0))
+            download_speed = int(global_stats.get("downloadSpeed", -1))
+
+            if number_stopped:
+                yield dict(completed=number_stopped)
+            if download_speed != -1:
+                yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
+
+            stopped_downloads: list[dict[str, Any]] = (
+                rpc(
+                    caller=partial(rpc_session.post, url=rpc_uri),
+                    secret=rpc_secret,
+                    method="aria2.tellStopped",
+                    params=[0, 999999],
+                )
+                or []
+            )
+
+            for dl in stopped_downloads:
+                if dl["status"] == "error":
+                    used_uri = next(
+                        uri["uri"]
+                        for file in dl["files"]
+                        if file["selected"] == "true"
+                        for uri in file["uris"]
+                        if uri["status"] == "used"
+                    )
+                    error = f"Download Error (#{dl['gid']}): {dl['errorMessage']} ({dl['errorCode']}), {used_uri}"
+                    error_pretty = "\n          ".join(
+                        textwrap.wrap(error, width=console.width - 20, initial_indent="")
+                    )
+                    console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty))
+                    raise ValueError(error)
+
+            if number_stopped == len(urls):
+                rpc(caller=partial(rpc_session.post, url=rpc_uri), secret=rpc_secret, method="aria2.shutdown")
+                break
+
+            time.sleep(1)
+
+        p.wait()
+
+        if p.returncode != 0:
+            raise subprocess.CalledProcessError(p.returncode, arguments)
+    except ConnectionResetError:
+        # interrupted while passing URI to download
+        raise KeyboardInterrupt()
+    except subprocess.CalledProcessError as e:
+        if e.returncode in (7, 0xC000013A):
+            # 7 is when Aria2(c) handled the CTRL+C
+            # 0xC000013A is when it never got the chance to
+            raise KeyboardInterrupt()
+        raise
+    except KeyboardInterrupt:
+        DOWNLOAD_CANCELLED.set()  # skip pending track downloads
+        yield dict(downloaded="[yellow]CANCELLED")
+        raise
+    except Exception:
+        DOWNLOAD_CANCELLED.set()  # skip pending track downloads
+        yield dict(downloaded="[red]FAILED")
+        raise
+    finally:
+        rpc(caller=partial(rpc_session.post, url=rpc_uri), secret=rpc_secret, method="aria2.shutdown")
+
+
+def aria2c(
+    urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
+    output_dir: Path,
+    filename: str,
+    headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
+    cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
+    proxy: Optional[str] = None,
+    max_workers: Optional[int] = None,
+) -> Generator[dict[str, Any], None, None]:
+    """
+    Download files using Aria2(c).
+    https://aria2.github.io
+
+    Yields the following download status updates while chunks are downloading:
+
+    - {total: 100} (100% download total)
+    - {completed: 1} (1% download progress out of 100%)
+    - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
+
+    The data is in the same format accepted by rich's progress.update() function.
+
+    Parameters:
+        urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
+            "url" for the URI, and other keys for extra arguments to use per-URL.
+        output_dir: The folder to save the file into. If the save path's directory does
+            not exist then it will be made automatically.
+        filename: The filename or filename template to use for each file. The variables
+            you can use are `i` for the URL index and `ext` for the URL extension.
+        headers: A mapping of HTTP Header Key/Values to use for all downloads.
+        cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads.
+        proxy: An optional proxy URI to route connections through for all downloads.
+        max_workers: The maximum amount of threads to use for downloads. Defaults to
+            min(32,(cpu_count+4)). Use for the --max-concurrent-downloads option.
+    """
+    if proxy and not proxy.lower().startswith("http://"):
+        # Only HTTP proxies are supported by aria2(c)
+        proxy = urlparse(proxy)
+
+        port = get_free_port()
+        username, password = get_random_bytes(8).hex(), get_random_bytes(8).hex()
+        local_proxy = f"http://{username}:{password}@localhost:{port}"
+
+        scheme = {"https": "http+ssl", "socks5h": "socks"}.get(proxy.scheme, proxy.scheme)
+
+        remote_server = f"{scheme}://{proxy.hostname}"
+        if proxy.port:
+            remote_server += f":{proxy.port}"
+        if proxy.username or proxy.password:
+            remote_server += "#"
+        if proxy.username:
+            remote_server += proxy.username
+        if proxy.password:
+            remote_server += f":{proxy.password}"
+
+        p = subprocess.Popen(
+            ["pproxy", "-l", f"http://:{port}#{username}:{password}", "-r", remote_server],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+
+        try:
+            yield from download(urls, output_dir, filename, headers, cookies, local_proxy, max_workers)
+        finally:
+            p.kill()
+            p.wait()
+        return
+    yield from download(urls, output_dir, filename, headers, cookies, proxy, max_workers)
+
+
+__all__ = ("aria2c",)
--- a/unshackle/core/downloaders/curl_impersonate.py
+++ b/unshackle/core/downloaders/curl_impersonate.py
@@ -0,0 +1,259 @@
+import math
+import time
+from concurrent import futures
+from concurrent.futures.thread import ThreadPoolExecutor
+from http.cookiejar import CookieJar
+from pathlib import Path
+from typing import Any, Generator, MutableMapping, Optional, Union
+
+from curl_cffi.requests import Session
+from rich import filesize
+
+from unshackle.core.config import config
+from unshackle.core.constants import DOWNLOAD_CANCELLED
+from unshackle.core.utilities import get_extension
+
+MAX_ATTEMPTS = 5
+RETRY_WAIT = 2
+CHUNK_SIZE = 1024
+PROGRESS_WINDOW = 5
+BROWSER = config.curl_impersonate.get("browser", "chrome124")
+
+
+def download(url: str, save_path: Path, session: Session, **kwargs: Any) -> Generator[dict[str, Any], None, None]:
+    """
+    Download files using Curl Impersonate.
+    https://github.com/lwthiker/curl-impersonate
+
+    Yields the following download status updates while chunks are downloading:
+
+    - {total: 123} (there are 123 chunks to download)
+    - {total: None} (there are an unknown number of chunks to download)
+    - {advance: 1} (one chunk was downloaded)
+    - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
+    - {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
+
+    The data is in the same format accepted by rich's progress.update() function. The
+    `downloaded` key is custom and is not natively accepted by all rich progress bars.
+
+    Parameters:
+        url: Web URL of a file to download.
+        save_path: The path to save the file to. If the save path's directory does not
+            exist then it will be made automatically.
+        session: The Requests or Curl-Impersonate Session to make HTTP requests with.
+            Useful to set Header, Cookie, and Proxy data. Connections are saved and
+            re-used with the session so long as the server keeps the connection alive.
+        kwargs: Any extra keyword arguments to pass to the session.get() call. Use this
+            for one-time request changes like a header, cookie, or proxy. For example,
+            to request Byte-ranges use e.g., `headers={"Range": "bytes=0-128"}`.
+    """
+    save_dir = save_path.parent
+    control_file = save_path.with_name(f"{save_path.name}.!dev")
+
+    save_dir.mkdir(parents=True, exist_ok=True)
+
+    if control_file.exists():
+        # consider the file corrupt if the control file exists
+        save_path.unlink(missing_ok=True)
+        control_file.unlink()
+    elif save_path.exists():
+        # if it exists, and no control file, then it should be safe
+        yield dict(file_downloaded=save_path, written=save_path.stat().st_size)
+
+    # TODO: Design a control file format so we know how much of the file is missing
+    control_file.write_bytes(b"")
+
+    attempts = 1
+    try:
+        while True:
+            written = 0
+            download_sizes = []
+            last_speed_refresh = time.time()
+
+            try:
+                stream = session.get(url, stream=True, **kwargs)
+                stream.raise_for_status()
+
+                try:
+                    content_length = int(stream.headers.get("Content-Length", "0"))
+                except ValueError:
+                    content_length = 0
+
+                if content_length > 0:
+                    yield dict(total=math.ceil(content_length / CHUNK_SIZE))
+                else:
+                    # we have no data to calculate total chunks
+                    yield dict(total=None)  # indeterminate mode
+
+                with open(save_path, "wb") as f:
+                    for chunk in stream.iter_content(chunk_size=CHUNK_SIZE):
+                        download_size = len(chunk)
+                        f.write(chunk)
+                        written += download_size
+
+                        yield dict(advance=1)
+
+                        now = time.time()
+                        time_since = now - last_speed_refresh
+
+                        download_sizes.append(download_size)
+                        if time_since > PROGRESS_WINDOW or download_size < CHUNK_SIZE:
+                            data_size = sum(download_sizes)
+                            download_speed = math.ceil(data_size / (time_since or 1))
+                            yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
+                            last_speed_refresh = now
+                            download_sizes.clear()
+
+                if content_length and written < content_length:
+                    raise IOError(f"Failed to read {content_length} bytes from the track URI.")
+
+                yield dict(file_downloaded=save_path, written=written)
+                break
+            except Exception as e:
+                save_path.unlink(missing_ok=True)
+                if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS:
+                    raise e
+                time.sleep(RETRY_WAIT)
+                attempts += 1
+    finally:
+        control_file.unlink()
+
+
+def curl_impersonate(
+    urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
+    output_dir: Path,
+    filename: str,
+    headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
+    cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
+    proxy: Optional[str] = None,
+    max_workers: Optional[int] = None,
+) -> Generator[dict[str, Any], None, None]:
+    """
+    Download files using Curl Impersonate.
+    https://github.com/lwthiker/curl-impersonate
+
+    Yields the following download status updates while chunks are downloading:
+
+    - {total: 123} (there are 123 chunks to download)
+    - {total: None} (there are an unknown number of chunks to download)
+    - {advance: 1} (one chunk was downloaded)
+    - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
+    - {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
+
+    The data is in the same format accepted by rich's progress.update() function.
+    However, The `downloaded`, `file_downloaded` and `written` keys are custom and not
+    natively accepted by rich progress bars.
+
+    Parameters:
+        urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
+            "url" for the URI, and other keys for extra arguments to use per-URL.
+        output_dir: The folder to save the file into. If the save path's directory does
+            not exist then it will be made automatically.
+        filename: The filename or filename template to use for each file. The variables
+            you can use are `i` for the URL index and `ext` for the URL extension.
+        headers: A mapping of HTTP Header Key/Values to use for all downloads.
+        cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads.
+        proxy: An optional proxy URI to route connections through for all downloads.
+        max_workers: The maximum amount of threads to use for downloads. Defaults to
+            min(32,(cpu_count+4)).
+    """
+    if not urls:
+        raise ValueError("urls must be provided and not empty")
+    elif not isinstance(urls, (str, dict, list)):
+        raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
+
+    if not output_dir:
+        raise ValueError("output_dir must be provided")
+    elif not isinstance(output_dir, Path):
+        raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
+
+    if not filename:
+        raise ValueError("filename must be provided")
+    elif not isinstance(filename, str):
+        raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
+
+    if not isinstance(headers, (MutableMapping, type(None))):
+        raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
+
+    if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
+        raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
+
+    if not isinstance(proxy, (str, type(None))):
+        raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
+
+    if not isinstance(max_workers, (int, type(None))):
+        raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
+
+    if not isinstance(urls, list):
+        urls = [urls]
+
+    urls = [
+        dict(save_path=save_path, **url) if isinstance(url, dict) else dict(url=url, save_path=save_path)
+        for i, url in enumerate(urls)
+        for save_path in [
+            output_dir / filename.format(i=i, ext=get_extension(url["url"] if isinstance(url, dict) else url))
+        ]
+    ]
+
+    session = Session(impersonate=BROWSER)
+    if headers:
+        headers = {k: v for k, v in headers.items() if k.lower() != "accept-encoding"}
+        session.headers.update(headers)
+    if cookies:
+        session.cookies.update(cookies)
+    if proxy:
+        session.proxies.update({"all": proxy})
+
+    yield dict(total=len(urls))
+
+    download_sizes = []
+    last_speed_refresh = time.time()
+
+    with ThreadPoolExecutor(max_workers=max_workers) as pool:
+        for i, future in enumerate(
+            futures.as_completed((pool.submit(download, session=session, **url) for url in urls))
+        ):
+            file_path, download_size = None, None
+            try:
+                for status_update in future.result():
+                    if status_update.get("file_downloaded") and status_update.get("written"):
+                        file_path = status_update["file_downloaded"]
+                        download_size = status_update["written"]
+                    elif len(urls) == 1:
+                        # these are per-chunk updates, only useful if it's one big file
+                        yield status_update
+            except KeyboardInterrupt:
+                DOWNLOAD_CANCELLED.set()  # skip pending track downloads
+                yield dict(downloaded="[yellow]CANCELLING")
+                pool.shutdown(wait=True, cancel_futures=True)
+                yield dict(downloaded="[yellow]CANCELLED")
+                # tell dl that it was cancelled
+                # the pool is already shut down, so exiting loop is fine
+                raise
+            except Exception:
+                DOWNLOAD_CANCELLED.set()  # skip pending track downloads
+                yield dict(downloaded="[red]FAILING")
+                pool.shutdown(wait=True, cancel_futures=True)
+                yield dict(downloaded="[red]FAILED")
+                # tell dl that it failed
+                # the pool is already shut down, so exiting loop is fine
+                raise
+            else:
+                yield dict(file_downloaded=file_path)
+                yield dict(advance=1)
+
+                now = time.time()
+                time_since = now - last_speed_refresh
+
+                if download_size:  # no size == skipped dl
+                    download_sizes.append(download_size)
+
+                if download_sizes and (time_since > PROGRESS_WINDOW or i == len(urls)):
+                    data_size = sum(download_sizes)
+                    download_speed = math.ceil(data_size / (time_since or 1))
+                    yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
+                    last_speed_refresh = now
+                    download_sizes.clear()
+
+
+__all__ = ("curl_impersonate",)
--- a/unshackle/core/downloaders/n_m3u8dl_re.py
+++ b/unshackle/core/downloaders/n_m3u8dl_re.py
@@ -0,0 +1,299 @@
+import logging
+import os
+import re
+import subprocess
+import warnings
+from http.cookiejar import CookieJar
+from itertools import chain
+from pathlib import Path
+from typing import Any, Generator, MutableMapping, Optional, Union
+
+import requests
+from requests.cookies import cookiejar_from_dict, get_cookie_header
+
+from unshackle.core import binaries
+from unshackle.core.config import config
+from unshackle.core.console import console
+from unshackle.core.constants import DOWNLOAD_CANCELLED
+
+# Ignore FutureWarnings
+warnings.simplefilter(action="ignore", category=FutureWarning)
+
+AUDIO_CODEC_MAP = {"AAC": "mp4a", "AC3": "ac-3", "EC3": "ec-3"}
+VIDEO_CODEC_MAP = {"AVC": "avc", "HEVC": "hvc", "DV": "dvh", "HLG": "hev"}
+
+
+def track_selection(track: object) -> list[str]:
+    """Return the N_m3u8DL-RE stream selection arguments for a track."""
+
+    if "dash" in track.data:
+        adaptation_set = track.data["dash"]["adaptation_set"]
+        representation = track.data["dash"]["representation"]
+
+        track_type = track.__class__.__name__
+        codec = track.codec.name
+        bitrate = track.bitrate // 1000
+        language = track.language
+        width = track.width if track_type == "Video" else None
+        height = track.height if track_type == "Video" else None
+        range = track.range.name if track_type == "Video" else None
+
+    elif "ism" in track.data:
+        stream_index = track.data["ism"]["stream_index"]
+        quality_level = track.data["ism"]["quality_level"]
+
+        track_type = track.__class__.__name__
+        codec = track.codec.name
+        bitrate = track.bitrate // 1000
+        language = track.language
+        width = track.width if track_type == "Video" else None
+        height = track.height if track_type == "Video" else None
+        range = track.range.name if track_type == "Video" else None
+        adaptation_set = stream_index
+        representation = quality_level
+
+    else:
+        return []
+
+    if track_type == "Audio":
+        codecs = AUDIO_CODEC_MAP.get(codec)
+        langs = adaptation_set.findall("lang") + representation.findall("lang")
+        track_ids = list(
+            set(
+                v
+                for x in chain(adaptation_set, representation)
+                for v in (x.get("audioTrackId"), x.get("id"))
+                if v is not None
+            )
+        )
+        roles = adaptation_set.findall("Role") + representation.findall("Role")
+        role = ":role=main" if next((i for i in roles if i.get("value").lower() == "main"), None) else ""
+        bandwidth = f"bwMin={bitrate}:bwMax={bitrate + 5}"
+
+        if langs:
+            track_selection = ["-sa", f"lang={language}:codecs={codecs}:{bandwidth}{role}"]
+        elif len(track_ids) == 1:
+            track_selection = ["-sa", f"id={track_ids[0]}"]
+        else:
+            track_selection = ["-sa", f"for=best{role}"]
+        return track_selection
+
+    if track_type == "Video":
+        # adjust codec based on range
+        codec_adjustments = {("HEVC", "DV"): "DV", ("HEVC", "HLG"): "HLG"}
+        codec = codec_adjustments.get((codec, range), codec)
+        codecs = VIDEO_CODEC_MAP.get(codec)
+
+        bandwidth = f"bwMin={bitrate}:bwMax={bitrate + 5}"
+        if width and height:
+            resolution = f"{width}x{height}"
+        elif width:
+            resolution = f"{width}*"
+        else:
+            resolution = "for=best"
+        if resolution.startswith("for="):
+            track_selection = ["-sv", resolution]
+            track_selection.append(f"codecs={codecs}:{bandwidth}")
+        else:
+            track_selection = ["-sv", f"res={resolution}:codecs={codecs}:{bandwidth}"]
+        return track_selection
+
+
+def download(
+    urls: Union[str, dict[str, Any], list[str], list[dict[str, Any]]],
+    track: object,
+    output_dir: Path,
+    filename: str,
+    headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
+    cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
+    proxy: Optional[str] = None,
+    max_workers: Optional[int] = None,
+    content_keys: Optional[dict[str, Any]] = None,
+) -> Generator[dict[str, Any], None, None]:
+    if not urls:
+        raise ValueError("urls must be provided and not empty")
+    elif not isinstance(urls, (str, dict, list)):
+        raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
+
+    if not output_dir:
+        raise ValueError("output_dir must be provided")
+    elif not isinstance(output_dir, Path):
+        raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
+
+    if not filename:
+        raise ValueError("filename must be provided")
+    elif not isinstance(filename, str):
+        raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
+
+    if not isinstance(headers, (MutableMapping, type(None))):
+        raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
+
+    if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
+        raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
+
+    if not isinstance(proxy, (str, type(None))):
+        raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
+
+    if not max_workers:
+        max_workers = min(32, (os.cpu_count() or 1) + 4)
+    elif not isinstance(max_workers, int):
+        raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
+
+    if not isinstance(urls, list):
+        urls = [urls]
+
+    if not binaries.N_m3u8DL_RE:
+        raise EnvironmentError("N_m3u8DL-RE executable not found...")
+
+    if cookies and not isinstance(cookies, CookieJar):
+        cookies = cookiejar_from_dict(cookies)
+
+    track_type = track.__class__.__name__
+    thread_count = str(config.n_m3u8dl_re.get("thread_count", max_workers))
+    ad_keyword = config.n_m3u8dl_re.get("ad_keyword")
+
+    arguments = [
+        track.url,
+        "--save-dir",
+        output_dir,
+        "--tmp-dir",
+        output_dir,
+        "--thread-count",
+        thread_count,
+        "--no-log",
+        "--write-meta-json",
+        "false",
+    ]
+
+    for header, value in (headers or {}).items():
+        if header.lower() in ("accept-encoding", "cookie"):
+            continue
+        arguments.extend(["--header", f"{header}: {value}"])
+
+    if cookies:
+        cookie_header = get_cookie_header(cookies, requests.Request(url=track.url))
+        if cookie_header:
+            arguments.extend(["--header", f"Cookie: {cookie_header}"])
+
+    if proxy:
+        arguments.extend(["--custom-proxy", proxy])
+
+    if content_keys:
+        for kid, key in content_keys.items():
+            keys = f"{kid.hex}:{key.lower()}"
+        arguments.extend(["--key", keys])
+        arguments.extend(["--use-shaka-packager"])
+
+    if ad_keyword:
+        arguments.extend(["--ad-keyword", ad_keyword])
+
+    if track.descriptor.name == "URL":
+        error = f"[N_m3u8DL-RE]: {track.descriptor} is currently not supported"
+        raise ValueError(error)
+    elif track.descriptor.name == "DASH":
+        arguments.extend(track_selection(track))
+
+    # TODO: improve this nonsense
+    percent_re = re.compile(r"(\d+\.\d+%)")
+    speed_re = re.compile(r"(?<!/)(\d+\.\d+MB)(?!.*\/)")
+    warn = re.compile(r"(WARN : Response.*)")
+    error = re.compile(r"(ERROR.*)")
+    size_patterns = [
+        re.compile(r"(\d+\.\d+MB/\d+\.\d+GB)"),
+        re.compile(r"(\d+\.\d+GB/\d+\.\d+GB)"),
+        re.compile(r"(\d+\.\d+MB/\d+\.\d+MB)"),
+    ]
+
+    yield dict(total=100)
+
+    try:
+        with subprocess.Popen(
+            [binaries.N_m3u8DL_RE, *arguments], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
+        ) as p:
+            for line in p.stdout:
+                output = line.strip()
+                if output:
+                    percent = percent_re.search(output)
+                    speed = speed_re.search(output)
+                    size = next(
+                        (pattern.search(output).group(1) for pattern in size_patterns if pattern.search(output)), ""
+                    )
+
+                    if speed:
+                        yield dict(downloaded=f"{speed.group(1)}ps {size}")
+                    if percent:
+                        progress = int(percent.group(1).split(".")[0])
+                        yield dict(completed=progress) if progress < 100 else dict(downloaded="Merging")
+
+                    if warn.search(output):
+                        console.log(f"{track_type} " + warn.search(output).group(1))
+
+            p.wait()
+
+        if p.returncode != 0:
+            if error.search(output):
+                raise ValueError(f"[N_m3u8DL-RE]: {error.search(output).group(1)}")
+            raise subprocess.CalledProcessError(p.returncode, arguments)
+
+    except ConnectionResetError:
+        # interrupted while passing URI to download
+        raise KeyboardInterrupt()
+    except KeyboardInterrupt:
+        DOWNLOAD_CANCELLED.set()  # skip pending track downloads
+        yield dict(downloaded="[yellow]CANCELLED")
+        raise
+    except Exception:
+        DOWNLOAD_CANCELLED.set()  # skip pending track downloads
+        yield dict(downloaded="[red]FAILED")
+        raise
+
+
+def n_m3u8dl_re(
+    urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
+    track: object,
+    output_dir: Path,
+    filename: str,
+    headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
+    cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
+    proxy: Optional[str] = None,
+    max_workers: Optional[int] = None,
+    content_keys: Optional[dict[str, Any]] = None,
+) -> Generator[dict[str, Any], None, None]:
+    """
+    Download files using N_m3u8DL-RE.
+    https://github.com/nilaoda/N_m3u8DL-RE
+
+    Yields the following download status updates while chunks are downloading:
+
+    - {total: 100} (100% download total)
+    - {completed: 1} (1% download progress out of 100%)
+    - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
+
+    The data is in the same format accepted by rich's progress.update() function.
+
+    Parameters:
+        urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
+            "url" for the URI, and other keys for extra arguments to use per-URL.
+        track: The track to download. Used to get track attributes for the selection
+            process. Note that Track.Descriptor.URL is not supported by N_m3u8DL-RE.
+        output_dir: The folder to save the file into. If the save path's directory does
+            not exist then it will be made automatically.
+        filename: The filename or filename template to use for each file. The variables
+            you can use are `i` for the URL index and `ext` for the URL extension.
+        headers: A mapping of HTTP Header Key/Values to use for the download.
+        cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for the download.
+        max_workers: The maximum amount of threads to use for downloads. Defaults to
+            min(32,(cpu_count+4)). Can be set in config with --thread-count option.
+        content_keys: The content keys to use for decryption.
+    """
+    track_type = track.__class__.__name__
+
+    log = logging.getLogger("N_m3u8DL-RE")
+    if proxy and not config.n_m3u8dl_re.get("use_proxy", True):
+        log.warning(f"{track_type}: Ignoring proxy as N_m3u8DL-RE is set to use_proxy=False")
+        proxy = None
+
+    yield from download(urls, track, output_dir, filename, headers, cookies, proxy, max_workers, content_keys)
+
+
+__all__ = ("n_m3u8dl_re",)
--- a/unshackle/core/downloaders/requests.py
+++ b/unshackle/core/downloaders/requests.py
@@ -0,0 +1,266 @@
+import math
+import os
+import time
+from concurrent.futures import as_completed
+from concurrent.futures.thread import ThreadPoolExecutor
+from http.cookiejar import CookieJar
+from pathlib import Path
+from typing import Any, Generator, MutableMapping, Optional, Union
+
+from requests import Session
+from requests.adapters import HTTPAdapter
+from rich import filesize
+
+from unshackle.core.constants import DOWNLOAD_CANCELLED
+from unshackle.core.utilities import get_extension
+
+MAX_ATTEMPTS = 5
+RETRY_WAIT = 2
+CHUNK_SIZE = 1024
+PROGRESS_WINDOW = 5
+
+DOWNLOAD_SIZES = []
+LAST_SPEED_REFRESH = time.time()
+
+
+def download(
+    url: str, save_path: Path, session: Optional[Session] = None, segmented: bool = False, **kwargs: Any
+) -> Generator[dict[str, Any], None, None]:
+    """
+    Download a file using Python Requests.
+    https://requests.readthedocs.io
+
+    Yields the following download status updates while chunks are downloading:
+
+    - {total: 123} (there are 123 chunks to download)
+    - {total: None} (there are an unknown number of chunks to download)
+    - {advance: 1} (one chunk was downloaded)
+    - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
+    - {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
+
+    The data is in the same format accepted by rich's progress.update() function. The
+    `downloaded` key is custom and is not natively accepted by all rich progress bars.
+
+    Parameters:
+        url: Web URL of a file to download.
+        save_path: The path to save the file to. If the save path's directory does not
+            exist then it will be made automatically.
+        session: The Requests Session to make HTTP requests with. Useful to set Header,
+            Cookie, and Proxy data. Connections are saved and re-used with the session
+            so long as the server keeps the connection alive.
+        segmented: If downloads are segments or parts of one bigger file.
+        kwargs: Any extra keyword arguments to pass to the session.get() call. Use this
+            for one-time request changes like a header, cookie, or proxy. For example,
+            to request Byte-ranges use e.g., `headers={"Range": "bytes=0-128"}`.
+    """
+    global LAST_SPEED_REFRESH
+
+    session = session or Session()
+
+    save_dir = save_path.parent
+    control_file = save_path.with_name(f"{save_path.name}.!dev")
+
+    save_dir.mkdir(parents=True, exist_ok=True)
+
+    if control_file.exists():
+        # consider the file corrupt if the control file exists
+        save_path.unlink(missing_ok=True)
+        control_file.unlink()
+    elif save_path.exists():
+        # if it exists, and no control file, then it should be safe
+        yield dict(file_downloaded=save_path, written=save_path.stat().st_size)
+        # TODO: This should return, potential recovery bug
+
+    # TODO: Design a control file format so we know how much of the file is missing
+    control_file.write_bytes(b"")
+
+    attempts = 1
+    try:
+        while True:
+            written = 0
+
+            # these are for single-url speed calcs only
+            download_sizes = []
+            last_speed_refresh = time.time()
+
+            try:
+                stream = session.get(url, stream=True, **kwargs)
+                stream.raise_for_status()
+
+                if not segmented:
+                    try:
+                        content_length = int(stream.headers.get("Content-Length", "0"))
+                    except ValueError:
+                        content_length = 0
+
+                    if content_length > 0:
+                        yield dict(total=math.ceil(content_length / CHUNK_SIZE))
+                    else:
+                        # we have no data to calculate total chunks
+                        yield dict(total=None)  # indeterminate mode
+
+                with open(save_path, "wb") as f:
+                    for chunk in stream.iter_content(chunk_size=CHUNK_SIZE):
+                        download_size = len(chunk)
+                        f.write(chunk)
+                        written += download_size
+
+                        if not segmented:
+                            yield dict(advance=1)
+                            now = time.time()
+                            time_since = now - last_speed_refresh
+                            download_sizes.append(download_size)
+                            if time_since > PROGRESS_WINDOW or download_size < CHUNK_SIZE:
+                                data_size = sum(download_sizes)
+                                download_speed = math.ceil(data_size / (time_since or 1))
+                                yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
+                                last_speed_refresh = now
+                                download_sizes.clear()
+
+                if content_length and written < content_length:
+                    raise IOError(f"Failed to read {content_length} bytes from the track URI.")
+
+                yield dict(file_downloaded=save_path, written=written)
+
+                if segmented:
+                    yield dict(advance=1)
+                    now = time.time()
+                    time_since = now - LAST_SPEED_REFRESH
+                    if written:  # no size == skipped dl
+                        DOWNLOAD_SIZES.append(written)
+                    if DOWNLOAD_SIZES and time_since > PROGRESS_WINDOW:
+                        data_size = sum(DOWNLOAD_SIZES)
+                        download_speed = math.ceil(data_size / (time_since or 1))
+                        yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
+                        LAST_SPEED_REFRESH = now
+                        DOWNLOAD_SIZES.clear()
+                break
+            except Exception as e:
+                save_path.unlink(missing_ok=True)
+                if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS:
+                    raise e
+                time.sleep(RETRY_WAIT)
+                attempts += 1
+    finally:
+        control_file.unlink()
+
+
+def requests(
+    urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
+    output_dir: Path,
+    filename: str,
+    headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
+    cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
+    proxy: Optional[str] = None,
+    max_workers: Optional[int] = None,
+) -> Generator[dict[str, Any], None, None]:
+    """
+    Download a file using Python Requests.
+    https://requests.readthedocs.io
+
+    Yields the following download status updates while chunks are downloading:
+
+    - {total: 123} (there are 123 chunks to download)
+    - {total: None} (there are an unknown number of chunks to download)
+    - {advance: 1} (one chunk was downloaded)
+    - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
+    - {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
+
+    The data is in the same format accepted by rich's progress.update() function.
+    However, The `downloaded`, `file_downloaded` and `written` keys are custom and not
+    natively accepted by rich progress bars.
+
+    Parameters:
+        urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
+            "url" for the URI, and other keys for extra arguments to use per-URL.
+        output_dir: The folder to save the file into. If the save path's directory does
+            not exist then it will be made automatically.
+        filename: The filename or filename template to use for each file. The variables
+            you can use are `i` for the URL index and `ext` for the URL extension.
+        headers: A mapping of HTTP Header Key/Values to use for all downloads.
+        cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads.
+        proxy: An optional proxy URI to route connections through for all downloads.
+        max_workers: The maximum amount of threads to use for downloads. Defaults to
+            min(32,(cpu_count+4)).
+    """
+    if not urls:
+        raise ValueError("urls must be provided and not empty")
+    elif not isinstance(urls, (str, dict, list)):
+        raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
+
+    if not output_dir:
+        raise ValueError("output_dir must be provided")
+    elif not isinstance(output_dir, Path):
+        raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
+
+    if not filename:
+        raise ValueError("filename must be provided")
+    elif not isinstance(filename, str):
+        raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
+
+    if not isinstance(headers, (MutableMapping, type(None))):
+        raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
+
+    if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
+        raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
+
+    if not isinstance(proxy, (str, type(None))):
+        raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
+
+    if not isinstance(max_workers, (int, type(None))):
+        raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
+
+    if not isinstance(urls, list):
+        urls = [urls]
+
+    if not max_workers:
+        max_workers = min(32, (os.cpu_count() or 1) + 4)
+
+    urls = [
+        dict(save_path=save_path, **url) if isinstance(url, dict) else dict(url=url, save_path=save_path)
+        for i, url in enumerate(urls)
+        for save_path in [
+            output_dir / filename.format(i=i, ext=get_extension(url["url"] if isinstance(url, dict) else url))
+        ]
+    ]
+
+    session = Session()
+    session.mount("https://", HTTPAdapter(pool_connections=max_workers, pool_maxsize=max_workers, pool_block=True))
+    session.mount("http://", session.adapters["https://"])
+
+    if headers:
+        headers = {k: v for k, v in headers.items() if k.lower() != "accept-encoding"}
+        session.headers.update(headers)
+    if cookies:
+        session.cookies.update(cookies)
+    if proxy:
+        session.proxies.update({"all": proxy})
+
+    yield dict(total=len(urls))
+
+    try:
+        with ThreadPoolExecutor(max_workers=max_workers) as pool:
+            for future in as_completed(pool.submit(download, session=session, segmented=False, **url) for url in urls):
+                try:
+                    yield from future.result()
+                except KeyboardInterrupt:
+                    DOWNLOAD_CANCELLED.set()  # skip pending track downloads
+                    yield dict(downloaded="[yellow]CANCELLING")
+                    pool.shutdown(wait=True, cancel_futures=True)
+                    yield dict(downloaded="[yellow]CANCELLED")
+                    # tell dl that it was cancelled
+                    # the pool is already shut down, so exiting loop is fine
+                    raise
+                except Exception:
+                    DOWNLOAD_CANCELLED.set()  # skip pending track downloads
+                    yield dict(downloaded="[red]FAILING")
+                    pool.shutdown(wait=True, cancel_futures=True)
+                    yield dict(downloaded="[red]FAILED")
+                    # tell dl that it failed
+                    # the pool is already shut down, so exiting loop is fine
+                    raise
+    finally:
+        DOWNLOAD_SIZES.clear()
+
+
+__all__ = ("requests",)