Merge pull request #53 from CodeName393/main

Merging after code review - fixes binary path handling
This commit is contained in:
Sp5rky
2026-01-22 13:53:23 -07:00
committed by GitHub
25 changed files with 1029 additions and 142 deletions

View File

@@ -1376,6 +1376,7 @@ class dl:
kept_tracks.extend(title.tracks.subtitles)
if keep_chapters:
kept_tracks.extend(title.tracks.chapters)
kept_tracks.extend(title.tracks.attachments)
title.tracks = Tracks(kept_tracks)
@@ -1574,7 +1575,7 @@ class dl:
if subtitle.codec == Subtitle.Codec.SubStationAlphav4:
for line in subtitle.path.read_text("utf8").splitlines():
if line.startswith("Style: "):
font_names.append(line.removesuffix("Style: ").split(",")[1])
font_names.append(line.removeprefix("Style: ").split(",")[1].strip())
font_count, missing_fonts = self.attach_subtitle_fonts(
font_names, title, temp_font_files

View File

@@ -1,3 +1,4 @@
import re
import subprocess
from pathlib import Path
@@ -8,6 +9,11 @@ from unshackle.core import binaries
from unshackle.core.constants import context_settings
def _natural_sort_key(path: Path) -> list:
"""Sort key for natural sorting (S01E01 before S01E10)."""
return [int(part) if part.isdigit() else part.lower() for part in re.split(r"(\d+)", path.name)]
@click.group(short_help="Various helper scripts and programs.", context_settings=context_settings)
def util() -> None:
"""Various helper scripts and programs."""
@@ -49,7 +55,7 @@ def crop(path: Path, aspect: str, letter: bool, offset: int, preview: bool) -> N
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
if path.is_dir():
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key)
else:
paths = [path]
for video_path in paths:
@@ -140,7 +146,7 @@ def range_(path: Path, full: bool, preview: bool) -> None:
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
if path.is_dir():
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key)
else:
paths = [path]
for video_path in paths:
@@ -225,16 +231,18 @@ def test(path: Path, map_: str) -> None:
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
if path.is_dir():
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key)
else:
paths = [path]
for video_path in paths:
print("Starting...")
print(f"Testing: {video_path.name}")
p = subprocess.Popen(
[
binaries.FFMPEG,
"-hide_banner",
"-benchmark",
"-err_detect",
"+crccheck+bitstream+buffer+careful+compliant+aggressive",
"-i",
str(video_path),
"-map",
@@ -255,13 +263,13 @@ def test(path: Path, map_: str) -> None:
reached_output = True
if not reached_output:
continue
if line.startswith("["): # error of some kind
if line.startswith("[") and not line.startswith("[out#"):
errors += 1
stream, error = line.split("] ", maxsplit=1)
stream = stream.split(" @ ")[0]
line = f"{stream} ERROR: {error}"
print(line)
p.stderr.close()
print(f"Finished with {errors} Errors, Cleaning up...")
print(f"Finished with {errors} error(s)")
p.terminate()
p.wait()

View File

@@ -1 +1 @@
__version__ = "2.0.0"
__version__ = "2.3.0"

View File

@@ -95,6 +95,7 @@ class Config:
self.update_check_interval: int = kwargs.get("update_check_interval", 24)
self.scene_naming: bool = kwargs.get("scene_naming", True)
self.series_year: bool = kwargs.get("series_year", True)
self.unicode_filenames: bool = kwargs.get("unicode_filenames", False)
self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default
self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default

View File

@@ -19,7 +19,7 @@ from unshackle.core import binaries
from unshackle.core.config import config
from unshackle.core.console import console
from unshackle.core.constants import DOWNLOAD_CANCELLED
from unshackle.core.utilities import get_extension, get_free_port
from unshackle.core.utilities import get_debug_logger, get_extension, get_free_port
def rpc(caller: Callable, secret: str, method: str, params: Optional[list[Any]] = None) -> Any:
@@ -58,6 +58,8 @@ def download(
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
) -> Generator[dict[str, Any], None, None]:
debug_logger = get_debug_logger()
if not urls:
raise ValueError("urls must be provided and not empty")
elif not isinstance(urls, (str, dict, list)):
@@ -91,6 +93,13 @@ def download(
urls = [urls]
if not binaries.Aria2:
if debug_logger:
debug_logger.log(
level="ERROR",
operation="downloader_aria2c_binary_missing",
message="Aria2c executable not found in PATH or local binaries directory",
context={"searched_names": ["aria2c", "aria2"]},
)
raise EnvironmentError("Aria2c executable not found...")
if proxy and not proxy.lower().startswith("http://"):
@@ -180,6 +189,28 @@ def download(
continue
arguments.extend(["--header", f"{header}: {value}"])
if debug_logger:
first_url = urls[0] if isinstance(urls[0], str) else urls[0].get("url", "")
url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url
debug_logger.log(
level="DEBUG",
operation="downloader_aria2c_start",
message="Starting Aria2c download",
context={
"binary_path": str(binaries.Aria2),
"url_count": len(urls),
"first_url": url_display,
"output_dir": str(output_dir),
"filename": filename,
"max_concurrent_downloads": max_concurrent_downloads,
"max_connection_per_server": max_connection_per_server,
"split": split,
"file_allocation": file_allocation,
"has_proxy": bool(proxy),
"rpc_port": rpc_port,
},
)
yield dict(total=len(urls))
try:
@@ -226,6 +257,20 @@ def download(
textwrap.wrap(error, width=console.width - 20, initial_indent="")
)
console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty))
if debug_logger:
debug_logger.log(
level="ERROR",
operation="downloader_aria2c_download_error",
message=f"Aria2c download failed: {dl['errorMessage']}",
context={
"gid": dl["gid"],
"error_code": dl["errorCode"],
"error_message": dl["errorMessage"],
"used_uri": used_uri[:200] + "..." if len(used_uri) > 200 else used_uri,
"completed_length": dl.get("completedLength"),
"total_length": dl.get("totalLength"),
},
)
raise ValueError(error)
if number_stopped == len(urls):
@@ -237,7 +282,31 @@ def download(
p.wait()
if p.returncode != 0:
if debug_logger:
debug_logger.log(
level="ERROR",
operation="downloader_aria2c_failed",
message=f"Aria2c exited with code {p.returncode}",
context={
"returncode": p.returncode,
"url_count": len(urls),
"output_dir": str(output_dir),
},
)
raise subprocess.CalledProcessError(p.returncode, arguments)
if debug_logger:
debug_logger.log(
level="DEBUG",
operation="downloader_aria2c_complete",
message="Aria2c download completed successfully",
context={
"url_count": len(urls),
"output_dir": str(output_dir),
"filename": filename,
},
)
except ConnectionResetError:
# interrupted while passing URI to download
raise KeyboardInterrupt()
@@ -251,9 +320,20 @@ def download(
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[yellow]CANCELLED")
raise
except Exception:
except Exception as e:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[red]FAILED")
if debug_logger and not isinstance(e, (subprocess.CalledProcessError, ValueError)):
debug_logger.log(
level="ERROR",
operation="downloader_aria2c_exception",
message=f"Unexpected error during Aria2c download: {e}",
error=e,
context={
"url_count": len(urls),
"output_dir": str(output_dir),
},
)
raise
finally:
rpc(caller=partial(rpc_session.post, url=rpc_uri), secret=rpc_secret, method="aria2.shutdown")

View File

@@ -11,7 +11,7 @@ from rich import filesize
from unshackle.core.config import config
from unshackle.core.constants import DOWNLOAD_CANCELLED
from unshackle.core.utilities import get_extension
from unshackle.core.utilities import get_debug_logger, get_extension
MAX_ATTEMPTS = 5
RETRY_WAIT = 2
@@ -189,6 +189,8 @@ def curl_impersonate(
if not isinstance(max_workers, (int, type(None))):
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
debug_logger = get_debug_logger()
if not isinstance(urls, list):
urls = [urls]
@@ -209,6 +211,24 @@ def curl_impersonate(
if proxy:
session.proxies.update({"all": proxy})
if debug_logger:
first_url = urls[0].get("url", "") if urls else ""
url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url
debug_logger.log(
level="DEBUG",
operation="downloader_curl_impersonate_start",
message="Starting curl_impersonate download",
context={
"url_count": len(urls),
"first_url": url_display,
"output_dir": str(output_dir),
"filename": filename,
"max_workers": max_workers,
"browser": BROWSER,
"has_proxy": bool(proxy),
},
)
yield dict(total=len(urls))
download_sizes = []
@@ -235,11 +255,23 @@ def curl_impersonate(
# tell dl that it was cancelled
# the pool is already shut down, so exiting loop is fine
raise
except Exception:
except Exception as e:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[red]FAILING")
pool.shutdown(wait=True, cancel_futures=True)
yield dict(downloaded="[red]FAILED")
if debug_logger:
debug_logger.log(
level="ERROR",
operation="downloader_curl_impersonate_failed",
message=f"curl_impersonate download failed: {e}",
error=e,
context={
"url_count": len(urls),
"output_dir": str(output_dir),
"browser": BROWSER,
},
)
# tell dl that it failed
# the pool is already shut down, so exiting loop is fine
raise
@@ -260,5 +292,17 @@ def curl_impersonate(
last_speed_refresh = now
download_sizes.clear()
if debug_logger:
debug_logger.log(
level="DEBUG",
operation="downloader_curl_impersonate_complete",
message="curl_impersonate download completed successfully",
context={
"url_count": len(urls),
"output_dir": str(output_dir),
"filename": filename,
},
)
__all__ = ("curl_impersonate",)

View File

@@ -10,15 +10,17 @@ import requests
from requests.cookies import cookiejar_from_dict, get_cookie_header
from unshackle.core import binaries
from unshackle.core.binaries import FFMPEG, ShakaPackager, Mp4decrypt
from unshackle.core.config import config
from unshackle.core.console import console
from unshackle.core.constants import DOWNLOAD_CANCELLED
from unshackle.core.utilities import get_debug_logger
PERCENT_RE = re.compile(r"(\d+\.\d+%)")
SPEED_RE = re.compile(r"(\d+\.\d+(?:MB|KB)ps)")
SIZE_RE = re.compile(r"(\d+\.\d+(?:MB|GB|KB)/\d+\.\d+(?:MB|GB|KB))")
WARN_RE = re.compile(r"(WARN : Response.*|WARN : One or more errors occurred.*)")
ERROR_RE = re.compile(r"(ERROR.*)")
ERROR_RE = re.compile(r"(\bERROR\b.*|\bFAILED\b.*|\bException\b.*)")
DECRYPTION_ENGINE = {
"shaka": "SHAKA_PACKAGER",
@@ -66,12 +68,17 @@ def get_track_selection_args(track: Any) -> list[str]:
parts = []
if track_type == "Audio":
if track_id := representation.get("id") or adaptation_set.get("audioTrackId"):
track_id = representation.get("id") or adaptation_set.get("audioTrackId")
lang = representation.get("lang") or adaptation_set.get("lang")
if track_id:
parts.append(rf'"id=\b{track_id}\b"')
if lang:
parts.append(f"lang={lang}")
else:
if codecs := representation.get("codecs"):
parts.append(f"codecs={codecs}")
if lang := representation.get("lang") or adaptation_set.get("lang"):
if lang:
parts.append(f"lang={lang}")
if bw := representation.get("bandwidth"):
bitrate = int(bw) // 1000
@@ -176,17 +183,33 @@ def build_download_args(
"--thread-count": thread_count,
"--download-retry-count": retry_count,
"--write-meta-json": False,
"--no-log": True,
}
if FFMPEG:
args["--ffmpeg-binary-path"] = str(FFMPEG)
if proxy:
args["--custom-proxy"] = proxy
if skip_merge:
args["--skip-merge"] = skip_merge
if ad_keyword:
args["--ad-keyword"] = ad_keyword
if content_keys:
args["--key"] = next((f"{kid.hex}:{key.lower()}" for kid, key in content_keys.items()), None)
args["--decryption-engine"] = DECRYPTION_ENGINE.get(config.decryption.lower()) or "SHAKA_PACKAGER"
decryption_config = config.decryption.lower()
engine_name = DECRYPTION_ENGINE.get(decryption_config) or "SHAKA_PACKAGER"
args["--decryption-engine"] = engine_name
binary_path = None
if engine_name == "SHAKA_PACKAGER":
if ShakaPackager:
binary_path = str(ShakaPackager)
elif engine_name == "MP4DECRYPT":
if Mp4decrypt:
binary_path = str(Mp4decrypt)
if binary_path:
args["--decryption-binary-path"] = binary_path
if custom_args:
args.update(custom_args)
@@ -224,6 +247,8 @@ def download(
content_keys: dict[str, Any] | None,
skip_merge: bool | None = False,
) -> Generator[dict[str, Any], None, None]:
debug_logger = get_debug_logger()
if not urls:
raise ValueError("urls must be provided and not empty")
if not isinstance(urls, (str, dict, list)):
@@ -275,7 +300,39 @@ def download(
skip_merge=skip_merge,
ad_keyword=ad_keyword,
)
arguments.extend(get_track_selection_args(track))
selection_args = get_track_selection_args(track)
arguments.extend(selection_args)
log_file_path: Path | None = None
if debug_logger:
log_file_path = output_dir / f".n_m3u8dl_re_{filename}.log"
arguments.extend(["--log-file-path", str(log_file_path)])
track_url_display = track.url[:200] + "..." if len(track.url) > 200 else track.url
debug_logger.log(
level="DEBUG",
operation="downloader_n_m3u8dl_re_start",
message="Starting N_m3u8DL-RE download",
context={
"binary_path": str(binaries.N_m3u8DL_RE),
"track_id": getattr(track, "id", None),
"track_type": track.__class__.__name__,
"track_url": track_url_display,
"output_dir": str(output_dir),
"filename": filename,
"thread_count": thread_count,
"retry_count": retry_count,
"has_content_keys": bool(content_keys),
"content_key_count": len(content_keys) if content_keys else 0,
"has_proxy": bool(proxy),
"skip_merge": skip_merge,
"has_custom_args": bool(track.downloader_args),
"selection_args": selection_args,
"descriptor": track.descriptor.name if hasattr(track, "descriptor") else None,
},
)
else:
arguments.extend(["--no-log", "true"])
yield {"total": 100}
yield {"downloaded": "Parsing streams..."}
@@ -310,11 +367,45 @@ def download(
yield {"completed": progress} if progress < 100 else {"downloaded": "Merging"}
process.wait()
if process.returncode != 0:
if debug_logger and log_file_path:
log_contents = ""
if log_file_path.exists():
try:
log_contents = log_file_path.read_text(encoding="utf-8", errors="replace")
except Exception:
log_contents = "<failed to read log file>"
debug_logger.log(
level="ERROR",
operation="downloader_n_m3u8dl_re_failed",
message=f"N_m3u8DL-RE exited with code {process.returncode}",
context={
"returncode": process.returncode,
"track_id": getattr(track, "id", None),
"track_type": track.__class__.__name__,
"last_line": last_line,
"log_file_contents": log_contents,
},
)
if error_match := ERROR_RE.search(last_line):
raise ValueError(f"[N_m3u8DL-RE]: {error_match.group(1)}")
raise subprocess.CalledProcessError(process.returncode, arguments)
if debug_logger:
debug_logger.log(
level="DEBUG",
operation="downloader_n_m3u8dl_re_complete",
message="N_m3u8DL-RE download completed successfully",
context={
"track_id": getattr(track, "id", None),
"track_type": track.__class__.__name__,
"output_dir": str(output_dir),
"filename": filename,
},
)
except ConnectionResetError:
# interrupted while passing URI to download
raise KeyboardInterrupt()
@@ -322,10 +413,35 @@ def download(
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield {"downloaded": "[yellow]CANCELLED"}
raise
except Exception:
except Exception as e:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield {"downloaded": "[red]FAILED"}
if debug_logger and log_file_path and not isinstance(e, (subprocess.CalledProcessError, ValueError)):
log_contents = ""
if log_file_path.exists():
try:
log_contents = log_file_path.read_text(encoding="utf-8", errors="replace")
except Exception:
log_contents = "<failed to read log file>"
debug_logger.log(
level="ERROR",
operation="downloader_n_m3u8dl_re_exception",
message=f"Unexpected error during N_m3u8DL-RE download: {e}",
error=e,
context={
"track_id": getattr(track, "id", None),
"track_type": track.__class__.__name__,
"log_file_contents": log_contents,
},
)
raise
finally:
if log_file_path and log_file_path.exists():
try:
log_file_path.unlink()
except Exception:
pass
def n_m3u8dl_re(

View File

@@ -12,7 +12,7 @@ from requests.adapters import HTTPAdapter
from rich import filesize
from unshackle.core.constants import DOWNLOAD_CANCELLED
from unshackle.core.utilities import get_extension
from unshackle.core.utilities import get_debug_logger, get_extension
MAX_ATTEMPTS = 5
RETRY_WAIT = 2
@@ -215,6 +215,8 @@ def requests(
if not isinstance(max_workers, (int, type(None))):
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
debug_logger = get_debug_logger()
if not isinstance(urls, list):
urls = [urls]
@@ -241,6 +243,23 @@ def requests(
if proxy:
session.proxies.update({"all": proxy})
if debug_logger:
first_url = urls[0].get("url", "") if urls else ""
url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url
debug_logger.log(
level="DEBUG",
operation="downloader_requests_start",
message="Starting requests download",
context={
"url_count": len(urls),
"first_url": url_display,
"output_dir": str(output_dir),
"filename": filename,
"max_workers": max_workers,
"has_proxy": bool(proxy),
},
)
yield dict(total=len(urls))
try:
@@ -256,14 +275,37 @@ def requests(
# tell dl that it was cancelled
# the pool is already shut down, so exiting loop is fine
raise
except Exception:
except Exception as e:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[red]FAILING")
pool.shutdown(wait=True, cancel_futures=True)
yield dict(downloaded="[red]FAILED")
if debug_logger:
debug_logger.log(
level="ERROR",
operation="downloader_requests_failed",
message=f"Requests download failed: {e}",
error=e,
context={
"url_count": len(urls),
"output_dir": str(output_dir),
},
)
# tell dl that it failed
# the pool is already shut down, so exiting loop is fine
raise
if debug_logger:
debug_logger.log(
level="DEBUG",
operation="downloader_requests_complete",
message="Requests download completed successfully",
context={
"url_count": len(urls),
"output_dir": str(output_dir),
"filename": filename,
},
)
finally:
DOWNLOAD_SIZES.clear()

View File

@@ -168,7 +168,7 @@ class PlayReady:
pssh_boxes.extend(list(get_boxes(init_data, b"pssh")))
tenc_boxes.extend(list(get_boxes(init_data, b"tenc")))
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID.bytes), None)
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID), None)
if not pssh:
raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.")
@@ -197,7 +197,7 @@ class PlayReady:
if enc_key_id:
kid = UUID(bytes=base64.b64decode(enc_key_id))
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID.bytes), None)
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID), None)
if not pssh:
raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.")
@@ -415,7 +415,7 @@ class PlayReady:
p.wait()
if p.returncode != 0 or had_error:
raise subprocess.CalledProcessError(p.returncode, arguments)
raise subprocess.CalledProcessError(p.returncode, [binaries.ShakaPackager, *arguments])
path.unlink()
if not stream_skipped:

View File

@@ -100,9 +100,7 @@ class Widevine:
pssh_boxes.extend(list(get_boxes(init_data, b"pssh")))
tenc_boxes.extend(list(get_boxes(init_data, b"tenc")))
pssh_boxes.sort(key=lambda b: {PSSH.SystemId.Widevine: 0, PSSH.SystemId.PlayReady: 1}[b.system_ID])
pssh = next(iter(pssh_boxes), None)
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SystemId.Widevine), None)
if not pssh:
raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.")
@@ -141,9 +139,7 @@ class Widevine:
if enc_key_id:
kid = UUID(bytes=base64.b64decode(enc_key_id))
pssh_boxes.sort(key=lambda b: {PSSH.SystemId.Widevine: 0, PSSH.SystemId.PlayReady: 1}[b.system_ID])
pssh = next(iter(pssh_boxes), None)
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SystemId.Widevine), None)
if not pssh:
raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.")
@@ -371,7 +367,7 @@ class Widevine:
p.wait()
if p.returncode != 0 or had_error:
raise subprocess.CalledProcessError(p.returncode, arguments)
raise subprocess.CalledProcessError(p.returncode, [binaries.ShakaPackager, *arguments])
path.unlink()
if not stream_skipped:

View File

@@ -5,6 +5,7 @@ import html
import logging
import math
import re
import shutil
import sys
from copy import copy
from functools import partial
@@ -18,6 +19,7 @@ import requests
from curl_cffi.requests import Session as CurlSession
from langcodes import Language, tag_is_valid
from lxml.etree import Element, ElementTree
from pyplayready.cdm import Cdm as PlayReadyCdm
from pyplayready.system.pssh import PSSH as PR_PSSH
from pywidevine.cdm import Cdm as WidevineCdm
from pywidevine.pssh import PSSH
@@ -28,7 +30,7 @@ from unshackle.core.downloaders import requests as requests_downloader
from unshackle.core.drm import DRM_T, PlayReady, Widevine
from unshackle.core.events import events
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
from unshackle.core.utilities import is_close_match, try_ensure_utf8
from unshackle.core.utilities import get_debug_logger, is_close_match, try_ensure_utf8
from unshackle.core.utils.xml import load_xml
@@ -465,12 +467,23 @@ class DASH:
track.data["dash"]["timescale"] = int(segment_timescale)
track.data["dash"]["segment_durations"] = segment_durations
if not track.drm and isinstance(track, (Video, Audio)):
try:
track.drm = [Widevine.from_init_data(init_data)]
except Widevine.Exceptions.PSSHNotFound:
# it might not have Widevine DRM, or might not have found the PSSH
log.warning("No Widevine PSSH was found for this track, is it DRM free?")
if init_data and isinstance(track, (Video, Audio)):
if isinstance(cdm, PlayReadyCdm):
try:
track.drm = [PlayReady.from_init_data(init_data)]
except PlayReady.Exceptions.PSSHNotFound:
try:
track.drm = [Widevine.from_init_data(init_data)]
except Widevine.Exceptions.PSSHNotFound:
log.warning("No PlayReady or Widevine PSSH was found for this track, is it DRM free?")
else:
try:
track.drm = [Widevine.from_init_data(init_data)]
except Widevine.Exceptions.PSSHNotFound:
try:
track.drm = [PlayReady.from_init_data(init_data)]
except PlayReady.Exceptions.PSSHNotFound:
log.warning("No Widevine or PlayReady PSSH was found for this track, is it DRM free?")
if track.drm:
track_kid = track_kid or track.get_key_id(url=segments[0][0], session=session)
@@ -515,8 +528,35 @@ class DASH:
max_workers=max_workers,
)
skip_merge = False
if downloader.__name__ == "n_m3u8dl_re":
downloader_args.update({"filename": track.id, "track": track})
skip_merge = True
downloader_args.update(
{
"filename": track.id,
"track": track,
"content_keys": drm.content_keys if drm else None,
}
)
debug_logger = get_debug_logger()
if debug_logger:
debug_logger.log(
level="DEBUG",
operation="manifest_dash_download_start",
message="Starting DASH manifest download",
context={
"track_id": getattr(track, "id", None),
"track_type": track.__class__.__name__,
"total_segments": len(segments),
"downloader": downloader.__name__,
"has_drm": bool(track.drm),
"drm_types": [drm.__class__.__name__ for drm in (track.drm or [])],
"skip_merge": skip_merge,
"save_path": str(save_path),
"has_init_data": bool(init_data),
},
)
for status_update in downloader(**downloader_args):
file_downloaded = status_update.get("file_downloaded")
@@ -533,42 +573,56 @@ class DASH:
control_file.unlink()
segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()]
with open(save_path, "wb") as f:
if init_data:
f.write(init_data)
if len(segments_to_merge) > 1:
progress(downloaded="Merging", completed=0, total=len(segments_to_merge))
for segment_file in segments_to_merge:
segment_data = segment_file.read_bytes()
# TODO: fix encoding after decryption?
if (
not drm
and isinstance(track, Subtitle)
and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
):
segment_data = try_ensure_utf8(segment_data)
segment_data = (
segment_data.decode("utf8")
.replace("&lrm;", html.unescape("&lrm;"))
.replace("&rlm;", html.unescape("&rlm;"))
.encode("utf8")
)
f.write(segment_data)
f.flush()
segment_file.unlink()
progress(advance=1)
if skip_merge:
# N_m3u8DL-RE handles merging and decryption internally
shutil.move(segments_to_merge[0], save_path)
if drm:
track.drm = None
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None)
else:
with open(save_path, "wb") as f:
if init_data:
f.write(init_data)
if len(segments_to_merge) > 1:
progress(downloaded="Merging", completed=0, total=len(segments_to_merge))
for segment_file in segments_to_merge:
segment_data = segment_file.read_bytes()
# TODO: fix encoding after decryption?
if (
not drm
and isinstance(track, Subtitle)
and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
):
segment_data = try_ensure_utf8(segment_data)
segment_data = (
segment_data.decode("utf8")
.replace("&lrm;", html.unescape("&lrm;"))
.replace("&rlm;", html.unescape("&rlm;"))
.encode("utf8")
)
f.write(segment_data)
f.flush()
segment_file.unlink()
progress(advance=1)
track.path = save_path
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
if drm:
if not skip_merge and drm:
progress(downloaded="Decrypting", completed=0, total=100)
drm.decrypt(save_path)
track.drm = None
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None)
progress(downloaded="Decrypting", advance=100)
save_dir.rmdir()
# Clean up empty segment directory
if save_dir.exists() and save_dir.name.endswith("_segments"):
try:
save_dir.rmdir()
except OSError:
# Directory might not be empty, try removing recursively
shutil.rmtree(save_dir, ignore_errors=True)
progress(downloaded="Downloaded")
@@ -736,6 +790,11 @@ class DASH:
@staticmethod
def get_drm(protections: list[Element]) -> list[DRM_T]:
drm: list[DRM_T] = []
PLACEHOLDER_KIDS = {
UUID("00000000-0000-0000-0000-000000000000"), # All zeros (key rotation default)
UUID("00010203-0405-0607-0809-0a0b0c0d0e0f"), # Sequential 0x00-0x0f
UUID("00010203-0405-0607-0809-101112131415"), # Shaka Packager test pattern
}
for protection in protections:
urn = (protection.get("schemeIdUri") or "").lower()
@@ -745,17 +804,27 @@ class DASH:
if not pssh_text:
continue
pssh = PSSH(pssh_text)
kid_attr = protection.get("kid") or protection.get("{urn:mpeg:cenc:2013}kid")
kid = UUID(bytes=base64.b64decode(kid_attr)) if kid_attr else None
kid = protection.get("kid")
if kid:
kid = UUID(bytes=base64.b64decode(kid))
if not kid:
default_kid_attr = protection.get("default_KID") or protection.get(
"{urn:mpeg:cenc:2013}default_KID"
)
kid = UUID(default_kid_attr) if default_kid_attr else None
default_kid = protection.get("default_KID")
if default_kid:
kid = UUID(default_kid)
if not kid:
kid = next(
(
UUID(p.get("default_KID") or p.get("{urn:mpeg:cenc:2013}default_KID"))
for p in protections
if p.get("default_KID") or p.get("{urn:mpeg:cenc:2013}default_KID")
),
None,
)
if not pssh.key_ids and not kid:
kid = next((UUID(p.get("default_KID")) for p in protections if p.get("default_KID")), None)
if kid and (not pssh.key_ids or all(k.int == 0 or k in PLACEHOLDER_KIDS for k in pssh.key_ids)):
pssh.set_key_ids([kid])
drm.append(Widevine(pssh=pssh, kid=kid))

View File

@@ -32,7 +32,7 @@ from unshackle.core.downloaders import requests as requests_downloader
from unshackle.core.drm import DRM_T, ClearKey, PlayReady, Widevine
from unshackle.core.events import events
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
from unshackle.core.utilities import get_extension, is_close_match, try_ensure_utf8
from unshackle.core.utilities import get_debug_logger, get_extension, is_close_match, try_ensure_utf8
class HLS:
@@ -350,6 +350,24 @@ class HLS:
}
)
debug_logger = get_debug_logger()
if debug_logger:
debug_logger.log(
level="DEBUG",
operation="manifest_hls_download_start",
message="Starting HLS manifest download",
context={
"track_id": getattr(track, "id", None),
"track_type": track.__class__.__name__,
"total_segments": total_segments,
"downloader": downloader.__name__,
"has_drm": bool(session_drm),
"drm_type": session_drm.__class__.__name__ if session_drm else None,
"skip_merge": skip_merge,
"save_path": str(save_path),
},
)
for status_update in downloader(**downloader_args):
file_downloaded = status_update.get("file_downloaded")
if file_downloaded:

View File

@@ -21,7 +21,7 @@ from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY,
from unshackle.core.drm import DRM_T, PlayReady, Widevine
from unshackle.core.events import events
from unshackle.core.tracks import Audio, Subtitle, Track, Tracks, Video
from unshackle.core.utilities import try_ensure_utf8
from unshackle.core.utilities import get_debug_logger, try_ensure_utf8
from unshackle.core.utils.xml import load_xml
@@ -283,6 +283,24 @@ class ISM:
}
)
debug_logger = get_debug_logger()
if debug_logger:
debug_logger.log(
level="DEBUG",
operation="manifest_ism_download_start",
message="Starting ISM manifest download",
context={
"track_id": getattr(track, "id", None),
"track_type": track.__class__.__name__,
"total_segments": len(segments),
"downloader": downloader.__name__,
"has_drm": bool(session_drm),
"drm_type": session_drm.__class__.__name__ if session_drm else None,
"skip_merge": skip_merge,
"save_path": str(save_path),
},
)
for status_update in downloader(**downloader_args):
file_downloaded = status_update.get("file_downloaded")
if file_downloaded:

View File

@@ -185,7 +185,10 @@ class Episode(Title):
if hdr_format:
if hdr_format_full.startswith("Dolby Vision"):
name += " DV"
if any(indicator in hdr_format_full for indicator in ["HDR10", "SMPTE ST 2086"]):
if any(
indicator in (hdr_format_full + " " + hdr_format)
for indicator in ["HDR10", "SMPTE ST 2086"]
):
name += " HDR"
else:
name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} "

View File

@@ -136,7 +136,10 @@ class Movie(Title):
if hdr_format:
if hdr_format_full.startswith("Dolby Vision"):
name += " DV"
if any(indicator in hdr_format_full for indicator in ["HDR10", "SMPTE ST 2086"]):
if any(
indicator in (hdr_format_full + " " + hdr_format)
for indicator in ["HDR10", "SMPTE ST 2086"]
):
name += " HDR"
else:
name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} "

View File

@@ -8,7 +8,7 @@ from pathlib import Path
from rich.padding import Padding
from rich.rule import Rule
from unshackle.core.binaries import DoviTool, HDR10PlusTool
from unshackle.core.binaries import FFMPEG, DoviTool, HDR10PlusTool
from unshackle.core.config import config
from unshackle.core.console import console
@@ -109,7 +109,7 @@ class Hybrid:
"""Simple ffmpeg execution without progress tracking"""
p = subprocess.run(
[
"ffmpeg",
str(FFMPEG) if FFMPEG else "ffmpeg",
"-nostdin",
"-i",
str(save_path),

View File

@@ -91,6 +91,12 @@ class Subtitle(Track):
return Subtitle.Codec.TimedTextMarkupLang
raise ValueError(f"The Content Profile '{profile}' is not a supported Subtitle Codec")
# WebVTT sanitization patterns (compiled once for performance)
_CUE_ID_PATTERN = re.compile(r"^[A-Za-z]+\d+$")
_TIMING_START_PATTERN = re.compile(r"^\d+:\d+[:\.]")
_TIMING_LINE_PATTERN = re.compile(r"^((?:\d+:)?\d+:\d+[.,]\d+)\s*-->\s*((?:\d+:)?\d+:\d+[.,]\d+)(.*)$")
_LINE_POS_PATTERN = re.compile(r"line:(\d+(?:\.\d+)?%?)")
def __init__(
self,
*args: Any,
@@ -239,6 +245,11 @@ class Subtitle(Track):
# Sanitize WebVTT timestamps before parsing
text = Subtitle.sanitize_webvtt_timestamps(text)
# Remove cue identifiers that confuse parsers like pysubs2
text = Subtitle.sanitize_webvtt_cue_identifiers(text)
# Merge overlapping cues with line positioning into single multi-line cues
text = Subtitle.merge_overlapping_webvtt_cues(text)
preserve_formatting = config.subtitle.get("preserve_formatting", True)
if preserve_formatting:
@@ -277,6 +288,240 @@ class Subtitle(Track):
# Replace negative timestamps with 00:00:00.000
return re.sub(r"(-\d+:\d+:\d+\.\d+)", "00:00:00.000", text)
@staticmethod
def has_webvtt_cue_identifiers(text: str) -> bool:
"""
Check if WebVTT content has cue identifiers that need removal.
Parameters:
text: The WebVTT content as string
Returns:
True if cue identifiers are detected, False otherwise
"""
lines = text.split("\n")
for i, line in enumerate(lines):
line = line.strip()
if Subtitle._CUE_ID_PATTERN.match(line):
# Look ahead to see if next non-empty line is a timing line
j = i + 1
while j < len(lines) and not lines[j].strip():
j += 1
if j < len(lines) and ("-->" in lines[j] or Subtitle._TIMING_START_PATTERN.match(lines[j].strip())):
return True
return False
@staticmethod
def sanitize_webvtt_cue_identifiers(text: str) -> str:
"""
Remove WebVTT cue identifiers that can confuse subtitle parsers.
Some services use cue identifiers like "Q0", "Q1", etc.
that appear on their own line before the timing line. These can be
incorrectly parsed as part of the previous cue's text content by
some parsers (like pysubs2).
Parameters:
text: The WebVTT content as string
Returns:
Sanitized WebVTT content with cue identifiers removed
"""
if not Subtitle.has_webvtt_cue_identifiers(text):
return text
lines = text.split("\n")
sanitized_lines = []
i = 0
while i < len(lines):
line = lines[i].strip()
# Check if this line is a cue identifier followed by a timing line
if Subtitle._CUE_ID_PATTERN.match(line):
# Look ahead to see if next non-empty line is a timing line
j = i + 1
while j < len(lines) and not lines[j].strip():
j += 1
if j < len(lines) and ("-->" in lines[j] or Subtitle._TIMING_START_PATTERN.match(lines[j].strip())):
# This is a cue identifier, skip it
i += 1
continue
sanitized_lines.append(lines[i])
i += 1
return "\n".join(sanitized_lines)
@staticmethod
def _parse_vtt_time(t: str) -> int:
"""Parse WebVTT timestamp to milliseconds. Returns 0 for malformed input."""
try:
t = t.replace(",", ".")
parts = t.split(":")
if len(parts) == 2:
m, s = parts
h = "0"
elif len(parts) >= 3:
h, m, s = parts[:3]
else:
return 0
sec_parts = s.split(".")
secs = int(sec_parts[0])
# Handle variable millisecond digits (e.g., .5 = 500ms, .50 = 500ms, .500 = 500ms)
ms = int(sec_parts[1].ljust(3, "0")[:3]) if len(sec_parts) > 1 else 0
return int(h) * 3600000 + int(m) * 60000 + secs * 1000 + ms
except (ValueError, IndexError):
return 0
@staticmethod
def has_overlapping_webvtt_cues(text: str) -> bool:
"""
Check if WebVTT content has overlapping cues that need merging.
Detects cues with start times within 50ms of each other and the same end time,
which indicates multi-line subtitles split into separate cues.
Parameters:
text: The WebVTT content as string
Returns:
True if overlapping cues are detected, False otherwise
"""
timings = []
for line in text.split("\n"):
match = Subtitle._TIMING_LINE_PATTERN.match(line)
if match:
start_str, end_str = match.group(1), match.group(2)
timings.append((Subtitle._parse_vtt_time(start_str), Subtitle._parse_vtt_time(end_str)))
# Check for overlapping cues (within 50ms start, same end)
for i in range(len(timings) - 1):
curr_start, curr_end = timings[i]
next_start, next_end = timings[i + 1]
if abs(curr_start - next_start) <= 50 and curr_end == next_end:
return True
return False
@staticmethod
def merge_overlapping_webvtt_cues(text: str) -> str:
"""
Merge WebVTT cues that have overlapping/near-identical times but different line positions.
Some services use separate cues for each line of a multi-line subtitle, with
slightly different start times (1ms apart) and different line: positions.
This merges them into single cues with proper line ordering based on the
line: position (lower percentage = higher on screen = first line).
Parameters:
text: The WebVTT content as string
Returns:
WebVTT content with overlapping cues merged
"""
if not Subtitle.has_overlapping_webvtt_cues(text):
return text
lines = text.split("\n")
cues = []
header_lines = []
in_header = True
i = 0
while i < len(lines):
line = lines[i]
if in_header:
if "-->" in line:
in_header = False
else:
header_lines.append(line)
i += 1
continue
match = Subtitle._TIMING_LINE_PATTERN.match(line)
if match:
start_str, end_str, settings = match.groups()
line_pos = 100.0 # Default to bottom
line_match = Subtitle._LINE_POS_PATTERN.search(settings)
if line_match:
pos_str = line_match.group(1).rstrip("%")
line_pos = float(pos_str)
content_lines = []
i += 1
while i < len(lines) and lines[i].strip() and "-->" not in lines[i]:
content_lines.append(lines[i])
i += 1
cues.append(
{
"start_ms": Subtitle._parse_vtt_time(start_str),
"end_ms": Subtitle._parse_vtt_time(end_str),
"start_str": start_str,
"end_str": end_str,
"line_pos": line_pos,
"content": "\n".join(content_lines),
"settings": settings,
}
)
else:
i += 1
# Merge overlapping cues (within 50ms of each other with same end time)
merged_cues = []
i = 0
while i < len(cues):
current = cues[i]
group = [current]
j = i + 1
while j < len(cues):
other = cues[j]
if abs(current["start_ms"] - other["start_ms"]) <= 50 and current["end_ms"] == other["end_ms"]:
group.append(other)
j += 1
else:
break
if len(group) > 1:
# Sort by line position (lower % = higher on screen = first)
group.sort(key=lambda x: x["line_pos"])
# Use the earliest start time from the group
earliest = min(group, key=lambda x: x["start_ms"])
merged_cues.append(
{
"start_str": earliest["start_str"],
"end_str": group[0]["end_str"],
"content": "\n".join(c["content"] for c in group),
"settings": "",
}
)
else:
merged_cues.append(
{
"start_str": current["start_str"],
"end_str": current["end_str"],
"content": current["content"],
"settings": current["settings"],
}
)
i = j if len(group) > 1 else i + 1
result_lines = header_lines[:]
if result_lines and result_lines[-1].strip():
result_lines.append("")
for cue in merged_cues:
result_lines.append(f"{cue['start_str']} --> {cue['end_str']}{cue['settings']}")
result_lines.append(cue["content"])
result_lines.append("")
return "\n".join(result_lines)
@staticmethod
def sanitize_webvtt(text: str) -> str:
"""
@@ -565,13 +810,18 @@ class Subtitle(Track):
if binaries.SubtitleEdit and self.codec not in (Subtitle.Codec.fTTML, Subtitle.Codec.fVTT):
sub_edit_format = {
Subtitle.Codec.SubStationAlphav4: "AdvancedSubStationAlpha",
Subtitle.Codec.TimedTextMarkupLang: "TimedText1.0",
}.get(codec, codec.name)
Subtitle.Codec.SubRip: "subrip",
Subtitle.Codec.SubStationAlpha: "substationalpha",
Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha",
Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0",
Subtitle.Codec.WebVTT: "webvtt",
Subtitle.Codec.SAMI: "sami",
Subtitle.Codec.MicroDVD: "microdvd",
}.get(codec, codec.name.lower())
sub_edit_args = [
binaries.SubtitleEdit,
"/Convert",
self.path,
str(binaries.SubtitleEdit),
"/convert",
str(self.path),
sub_edit_format,
f"/outputfilename:{output_path.name}",
"/encoding:utf8",
@@ -631,7 +881,7 @@ class Subtitle(Track):
text = try_ensure_utf8(data).decode("utf8")
text = text.replace("tt:", "")
# negative size values aren't allowed in TTML/DFXP spec, replace with 0
text = re.sub(r'"(-\d+(\.\d+)?(px|em|%|c|pt))"', '"0"', text)
text = re.sub(r"-(\d+(?:\.\d+)?)(px|em|%|c|pt)", r"0\2", text)
caption_set = pycaption.DFXPReader().read(text)
elif codec == Subtitle.Codec.fVTT:
caption_lists: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList)
@@ -962,18 +1212,26 @@ class Subtitle(Track):
except Exception:
pass # Fall through to other methods
if binaries.SubtitleEdit:
if self.codec == Subtitle.Codec.SubStationAlphav4:
output_format = "AdvancedSubStationAlpha"
elif self.codec == Subtitle.Codec.TimedTextMarkupLang:
output_format = "TimedText1.0"
else:
output_format = self.codec.name
conversion_method = config.subtitle.get("conversion_method", "auto")
use_subtitleedit = sdh_method == "subtitleedit" or (
sdh_method == "auto" and conversion_method in ("auto", "subtitleedit")
)
if binaries.SubtitleEdit and use_subtitleedit:
output_format = {
Subtitle.Codec.SubRip: "subrip",
Subtitle.Codec.SubStationAlpha: "substationalpha",
Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha",
Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0",
Subtitle.Codec.WebVTT: "webvtt",
Subtitle.Codec.SAMI: "sami",
Subtitle.Codec.MicroDVD: "microdvd",
}.get(self.codec, self.codec.name.lower())
subprocess.run(
[
binaries.SubtitleEdit,
"/Convert",
self.path,
str(binaries.SubtitleEdit),
"/convert",
str(self.path),
output_format,
"/encoding:utf8",
"/overwrite",
@@ -981,6 +1239,7 @@ class Subtitle(Track):
],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
else:
if config.subtitle.get("convert_before_strip", True) and self.codec != Subtitle.Codec.SubRip:
@@ -1022,18 +1281,21 @@ class Subtitle(Track):
if not binaries.SubtitleEdit:
raise EnvironmentError("SubtitleEdit executable not found...")
if self.codec == Subtitle.Codec.SubStationAlphav4:
output_format = "AdvancedSubStationAlpha"
elif self.codec == Subtitle.Codec.TimedTextMarkupLang:
output_format = "TimedText1.0"
else:
output_format = self.codec.name
output_format = {
Subtitle.Codec.SubRip: "subrip",
Subtitle.Codec.SubStationAlpha: "substationalpha",
Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha",
Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0",
Subtitle.Codec.WebVTT: "webvtt",
Subtitle.Codec.SAMI: "sami",
Subtitle.Codec.MicroDVD: "microdvd",
}.get(self.codec, self.codec.name.lower())
subprocess.run(
[
binaries.SubtitleEdit,
"/Convert",
self.path,
str(binaries.SubtitleEdit),
"/convert",
str(self.path),
output_format,
"/ReverseRtlStartEnd",
"/encoding:utf8",
@@ -1041,6 +1303,7 @@ class Subtitle(Track):
],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)

View File

@@ -295,12 +295,23 @@ class Track:
try:
if not self.drm and track_type in ("Video", "Audio"):
# the service might not have explicitly defined the `drm` property
# try find widevine DRM information from the init data of URL
try:
self.drm = [Widevine.from_track(self, session)]
except Widevine.Exceptions.PSSHNotFound:
# it might not have Widevine DRM, or might not have found the PSSH
log.warning("No Widevine PSSH was found for this track, is it DRM free?")
# try find DRM information from the init data of URL based on CDM type
if isinstance(cdm, PlayReadyCdm):
try:
self.drm = [PlayReady.from_track(self, session)]
except PlayReady.Exceptions.PSSHNotFound:
try:
self.drm = [Widevine.from_track(self, session)]
except Widevine.Exceptions.PSSHNotFound:
log.warning("No PlayReady or Widevine PSSH was found for this track, is it DRM free?")
else:
try:
self.drm = [Widevine.from_track(self, session)]
except Widevine.Exceptions.PSSHNotFound:
try:
self.drm = [PlayReady.from_track(self, session)]
except PlayReady.Exceptions.PSSHNotFound:
log.warning("No Widevine or PlayReady PSSH was found for this track, is it DRM free?")
if self.drm:
track_kid = self.get_key_id(session=session)

View File

@@ -22,7 +22,7 @@ from unshackle.core.tracks.chapters import Chapter, Chapters
from unshackle.core.tracks.subtitle import Subtitle
from unshackle.core.tracks.track import Track
from unshackle.core.tracks.video import Video
from unshackle.core.utilities import is_close_match, sanitize_filename
from unshackle.core.utilities import get_debug_logger, is_close_match, sanitize_filename
from unshackle.core.utils.collections import as_list, flatten
@@ -507,6 +507,35 @@ class Tracks:
if not output_path:
raise ValueError("No tracks provided, at least one track must be provided.")
debug_logger = get_debug_logger()
if debug_logger:
debug_logger.log(
level="DEBUG",
operation="mux_start",
message="Starting mkvmerge muxing",
context={
"title": title,
"output_path": str(output_path),
"video_count": len(self.videos),
"audio_count": len(self.audio),
"subtitle_count": len(self.subtitles),
"attachment_count": len(self.attachments),
"has_chapters": bool(self.chapters),
"video_tracks": [
{"id": v.id, "codec": getattr(v, "codec", None), "language": str(v.language)}
for v in self.videos
],
"audio_tracks": [
{"id": a.id, "codec": getattr(a, "codec", None), "language": str(a.language)}
for a in self.audio
],
"subtitle_tracks": [
{"id": s.id, "codec": getattr(s, "codec", None), "language": str(s.language)}
for s in self.subtitles
],
},
)
# let potential failures go to caller, caller should handle
try:
errors = []
@@ -516,7 +545,33 @@ class Tracks:
errors.append(line)
if "progress" in line:
progress(total=100, completed=int(line.strip()[14:-1]))
return output_path, p.wait(), errors
returncode = p.wait()
if debug_logger:
if returncode != 0 or errors:
debug_logger.log(
level="ERROR",
operation="mux_failed",
message=f"mkvmerge exited with code {returncode}",
context={
"returncode": returncode,
"output_path": str(output_path),
"errors": errors,
},
)
else:
debug_logger.log(
level="DEBUG",
operation="mux_complete",
message="mkvmerge muxing completed successfully",
context={
"output_path": str(output_path),
"output_exists": output_path.exists() if output_path else False,
},
)
return output_path, returncode, errors
finally:
if chapters_path:
chapters_path.unlink()

View File

@@ -121,9 +121,14 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str:
The spacer is safer to be a '.' for older DDL and p2p sharing spaces.
This includes web-served content via direct links and such.
Set `unicode_filenames: true` in config to preserve native language
characters (Korean, Japanese, Chinese, etc.) instead of transliterating
them to ASCII equivalents.
"""
# replace all non-ASCII characters with ASCII equivalents
filename = unidecode(filename)
# optionally replace non-ASCII characters with ASCII equivalents
if not config.unicode_filenames:
filename = unidecode(filename)
# remove or replace further characters as needed
filename = "".join(c for c in filename if unicodedata.category(c) != "Mn") # hidden characters

View File

@@ -114,32 +114,71 @@ class API(Vault):
return added or updated
def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int:
data = self.session.post(
url=f"{self.uri}/{service.lower()}",
json={"content_keys": {str(kid).replace("-", ""): key for kid, key in kid_keys.items()}},
headers={"Accept": "application/json"},
).json()
# Normalize keys
normalized_keys = {str(kid).replace("-", ""): key for kid, key in kid_keys.items()}
kid_list = list(normalized_keys.keys())
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
3: Exceptions.ServiceTagInvalid,
4: Exceptions.KeyIdInvalid,
5: Exceptions.ContentKeyInvalid,
}.get(code, ValueError)
if not kid_list:
return 0
if error:
raise error(f"{message} ({code})")
# Try batches starting at 500, stepping down by 100 on failure, fallback to 1
batch_size = 500
total_added = 0
i = 0
# each kid:key that was new to the vault (optional)
added = int(data.get("added"))
# each key for a kid that was changed/updated (optional)
updated = int(data.get("updated"))
while i < len(kid_list):
batch_kids = kid_list[i : i + batch_size]
batch_keys = {kid: normalized_keys[kid] for kid in batch_kids}
return added + updated
try:
response = self.session.post(
url=f"{self.uri}/{service.lower()}",
json={"content_keys": batch_keys},
headers={"Accept": "application/json"},
)
# Check for HTTP errors that suggest batch is too large
if response.status_code in (413, 414, 400) and batch_size > 1:
if batch_size > 100:
batch_size -= 100
else:
batch_size = 1
continue
data = response.json()
except Exception:
# JSON decode error or connection issue - try smaller batch
if batch_size > 1:
if batch_size > 100:
batch_size -= 100
else:
batch_size = 1
continue
raise
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
3: Exceptions.ServiceTagInvalid,
4: Exceptions.KeyIdInvalid,
5: Exceptions.ContentKeyInvalid,
}.get(code, ValueError)
if error:
raise error(f"{message} ({code})")
# each kid:key that was new to the vault (optional)
added = int(data.get("added", 0))
# each key for a kid that was changed/updated (optional)
updated = int(data.get("updated", 0))
total_added += added + updated
i += batch_size
return total_added
def get_services(self) -> Iterator[str]:
data = self.session.post(url=self.uri, headers={"Accept": "application/json"}).json()

View File

@@ -119,9 +119,25 @@ class SQLite(Vault):
cursor = conn.cursor()
try:
placeholders = ",".join(["?"] * len(kid_keys))
cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", list(kid_keys.keys()))
existing_kids = {row[0] for row in cursor.fetchall()}
# Query existing KIDs in batches to avoid SQLite variable limit
# Try larger batch first (newer SQLite supports 32766), fall back to 500 if needed
existing_kids: set[str] = set()
kid_list = list(kid_keys.keys())
batch_size = 32000
i = 0
while i < len(kid_list):
batch = kid_list[i : i + batch_size]
placeholders = ",".join(["?"] * len(batch))
try:
cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", batch)
existing_kids.update(row[0] for row in cursor.fetchall())
i += batch_size
except sqlite3.OperationalError as e:
if "too many SQL variables" in str(e) and batch_size > 500:
batch_size = 500
continue
raise
new_keys = {kid: key for kid, key in kid_keys.items() if kid not in existing_kids}