Merge branch 'main' into main
This commit is contained in:
58
CHANGELOG.md
58
CHANGELOG.md
@@ -5,6 +5,64 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [2.3.0] - 2026-01-18
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- **Unicode Filenames Option**: New `unicode_filenames` config option to preserve native characters
|
||||||
|
- Allows disabling ASCII transliteration in filenames
|
||||||
|
- Preserves Korean, Japanese, Chinese, and other native language characters
|
||||||
|
- Closes #49
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- **WebVTT Cue Handling**: Handle WebVTT cue identifiers and overlapping multi-line cues
|
||||||
|
- Added detection and sanitization for cue identifiers (Q0, Q1, etc.) before timing lines
|
||||||
|
- Added merging of overlapping cues with different line positions into multi-line subtitles
|
||||||
|
- Fixes parsing issues with pysubs2/pycaption on certain WebVTT files
|
||||||
|
- **Widevine PSSH Filtering**: Filter Widevine PSSH by system ID instead of sorting
|
||||||
|
- Fixes KeyError crash when unsupported DRM systems are present in init segments
|
||||||
|
- **TTML Negative Values**: Handle negative values in multi-value TTML attributes
|
||||||
|
- Fixes pycaption parse errors for attributes like `tts:extent="-5% 7.5%"`
|
||||||
|
- Closes #47
|
||||||
|
- **ASS Font Names**: Strip whitespace from ASS font names
|
||||||
|
- Handles ASS subtitle files with spaces after commas in Style definitions
|
||||||
|
- Fixes #57
|
||||||
|
- **Shaka-Packager Error Messages**: Include shaka-packager binary path in error messages
|
||||||
|
- **N_m3u8DL-RE Merge and Decryption**: Handle merge and decryption properly
|
||||||
|
- Prevents audio corruption ("Box 'OG 2' size is too large") with DASH manifests
|
||||||
|
- Fixes duplicate init segment writing when using N_m3u8DL-RE
|
||||||
|
- **DASH Placeholder KIDs**: Handle placeholder KIDs and improve DRM init from segments
|
||||||
|
- Detects and replaces placeholder/test KIDs in Widevine PSSH
|
||||||
|
- Adds CENC namespace support for kid/default_KID attributes
|
||||||
|
- **PlayReady PSSH Comparison**: Correct PSSH system ID comparison in PlayReady
|
||||||
|
- Removes erroneous `.bytes` accessor from PSSH.SYSTEM_ID comparisons
|
||||||
|
|
||||||
|
## [2.2.0] - 2026-01-15
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- **CDM-Aware PlayReady Fallback Detection**: Intelligent DRM fallback based on selected CDM
|
||||||
|
- Adds PlayReady PSSH/KID extraction from track and init data with CDM-aware ordering
|
||||||
|
- When PlayReady CDM is selected, tries PlayReady first then falls back to Widevine
|
||||||
|
- When Widevine CDM is selected (default), tries Widevine first then falls back to PlayReady
|
||||||
|
- **Comprehensive Debug Logging**: Enhanced debug logging for downloaders and muxing
|
||||||
|
- Added detailed debug logging to aria2c, curl_impersonate, n_m3u8dl_re, and requests downloaders
|
||||||
|
- Enhanced manifest parsers (DASH, HLS, ISM) with debug logging
|
||||||
|
- Added debug logging to track muxing operations
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- **Hybrid DV+HDR10 Filename Detection**: Fixed HDR10 detection in hybrid Dolby Vision filenames
|
||||||
|
- Hybrid DV+HDR10 files were incorrectly named "DV.H.265" instead of "DV.HDR.H.265"
|
||||||
|
- Now checks both `hdr_format_full` and `hdr_format_commercial` fields for HDR10 indicators
|
||||||
|
- **Vault Adaptive Batch Sizing**: Improved bulk key operations with adaptive batch sizing
|
||||||
|
- Prevents query limit issues when retrieving large numbers of keys from vaults
|
||||||
|
- Dynamically adjusts batch sizes based on vault response characteristics
|
||||||
|
- **Test Command Improvements**: Enhanced test command error detection and sorting
|
||||||
|
- Improved error detection in test command output
|
||||||
|
- Added natural sorting for test results
|
||||||
|
|
||||||
## [2.1.0] - 2025-11-27
|
## [2.1.0] - 2025-11-27
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "unshackle"
|
name = "unshackle"
|
||||||
version = "2.1.0"
|
version = "2.3.0"
|
||||||
description = "Modular Movie, TV, and Music Archival Software."
|
description = "Modular Movie, TV, and Music Archival Software."
|
||||||
authors = [{ name = "unshackle team" }]
|
authors = [{ name = "unshackle team" }]
|
||||||
requires-python = ">=3.10,<3.13"
|
requires-python = ">=3.10,<3.13"
|
||||||
|
|||||||
@@ -1567,7 +1567,7 @@ class dl:
|
|||||||
if subtitle.codec == Subtitle.Codec.SubStationAlphav4:
|
if subtitle.codec == Subtitle.Codec.SubStationAlphav4:
|
||||||
for line in subtitle.path.read_text("utf8").splitlines():
|
for line in subtitle.path.read_text("utf8").splitlines():
|
||||||
if line.startswith("Style: "):
|
if line.startswith("Style: "):
|
||||||
font_names.append(line.removesuffix("Style: ").split(",")[1])
|
font_names.append(line.removeprefix("Style: ").split(",")[1].strip())
|
||||||
|
|
||||||
font_count, missing_fonts = self.attach_subtitle_fonts(
|
font_count, missing_fonts = self.attach_subtitle_fonts(
|
||||||
font_names, title, temp_font_files
|
font_names, title, temp_font_files
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -8,6 +9,11 @@ from unshackle.core import binaries
|
|||||||
from unshackle.core.constants import context_settings
|
from unshackle.core.constants import context_settings
|
||||||
|
|
||||||
|
|
||||||
|
def _natural_sort_key(path: Path) -> list:
|
||||||
|
"""Sort key for natural sorting (S01E01 before S01E10)."""
|
||||||
|
return [int(part) if part.isdigit() else part.lower() for part in re.split(r"(\d+)", path.name)]
|
||||||
|
|
||||||
|
|
||||||
@click.group(short_help="Various helper scripts and programs.", context_settings=context_settings)
|
@click.group(short_help="Various helper scripts and programs.", context_settings=context_settings)
|
||||||
def util() -> None:
|
def util() -> None:
|
||||||
"""Various helper scripts and programs."""
|
"""Various helper scripts and programs."""
|
||||||
@@ -49,7 +55,7 @@ def crop(path: Path, aspect: str, letter: bool, offset: int, preview: bool) -> N
|
|||||||
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
|
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
|
||||||
|
|
||||||
if path.is_dir():
|
if path.is_dir():
|
||||||
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
|
paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key)
|
||||||
else:
|
else:
|
||||||
paths = [path]
|
paths = [path]
|
||||||
for video_path in paths:
|
for video_path in paths:
|
||||||
@@ -140,7 +146,7 @@ def range_(path: Path, full: bool, preview: bool) -> None:
|
|||||||
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
|
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
|
||||||
|
|
||||||
if path.is_dir():
|
if path.is_dir():
|
||||||
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
|
paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key)
|
||||||
else:
|
else:
|
||||||
paths = [path]
|
paths = [path]
|
||||||
for video_path in paths:
|
for video_path in paths:
|
||||||
@@ -225,16 +231,18 @@ def test(path: Path, map_: str) -> None:
|
|||||||
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
|
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
|
||||||
|
|
||||||
if path.is_dir():
|
if path.is_dir():
|
||||||
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
|
paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key)
|
||||||
else:
|
else:
|
||||||
paths = [path]
|
paths = [path]
|
||||||
for video_path in paths:
|
for video_path in paths:
|
||||||
print("Starting...")
|
print(f"Testing: {video_path.name}")
|
||||||
p = subprocess.Popen(
|
p = subprocess.Popen(
|
||||||
[
|
[
|
||||||
binaries.FFMPEG,
|
binaries.FFMPEG,
|
||||||
"-hide_banner",
|
"-hide_banner",
|
||||||
"-benchmark",
|
"-benchmark",
|
||||||
|
"-err_detect",
|
||||||
|
"+crccheck+bitstream+buffer+careful+compliant+aggressive",
|
||||||
"-i",
|
"-i",
|
||||||
str(video_path),
|
str(video_path),
|
||||||
"-map",
|
"-map",
|
||||||
@@ -255,13 +263,13 @@ def test(path: Path, map_: str) -> None:
|
|||||||
reached_output = True
|
reached_output = True
|
||||||
if not reached_output:
|
if not reached_output:
|
||||||
continue
|
continue
|
||||||
if line.startswith("["): # error of some kind
|
if line.startswith("[") and not line.startswith("[out#"):
|
||||||
errors += 1
|
errors += 1
|
||||||
stream, error = line.split("] ", maxsplit=1)
|
stream, error = line.split("] ", maxsplit=1)
|
||||||
stream = stream.split(" @ ")[0]
|
stream = stream.split(" @ ")[0]
|
||||||
line = f"{stream} ERROR: {error}"
|
line = f"{stream} ERROR: {error}"
|
||||||
print(line)
|
print(line)
|
||||||
p.stderr.close()
|
p.stderr.close()
|
||||||
print(f"Finished with {errors} Errors, Cleaning up...")
|
print(f"Finished with {errors} error(s)")
|
||||||
p.terminate()
|
p.terminate()
|
||||||
p.wait()
|
p.wait()
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
__version__ = "2.1.0"
|
__version__ = "2.3.0"
|
||||||
|
|||||||
@@ -95,6 +95,7 @@ class Config:
|
|||||||
self.update_check_interval: int = kwargs.get("update_check_interval", 24)
|
self.update_check_interval: int = kwargs.get("update_check_interval", 24)
|
||||||
self.scene_naming: bool = kwargs.get("scene_naming", True)
|
self.scene_naming: bool = kwargs.get("scene_naming", True)
|
||||||
self.series_year: bool = kwargs.get("series_year", True)
|
self.series_year: bool = kwargs.get("series_year", True)
|
||||||
|
self.unicode_filenames: bool = kwargs.get("unicode_filenames", False)
|
||||||
|
|
||||||
self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default
|
self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default
|
||||||
self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default
|
self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ from unshackle.core import binaries
|
|||||||
from unshackle.core.config import config
|
from unshackle.core.config import config
|
||||||
from unshackle.core.console import console
|
from unshackle.core.console import console
|
||||||
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
||||||
from unshackle.core.utilities import get_extension, get_free_port
|
from unshackle.core.utilities import get_debug_logger, get_extension, get_free_port
|
||||||
|
|
||||||
|
|
||||||
def rpc(caller: Callable, secret: str, method: str, params: Optional[list[Any]] = None) -> Any:
|
def rpc(caller: Callable, secret: str, method: str, params: Optional[list[Any]] = None) -> Any:
|
||||||
@@ -58,6 +58,8 @@ def download(
|
|||||||
proxy: Optional[str] = None,
|
proxy: Optional[str] = None,
|
||||||
max_workers: Optional[int] = None,
|
max_workers: Optional[int] = None,
|
||||||
) -> Generator[dict[str, Any], None, None]:
|
) -> Generator[dict[str, Any], None, None]:
|
||||||
|
debug_logger = get_debug_logger()
|
||||||
|
|
||||||
if not urls:
|
if not urls:
|
||||||
raise ValueError("urls must be provided and not empty")
|
raise ValueError("urls must be provided and not empty")
|
||||||
elif not isinstance(urls, (str, dict, list)):
|
elif not isinstance(urls, (str, dict, list)):
|
||||||
@@ -91,6 +93,13 @@ def download(
|
|||||||
urls = [urls]
|
urls = [urls]
|
||||||
|
|
||||||
if not binaries.Aria2:
|
if not binaries.Aria2:
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="ERROR",
|
||||||
|
operation="downloader_aria2c_binary_missing",
|
||||||
|
message="Aria2c executable not found in PATH or local binaries directory",
|
||||||
|
context={"searched_names": ["aria2c", "aria2"]},
|
||||||
|
)
|
||||||
raise EnvironmentError("Aria2c executable not found...")
|
raise EnvironmentError("Aria2c executable not found...")
|
||||||
|
|
||||||
if proxy and not proxy.lower().startswith("http://"):
|
if proxy and not proxy.lower().startswith("http://"):
|
||||||
@@ -180,6 +189,28 @@ def download(
|
|||||||
continue
|
continue
|
||||||
arguments.extend(["--header", f"{header}: {value}"])
|
arguments.extend(["--header", f"{header}: {value}"])
|
||||||
|
|
||||||
|
if debug_logger:
|
||||||
|
first_url = urls[0] if isinstance(urls[0], str) else urls[0].get("url", "")
|
||||||
|
url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="downloader_aria2c_start",
|
||||||
|
message="Starting Aria2c download",
|
||||||
|
context={
|
||||||
|
"binary_path": str(binaries.Aria2),
|
||||||
|
"url_count": len(urls),
|
||||||
|
"first_url": url_display,
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
"filename": filename,
|
||||||
|
"max_concurrent_downloads": max_concurrent_downloads,
|
||||||
|
"max_connection_per_server": max_connection_per_server,
|
||||||
|
"split": split,
|
||||||
|
"file_allocation": file_allocation,
|
||||||
|
"has_proxy": bool(proxy),
|
||||||
|
"rpc_port": rpc_port,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
yield dict(total=len(urls))
|
yield dict(total=len(urls))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -226,6 +257,20 @@ def download(
|
|||||||
textwrap.wrap(error, width=console.width - 20, initial_indent="")
|
textwrap.wrap(error, width=console.width - 20, initial_indent="")
|
||||||
)
|
)
|
||||||
console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty))
|
console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty))
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="ERROR",
|
||||||
|
operation="downloader_aria2c_download_error",
|
||||||
|
message=f"Aria2c download failed: {dl['errorMessage']}",
|
||||||
|
context={
|
||||||
|
"gid": dl["gid"],
|
||||||
|
"error_code": dl["errorCode"],
|
||||||
|
"error_message": dl["errorMessage"],
|
||||||
|
"used_uri": used_uri[:200] + "..." if len(used_uri) > 200 else used_uri,
|
||||||
|
"completed_length": dl.get("completedLength"),
|
||||||
|
"total_length": dl.get("totalLength"),
|
||||||
|
},
|
||||||
|
)
|
||||||
raise ValueError(error)
|
raise ValueError(error)
|
||||||
|
|
||||||
if number_stopped == len(urls):
|
if number_stopped == len(urls):
|
||||||
@@ -237,7 +282,31 @@ def download(
|
|||||||
p.wait()
|
p.wait()
|
||||||
|
|
||||||
if p.returncode != 0:
|
if p.returncode != 0:
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="ERROR",
|
||||||
|
operation="downloader_aria2c_failed",
|
||||||
|
message=f"Aria2c exited with code {p.returncode}",
|
||||||
|
context={
|
||||||
|
"returncode": p.returncode,
|
||||||
|
"url_count": len(urls),
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
},
|
||||||
|
)
|
||||||
raise subprocess.CalledProcessError(p.returncode, arguments)
|
raise subprocess.CalledProcessError(p.returncode, arguments)
|
||||||
|
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="downloader_aria2c_complete",
|
||||||
|
message="Aria2c download completed successfully",
|
||||||
|
context={
|
||||||
|
"url_count": len(urls),
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
"filename": filename,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
except ConnectionResetError:
|
except ConnectionResetError:
|
||||||
# interrupted while passing URI to download
|
# interrupted while passing URI to download
|
||||||
raise KeyboardInterrupt()
|
raise KeyboardInterrupt()
|
||||||
@@ -251,9 +320,20 @@ def download(
|
|||||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||||
yield dict(downloaded="[yellow]CANCELLED")
|
yield dict(downloaded="[yellow]CANCELLED")
|
||||||
raise
|
raise
|
||||||
except Exception:
|
except Exception as e:
|
||||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||||
yield dict(downloaded="[red]FAILED")
|
yield dict(downloaded="[red]FAILED")
|
||||||
|
if debug_logger and not isinstance(e, (subprocess.CalledProcessError, ValueError)):
|
||||||
|
debug_logger.log(
|
||||||
|
level="ERROR",
|
||||||
|
operation="downloader_aria2c_exception",
|
||||||
|
message=f"Unexpected error during Aria2c download: {e}",
|
||||||
|
error=e,
|
||||||
|
context={
|
||||||
|
"url_count": len(urls),
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
},
|
||||||
|
)
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
rpc(caller=partial(rpc_session.post, url=rpc_uri), secret=rpc_secret, method="aria2.shutdown")
|
rpc(caller=partial(rpc_session.post, url=rpc_uri), secret=rpc_secret, method="aria2.shutdown")
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from rich import filesize
|
|||||||
|
|
||||||
from unshackle.core.config import config
|
from unshackle.core.config import config
|
||||||
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
||||||
from unshackle.core.utilities import get_extension
|
from unshackle.core.utilities import get_debug_logger, get_extension
|
||||||
|
|
||||||
MAX_ATTEMPTS = 5
|
MAX_ATTEMPTS = 5
|
||||||
RETRY_WAIT = 2
|
RETRY_WAIT = 2
|
||||||
@@ -189,6 +189,8 @@ def curl_impersonate(
|
|||||||
if not isinstance(max_workers, (int, type(None))):
|
if not isinstance(max_workers, (int, type(None))):
|
||||||
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
|
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
|
||||||
|
|
||||||
|
debug_logger = get_debug_logger()
|
||||||
|
|
||||||
if not isinstance(urls, list):
|
if not isinstance(urls, list):
|
||||||
urls = [urls]
|
urls = [urls]
|
||||||
|
|
||||||
@@ -209,6 +211,24 @@ def curl_impersonate(
|
|||||||
if proxy:
|
if proxy:
|
||||||
session.proxies.update({"all": proxy})
|
session.proxies.update({"all": proxy})
|
||||||
|
|
||||||
|
if debug_logger:
|
||||||
|
first_url = urls[0].get("url", "") if urls else ""
|
||||||
|
url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="downloader_curl_impersonate_start",
|
||||||
|
message="Starting curl_impersonate download",
|
||||||
|
context={
|
||||||
|
"url_count": len(urls),
|
||||||
|
"first_url": url_display,
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
"filename": filename,
|
||||||
|
"max_workers": max_workers,
|
||||||
|
"browser": BROWSER,
|
||||||
|
"has_proxy": bool(proxy),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
yield dict(total=len(urls))
|
yield dict(total=len(urls))
|
||||||
|
|
||||||
download_sizes = []
|
download_sizes = []
|
||||||
@@ -235,11 +255,23 @@ def curl_impersonate(
|
|||||||
# tell dl that it was cancelled
|
# tell dl that it was cancelled
|
||||||
# the pool is already shut down, so exiting loop is fine
|
# the pool is already shut down, so exiting loop is fine
|
||||||
raise
|
raise
|
||||||
except Exception:
|
except Exception as e:
|
||||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||||
yield dict(downloaded="[red]FAILING")
|
yield dict(downloaded="[red]FAILING")
|
||||||
pool.shutdown(wait=True, cancel_futures=True)
|
pool.shutdown(wait=True, cancel_futures=True)
|
||||||
yield dict(downloaded="[red]FAILED")
|
yield dict(downloaded="[red]FAILED")
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="ERROR",
|
||||||
|
operation="downloader_curl_impersonate_failed",
|
||||||
|
message=f"curl_impersonate download failed: {e}",
|
||||||
|
error=e,
|
||||||
|
context={
|
||||||
|
"url_count": len(urls),
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
"browser": BROWSER,
|
||||||
|
},
|
||||||
|
)
|
||||||
# tell dl that it failed
|
# tell dl that it failed
|
||||||
# the pool is already shut down, so exiting loop is fine
|
# the pool is already shut down, so exiting loop is fine
|
||||||
raise
|
raise
|
||||||
@@ -260,5 +292,17 @@ def curl_impersonate(
|
|||||||
last_speed_refresh = now
|
last_speed_refresh = now
|
||||||
download_sizes.clear()
|
download_sizes.clear()
|
||||||
|
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="downloader_curl_impersonate_complete",
|
||||||
|
message="curl_impersonate download completed successfully",
|
||||||
|
context={
|
||||||
|
"url_count": len(urls),
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
"filename": filename,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
__all__ = ("curl_impersonate",)
|
__all__ = ("curl_impersonate",)
|
||||||
|
|||||||
@@ -10,9 +10,11 @@ import requests
|
|||||||
from requests.cookies import cookiejar_from_dict, get_cookie_header
|
from requests.cookies import cookiejar_from_dict, get_cookie_header
|
||||||
|
|
||||||
from unshackle.core import binaries
|
from unshackle.core import binaries
|
||||||
|
from unshackle.core.binaries import FFMPEG, ShakaPackager, Mp4decrypt
|
||||||
from unshackle.core.config import config
|
from unshackle.core.config import config
|
||||||
from unshackle.core.console import console
|
from unshackle.core.console import console
|
||||||
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
||||||
|
from unshackle.core.utilities import get_debug_logger
|
||||||
|
|
||||||
PERCENT_RE = re.compile(r"(\d+\.\d+%)")
|
PERCENT_RE = re.compile(r"(\d+\.\d+%)")
|
||||||
SPEED_RE = re.compile(r"(\d+\.\d+(?:MB|KB)ps)")
|
SPEED_RE = re.compile(r"(\d+\.\d+(?:MB|KB)ps)")
|
||||||
@@ -66,12 +68,17 @@ def get_track_selection_args(track: Any) -> list[str]:
|
|||||||
parts = []
|
parts = []
|
||||||
|
|
||||||
if track_type == "Audio":
|
if track_type == "Audio":
|
||||||
if track_id := representation.get("id") or adaptation_set.get("audioTrackId"):
|
track_id = representation.get("id") or adaptation_set.get("audioTrackId")
|
||||||
parts.append(rf"id={track_id}")
|
lang = representation.get("lang") or adaptation_set.get("lang")
|
||||||
|
|
||||||
|
if track_id:
|
||||||
|
parts.append(rf'"id=\b{track_id}\b"')
|
||||||
|
if lang:
|
||||||
|
parts.append(f"lang={lang}")
|
||||||
else:
|
else:
|
||||||
if codecs := representation.get("codecs"):
|
if codecs := representation.get("codecs"):
|
||||||
parts.append(f"codecs={codecs}")
|
parts.append(f"codecs={codecs}")
|
||||||
if lang := representation.get("lang") or adaptation_set.get("lang"):
|
if lang:
|
||||||
parts.append(f"lang={lang}")
|
parts.append(f"lang={lang}")
|
||||||
if bw := representation.get("bandwidth"):
|
if bw := representation.get("bandwidth"):
|
||||||
bitrate = int(bw) // 1000
|
bitrate = int(bw) // 1000
|
||||||
@@ -178,15 +185,32 @@ def build_download_args(
|
|||||||
"--write-meta-json": False,
|
"--write-meta-json": False,
|
||||||
"--no-log": True,
|
"--no-log": True,
|
||||||
}
|
}
|
||||||
|
if FFMPEG:
|
||||||
|
args["--ffmpeg-binary-path"] = str(FFMPEG)
|
||||||
if proxy:
|
if proxy:
|
||||||
args["--custom-proxy"] = proxy
|
args["--custom-proxy"] = proxy
|
||||||
if skip_merge:
|
if skip_merge:
|
||||||
args["--skip-merge"] = skip_merge
|
args["--skip-merge"] = skip_merge
|
||||||
if ad_keyword:
|
if ad_keyword:
|
||||||
args["--ad-keyword"] = ad_keyword
|
args["--ad-keyword"] = ad_keyword
|
||||||
|
|
||||||
if content_keys:
|
if content_keys:
|
||||||
args["--key"] = next((f"{kid.hex}:{key.lower()}" for kid, key in content_keys.items()), None)
|
args["--key"] = next((f"{kid.hex}:{key.lower()}" for kid, key in content_keys.items()), None)
|
||||||
args["--decryption-engine"] = DECRYPTION_ENGINE.get(config.decryption.lower()) or "SHAKA_PACKAGER"
|
|
||||||
|
decryption_config = config.decryption.lower()
|
||||||
|
engine_name = DECRYPTION_ENGINE.get(decryption_config) or "SHAKA_PACKAGER"
|
||||||
|
args["--decryption-engine"] = engine_name
|
||||||
|
|
||||||
|
binary_path = None
|
||||||
|
if engine_name == "SHAKA_PACKAGER":
|
||||||
|
if ShakaPackager:
|
||||||
|
binary_path = str(ShakaPackager)
|
||||||
|
elif engine_name == "MP4DECRYPT":
|
||||||
|
if Mp4decrypt:
|
||||||
|
binary_path = str(Mp4decrypt)
|
||||||
|
if binary_path:
|
||||||
|
args["--decryption-binary-path"] = binary_path
|
||||||
|
|
||||||
if custom_args:
|
if custom_args:
|
||||||
args.update(custom_args)
|
args.update(custom_args)
|
||||||
|
|
||||||
@@ -224,6 +248,8 @@ def download(
|
|||||||
content_keys: dict[str, Any] | None,
|
content_keys: dict[str, Any] | None,
|
||||||
skip_merge: bool | None = False,
|
skip_merge: bool | None = False,
|
||||||
) -> Generator[dict[str, Any], None, None]:
|
) -> Generator[dict[str, Any], None, None]:
|
||||||
|
debug_logger = get_debug_logger()
|
||||||
|
|
||||||
if not urls:
|
if not urls:
|
||||||
raise ValueError("urls must be provided and not empty")
|
raise ValueError("urls must be provided and not empty")
|
||||||
if not isinstance(urls, (str, dict, list)):
|
if not isinstance(urls, (str, dict, list)):
|
||||||
@@ -250,6 +276,18 @@ def download(
|
|||||||
|
|
||||||
if not binaries.N_m3u8DL_RE:
|
if not binaries.N_m3u8DL_RE:
|
||||||
raise EnvironmentError("N_m3u8DL-RE executable not found...")
|
raise EnvironmentError("N_m3u8DL-RE executable not found...")
|
||||||
|
|
||||||
|
decryption_engine = config.decryption.lower()
|
||||||
|
binary_path = None
|
||||||
|
|
||||||
|
if content_keys:
|
||||||
|
if decryption_engine == "shaka":
|
||||||
|
binary_path = binaries.ShakaPackager
|
||||||
|
elif decryption_engine == "mp4decrypt":
|
||||||
|
binary_path = binaries.Mp4decrypt
|
||||||
|
|
||||||
|
if binary_path:
|
||||||
|
binary_path = Path(binary_path)
|
||||||
|
|
||||||
effective_max_workers = max_workers or min(32, (os.cpu_count() or 1) + 4)
|
effective_max_workers = max_workers or min(32, (os.cpu_count() or 1) + 4)
|
||||||
|
|
||||||
@@ -275,11 +313,49 @@ def download(
|
|||||||
skip_merge=skip_merge,
|
skip_merge=skip_merge,
|
||||||
ad_keyword=ad_keyword,
|
ad_keyword=ad_keyword,
|
||||||
)
|
)
|
||||||
arguments.extend(get_track_selection_args(track))
|
selection_args = get_track_selection_args(track)
|
||||||
|
arguments.extend(selection_args)
|
||||||
|
|
||||||
|
log_file_path: Path | None = None
|
||||||
|
if debug_logger:
|
||||||
|
log_file_path = output_dir / f".n_m3u8dl_re_{filename}.log"
|
||||||
|
arguments.extend(["--log-file-path", str(log_file_path)])
|
||||||
|
|
||||||
|
track_url_display = track.url[:200] + "..." if len(track.url) > 200 else track.url
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="downloader_n_m3u8dl_re_start",
|
||||||
|
message="Starting N_m3u8DL-RE download",
|
||||||
|
context={
|
||||||
|
"binary_path": str(binaries.N_m3u8DL_RE),
|
||||||
|
"track_id": getattr(track, "id", None),
|
||||||
|
"track_type": track.__class__.__name__,
|
||||||
|
"track_url": track_url_display,
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
"filename": filename,
|
||||||
|
"thread_count": thread_count,
|
||||||
|
"retry_count": retry_count,
|
||||||
|
"has_content_keys": bool(content_keys),
|
||||||
|
"content_key_count": len(content_keys) if content_keys else 0,
|
||||||
|
"has_proxy": bool(proxy),
|
||||||
|
"skip_merge": skip_merge,
|
||||||
|
"has_custom_args": bool(track.downloader_args),
|
||||||
|
"selection_args": selection_args,
|
||||||
|
"descriptor": track.descriptor.name if hasattr(track, "descriptor") else None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
arguments.extend(["--no-log", "true"])
|
||||||
|
|
||||||
yield {"total": 100}
|
yield {"total": 100}
|
||||||
yield {"downloaded": "Parsing streams..."}
|
yield {"downloaded": "Parsing streams..."}
|
||||||
|
|
||||||
|
env = os.environ.copy()
|
||||||
|
|
||||||
|
if binary_path and binary_path.exists():
|
||||||
|
binary_dir = str(binary_path.parent)
|
||||||
|
env["PATH"] = binary_dir + os.pathsep + env["PATH"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with subprocess.Popen(
|
with subprocess.Popen(
|
||||||
[binaries.N_m3u8DL_RE, *arguments],
|
[binaries.N_m3u8DL_RE, *arguments],
|
||||||
@@ -287,6 +363,7 @@ def download(
|
|||||||
stderr=subprocess.STDOUT,
|
stderr=subprocess.STDOUT,
|
||||||
text=True,
|
text=True,
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
|
env=env, # Assign to virtual environment variables
|
||||||
) as process:
|
) as process:
|
||||||
last_line = ""
|
last_line = ""
|
||||||
track_type = track.__class__.__name__
|
track_type = track.__class__.__name__
|
||||||
@@ -297,12 +374,16 @@ def download(
|
|||||||
continue
|
continue
|
||||||
last_line = output
|
last_line = output
|
||||||
|
|
||||||
|
if ERROR_RE.search(output):
|
||||||
|
console.log(f"[N_m3u8DL-RE]: {output}")
|
||||||
|
|
||||||
if warn_match := WARN_RE.search(output):
|
if warn_match := WARN_RE.search(output):
|
||||||
console.log(f"{track_type} {warn_match.group(1)}")
|
console.log(f"{track_type} {warn_match.group(1)}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if speed_match := SPEED_RE.search(output):
|
if speed_match := SPEED_RE.search(output):
|
||||||
size = size_match.group(1) if (size_match := SIZE_RE.search(output)) else ""
|
size_match = SIZE_RE.search(output)
|
||||||
|
size = size_match.group(1) if size_match else ""
|
||||||
yield {"downloaded": f"{speed_match.group(1)} {size}"}
|
yield {"downloaded": f"{speed_match.group(1)} {size}"}
|
||||||
|
|
||||||
if percent_match := PERCENT_RE.search(output):
|
if percent_match := PERCENT_RE.search(output):
|
||||||
@@ -310,11 +391,45 @@ def download(
|
|||||||
yield {"completed": progress} if progress < 100 else {"downloaded": "Merging"}
|
yield {"completed": progress} if progress < 100 else {"downloaded": "Merging"}
|
||||||
|
|
||||||
process.wait()
|
process.wait()
|
||||||
|
|
||||||
if process.returncode != 0:
|
if process.returncode != 0:
|
||||||
|
if debug_logger and log_file_path:
|
||||||
|
log_contents = ""
|
||||||
|
if log_file_path.exists():
|
||||||
|
try:
|
||||||
|
log_contents = log_file_path.read_text(encoding="utf-8", errors="replace")
|
||||||
|
except Exception:
|
||||||
|
log_contents = "<failed to read log file>"
|
||||||
|
|
||||||
|
debug_logger.log(
|
||||||
|
level="ERROR",
|
||||||
|
operation="downloader_n_m3u8dl_re_failed",
|
||||||
|
message=f"N_m3u8DL-RE exited with code {process.returncode}",
|
||||||
|
context={
|
||||||
|
"returncode": process.returncode,
|
||||||
|
"track_id": getattr(track, "id", None),
|
||||||
|
"track_type": track.__class__.__name__,
|
||||||
|
"last_line": last_line,
|
||||||
|
"log_file_contents": log_contents,
|
||||||
|
},
|
||||||
|
)
|
||||||
if error_match := ERROR_RE.search(last_line):
|
if error_match := ERROR_RE.search(last_line):
|
||||||
raise ValueError(f"[N_m3u8DL-RE]: {error_match.group(1)}")
|
raise ValueError(f"[N_m3u8DL-RE]: {error_match.group(1)}")
|
||||||
raise subprocess.CalledProcessError(process.returncode, arguments)
|
raise subprocess.CalledProcessError(process.returncode, arguments)
|
||||||
|
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="downloader_n_m3u8dl_re_complete",
|
||||||
|
message="N_m3u8DL-RE download completed successfully",
|
||||||
|
context={
|
||||||
|
"track_id": getattr(track, "id", None),
|
||||||
|
"track_type": track.__class__.__name__,
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
"filename": filename,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
except ConnectionResetError:
|
except ConnectionResetError:
|
||||||
# interrupted while passing URI to download
|
# interrupted while passing URI to download
|
||||||
raise KeyboardInterrupt()
|
raise KeyboardInterrupt()
|
||||||
@@ -322,10 +437,35 @@ def download(
|
|||||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||||
yield {"downloaded": "[yellow]CANCELLED"}
|
yield {"downloaded": "[yellow]CANCELLED"}
|
||||||
raise
|
raise
|
||||||
except Exception:
|
except Exception as e:
|
||||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||||
yield {"downloaded": "[red]FAILED"}
|
yield {"downloaded": "[red]FAILED"}
|
||||||
|
if debug_logger and log_file_path and not isinstance(e, (subprocess.CalledProcessError, ValueError)):
|
||||||
|
log_contents = ""
|
||||||
|
if log_file_path.exists():
|
||||||
|
try:
|
||||||
|
log_contents = log_file_path.read_text(encoding="utf-8", errors="replace")
|
||||||
|
except Exception:
|
||||||
|
log_contents = "<failed to read log file>"
|
||||||
|
|
||||||
|
debug_logger.log(
|
||||||
|
level="ERROR",
|
||||||
|
operation="downloader_n_m3u8dl_re_exception",
|
||||||
|
message=f"Unexpected error during N_m3u8DL-RE download: {e}",
|
||||||
|
error=e,
|
||||||
|
context={
|
||||||
|
"track_id": getattr(track, "id", None),
|
||||||
|
"track_type": track.__class__.__name__,
|
||||||
|
"log_file_contents": log_contents,
|
||||||
|
},
|
||||||
|
)
|
||||||
raise
|
raise
|
||||||
|
finally:
|
||||||
|
if log_file_path and log_file_path.exists():
|
||||||
|
try:
|
||||||
|
log_file_path.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def n_m3u8dl_re(
|
def n_m3u8dl_re(
|
||||||
@@ -382,4 +522,4 @@ def n_m3u8dl_re(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
__all__ = ("n_m3u8dl_re",)
|
__all__ = ("n_m3u8dl_re",)
|
||||||
@@ -12,7 +12,7 @@ from requests.adapters import HTTPAdapter
|
|||||||
from rich import filesize
|
from rich import filesize
|
||||||
|
|
||||||
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
||||||
from unshackle.core.utilities import get_extension
|
from unshackle.core.utilities import get_debug_logger, get_extension
|
||||||
|
|
||||||
MAX_ATTEMPTS = 5
|
MAX_ATTEMPTS = 5
|
||||||
RETRY_WAIT = 2
|
RETRY_WAIT = 2
|
||||||
@@ -215,6 +215,8 @@ def requests(
|
|||||||
if not isinstance(max_workers, (int, type(None))):
|
if not isinstance(max_workers, (int, type(None))):
|
||||||
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
|
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
|
||||||
|
|
||||||
|
debug_logger = get_debug_logger()
|
||||||
|
|
||||||
if not isinstance(urls, list):
|
if not isinstance(urls, list):
|
||||||
urls = [urls]
|
urls = [urls]
|
||||||
|
|
||||||
@@ -241,6 +243,23 @@ def requests(
|
|||||||
if proxy:
|
if proxy:
|
||||||
session.proxies.update({"all": proxy})
|
session.proxies.update({"all": proxy})
|
||||||
|
|
||||||
|
if debug_logger:
|
||||||
|
first_url = urls[0].get("url", "") if urls else ""
|
||||||
|
url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="downloader_requests_start",
|
||||||
|
message="Starting requests download",
|
||||||
|
context={
|
||||||
|
"url_count": len(urls),
|
||||||
|
"first_url": url_display,
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
"filename": filename,
|
||||||
|
"max_workers": max_workers,
|
||||||
|
"has_proxy": bool(proxy),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
yield dict(total=len(urls))
|
yield dict(total=len(urls))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -256,14 +275,37 @@ def requests(
|
|||||||
# tell dl that it was cancelled
|
# tell dl that it was cancelled
|
||||||
# the pool is already shut down, so exiting loop is fine
|
# the pool is already shut down, so exiting loop is fine
|
||||||
raise
|
raise
|
||||||
except Exception:
|
except Exception as e:
|
||||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||||
yield dict(downloaded="[red]FAILING")
|
yield dict(downloaded="[red]FAILING")
|
||||||
pool.shutdown(wait=True, cancel_futures=True)
|
pool.shutdown(wait=True, cancel_futures=True)
|
||||||
yield dict(downloaded="[red]FAILED")
|
yield dict(downloaded="[red]FAILED")
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="ERROR",
|
||||||
|
operation="downloader_requests_failed",
|
||||||
|
message=f"Requests download failed: {e}",
|
||||||
|
error=e,
|
||||||
|
context={
|
||||||
|
"url_count": len(urls),
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
},
|
||||||
|
)
|
||||||
# tell dl that it failed
|
# tell dl that it failed
|
||||||
# the pool is already shut down, so exiting loop is fine
|
# the pool is already shut down, so exiting loop is fine
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="downloader_requests_complete",
|
||||||
|
message="Requests download completed successfully",
|
||||||
|
context={
|
||||||
|
"url_count": len(urls),
|
||||||
|
"output_dir": str(output_dir),
|
||||||
|
"filename": filename,
|
||||||
|
},
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
DOWNLOAD_SIZES.clear()
|
DOWNLOAD_SIZES.clear()
|
||||||
|
|
||||||
|
|||||||
@@ -168,7 +168,7 @@ class PlayReady:
|
|||||||
pssh_boxes.extend(list(get_boxes(init_data, b"pssh")))
|
pssh_boxes.extend(list(get_boxes(init_data, b"pssh")))
|
||||||
tenc_boxes.extend(list(get_boxes(init_data, b"tenc")))
|
tenc_boxes.extend(list(get_boxes(init_data, b"tenc")))
|
||||||
|
|
||||||
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID.bytes), None)
|
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID), None)
|
||||||
if not pssh:
|
if not pssh:
|
||||||
raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
||||||
|
|
||||||
@@ -197,7 +197,7 @@ class PlayReady:
|
|||||||
if enc_key_id:
|
if enc_key_id:
|
||||||
kid = UUID(bytes=base64.b64decode(enc_key_id))
|
kid = UUID(bytes=base64.b64decode(enc_key_id))
|
||||||
|
|
||||||
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID.bytes), None)
|
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID), None)
|
||||||
if not pssh:
|
if not pssh:
|
||||||
raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
||||||
|
|
||||||
@@ -415,7 +415,7 @@ class PlayReady:
|
|||||||
p.wait()
|
p.wait()
|
||||||
|
|
||||||
if p.returncode != 0 or had_error:
|
if p.returncode != 0 or had_error:
|
||||||
raise subprocess.CalledProcessError(p.returncode, arguments)
|
raise subprocess.CalledProcessError(p.returncode, [binaries.ShakaPackager, *arguments])
|
||||||
|
|
||||||
path.unlink()
|
path.unlink()
|
||||||
if not stream_skipped:
|
if not stream_skipped:
|
||||||
|
|||||||
@@ -100,9 +100,7 @@ class Widevine:
|
|||||||
pssh_boxes.extend(list(get_boxes(init_data, b"pssh")))
|
pssh_boxes.extend(list(get_boxes(init_data, b"pssh")))
|
||||||
tenc_boxes.extend(list(get_boxes(init_data, b"tenc")))
|
tenc_boxes.extend(list(get_boxes(init_data, b"tenc")))
|
||||||
|
|
||||||
pssh_boxes.sort(key=lambda b: {PSSH.SystemId.Widevine: 0, PSSH.SystemId.PlayReady: 1}[b.system_ID])
|
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SystemId.Widevine), None)
|
||||||
|
|
||||||
pssh = next(iter(pssh_boxes), None)
|
|
||||||
if not pssh:
|
if not pssh:
|
||||||
raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
||||||
|
|
||||||
@@ -141,9 +139,7 @@ class Widevine:
|
|||||||
if enc_key_id:
|
if enc_key_id:
|
||||||
kid = UUID(bytes=base64.b64decode(enc_key_id))
|
kid = UUID(bytes=base64.b64decode(enc_key_id))
|
||||||
|
|
||||||
pssh_boxes.sort(key=lambda b: {PSSH.SystemId.Widevine: 0, PSSH.SystemId.PlayReady: 1}[b.system_ID])
|
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SystemId.Widevine), None)
|
||||||
|
|
||||||
pssh = next(iter(pssh_boxes), None)
|
|
||||||
if not pssh:
|
if not pssh:
|
||||||
raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
||||||
|
|
||||||
@@ -371,7 +367,7 @@ class Widevine:
|
|||||||
p.wait()
|
p.wait()
|
||||||
|
|
||||||
if p.returncode != 0 or had_error:
|
if p.returncode != 0 or had_error:
|
||||||
raise subprocess.CalledProcessError(p.returncode, arguments)
|
raise subprocess.CalledProcessError(p.returncode, [binaries.ShakaPackager, *arguments])
|
||||||
|
|
||||||
path.unlink()
|
path.unlink()
|
||||||
if not stream_skipped:
|
if not stream_skipped:
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import html
|
|||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
from copy import copy
|
from copy import copy
|
||||||
from functools import partial
|
from functools import partial
|
||||||
@@ -18,6 +19,7 @@ import requests
|
|||||||
from curl_cffi.requests import Session as CurlSession
|
from curl_cffi.requests import Session as CurlSession
|
||||||
from langcodes import Language, tag_is_valid
|
from langcodes import Language, tag_is_valid
|
||||||
from lxml.etree import Element, ElementTree
|
from lxml.etree import Element, ElementTree
|
||||||
|
from pyplayready.cdm import Cdm as PlayReadyCdm
|
||||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||||
from pywidevine.cdm import Cdm as WidevineCdm
|
from pywidevine.cdm import Cdm as WidevineCdm
|
||||||
from pywidevine.pssh import PSSH
|
from pywidevine.pssh import PSSH
|
||||||
@@ -28,7 +30,7 @@ from unshackle.core.downloaders import requests as requests_downloader
|
|||||||
from unshackle.core.drm import DRM_T, PlayReady, Widevine
|
from unshackle.core.drm import DRM_T, PlayReady, Widevine
|
||||||
from unshackle.core.events import events
|
from unshackle.core.events import events
|
||||||
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
|
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
|
||||||
from unshackle.core.utilities import is_close_match, try_ensure_utf8
|
from unshackle.core.utilities import get_debug_logger, is_close_match, try_ensure_utf8
|
||||||
from unshackle.core.utils.xml import load_xml
|
from unshackle.core.utils.xml import load_xml
|
||||||
|
|
||||||
|
|
||||||
@@ -465,12 +467,23 @@ class DASH:
|
|||||||
track.data["dash"]["timescale"] = int(segment_timescale)
|
track.data["dash"]["timescale"] = int(segment_timescale)
|
||||||
track.data["dash"]["segment_durations"] = segment_durations
|
track.data["dash"]["segment_durations"] = segment_durations
|
||||||
|
|
||||||
if not track.drm and isinstance(track, (Video, Audio)):
|
if init_data and isinstance(track, (Video, Audio)):
|
||||||
try:
|
if isinstance(cdm, PlayReadyCdm):
|
||||||
track.drm = [Widevine.from_init_data(init_data)]
|
try:
|
||||||
except Widevine.Exceptions.PSSHNotFound:
|
track.drm = [PlayReady.from_init_data(init_data)]
|
||||||
# it might not have Widevine DRM, or might not have found the PSSH
|
except PlayReady.Exceptions.PSSHNotFound:
|
||||||
log.warning("No Widevine PSSH was found for this track, is it DRM free?")
|
try:
|
||||||
|
track.drm = [Widevine.from_init_data(init_data)]
|
||||||
|
except Widevine.Exceptions.PSSHNotFound:
|
||||||
|
log.warning("No PlayReady or Widevine PSSH was found for this track, is it DRM free?")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
track.drm = [Widevine.from_init_data(init_data)]
|
||||||
|
except Widevine.Exceptions.PSSHNotFound:
|
||||||
|
try:
|
||||||
|
track.drm = [PlayReady.from_init_data(init_data)]
|
||||||
|
except PlayReady.Exceptions.PSSHNotFound:
|
||||||
|
log.warning("No Widevine or PlayReady PSSH was found for this track, is it DRM free?")
|
||||||
|
|
||||||
if track.drm:
|
if track.drm:
|
||||||
track_kid = track_kid or track.get_key_id(url=segments[0][0], session=session)
|
track_kid = track_kid or track.get_key_id(url=segments[0][0], session=session)
|
||||||
@@ -515,8 +528,35 @@ class DASH:
|
|||||||
max_workers=max_workers,
|
max_workers=max_workers,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
skip_merge = False
|
||||||
if downloader.__name__ == "n_m3u8dl_re":
|
if downloader.__name__ == "n_m3u8dl_re":
|
||||||
downloader_args.update({"filename": track.id, "track": track})
|
skip_merge = True
|
||||||
|
downloader_args.update(
|
||||||
|
{
|
||||||
|
"filename": track.id,
|
||||||
|
"track": track,
|
||||||
|
"content_keys": drm.content_keys if drm else None,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
debug_logger = get_debug_logger()
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="manifest_dash_download_start",
|
||||||
|
message="Starting DASH manifest download",
|
||||||
|
context={
|
||||||
|
"track_id": getattr(track, "id", None),
|
||||||
|
"track_type": track.__class__.__name__,
|
||||||
|
"total_segments": len(segments),
|
||||||
|
"downloader": downloader.__name__,
|
||||||
|
"has_drm": bool(track.drm),
|
||||||
|
"drm_types": [drm.__class__.__name__ for drm in (track.drm or [])],
|
||||||
|
"skip_merge": skip_merge,
|
||||||
|
"save_path": str(save_path),
|
||||||
|
"has_init_data": bool(init_data),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
for status_update in downloader(**downloader_args):
|
for status_update in downloader(**downloader_args):
|
||||||
file_downloaded = status_update.get("file_downloaded")
|
file_downloaded = status_update.get("file_downloaded")
|
||||||
@@ -533,42 +573,56 @@ class DASH:
|
|||||||
control_file.unlink()
|
control_file.unlink()
|
||||||
|
|
||||||
segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()]
|
segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()]
|
||||||
with open(save_path, "wb") as f:
|
|
||||||
if init_data:
|
if skip_merge:
|
||||||
f.write(init_data)
|
# N_m3u8DL-RE handles merging and decryption internally
|
||||||
if len(segments_to_merge) > 1:
|
shutil.move(segments_to_merge[0], save_path)
|
||||||
progress(downloaded="Merging", completed=0, total=len(segments_to_merge))
|
if drm:
|
||||||
for segment_file in segments_to_merge:
|
track.drm = None
|
||||||
segment_data = segment_file.read_bytes()
|
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None)
|
||||||
# TODO: fix encoding after decryption?
|
else:
|
||||||
if (
|
with open(save_path, "wb") as f:
|
||||||
not drm
|
if init_data:
|
||||||
and isinstance(track, Subtitle)
|
f.write(init_data)
|
||||||
and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
|
if len(segments_to_merge) > 1:
|
||||||
):
|
progress(downloaded="Merging", completed=0, total=len(segments_to_merge))
|
||||||
segment_data = try_ensure_utf8(segment_data)
|
for segment_file in segments_to_merge:
|
||||||
segment_data = (
|
segment_data = segment_file.read_bytes()
|
||||||
segment_data.decode("utf8")
|
# TODO: fix encoding after decryption?
|
||||||
.replace("‎", html.unescape("‎"))
|
if (
|
||||||
.replace("‏", html.unescape("‏"))
|
not drm
|
||||||
.encode("utf8")
|
and isinstance(track, Subtitle)
|
||||||
)
|
and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
|
||||||
f.write(segment_data)
|
):
|
||||||
f.flush()
|
segment_data = try_ensure_utf8(segment_data)
|
||||||
segment_file.unlink()
|
segment_data = (
|
||||||
progress(advance=1)
|
segment_data.decode("utf8")
|
||||||
|
.replace("‎", html.unescape("‎"))
|
||||||
|
.replace("‏", html.unescape("‏"))
|
||||||
|
.encode("utf8")
|
||||||
|
)
|
||||||
|
f.write(segment_data)
|
||||||
|
f.flush()
|
||||||
|
segment_file.unlink()
|
||||||
|
progress(advance=1)
|
||||||
|
|
||||||
track.path = save_path
|
track.path = save_path
|
||||||
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
|
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
|
||||||
|
|
||||||
if drm:
|
if not skip_merge and drm:
|
||||||
progress(downloaded="Decrypting", completed=0, total=100)
|
progress(downloaded="Decrypting", completed=0, total=100)
|
||||||
drm.decrypt(save_path)
|
drm.decrypt(save_path)
|
||||||
track.drm = None
|
track.drm = None
|
||||||
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None)
|
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None)
|
||||||
progress(downloaded="Decrypting", advance=100)
|
progress(downloaded="Decrypting", advance=100)
|
||||||
|
|
||||||
save_dir.rmdir()
|
# Clean up empty segment directory
|
||||||
|
if save_dir.exists() and save_dir.name.endswith("_segments"):
|
||||||
|
try:
|
||||||
|
save_dir.rmdir()
|
||||||
|
except OSError:
|
||||||
|
# Directory might not be empty, try removing recursively
|
||||||
|
shutil.rmtree(save_dir, ignore_errors=True)
|
||||||
|
|
||||||
progress(downloaded="Downloaded")
|
progress(downloaded="Downloaded")
|
||||||
|
|
||||||
@@ -736,6 +790,11 @@ class DASH:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def get_drm(protections: list[Element]) -> list[DRM_T]:
|
def get_drm(protections: list[Element]) -> list[DRM_T]:
|
||||||
drm: list[DRM_T] = []
|
drm: list[DRM_T] = []
|
||||||
|
PLACEHOLDER_KIDS = {
|
||||||
|
UUID("00000000-0000-0000-0000-000000000000"), # All zeros (key rotation default)
|
||||||
|
UUID("00010203-0405-0607-0809-0a0b0c0d0e0f"), # Sequential 0x00-0x0f
|
||||||
|
UUID("00010203-0405-0607-0809-101112131415"), # Shaka Packager test pattern
|
||||||
|
}
|
||||||
|
|
||||||
for protection in protections:
|
for protection in protections:
|
||||||
urn = (protection.get("schemeIdUri") or "").lower()
|
urn = (protection.get("schemeIdUri") or "").lower()
|
||||||
@@ -745,17 +804,27 @@ class DASH:
|
|||||||
if not pssh_text:
|
if not pssh_text:
|
||||||
continue
|
continue
|
||||||
pssh = PSSH(pssh_text)
|
pssh = PSSH(pssh_text)
|
||||||
|
kid_attr = protection.get("kid") or protection.get("{urn:mpeg:cenc:2013}kid")
|
||||||
|
kid = UUID(bytes=base64.b64decode(kid_attr)) if kid_attr else None
|
||||||
|
|
||||||
kid = protection.get("kid")
|
if not kid:
|
||||||
if kid:
|
default_kid_attr = protection.get("default_KID") or protection.get(
|
||||||
kid = UUID(bytes=base64.b64decode(kid))
|
"{urn:mpeg:cenc:2013}default_KID"
|
||||||
|
)
|
||||||
|
kid = UUID(default_kid_attr) if default_kid_attr else None
|
||||||
|
|
||||||
default_kid = protection.get("default_KID")
|
if not kid:
|
||||||
if default_kid:
|
kid = next(
|
||||||
kid = UUID(default_kid)
|
(
|
||||||
|
UUID(p.get("default_KID") or p.get("{urn:mpeg:cenc:2013}default_KID"))
|
||||||
|
for p in protections
|
||||||
|
if p.get("default_KID") or p.get("{urn:mpeg:cenc:2013}default_KID")
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
if not pssh.key_ids and not kid:
|
if kid and (not pssh.key_ids or all(k.int == 0 or k in PLACEHOLDER_KIDS for k in pssh.key_ids)):
|
||||||
kid = next((UUID(p.get("default_KID")) for p in protections if p.get("default_KID")), None)
|
pssh.set_key_ids([kid])
|
||||||
|
|
||||||
drm.append(Widevine(pssh=pssh, kid=kid))
|
drm.append(Widevine(pssh=pssh, kid=kid))
|
||||||
|
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ from unshackle.core.downloaders import requests as requests_downloader
|
|||||||
from unshackle.core.drm import DRM_T, ClearKey, PlayReady, Widevine
|
from unshackle.core.drm import DRM_T, ClearKey, PlayReady, Widevine
|
||||||
from unshackle.core.events import events
|
from unshackle.core.events import events
|
||||||
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
|
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
|
||||||
from unshackle.core.utilities import get_extension, is_close_match, try_ensure_utf8
|
from unshackle.core.utilities import get_debug_logger, get_extension, is_close_match, try_ensure_utf8
|
||||||
|
|
||||||
|
|
||||||
class HLS:
|
class HLS:
|
||||||
@@ -350,6 +350,24 @@ class HLS:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
debug_logger = get_debug_logger()
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="manifest_hls_download_start",
|
||||||
|
message="Starting HLS manifest download",
|
||||||
|
context={
|
||||||
|
"track_id": getattr(track, "id", None),
|
||||||
|
"track_type": track.__class__.__name__,
|
||||||
|
"total_segments": total_segments,
|
||||||
|
"downloader": downloader.__name__,
|
||||||
|
"has_drm": bool(session_drm),
|
||||||
|
"drm_type": session_drm.__class__.__name__ if session_drm else None,
|
||||||
|
"skip_merge": skip_merge,
|
||||||
|
"save_path": str(save_path),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
for status_update in downloader(**downloader_args):
|
for status_update in downloader(**downloader_args):
|
||||||
file_downloaded = status_update.get("file_downloaded")
|
file_downloaded = status_update.get("file_downloaded")
|
||||||
if file_downloaded:
|
if file_downloaded:
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY,
|
|||||||
from unshackle.core.drm import DRM_T, PlayReady, Widevine
|
from unshackle.core.drm import DRM_T, PlayReady, Widevine
|
||||||
from unshackle.core.events import events
|
from unshackle.core.events import events
|
||||||
from unshackle.core.tracks import Audio, Subtitle, Track, Tracks, Video
|
from unshackle.core.tracks import Audio, Subtitle, Track, Tracks, Video
|
||||||
from unshackle.core.utilities import try_ensure_utf8
|
from unshackle.core.utilities import get_debug_logger, try_ensure_utf8
|
||||||
from unshackle.core.utils.xml import load_xml
|
from unshackle.core.utils.xml import load_xml
|
||||||
|
|
||||||
|
|
||||||
@@ -283,6 +283,24 @@ class ISM:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
debug_logger = get_debug_logger()
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="manifest_ism_download_start",
|
||||||
|
message="Starting ISM manifest download",
|
||||||
|
context={
|
||||||
|
"track_id": getattr(track, "id", None),
|
||||||
|
"track_type": track.__class__.__name__,
|
||||||
|
"total_segments": len(segments),
|
||||||
|
"downloader": downloader.__name__,
|
||||||
|
"has_drm": bool(session_drm),
|
||||||
|
"drm_type": session_drm.__class__.__name__ if session_drm else None,
|
||||||
|
"skip_merge": skip_merge,
|
||||||
|
"save_path": str(save_path),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
for status_update in downloader(**downloader_args):
|
for status_update in downloader(**downloader_args):
|
||||||
file_downloaded = status_update.get("file_downloaded")
|
file_downloaded = status_update.get("file_downloaded")
|
||||||
if file_downloaded:
|
if file_downloaded:
|
||||||
|
|||||||
@@ -185,7 +185,10 @@ class Episode(Title):
|
|||||||
if hdr_format:
|
if hdr_format:
|
||||||
if hdr_format_full.startswith("Dolby Vision"):
|
if hdr_format_full.startswith("Dolby Vision"):
|
||||||
name += " DV"
|
name += " DV"
|
||||||
if any(indicator in hdr_format_full for indicator in ["HDR10", "SMPTE ST 2086"]):
|
if any(
|
||||||
|
indicator in (hdr_format_full + " " + hdr_format)
|
||||||
|
for indicator in ["HDR10", "SMPTE ST 2086"]
|
||||||
|
):
|
||||||
name += " HDR"
|
name += " HDR"
|
||||||
else:
|
else:
|
||||||
name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} "
|
name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} "
|
||||||
|
|||||||
@@ -136,7 +136,10 @@ class Movie(Title):
|
|||||||
if hdr_format:
|
if hdr_format:
|
||||||
if hdr_format_full.startswith("Dolby Vision"):
|
if hdr_format_full.startswith("Dolby Vision"):
|
||||||
name += " DV"
|
name += " DV"
|
||||||
if any(indicator in hdr_format_full for indicator in ["HDR10", "SMPTE ST 2086"]):
|
if any(
|
||||||
|
indicator in (hdr_format_full + " " + hdr_format)
|
||||||
|
for indicator in ["HDR10", "SMPTE ST 2086"]
|
||||||
|
):
|
||||||
name += " HDR"
|
name += " HDR"
|
||||||
else:
|
else:
|
||||||
name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} "
|
name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} "
|
||||||
|
|||||||
@@ -91,6 +91,12 @@ class Subtitle(Track):
|
|||||||
return Subtitle.Codec.TimedTextMarkupLang
|
return Subtitle.Codec.TimedTextMarkupLang
|
||||||
raise ValueError(f"The Content Profile '{profile}' is not a supported Subtitle Codec")
|
raise ValueError(f"The Content Profile '{profile}' is not a supported Subtitle Codec")
|
||||||
|
|
||||||
|
# WebVTT sanitization patterns (compiled once for performance)
|
||||||
|
_CUE_ID_PATTERN = re.compile(r"^[A-Za-z]+\d+$")
|
||||||
|
_TIMING_START_PATTERN = re.compile(r"^\d+:\d+[:\.]")
|
||||||
|
_TIMING_LINE_PATTERN = re.compile(r"^((?:\d+:)?\d+:\d+[.,]\d+)\s*-->\s*((?:\d+:)?\d+:\d+[.,]\d+)(.*)$")
|
||||||
|
_LINE_POS_PATTERN = re.compile(r"line:(\d+(?:\.\d+)?%?)")
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
*args: Any,
|
*args: Any,
|
||||||
@@ -239,6 +245,11 @@ class Subtitle(Track):
|
|||||||
|
|
||||||
# Sanitize WebVTT timestamps before parsing
|
# Sanitize WebVTT timestamps before parsing
|
||||||
text = Subtitle.sanitize_webvtt_timestamps(text)
|
text = Subtitle.sanitize_webvtt_timestamps(text)
|
||||||
|
# Remove cue identifiers that confuse parsers like pysubs2
|
||||||
|
text = Subtitle.sanitize_webvtt_cue_identifiers(text)
|
||||||
|
# Merge overlapping cues with line positioning into single multi-line cues
|
||||||
|
text = Subtitle.merge_overlapping_webvtt_cues(text)
|
||||||
|
|
||||||
preserve_formatting = config.subtitle.get("preserve_formatting", True)
|
preserve_formatting = config.subtitle.get("preserve_formatting", True)
|
||||||
|
|
||||||
if preserve_formatting:
|
if preserve_formatting:
|
||||||
@@ -277,6 +288,240 @@ class Subtitle(Track):
|
|||||||
# Replace negative timestamps with 00:00:00.000
|
# Replace negative timestamps with 00:00:00.000
|
||||||
return re.sub(r"(-\d+:\d+:\d+\.\d+)", "00:00:00.000", text)
|
return re.sub(r"(-\d+:\d+:\d+\.\d+)", "00:00:00.000", text)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def has_webvtt_cue_identifiers(text: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if WebVTT content has cue identifiers that need removal.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
text: The WebVTT content as string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if cue identifiers are detected, False otherwise
|
||||||
|
"""
|
||||||
|
lines = text.split("\n")
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
line = line.strip()
|
||||||
|
if Subtitle._CUE_ID_PATTERN.match(line):
|
||||||
|
# Look ahead to see if next non-empty line is a timing line
|
||||||
|
j = i + 1
|
||||||
|
while j < len(lines) and not lines[j].strip():
|
||||||
|
j += 1
|
||||||
|
if j < len(lines) and ("-->" in lines[j] or Subtitle._TIMING_START_PATTERN.match(lines[j].strip())):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def sanitize_webvtt_cue_identifiers(text: str) -> str:
|
||||||
|
"""
|
||||||
|
Remove WebVTT cue identifiers that can confuse subtitle parsers.
|
||||||
|
|
||||||
|
Some services use cue identifiers like "Q0", "Q1", etc.
|
||||||
|
that appear on their own line before the timing line. These can be
|
||||||
|
incorrectly parsed as part of the previous cue's text content by
|
||||||
|
some parsers (like pysubs2).
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
text: The WebVTT content as string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sanitized WebVTT content with cue identifiers removed
|
||||||
|
"""
|
||||||
|
if not Subtitle.has_webvtt_cue_identifiers(text):
|
||||||
|
return text
|
||||||
|
|
||||||
|
lines = text.split("\n")
|
||||||
|
sanitized_lines = []
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
while i < len(lines):
|
||||||
|
line = lines[i].strip()
|
||||||
|
|
||||||
|
# Check if this line is a cue identifier followed by a timing line
|
||||||
|
if Subtitle._CUE_ID_PATTERN.match(line):
|
||||||
|
# Look ahead to see if next non-empty line is a timing line
|
||||||
|
j = i + 1
|
||||||
|
while j < len(lines) and not lines[j].strip():
|
||||||
|
j += 1
|
||||||
|
if j < len(lines) and ("-->" in lines[j] or Subtitle._TIMING_START_PATTERN.match(lines[j].strip())):
|
||||||
|
# This is a cue identifier, skip it
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
sanitized_lines.append(lines[i])
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return "\n".join(sanitized_lines)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_vtt_time(t: str) -> int:
|
||||||
|
"""Parse WebVTT timestamp to milliseconds. Returns 0 for malformed input."""
|
||||||
|
try:
|
||||||
|
t = t.replace(",", ".")
|
||||||
|
parts = t.split(":")
|
||||||
|
if len(parts) == 2:
|
||||||
|
m, s = parts
|
||||||
|
h = "0"
|
||||||
|
elif len(parts) >= 3:
|
||||||
|
h, m, s = parts[:3]
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
sec_parts = s.split(".")
|
||||||
|
secs = int(sec_parts[0])
|
||||||
|
# Handle variable millisecond digits (e.g., .5 = 500ms, .50 = 500ms, .500 = 500ms)
|
||||||
|
ms = int(sec_parts[1].ljust(3, "0")[:3]) if len(sec_parts) > 1 else 0
|
||||||
|
return int(h) * 3600000 + int(m) * 60000 + secs * 1000 + ms
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def has_overlapping_webvtt_cues(text: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if WebVTT content has overlapping cues that need merging.
|
||||||
|
|
||||||
|
Detects cues with start times within 50ms of each other and the same end time,
|
||||||
|
which indicates multi-line subtitles split into separate cues.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
text: The WebVTT content as string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if overlapping cues are detected, False otherwise
|
||||||
|
"""
|
||||||
|
timings = []
|
||||||
|
for line in text.split("\n"):
|
||||||
|
match = Subtitle._TIMING_LINE_PATTERN.match(line)
|
||||||
|
if match:
|
||||||
|
start_str, end_str = match.group(1), match.group(2)
|
||||||
|
timings.append((Subtitle._parse_vtt_time(start_str), Subtitle._parse_vtt_time(end_str)))
|
||||||
|
|
||||||
|
# Check for overlapping cues (within 50ms start, same end)
|
||||||
|
for i in range(len(timings) - 1):
|
||||||
|
curr_start, curr_end = timings[i]
|
||||||
|
next_start, next_end = timings[i + 1]
|
||||||
|
if abs(curr_start - next_start) <= 50 and curr_end == next_end:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def merge_overlapping_webvtt_cues(text: str) -> str:
|
||||||
|
"""
|
||||||
|
Merge WebVTT cues that have overlapping/near-identical times but different line positions.
|
||||||
|
|
||||||
|
Some services use separate cues for each line of a multi-line subtitle, with
|
||||||
|
slightly different start times (1ms apart) and different line: positions.
|
||||||
|
This merges them into single cues with proper line ordering based on the
|
||||||
|
line: position (lower percentage = higher on screen = first line).
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
text: The WebVTT content as string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
WebVTT content with overlapping cues merged
|
||||||
|
"""
|
||||||
|
if not Subtitle.has_overlapping_webvtt_cues(text):
|
||||||
|
return text
|
||||||
|
|
||||||
|
lines = text.split("\n")
|
||||||
|
cues = []
|
||||||
|
header_lines = []
|
||||||
|
in_header = True
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
while i < len(lines):
|
||||||
|
line = lines[i]
|
||||||
|
|
||||||
|
if in_header:
|
||||||
|
if "-->" in line:
|
||||||
|
in_header = False
|
||||||
|
else:
|
||||||
|
header_lines.append(line)
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
match = Subtitle._TIMING_LINE_PATTERN.match(line)
|
||||||
|
if match:
|
||||||
|
start_str, end_str, settings = match.groups()
|
||||||
|
line_pos = 100.0 # Default to bottom
|
||||||
|
line_match = Subtitle._LINE_POS_PATTERN.search(settings)
|
||||||
|
if line_match:
|
||||||
|
pos_str = line_match.group(1).rstrip("%")
|
||||||
|
line_pos = float(pos_str)
|
||||||
|
|
||||||
|
content_lines = []
|
||||||
|
i += 1
|
||||||
|
while i < len(lines) and lines[i].strip() and "-->" not in lines[i]:
|
||||||
|
content_lines.append(lines[i])
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
cues.append(
|
||||||
|
{
|
||||||
|
"start_ms": Subtitle._parse_vtt_time(start_str),
|
||||||
|
"end_ms": Subtitle._parse_vtt_time(end_str),
|
||||||
|
"start_str": start_str,
|
||||||
|
"end_str": end_str,
|
||||||
|
"line_pos": line_pos,
|
||||||
|
"content": "\n".join(content_lines),
|
||||||
|
"settings": settings,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Merge overlapping cues (within 50ms of each other with same end time)
|
||||||
|
merged_cues = []
|
||||||
|
i = 0
|
||||||
|
while i < len(cues):
|
||||||
|
current = cues[i]
|
||||||
|
group = [current]
|
||||||
|
|
||||||
|
j = i + 1
|
||||||
|
while j < len(cues):
|
||||||
|
other = cues[j]
|
||||||
|
if abs(current["start_ms"] - other["start_ms"]) <= 50 and current["end_ms"] == other["end_ms"]:
|
||||||
|
group.append(other)
|
||||||
|
j += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
if len(group) > 1:
|
||||||
|
# Sort by line position (lower % = higher on screen = first)
|
||||||
|
group.sort(key=lambda x: x["line_pos"])
|
||||||
|
# Use the earliest start time from the group
|
||||||
|
earliest = min(group, key=lambda x: x["start_ms"])
|
||||||
|
merged_cues.append(
|
||||||
|
{
|
||||||
|
"start_str": earliest["start_str"],
|
||||||
|
"end_str": group[0]["end_str"],
|
||||||
|
"content": "\n".join(c["content"] for c in group),
|
||||||
|
"settings": "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
merged_cues.append(
|
||||||
|
{
|
||||||
|
"start_str": current["start_str"],
|
||||||
|
"end_str": current["end_str"],
|
||||||
|
"content": current["content"],
|
||||||
|
"settings": current["settings"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
i = j if len(group) > 1 else i + 1
|
||||||
|
|
||||||
|
result_lines = header_lines[:]
|
||||||
|
if result_lines and result_lines[-1].strip():
|
||||||
|
result_lines.append("")
|
||||||
|
|
||||||
|
for cue in merged_cues:
|
||||||
|
result_lines.append(f"{cue['start_str']} --> {cue['end_str']}{cue['settings']}")
|
||||||
|
result_lines.append(cue["content"])
|
||||||
|
result_lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(result_lines)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def sanitize_webvtt(text: str) -> str:
|
def sanitize_webvtt(text: str) -> str:
|
||||||
"""
|
"""
|
||||||
@@ -565,13 +810,18 @@ class Subtitle(Track):
|
|||||||
|
|
||||||
if binaries.SubtitleEdit and self.codec not in (Subtitle.Codec.fTTML, Subtitle.Codec.fVTT):
|
if binaries.SubtitleEdit and self.codec not in (Subtitle.Codec.fTTML, Subtitle.Codec.fVTT):
|
||||||
sub_edit_format = {
|
sub_edit_format = {
|
||||||
Subtitle.Codec.SubStationAlphav4: "AdvancedSubStationAlpha",
|
Subtitle.Codec.SubRip: "subrip",
|
||||||
Subtitle.Codec.TimedTextMarkupLang: "TimedText1.0",
|
Subtitle.Codec.SubStationAlpha: "substationalpha",
|
||||||
}.get(codec, codec.name)
|
Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha",
|
||||||
|
Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0",
|
||||||
|
Subtitle.Codec.WebVTT: "webvtt",
|
||||||
|
Subtitle.Codec.SAMI: "sami",
|
||||||
|
Subtitle.Codec.MicroDVD: "microdvd",
|
||||||
|
}.get(codec, codec.name.lower())
|
||||||
sub_edit_args = [
|
sub_edit_args = [
|
||||||
binaries.SubtitleEdit,
|
str(binaries.SubtitleEdit),
|
||||||
"/Convert",
|
"/convert",
|
||||||
self.path,
|
str(self.path),
|
||||||
sub_edit_format,
|
sub_edit_format,
|
||||||
f"/outputfilename:{output_path.name}",
|
f"/outputfilename:{output_path.name}",
|
||||||
"/encoding:utf8",
|
"/encoding:utf8",
|
||||||
@@ -631,7 +881,7 @@ class Subtitle(Track):
|
|||||||
text = try_ensure_utf8(data).decode("utf8")
|
text = try_ensure_utf8(data).decode("utf8")
|
||||||
text = text.replace("tt:", "")
|
text = text.replace("tt:", "")
|
||||||
# negative size values aren't allowed in TTML/DFXP spec, replace with 0
|
# negative size values aren't allowed in TTML/DFXP spec, replace with 0
|
||||||
text = re.sub(r'"(-\d+(\.\d+)?(px|em|%|c|pt))"', '"0"', text)
|
text = re.sub(r"-(\d+(?:\.\d+)?)(px|em|%|c|pt)", r"0\2", text)
|
||||||
caption_set = pycaption.DFXPReader().read(text)
|
caption_set = pycaption.DFXPReader().read(text)
|
||||||
elif codec == Subtitle.Codec.fVTT:
|
elif codec == Subtitle.Codec.fVTT:
|
||||||
caption_lists: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList)
|
caption_lists: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList)
|
||||||
@@ -962,18 +1212,26 @@ class Subtitle(Track):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass # Fall through to other methods
|
pass # Fall through to other methods
|
||||||
|
|
||||||
if binaries.SubtitleEdit:
|
conversion_method = config.subtitle.get("conversion_method", "auto")
|
||||||
if self.codec == Subtitle.Codec.SubStationAlphav4:
|
use_subtitleedit = sdh_method == "subtitleedit" or (
|
||||||
output_format = "AdvancedSubStationAlpha"
|
sdh_method == "auto" and conversion_method in ("auto", "subtitleedit")
|
||||||
elif self.codec == Subtitle.Codec.TimedTextMarkupLang:
|
)
|
||||||
output_format = "TimedText1.0"
|
|
||||||
else:
|
if binaries.SubtitleEdit and use_subtitleedit:
|
||||||
output_format = self.codec.name
|
output_format = {
|
||||||
|
Subtitle.Codec.SubRip: "subrip",
|
||||||
|
Subtitle.Codec.SubStationAlpha: "substationalpha",
|
||||||
|
Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha",
|
||||||
|
Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0",
|
||||||
|
Subtitle.Codec.WebVTT: "webvtt",
|
||||||
|
Subtitle.Codec.SAMI: "sami",
|
||||||
|
Subtitle.Codec.MicroDVD: "microdvd",
|
||||||
|
}.get(self.codec, self.codec.name.lower())
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[
|
[
|
||||||
binaries.SubtitleEdit,
|
str(binaries.SubtitleEdit),
|
||||||
"/Convert",
|
"/convert",
|
||||||
self.path,
|
str(self.path),
|
||||||
output_format,
|
output_format,
|
||||||
"/encoding:utf8",
|
"/encoding:utf8",
|
||||||
"/overwrite",
|
"/overwrite",
|
||||||
@@ -981,6 +1239,7 @@ class Subtitle(Track):
|
|||||||
],
|
],
|
||||||
check=True,
|
check=True,
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if config.subtitle.get("convert_before_strip", True) and self.codec != Subtitle.Codec.SubRip:
|
if config.subtitle.get("convert_before_strip", True) and self.codec != Subtitle.Codec.SubRip:
|
||||||
@@ -1022,18 +1281,21 @@ class Subtitle(Track):
|
|||||||
if not binaries.SubtitleEdit:
|
if not binaries.SubtitleEdit:
|
||||||
raise EnvironmentError("SubtitleEdit executable not found...")
|
raise EnvironmentError("SubtitleEdit executable not found...")
|
||||||
|
|
||||||
if self.codec == Subtitle.Codec.SubStationAlphav4:
|
output_format = {
|
||||||
output_format = "AdvancedSubStationAlpha"
|
Subtitle.Codec.SubRip: "subrip",
|
||||||
elif self.codec == Subtitle.Codec.TimedTextMarkupLang:
|
Subtitle.Codec.SubStationAlpha: "substationalpha",
|
||||||
output_format = "TimedText1.0"
|
Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha",
|
||||||
else:
|
Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0",
|
||||||
output_format = self.codec.name
|
Subtitle.Codec.WebVTT: "webvtt",
|
||||||
|
Subtitle.Codec.SAMI: "sami",
|
||||||
|
Subtitle.Codec.MicroDVD: "microdvd",
|
||||||
|
}.get(self.codec, self.codec.name.lower())
|
||||||
|
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[
|
[
|
||||||
binaries.SubtitleEdit,
|
str(binaries.SubtitleEdit),
|
||||||
"/Convert",
|
"/convert",
|
||||||
self.path,
|
str(self.path),
|
||||||
output_format,
|
output_format,
|
||||||
"/ReverseRtlStartEnd",
|
"/ReverseRtlStartEnd",
|
||||||
"/encoding:utf8",
|
"/encoding:utf8",
|
||||||
@@ -1041,6 +1303,7 @@ class Subtitle(Track):
|
|||||||
],
|
],
|
||||||
check=True,
|
check=True,
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -295,12 +295,23 @@ class Track:
|
|||||||
try:
|
try:
|
||||||
if not self.drm and track_type in ("Video", "Audio"):
|
if not self.drm and track_type in ("Video", "Audio"):
|
||||||
# the service might not have explicitly defined the `drm` property
|
# the service might not have explicitly defined the `drm` property
|
||||||
# try find widevine DRM information from the init data of URL
|
# try find DRM information from the init data of URL based on CDM type
|
||||||
try:
|
if isinstance(cdm, PlayReadyCdm):
|
||||||
self.drm = [Widevine.from_track(self, session)]
|
try:
|
||||||
except Widevine.Exceptions.PSSHNotFound:
|
self.drm = [PlayReady.from_track(self, session)]
|
||||||
# it might not have Widevine DRM, or might not have found the PSSH
|
except PlayReady.Exceptions.PSSHNotFound:
|
||||||
log.warning("No Widevine PSSH was found for this track, is it DRM free?")
|
try:
|
||||||
|
self.drm = [Widevine.from_track(self, session)]
|
||||||
|
except Widevine.Exceptions.PSSHNotFound:
|
||||||
|
log.warning("No PlayReady or Widevine PSSH was found for this track, is it DRM free?")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self.drm = [Widevine.from_track(self, session)]
|
||||||
|
except Widevine.Exceptions.PSSHNotFound:
|
||||||
|
try:
|
||||||
|
self.drm = [PlayReady.from_track(self, session)]
|
||||||
|
except PlayReady.Exceptions.PSSHNotFound:
|
||||||
|
log.warning("No Widevine or PlayReady PSSH was found for this track, is it DRM free?")
|
||||||
|
|
||||||
if self.drm:
|
if self.drm:
|
||||||
track_kid = self.get_key_id(session=session)
|
track_kid = self.get_key_id(session=session)
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ from unshackle.core.tracks.chapters import Chapter, Chapters
|
|||||||
from unshackle.core.tracks.subtitle import Subtitle
|
from unshackle.core.tracks.subtitle import Subtitle
|
||||||
from unshackle.core.tracks.track import Track
|
from unshackle.core.tracks.track import Track
|
||||||
from unshackle.core.tracks.video import Video
|
from unshackle.core.tracks.video import Video
|
||||||
from unshackle.core.utilities import is_close_match, sanitize_filename
|
from unshackle.core.utilities import get_debug_logger, is_close_match, sanitize_filename
|
||||||
from unshackle.core.utils.collections import as_list, flatten
|
from unshackle.core.utils.collections import as_list, flatten
|
||||||
|
|
||||||
|
|
||||||
@@ -507,6 +507,35 @@ class Tracks:
|
|||||||
if not output_path:
|
if not output_path:
|
||||||
raise ValueError("No tracks provided, at least one track must be provided.")
|
raise ValueError("No tracks provided, at least one track must be provided.")
|
||||||
|
|
||||||
|
debug_logger = get_debug_logger()
|
||||||
|
if debug_logger:
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="mux_start",
|
||||||
|
message="Starting mkvmerge muxing",
|
||||||
|
context={
|
||||||
|
"title": title,
|
||||||
|
"output_path": str(output_path),
|
||||||
|
"video_count": len(self.videos),
|
||||||
|
"audio_count": len(self.audio),
|
||||||
|
"subtitle_count": len(self.subtitles),
|
||||||
|
"attachment_count": len(self.attachments),
|
||||||
|
"has_chapters": bool(self.chapters),
|
||||||
|
"video_tracks": [
|
||||||
|
{"id": v.id, "codec": getattr(v, "codec", None), "language": str(v.language)}
|
||||||
|
for v in self.videos
|
||||||
|
],
|
||||||
|
"audio_tracks": [
|
||||||
|
{"id": a.id, "codec": getattr(a, "codec", None), "language": str(a.language)}
|
||||||
|
for a in self.audio
|
||||||
|
],
|
||||||
|
"subtitle_tracks": [
|
||||||
|
{"id": s.id, "codec": getattr(s, "codec", None), "language": str(s.language)}
|
||||||
|
for s in self.subtitles
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
# let potential failures go to caller, caller should handle
|
# let potential failures go to caller, caller should handle
|
||||||
try:
|
try:
|
||||||
errors = []
|
errors = []
|
||||||
@@ -516,7 +545,33 @@ class Tracks:
|
|||||||
errors.append(line)
|
errors.append(line)
|
||||||
if "progress" in line:
|
if "progress" in line:
|
||||||
progress(total=100, completed=int(line.strip()[14:-1]))
|
progress(total=100, completed=int(line.strip()[14:-1]))
|
||||||
return output_path, p.wait(), errors
|
|
||||||
|
returncode = p.wait()
|
||||||
|
|
||||||
|
if debug_logger:
|
||||||
|
if returncode != 0 or errors:
|
||||||
|
debug_logger.log(
|
||||||
|
level="ERROR",
|
||||||
|
operation="mux_failed",
|
||||||
|
message=f"mkvmerge exited with code {returncode}",
|
||||||
|
context={
|
||||||
|
"returncode": returncode,
|
||||||
|
"output_path": str(output_path),
|
||||||
|
"errors": errors,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
debug_logger.log(
|
||||||
|
level="DEBUG",
|
||||||
|
operation="mux_complete",
|
||||||
|
message="mkvmerge muxing completed successfully",
|
||||||
|
context={
|
||||||
|
"output_path": str(output_path),
|
||||||
|
"output_exists": output_path.exists() if output_path else False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return output_path, returncode, errors
|
||||||
finally:
|
finally:
|
||||||
if chapters_path:
|
if chapters_path:
|
||||||
chapters_path.unlink()
|
chapters_path.unlink()
|
||||||
|
|||||||
@@ -120,9 +120,14 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str:
|
|||||||
|
|
||||||
The spacer is safer to be a '.' for older DDL and p2p sharing spaces.
|
The spacer is safer to be a '.' for older DDL and p2p sharing spaces.
|
||||||
This includes web-served content via direct links and such.
|
This includes web-served content via direct links and such.
|
||||||
|
|
||||||
|
Set `unicode_filenames: true` in config to preserve native language
|
||||||
|
characters (Korean, Japanese, Chinese, etc.) instead of transliterating
|
||||||
|
them to ASCII equivalents.
|
||||||
"""
|
"""
|
||||||
# replace all non-ASCII characters with ASCII equivalents
|
# optionally replace non-ASCII characters with ASCII equivalents
|
||||||
filename = unidecode(filename)
|
if not config.unicode_filenames:
|
||||||
|
filename = unidecode(filename)
|
||||||
|
|
||||||
# remove or replace further characters as needed
|
# remove or replace further characters as needed
|
||||||
filename = "".join(c for c in filename if unicodedata.category(c) != "Mn") # hidden characters
|
filename = "".join(c for c in filename if unicodedata.category(c) != "Mn") # hidden characters
|
||||||
|
|||||||
@@ -114,32 +114,71 @@ class API(Vault):
|
|||||||
return added or updated
|
return added or updated
|
||||||
|
|
||||||
def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int:
|
def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int:
|
||||||
data = self.session.post(
|
# Normalize keys
|
||||||
url=f"{self.uri}/{service.lower()}",
|
normalized_keys = {str(kid).replace("-", ""): key for kid, key in kid_keys.items()}
|
||||||
json={"content_keys": {str(kid).replace("-", ""): key for kid, key in kid_keys.items()}},
|
kid_list = list(normalized_keys.keys())
|
||||||
headers={"Accept": "application/json"},
|
|
||||||
).json()
|
|
||||||
|
|
||||||
code = int(data.get("code", 0))
|
if not kid_list:
|
||||||
message = data.get("message")
|
return 0
|
||||||
error = {
|
|
||||||
0: None,
|
|
||||||
1: Exceptions.AuthRejected,
|
|
||||||
2: Exceptions.TooManyRequests,
|
|
||||||
3: Exceptions.ServiceTagInvalid,
|
|
||||||
4: Exceptions.KeyIdInvalid,
|
|
||||||
5: Exceptions.ContentKeyInvalid,
|
|
||||||
}.get(code, ValueError)
|
|
||||||
|
|
||||||
if error:
|
# Try batches starting at 500, stepping down by 100 on failure, fallback to 1
|
||||||
raise error(f"{message} ({code})")
|
batch_size = 500
|
||||||
|
total_added = 0
|
||||||
|
i = 0
|
||||||
|
|
||||||
# each kid:key that was new to the vault (optional)
|
while i < len(kid_list):
|
||||||
added = int(data.get("added"))
|
batch_kids = kid_list[i : i + batch_size]
|
||||||
# each key for a kid that was changed/updated (optional)
|
batch_keys = {kid: normalized_keys[kid] for kid in batch_kids}
|
||||||
updated = int(data.get("updated"))
|
|
||||||
|
|
||||||
return added + updated
|
try:
|
||||||
|
response = self.session.post(
|
||||||
|
url=f"{self.uri}/{service.lower()}",
|
||||||
|
json={"content_keys": batch_keys},
|
||||||
|
headers={"Accept": "application/json"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check for HTTP errors that suggest batch is too large
|
||||||
|
if response.status_code in (413, 414, 400) and batch_size > 1:
|
||||||
|
if batch_size > 100:
|
||||||
|
batch_size -= 100
|
||||||
|
else:
|
||||||
|
batch_size = 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
except Exception:
|
||||||
|
# JSON decode error or connection issue - try smaller batch
|
||||||
|
if batch_size > 1:
|
||||||
|
if batch_size > 100:
|
||||||
|
batch_size -= 100
|
||||||
|
else:
|
||||||
|
batch_size = 1
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
|
||||||
|
code = int(data.get("code", 0))
|
||||||
|
message = data.get("message")
|
||||||
|
error = {
|
||||||
|
0: None,
|
||||||
|
1: Exceptions.AuthRejected,
|
||||||
|
2: Exceptions.TooManyRequests,
|
||||||
|
3: Exceptions.ServiceTagInvalid,
|
||||||
|
4: Exceptions.KeyIdInvalid,
|
||||||
|
5: Exceptions.ContentKeyInvalid,
|
||||||
|
}.get(code, ValueError)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
raise error(f"{message} ({code})")
|
||||||
|
|
||||||
|
# each kid:key that was new to the vault (optional)
|
||||||
|
added = int(data.get("added", 0))
|
||||||
|
# each key for a kid that was changed/updated (optional)
|
||||||
|
updated = int(data.get("updated", 0))
|
||||||
|
|
||||||
|
total_added += added + updated
|
||||||
|
i += batch_size
|
||||||
|
|
||||||
|
return total_added
|
||||||
|
|
||||||
def get_services(self) -> Iterator[str]:
|
def get_services(self) -> Iterator[str]:
|
||||||
data = self.session.post(url=self.uri, headers={"Accept": "application/json"}).json()
|
data = self.session.post(url=self.uri, headers={"Accept": "application/json"}).json()
|
||||||
|
|||||||
@@ -119,9 +119,25 @@ class SQLite(Vault):
|
|||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
placeholders = ",".join(["?"] * len(kid_keys))
|
# Query existing KIDs in batches to avoid SQLite variable limit
|
||||||
cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", list(kid_keys.keys()))
|
# Try larger batch first (newer SQLite supports 32766), fall back to 500 if needed
|
||||||
existing_kids = {row[0] for row in cursor.fetchall()}
|
existing_kids: set[str] = set()
|
||||||
|
kid_list = list(kid_keys.keys())
|
||||||
|
batch_size = 32000
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
while i < len(kid_list):
|
||||||
|
batch = kid_list[i : i + batch_size]
|
||||||
|
placeholders = ",".join(["?"] * len(batch))
|
||||||
|
try:
|
||||||
|
cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", batch)
|
||||||
|
existing_kids.update(row[0] for row in cursor.fetchall())
|
||||||
|
i += batch_size
|
||||||
|
except sqlite3.OperationalError as e:
|
||||||
|
if "too many SQL variables" in str(e) and batch_size > 500:
|
||||||
|
batch_size = 500
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
|
||||||
new_keys = {kid: key for kid, key in kid_keys.items() if kid not in existing_kids}
|
new_keys = {kid: key for kid, key in kid_keys.items() if kid not in existing_kids}
|
||||||
|
|
||||||
|
|||||||
2
uv.lock
generated
2
uv.lock
generated
@@ -1565,7 +1565,7 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unshackle"
|
name = "unshackle"
|
||||||
version = "2.1.0"
|
version = "2.3.0"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "aiohttp-swagger3" },
|
{ name = "aiohttp-swagger3" },
|
||||||
|
|||||||
Reference in New Issue
Block a user