Merge branch 'main' into main
This commit is contained in:
58
CHANGELOG.md
58
CHANGELOG.md
@@ -5,6 +5,64 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [2.3.0] - 2026-01-18
|
||||
|
||||
### Added
|
||||
|
||||
- **Unicode Filenames Option**: New `unicode_filenames` config option to preserve native characters
|
||||
- Allows disabling ASCII transliteration in filenames
|
||||
- Preserves Korean, Japanese, Chinese, and other native language characters
|
||||
- Closes #49
|
||||
|
||||
### Fixed
|
||||
|
||||
- **WebVTT Cue Handling**: Handle WebVTT cue identifiers and overlapping multi-line cues
|
||||
- Added detection and sanitization for cue identifiers (Q0, Q1, etc.) before timing lines
|
||||
- Added merging of overlapping cues with different line positions into multi-line subtitles
|
||||
- Fixes parsing issues with pysubs2/pycaption on certain WebVTT files
|
||||
- **Widevine PSSH Filtering**: Filter Widevine PSSH by system ID instead of sorting
|
||||
- Fixes KeyError crash when unsupported DRM systems are present in init segments
|
||||
- **TTML Negative Values**: Handle negative values in multi-value TTML attributes
|
||||
- Fixes pycaption parse errors for attributes like `tts:extent="-5% 7.5%"`
|
||||
- Closes #47
|
||||
- **ASS Font Names**: Strip whitespace from ASS font names
|
||||
- Handles ASS subtitle files with spaces after commas in Style definitions
|
||||
- Fixes #57
|
||||
- **Shaka-Packager Error Messages**: Include shaka-packager binary path in error messages
|
||||
- **N_m3u8DL-RE Merge and Decryption**: Handle merge and decryption properly
|
||||
- Prevents audio corruption ("Box 'OG 2' size is too large") with DASH manifests
|
||||
- Fixes duplicate init segment writing when using N_m3u8DL-RE
|
||||
- **DASH Placeholder KIDs**: Handle placeholder KIDs and improve DRM init from segments
|
||||
- Detects and replaces placeholder/test KIDs in Widevine PSSH
|
||||
- Adds CENC namespace support for kid/default_KID attributes
|
||||
- **PlayReady PSSH Comparison**: Correct PSSH system ID comparison in PlayReady
|
||||
- Removes erroneous `.bytes` accessor from PSSH.SYSTEM_ID comparisons
|
||||
|
||||
## [2.2.0] - 2026-01-15
|
||||
|
||||
### Added
|
||||
|
||||
- **CDM-Aware PlayReady Fallback Detection**: Intelligent DRM fallback based on selected CDM
|
||||
- Adds PlayReady PSSH/KID extraction from track and init data with CDM-aware ordering
|
||||
- When PlayReady CDM is selected, tries PlayReady first then falls back to Widevine
|
||||
- When Widevine CDM is selected (default), tries Widevine first then falls back to PlayReady
|
||||
- **Comprehensive Debug Logging**: Enhanced debug logging for downloaders and muxing
|
||||
- Added detailed debug logging to aria2c, curl_impersonate, n_m3u8dl_re, and requests downloaders
|
||||
- Enhanced manifest parsers (DASH, HLS, ISM) with debug logging
|
||||
- Added debug logging to track muxing operations
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Hybrid DV+HDR10 Filename Detection**: Fixed HDR10 detection in hybrid Dolby Vision filenames
|
||||
- Hybrid DV+HDR10 files were incorrectly named "DV.H.265" instead of "DV.HDR.H.265"
|
||||
- Now checks both `hdr_format_full` and `hdr_format_commercial` fields for HDR10 indicators
|
||||
- **Vault Adaptive Batch Sizing**: Improved bulk key operations with adaptive batch sizing
|
||||
- Prevents query limit issues when retrieving large numbers of keys from vaults
|
||||
- Dynamically adjusts batch sizes based on vault response characteristics
|
||||
- **Test Command Improvements**: Enhanced test command error detection and sorting
|
||||
- Improved error detection in test command output
|
||||
- Added natural sorting for test results
|
||||
|
||||
## [2.1.0] - 2025-11-27
|
||||
|
||||
### Added
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "unshackle"
|
||||
version = "2.1.0"
|
||||
version = "2.3.0"
|
||||
description = "Modular Movie, TV, and Music Archival Software."
|
||||
authors = [{ name = "unshackle team" }]
|
||||
requires-python = ">=3.10,<3.13"
|
||||
|
||||
@@ -1567,7 +1567,7 @@ class dl:
|
||||
if subtitle.codec == Subtitle.Codec.SubStationAlphav4:
|
||||
for line in subtitle.path.read_text("utf8").splitlines():
|
||||
if line.startswith("Style: "):
|
||||
font_names.append(line.removesuffix("Style: ").split(",")[1])
|
||||
font_names.append(line.removeprefix("Style: ").split(",")[1].strip())
|
||||
|
||||
font_count, missing_fonts = self.attach_subtitle_fonts(
|
||||
font_names, title, temp_font_files
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
@@ -8,6 +9,11 @@ from unshackle.core import binaries
|
||||
from unshackle.core.constants import context_settings
|
||||
|
||||
|
||||
def _natural_sort_key(path: Path) -> list:
|
||||
"""Sort key for natural sorting (S01E01 before S01E10)."""
|
||||
return [int(part) if part.isdigit() else part.lower() for part in re.split(r"(\d+)", path.name)]
|
||||
|
||||
|
||||
@click.group(short_help="Various helper scripts and programs.", context_settings=context_settings)
|
||||
def util() -> None:
|
||||
"""Various helper scripts and programs."""
|
||||
@@ -49,7 +55,7 @@ def crop(path: Path, aspect: str, letter: bool, offset: int, preview: bool) -> N
|
||||
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
|
||||
|
||||
if path.is_dir():
|
||||
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
|
||||
paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key)
|
||||
else:
|
||||
paths = [path]
|
||||
for video_path in paths:
|
||||
@@ -140,7 +146,7 @@ def range_(path: Path, full: bool, preview: bool) -> None:
|
||||
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
|
||||
|
||||
if path.is_dir():
|
||||
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
|
||||
paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key)
|
||||
else:
|
||||
paths = [path]
|
||||
for video_path in paths:
|
||||
@@ -225,16 +231,18 @@ def test(path: Path, map_: str) -> None:
|
||||
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
|
||||
|
||||
if path.is_dir():
|
||||
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
|
||||
paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key)
|
||||
else:
|
||||
paths = [path]
|
||||
for video_path in paths:
|
||||
print("Starting...")
|
||||
print(f"Testing: {video_path.name}")
|
||||
p = subprocess.Popen(
|
||||
[
|
||||
binaries.FFMPEG,
|
||||
"-hide_banner",
|
||||
"-benchmark",
|
||||
"-err_detect",
|
||||
"+crccheck+bitstream+buffer+careful+compliant+aggressive",
|
||||
"-i",
|
||||
str(video_path),
|
||||
"-map",
|
||||
@@ -255,13 +263,13 @@ def test(path: Path, map_: str) -> None:
|
||||
reached_output = True
|
||||
if not reached_output:
|
||||
continue
|
||||
if line.startswith("["): # error of some kind
|
||||
if line.startswith("[") and not line.startswith("[out#"):
|
||||
errors += 1
|
||||
stream, error = line.split("] ", maxsplit=1)
|
||||
stream = stream.split(" @ ")[0]
|
||||
line = f"{stream} ERROR: {error}"
|
||||
print(line)
|
||||
p.stderr.close()
|
||||
print(f"Finished with {errors} Errors, Cleaning up...")
|
||||
print(f"Finished with {errors} error(s)")
|
||||
p.terminate()
|
||||
p.wait()
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "2.1.0"
|
||||
__version__ = "2.3.0"
|
||||
|
||||
@@ -95,6 +95,7 @@ class Config:
|
||||
self.update_check_interval: int = kwargs.get("update_check_interval", 24)
|
||||
self.scene_naming: bool = kwargs.get("scene_naming", True)
|
||||
self.series_year: bool = kwargs.get("series_year", True)
|
||||
self.unicode_filenames: bool = kwargs.get("unicode_filenames", False)
|
||||
|
||||
self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default
|
||||
self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default
|
||||
|
||||
@@ -19,7 +19,7 @@ from unshackle.core import binaries
|
||||
from unshackle.core.config import config
|
||||
from unshackle.core.console import console
|
||||
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
||||
from unshackle.core.utilities import get_extension, get_free_port
|
||||
from unshackle.core.utilities import get_debug_logger, get_extension, get_free_port
|
||||
|
||||
|
||||
def rpc(caller: Callable, secret: str, method: str, params: Optional[list[Any]] = None) -> Any:
|
||||
@@ -58,6 +58,8 @@ def download(
|
||||
proxy: Optional[str] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
) -> Generator[dict[str, Any], None, None]:
|
||||
debug_logger = get_debug_logger()
|
||||
|
||||
if not urls:
|
||||
raise ValueError("urls must be provided and not empty")
|
||||
elif not isinstance(urls, (str, dict, list)):
|
||||
@@ -91,6 +93,13 @@ def download(
|
||||
urls = [urls]
|
||||
|
||||
if not binaries.Aria2:
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="ERROR",
|
||||
operation="downloader_aria2c_binary_missing",
|
||||
message="Aria2c executable not found in PATH or local binaries directory",
|
||||
context={"searched_names": ["aria2c", "aria2"]},
|
||||
)
|
||||
raise EnvironmentError("Aria2c executable not found...")
|
||||
|
||||
if proxy and not proxy.lower().startswith("http://"):
|
||||
@@ -180,6 +189,28 @@ def download(
|
||||
continue
|
||||
arguments.extend(["--header", f"{header}: {value}"])
|
||||
|
||||
if debug_logger:
|
||||
first_url = urls[0] if isinstance(urls[0], str) else urls[0].get("url", "")
|
||||
url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="downloader_aria2c_start",
|
||||
message="Starting Aria2c download",
|
||||
context={
|
||||
"binary_path": str(binaries.Aria2),
|
||||
"url_count": len(urls),
|
||||
"first_url": url_display,
|
||||
"output_dir": str(output_dir),
|
||||
"filename": filename,
|
||||
"max_concurrent_downloads": max_concurrent_downloads,
|
||||
"max_connection_per_server": max_connection_per_server,
|
||||
"split": split,
|
||||
"file_allocation": file_allocation,
|
||||
"has_proxy": bool(proxy),
|
||||
"rpc_port": rpc_port,
|
||||
},
|
||||
)
|
||||
|
||||
yield dict(total=len(urls))
|
||||
|
||||
try:
|
||||
@@ -226,6 +257,20 @@ def download(
|
||||
textwrap.wrap(error, width=console.width - 20, initial_indent="")
|
||||
)
|
||||
console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty))
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="ERROR",
|
||||
operation="downloader_aria2c_download_error",
|
||||
message=f"Aria2c download failed: {dl['errorMessage']}",
|
||||
context={
|
||||
"gid": dl["gid"],
|
||||
"error_code": dl["errorCode"],
|
||||
"error_message": dl["errorMessage"],
|
||||
"used_uri": used_uri[:200] + "..." if len(used_uri) > 200 else used_uri,
|
||||
"completed_length": dl.get("completedLength"),
|
||||
"total_length": dl.get("totalLength"),
|
||||
},
|
||||
)
|
||||
raise ValueError(error)
|
||||
|
||||
if number_stopped == len(urls):
|
||||
@@ -237,7 +282,31 @@ def download(
|
||||
p.wait()
|
||||
|
||||
if p.returncode != 0:
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="ERROR",
|
||||
operation="downloader_aria2c_failed",
|
||||
message=f"Aria2c exited with code {p.returncode}",
|
||||
context={
|
||||
"returncode": p.returncode,
|
||||
"url_count": len(urls),
|
||||
"output_dir": str(output_dir),
|
||||
},
|
||||
)
|
||||
raise subprocess.CalledProcessError(p.returncode, arguments)
|
||||
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="downloader_aria2c_complete",
|
||||
message="Aria2c download completed successfully",
|
||||
context={
|
||||
"url_count": len(urls),
|
||||
"output_dir": str(output_dir),
|
||||
"filename": filename,
|
||||
},
|
||||
)
|
||||
|
||||
except ConnectionResetError:
|
||||
# interrupted while passing URI to download
|
||||
raise KeyboardInterrupt()
|
||||
@@ -251,9 +320,20 @@ def download(
|
||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||
yield dict(downloaded="[yellow]CANCELLED")
|
||||
raise
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||
yield dict(downloaded="[red]FAILED")
|
||||
if debug_logger and not isinstance(e, (subprocess.CalledProcessError, ValueError)):
|
||||
debug_logger.log(
|
||||
level="ERROR",
|
||||
operation="downloader_aria2c_exception",
|
||||
message=f"Unexpected error during Aria2c download: {e}",
|
||||
error=e,
|
||||
context={
|
||||
"url_count": len(urls),
|
||||
"output_dir": str(output_dir),
|
||||
},
|
||||
)
|
||||
raise
|
||||
finally:
|
||||
rpc(caller=partial(rpc_session.post, url=rpc_uri), secret=rpc_secret, method="aria2.shutdown")
|
||||
|
||||
@@ -11,7 +11,7 @@ from rich import filesize
|
||||
|
||||
from unshackle.core.config import config
|
||||
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
||||
from unshackle.core.utilities import get_extension
|
||||
from unshackle.core.utilities import get_debug_logger, get_extension
|
||||
|
||||
MAX_ATTEMPTS = 5
|
||||
RETRY_WAIT = 2
|
||||
@@ -189,6 +189,8 @@ def curl_impersonate(
|
||||
if not isinstance(max_workers, (int, type(None))):
|
||||
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
|
||||
|
||||
debug_logger = get_debug_logger()
|
||||
|
||||
if not isinstance(urls, list):
|
||||
urls = [urls]
|
||||
|
||||
@@ -209,6 +211,24 @@ def curl_impersonate(
|
||||
if proxy:
|
||||
session.proxies.update({"all": proxy})
|
||||
|
||||
if debug_logger:
|
||||
first_url = urls[0].get("url", "") if urls else ""
|
||||
url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="downloader_curl_impersonate_start",
|
||||
message="Starting curl_impersonate download",
|
||||
context={
|
||||
"url_count": len(urls),
|
||||
"first_url": url_display,
|
||||
"output_dir": str(output_dir),
|
||||
"filename": filename,
|
||||
"max_workers": max_workers,
|
||||
"browser": BROWSER,
|
||||
"has_proxy": bool(proxy),
|
||||
},
|
||||
)
|
||||
|
||||
yield dict(total=len(urls))
|
||||
|
||||
download_sizes = []
|
||||
@@ -235,11 +255,23 @@ def curl_impersonate(
|
||||
# tell dl that it was cancelled
|
||||
# the pool is already shut down, so exiting loop is fine
|
||||
raise
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||
yield dict(downloaded="[red]FAILING")
|
||||
pool.shutdown(wait=True, cancel_futures=True)
|
||||
yield dict(downloaded="[red]FAILED")
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="ERROR",
|
||||
operation="downloader_curl_impersonate_failed",
|
||||
message=f"curl_impersonate download failed: {e}",
|
||||
error=e,
|
||||
context={
|
||||
"url_count": len(urls),
|
||||
"output_dir": str(output_dir),
|
||||
"browser": BROWSER,
|
||||
},
|
||||
)
|
||||
# tell dl that it failed
|
||||
# the pool is already shut down, so exiting loop is fine
|
||||
raise
|
||||
@@ -260,5 +292,17 @@ def curl_impersonate(
|
||||
last_speed_refresh = now
|
||||
download_sizes.clear()
|
||||
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="downloader_curl_impersonate_complete",
|
||||
message="curl_impersonate download completed successfully",
|
||||
context={
|
||||
"url_count": len(urls),
|
||||
"output_dir": str(output_dir),
|
||||
"filename": filename,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
__all__ = ("curl_impersonate",)
|
||||
|
||||
@@ -10,9 +10,11 @@ import requests
|
||||
from requests.cookies import cookiejar_from_dict, get_cookie_header
|
||||
|
||||
from unshackle.core import binaries
|
||||
from unshackle.core.binaries import FFMPEG, ShakaPackager, Mp4decrypt
|
||||
from unshackle.core.config import config
|
||||
from unshackle.core.console import console
|
||||
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
||||
from unshackle.core.utilities import get_debug_logger
|
||||
|
||||
PERCENT_RE = re.compile(r"(\d+\.\d+%)")
|
||||
SPEED_RE = re.compile(r"(\d+\.\d+(?:MB|KB)ps)")
|
||||
@@ -66,12 +68,17 @@ def get_track_selection_args(track: Any) -> list[str]:
|
||||
parts = []
|
||||
|
||||
if track_type == "Audio":
|
||||
if track_id := representation.get("id") or adaptation_set.get("audioTrackId"):
|
||||
parts.append(rf"id={track_id}")
|
||||
track_id = representation.get("id") or adaptation_set.get("audioTrackId")
|
||||
lang = representation.get("lang") or adaptation_set.get("lang")
|
||||
|
||||
if track_id:
|
||||
parts.append(rf'"id=\b{track_id}\b"')
|
||||
if lang:
|
||||
parts.append(f"lang={lang}")
|
||||
else:
|
||||
if codecs := representation.get("codecs"):
|
||||
parts.append(f"codecs={codecs}")
|
||||
if lang := representation.get("lang") or adaptation_set.get("lang"):
|
||||
if lang:
|
||||
parts.append(f"lang={lang}")
|
||||
if bw := representation.get("bandwidth"):
|
||||
bitrate = int(bw) // 1000
|
||||
@@ -178,15 +185,32 @@ def build_download_args(
|
||||
"--write-meta-json": False,
|
||||
"--no-log": True,
|
||||
}
|
||||
if FFMPEG:
|
||||
args["--ffmpeg-binary-path"] = str(FFMPEG)
|
||||
if proxy:
|
||||
args["--custom-proxy"] = proxy
|
||||
if skip_merge:
|
||||
args["--skip-merge"] = skip_merge
|
||||
if ad_keyword:
|
||||
args["--ad-keyword"] = ad_keyword
|
||||
|
||||
if content_keys:
|
||||
args["--key"] = next((f"{kid.hex}:{key.lower()}" for kid, key in content_keys.items()), None)
|
||||
args["--decryption-engine"] = DECRYPTION_ENGINE.get(config.decryption.lower()) or "SHAKA_PACKAGER"
|
||||
|
||||
decryption_config = config.decryption.lower()
|
||||
engine_name = DECRYPTION_ENGINE.get(decryption_config) or "SHAKA_PACKAGER"
|
||||
args["--decryption-engine"] = engine_name
|
||||
|
||||
binary_path = None
|
||||
if engine_name == "SHAKA_PACKAGER":
|
||||
if ShakaPackager:
|
||||
binary_path = str(ShakaPackager)
|
||||
elif engine_name == "MP4DECRYPT":
|
||||
if Mp4decrypt:
|
||||
binary_path = str(Mp4decrypt)
|
||||
if binary_path:
|
||||
args["--decryption-binary-path"] = binary_path
|
||||
|
||||
if custom_args:
|
||||
args.update(custom_args)
|
||||
|
||||
@@ -224,6 +248,8 @@ def download(
|
||||
content_keys: dict[str, Any] | None,
|
||||
skip_merge: bool | None = False,
|
||||
) -> Generator[dict[str, Any], None, None]:
|
||||
debug_logger = get_debug_logger()
|
||||
|
||||
if not urls:
|
||||
raise ValueError("urls must be provided and not empty")
|
||||
if not isinstance(urls, (str, dict, list)):
|
||||
@@ -251,6 +277,18 @@ def download(
|
||||
if not binaries.N_m3u8DL_RE:
|
||||
raise EnvironmentError("N_m3u8DL-RE executable not found...")
|
||||
|
||||
decryption_engine = config.decryption.lower()
|
||||
binary_path = None
|
||||
|
||||
if content_keys:
|
||||
if decryption_engine == "shaka":
|
||||
binary_path = binaries.ShakaPackager
|
||||
elif decryption_engine == "mp4decrypt":
|
||||
binary_path = binaries.Mp4decrypt
|
||||
|
||||
if binary_path:
|
||||
binary_path = Path(binary_path)
|
||||
|
||||
effective_max_workers = max_workers or min(32, (os.cpu_count() or 1) + 4)
|
||||
|
||||
if proxy and not config.n_m3u8dl_re.get("use_proxy", True):
|
||||
@@ -275,11 +313,49 @@ def download(
|
||||
skip_merge=skip_merge,
|
||||
ad_keyword=ad_keyword,
|
||||
)
|
||||
arguments.extend(get_track_selection_args(track))
|
||||
selection_args = get_track_selection_args(track)
|
||||
arguments.extend(selection_args)
|
||||
|
||||
log_file_path: Path | None = None
|
||||
if debug_logger:
|
||||
log_file_path = output_dir / f".n_m3u8dl_re_{filename}.log"
|
||||
arguments.extend(["--log-file-path", str(log_file_path)])
|
||||
|
||||
track_url_display = track.url[:200] + "..." if len(track.url) > 200 else track.url
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="downloader_n_m3u8dl_re_start",
|
||||
message="Starting N_m3u8DL-RE download",
|
||||
context={
|
||||
"binary_path": str(binaries.N_m3u8DL_RE),
|
||||
"track_id": getattr(track, "id", None),
|
||||
"track_type": track.__class__.__name__,
|
||||
"track_url": track_url_display,
|
||||
"output_dir": str(output_dir),
|
||||
"filename": filename,
|
||||
"thread_count": thread_count,
|
||||
"retry_count": retry_count,
|
||||
"has_content_keys": bool(content_keys),
|
||||
"content_key_count": len(content_keys) if content_keys else 0,
|
||||
"has_proxy": bool(proxy),
|
||||
"skip_merge": skip_merge,
|
||||
"has_custom_args": bool(track.downloader_args),
|
||||
"selection_args": selection_args,
|
||||
"descriptor": track.descriptor.name if hasattr(track, "descriptor") else None,
|
||||
},
|
||||
)
|
||||
else:
|
||||
arguments.extend(["--no-log", "true"])
|
||||
|
||||
yield {"total": 100}
|
||||
yield {"downloaded": "Parsing streams..."}
|
||||
|
||||
env = os.environ.copy()
|
||||
|
||||
if binary_path and binary_path.exists():
|
||||
binary_dir = str(binary_path.parent)
|
||||
env["PATH"] = binary_dir + os.pathsep + env["PATH"]
|
||||
|
||||
try:
|
||||
with subprocess.Popen(
|
||||
[binaries.N_m3u8DL_RE, *arguments],
|
||||
@@ -287,6 +363,7 @@ def download(
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
env=env, # Assign to virtual environment variables
|
||||
) as process:
|
||||
last_line = ""
|
||||
track_type = track.__class__.__name__
|
||||
@@ -297,12 +374,16 @@ def download(
|
||||
continue
|
||||
last_line = output
|
||||
|
||||
if ERROR_RE.search(output):
|
||||
console.log(f"[N_m3u8DL-RE]: {output}")
|
||||
|
||||
if warn_match := WARN_RE.search(output):
|
||||
console.log(f"{track_type} {warn_match.group(1)}")
|
||||
continue
|
||||
|
||||
if speed_match := SPEED_RE.search(output):
|
||||
size = size_match.group(1) if (size_match := SIZE_RE.search(output)) else ""
|
||||
size_match = SIZE_RE.search(output)
|
||||
size = size_match.group(1) if size_match else ""
|
||||
yield {"downloaded": f"{speed_match.group(1)} {size}"}
|
||||
|
||||
if percent_match := PERCENT_RE.search(output):
|
||||
@@ -310,11 +391,45 @@ def download(
|
||||
yield {"completed": progress} if progress < 100 else {"downloaded": "Merging"}
|
||||
|
||||
process.wait()
|
||||
|
||||
if process.returncode != 0:
|
||||
if debug_logger and log_file_path:
|
||||
log_contents = ""
|
||||
if log_file_path.exists():
|
||||
try:
|
||||
log_contents = log_file_path.read_text(encoding="utf-8", errors="replace")
|
||||
except Exception:
|
||||
log_contents = "<failed to read log file>"
|
||||
|
||||
debug_logger.log(
|
||||
level="ERROR",
|
||||
operation="downloader_n_m3u8dl_re_failed",
|
||||
message=f"N_m3u8DL-RE exited with code {process.returncode}",
|
||||
context={
|
||||
"returncode": process.returncode,
|
||||
"track_id": getattr(track, "id", None),
|
||||
"track_type": track.__class__.__name__,
|
||||
"last_line": last_line,
|
||||
"log_file_contents": log_contents,
|
||||
},
|
||||
)
|
||||
if error_match := ERROR_RE.search(last_line):
|
||||
raise ValueError(f"[N_m3u8DL-RE]: {error_match.group(1)}")
|
||||
raise subprocess.CalledProcessError(process.returncode, arguments)
|
||||
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="downloader_n_m3u8dl_re_complete",
|
||||
message="N_m3u8DL-RE download completed successfully",
|
||||
context={
|
||||
"track_id": getattr(track, "id", None),
|
||||
"track_type": track.__class__.__name__,
|
||||
"output_dir": str(output_dir),
|
||||
"filename": filename,
|
||||
},
|
||||
)
|
||||
|
||||
except ConnectionResetError:
|
||||
# interrupted while passing URI to download
|
||||
raise KeyboardInterrupt()
|
||||
@@ -322,10 +437,35 @@ def download(
|
||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||
yield {"downloaded": "[yellow]CANCELLED"}
|
||||
raise
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||
yield {"downloaded": "[red]FAILED"}
|
||||
if debug_logger and log_file_path and not isinstance(e, (subprocess.CalledProcessError, ValueError)):
|
||||
log_contents = ""
|
||||
if log_file_path.exists():
|
||||
try:
|
||||
log_contents = log_file_path.read_text(encoding="utf-8", errors="replace")
|
||||
except Exception:
|
||||
log_contents = "<failed to read log file>"
|
||||
|
||||
debug_logger.log(
|
||||
level="ERROR",
|
||||
operation="downloader_n_m3u8dl_re_exception",
|
||||
message=f"Unexpected error during N_m3u8DL-RE download: {e}",
|
||||
error=e,
|
||||
context={
|
||||
"track_id": getattr(track, "id", None),
|
||||
"track_type": track.__class__.__name__,
|
||||
"log_file_contents": log_contents,
|
||||
},
|
||||
)
|
||||
raise
|
||||
finally:
|
||||
if log_file_path and log_file_path.exists():
|
||||
try:
|
||||
log_file_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def n_m3u8dl_re(
|
||||
|
||||
@@ -12,7 +12,7 @@ from requests.adapters import HTTPAdapter
|
||||
from rich import filesize
|
||||
|
||||
from unshackle.core.constants import DOWNLOAD_CANCELLED
|
||||
from unshackle.core.utilities import get_extension
|
||||
from unshackle.core.utilities import get_debug_logger, get_extension
|
||||
|
||||
MAX_ATTEMPTS = 5
|
||||
RETRY_WAIT = 2
|
||||
@@ -215,6 +215,8 @@ def requests(
|
||||
if not isinstance(max_workers, (int, type(None))):
|
||||
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
|
||||
|
||||
debug_logger = get_debug_logger()
|
||||
|
||||
if not isinstance(urls, list):
|
||||
urls = [urls]
|
||||
|
||||
@@ -241,6 +243,23 @@ def requests(
|
||||
if proxy:
|
||||
session.proxies.update({"all": proxy})
|
||||
|
||||
if debug_logger:
|
||||
first_url = urls[0].get("url", "") if urls else ""
|
||||
url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="downloader_requests_start",
|
||||
message="Starting requests download",
|
||||
context={
|
||||
"url_count": len(urls),
|
||||
"first_url": url_display,
|
||||
"output_dir": str(output_dir),
|
||||
"filename": filename,
|
||||
"max_workers": max_workers,
|
||||
"has_proxy": bool(proxy),
|
||||
},
|
||||
)
|
||||
|
||||
yield dict(total=len(urls))
|
||||
|
||||
try:
|
||||
@@ -256,14 +275,37 @@ def requests(
|
||||
# tell dl that it was cancelled
|
||||
# the pool is already shut down, so exiting loop is fine
|
||||
raise
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||
yield dict(downloaded="[red]FAILING")
|
||||
pool.shutdown(wait=True, cancel_futures=True)
|
||||
yield dict(downloaded="[red]FAILED")
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="ERROR",
|
||||
operation="downloader_requests_failed",
|
||||
message=f"Requests download failed: {e}",
|
||||
error=e,
|
||||
context={
|
||||
"url_count": len(urls),
|
||||
"output_dir": str(output_dir),
|
||||
},
|
||||
)
|
||||
# tell dl that it failed
|
||||
# the pool is already shut down, so exiting loop is fine
|
||||
raise
|
||||
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="downloader_requests_complete",
|
||||
message="Requests download completed successfully",
|
||||
context={
|
||||
"url_count": len(urls),
|
||||
"output_dir": str(output_dir),
|
||||
"filename": filename,
|
||||
},
|
||||
)
|
||||
finally:
|
||||
DOWNLOAD_SIZES.clear()
|
||||
|
||||
|
||||
@@ -168,7 +168,7 @@ class PlayReady:
|
||||
pssh_boxes.extend(list(get_boxes(init_data, b"pssh")))
|
||||
tenc_boxes.extend(list(get_boxes(init_data, b"tenc")))
|
||||
|
||||
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID.bytes), None)
|
||||
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID), None)
|
||||
if not pssh:
|
||||
raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
||||
|
||||
@@ -197,7 +197,7 @@ class PlayReady:
|
||||
if enc_key_id:
|
||||
kid = UUID(bytes=base64.b64decode(enc_key_id))
|
||||
|
||||
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID.bytes), None)
|
||||
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID), None)
|
||||
if not pssh:
|
||||
raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
||||
|
||||
@@ -415,7 +415,7 @@ class PlayReady:
|
||||
p.wait()
|
||||
|
||||
if p.returncode != 0 or had_error:
|
||||
raise subprocess.CalledProcessError(p.returncode, arguments)
|
||||
raise subprocess.CalledProcessError(p.returncode, [binaries.ShakaPackager, *arguments])
|
||||
|
||||
path.unlink()
|
||||
if not stream_skipped:
|
||||
|
||||
@@ -100,9 +100,7 @@ class Widevine:
|
||||
pssh_boxes.extend(list(get_boxes(init_data, b"pssh")))
|
||||
tenc_boxes.extend(list(get_boxes(init_data, b"tenc")))
|
||||
|
||||
pssh_boxes.sort(key=lambda b: {PSSH.SystemId.Widevine: 0, PSSH.SystemId.PlayReady: 1}[b.system_ID])
|
||||
|
||||
pssh = next(iter(pssh_boxes), None)
|
||||
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SystemId.Widevine), None)
|
||||
if not pssh:
|
||||
raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
||||
|
||||
@@ -141,9 +139,7 @@ class Widevine:
|
||||
if enc_key_id:
|
||||
kid = UUID(bytes=base64.b64decode(enc_key_id))
|
||||
|
||||
pssh_boxes.sort(key=lambda b: {PSSH.SystemId.Widevine: 0, PSSH.SystemId.PlayReady: 1}[b.system_ID])
|
||||
|
||||
pssh = next(iter(pssh_boxes), None)
|
||||
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SystemId.Widevine), None)
|
||||
if not pssh:
|
||||
raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.")
|
||||
|
||||
@@ -371,7 +367,7 @@ class Widevine:
|
||||
p.wait()
|
||||
|
||||
if p.returncode != 0 or had_error:
|
||||
raise subprocess.CalledProcessError(p.returncode, arguments)
|
||||
raise subprocess.CalledProcessError(p.returncode, [binaries.ShakaPackager, *arguments])
|
||||
|
||||
path.unlink()
|
||||
if not stream_skipped:
|
||||
|
||||
@@ -5,6 +5,7 @@ import html
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
from copy import copy
|
||||
from functools import partial
|
||||
@@ -18,6 +19,7 @@ import requests
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from langcodes import Language, tag_is_valid
|
||||
from lxml.etree import Element, ElementTree
|
||||
from pyplayready.cdm import Cdm as PlayReadyCdm
|
||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||
from pywidevine.cdm import Cdm as WidevineCdm
|
||||
from pywidevine.pssh import PSSH
|
||||
@@ -28,7 +30,7 @@ from unshackle.core.downloaders import requests as requests_downloader
|
||||
from unshackle.core.drm import DRM_T, PlayReady, Widevine
|
||||
from unshackle.core.events import events
|
||||
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
|
||||
from unshackle.core.utilities import is_close_match, try_ensure_utf8
|
||||
from unshackle.core.utilities import get_debug_logger, is_close_match, try_ensure_utf8
|
||||
from unshackle.core.utils.xml import load_xml
|
||||
|
||||
|
||||
@@ -465,12 +467,23 @@ class DASH:
|
||||
track.data["dash"]["timescale"] = int(segment_timescale)
|
||||
track.data["dash"]["segment_durations"] = segment_durations
|
||||
|
||||
if not track.drm and isinstance(track, (Video, Audio)):
|
||||
if init_data and isinstance(track, (Video, Audio)):
|
||||
if isinstance(cdm, PlayReadyCdm):
|
||||
try:
|
||||
track.drm = [PlayReady.from_init_data(init_data)]
|
||||
except PlayReady.Exceptions.PSSHNotFound:
|
||||
try:
|
||||
track.drm = [Widevine.from_init_data(init_data)]
|
||||
except Widevine.Exceptions.PSSHNotFound:
|
||||
# it might not have Widevine DRM, or might not have found the PSSH
|
||||
log.warning("No Widevine PSSH was found for this track, is it DRM free?")
|
||||
log.warning("No PlayReady or Widevine PSSH was found for this track, is it DRM free?")
|
||||
else:
|
||||
try:
|
||||
track.drm = [Widevine.from_init_data(init_data)]
|
||||
except Widevine.Exceptions.PSSHNotFound:
|
||||
try:
|
||||
track.drm = [PlayReady.from_init_data(init_data)]
|
||||
except PlayReady.Exceptions.PSSHNotFound:
|
||||
log.warning("No Widevine or PlayReady PSSH was found for this track, is it DRM free?")
|
||||
|
||||
if track.drm:
|
||||
track_kid = track_kid or track.get_key_id(url=segments[0][0], session=session)
|
||||
@@ -515,8 +528,35 @@ class DASH:
|
||||
max_workers=max_workers,
|
||||
)
|
||||
|
||||
skip_merge = False
|
||||
if downloader.__name__ == "n_m3u8dl_re":
|
||||
downloader_args.update({"filename": track.id, "track": track})
|
||||
skip_merge = True
|
||||
downloader_args.update(
|
||||
{
|
||||
"filename": track.id,
|
||||
"track": track,
|
||||
"content_keys": drm.content_keys if drm else None,
|
||||
}
|
||||
)
|
||||
|
||||
debug_logger = get_debug_logger()
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="manifest_dash_download_start",
|
||||
message="Starting DASH manifest download",
|
||||
context={
|
||||
"track_id": getattr(track, "id", None),
|
||||
"track_type": track.__class__.__name__,
|
||||
"total_segments": len(segments),
|
||||
"downloader": downloader.__name__,
|
||||
"has_drm": bool(track.drm),
|
||||
"drm_types": [drm.__class__.__name__ for drm in (track.drm or [])],
|
||||
"skip_merge": skip_merge,
|
||||
"save_path": str(save_path),
|
||||
"has_init_data": bool(init_data),
|
||||
},
|
||||
)
|
||||
|
||||
for status_update in downloader(**downloader_args):
|
||||
file_downloaded = status_update.get("file_downloaded")
|
||||
@@ -533,6 +573,14 @@ class DASH:
|
||||
control_file.unlink()
|
||||
|
||||
segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()]
|
||||
|
||||
if skip_merge:
|
||||
# N_m3u8DL-RE handles merging and decryption internally
|
||||
shutil.move(segments_to_merge[0], save_path)
|
||||
if drm:
|
||||
track.drm = None
|
||||
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None)
|
||||
else:
|
||||
with open(save_path, "wb") as f:
|
||||
if init_data:
|
||||
f.write(init_data)
|
||||
@@ -561,14 +609,20 @@ class DASH:
|
||||
track.path = save_path
|
||||
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
|
||||
|
||||
if drm:
|
||||
if not skip_merge and drm:
|
||||
progress(downloaded="Decrypting", completed=0, total=100)
|
||||
drm.decrypt(save_path)
|
||||
track.drm = None
|
||||
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None)
|
||||
progress(downloaded="Decrypting", advance=100)
|
||||
|
||||
# Clean up empty segment directory
|
||||
if save_dir.exists() and save_dir.name.endswith("_segments"):
|
||||
try:
|
||||
save_dir.rmdir()
|
||||
except OSError:
|
||||
# Directory might not be empty, try removing recursively
|
||||
shutil.rmtree(save_dir, ignore_errors=True)
|
||||
|
||||
progress(downloaded="Downloaded")
|
||||
|
||||
@@ -736,6 +790,11 @@ class DASH:
|
||||
@staticmethod
|
||||
def get_drm(protections: list[Element]) -> list[DRM_T]:
|
||||
drm: list[DRM_T] = []
|
||||
PLACEHOLDER_KIDS = {
|
||||
UUID("00000000-0000-0000-0000-000000000000"), # All zeros (key rotation default)
|
||||
UUID("00010203-0405-0607-0809-0a0b0c0d0e0f"), # Sequential 0x00-0x0f
|
||||
UUID("00010203-0405-0607-0809-101112131415"), # Shaka Packager test pattern
|
||||
}
|
||||
|
||||
for protection in protections:
|
||||
urn = (protection.get("schemeIdUri") or "").lower()
|
||||
@@ -745,17 +804,27 @@ class DASH:
|
||||
if not pssh_text:
|
||||
continue
|
||||
pssh = PSSH(pssh_text)
|
||||
kid_attr = protection.get("kid") or protection.get("{urn:mpeg:cenc:2013}kid")
|
||||
kid = UUID(bytes=base64.b64decode(kid_attr)) if kid_attr else None
|
||||
|
||||
kid = protection.get("kid")
|
||||
if kid:
|
||||
kid = UUID(bytes=base64.b64decode(kid))
|
||||
if not kid:
|
||||
default_kid_attr = protection.get("default_KID") or protection.get(
|
||||
"{urn:mpeg:cenc:2013}default_KID"
|
||||
)
|
||||
kid = UUID(default_kid_attr) if default_kid_attr else None
|
||||
|
||||
default_kid = protection.get("default_KID")
|
||||
if default_kid:
|
||||
kid = UUID(default_kid)
|
||||
if not kid:
|
||||
kid = next(
|
||||
(
|
||||
UUID(p.get("default_KID") or p.get("{urn:mpeg:cenc:2013}default_KID"))
|
||||
for p in protections
|
||||
if p.get("default_KID") or p.get("{urn:mpeg:cenc:2013}default_KID")
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
if not pssh.key_ids and not kid:
|
||||
kid = next((UUID(p.get("default_KID")) for p in protections if p.get("default_KID")), None)
|
||||
if kid and (not pssh.key_ids or all(k.int == 0 or k in PLACEHOLDER_KIDS for k in pssh.key_ids)):
|
||||
pssh.set_key_ids([kid])
|
||||
|
||||
drm.append(Widevine(pssh=pssh, kid=kid))
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ from unshackle.core.downloaders import requests as requests_downloader
|
||||
from unshackle.core.drm import DRM_T, ClearKey, PlayReady, Widevine
|
||||
from unshackle.core.events import events
|
||||
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
|
||||
from unshackle.core.utilities import get_extension, is_close_match, try_ensure_utf8
|
||||
from unshackle.core.utilities import get_debug_logger, get_extension, is_close_match, try_ensure_utf8
|
||||
|
||||
|
||||
class HLS:
|
||||
@@ -350,6 +350,24 @@ class HLS:
|
||||
}
|
||||
)
|
||||
|
||||
debug_logger = get_debug_logger()
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="manifest_hls_download_start",
|
||||
message="Starting HLS manifest download",
|
||||
context={
|
||||
"track_id": getattr(track, "id", None),
|
||||
"track_type": track.__class__.__name__,
|
||||
"total_segments": total_segments,
|
||||
"downloader": downloader.__name__,
|
||||
"has_drm": bool(session_drm),
|
||||
"drm_type": session_drm.__class__.__name__ if session_drm else None,
|
||||
"skip_merge": skip_merge,
|
||||
"save_path": str(save_path),
|
||||
},
|
||||
)
|
||||
|
||||
for status_update in downloader(**downloader_args):
|
||||
file_downloaded = status_update.get("file_downloaded")
|
||||
if file_downloaded:
|
||||
|
||||
@@ -21,7 +21,7 @@ from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY,
|
||||
from unshackle.core.drm import DRM_T, PlayReady, Widevine
|
||||
from unshackle.core.events import events
|
||||
from unshackle.core.tracks import Audio, Subtitle, Track, Tracks, Video
|
||||
from unshackle.core.utilities import try_ensure_utf8
|
||||
from unshackle.core.utilities import get_debug_logger, try_ensure_utf8
|
||||
from unshackle.core.utils.xml import load_xml
|
||||
|
||||
|
||||
@@ -283,6 +283,24 @@ class ISM:
|
||||
}
|
||||
)
|
||||
|
||||
debug_logger = get_debug_logger()
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="manifest_ism_download_start",
|
||||
message="Starting ISM manifest download",
|
||||
context={
|
||||
"track_id": getattr(track, "id", None),
|
||||
"track_type": track.__class__.__name__,
|
||||
"total_segments": len(segments),
|
||||
"downloader": downloader.__name__,
|
||||
"has_drm": bool(session_drm),
|
||||
"drm_type": session_drm.__class__.__name__ if session_drm else None,
|
||||
"skip_merge": skip_merge,
|
||||
"save_path": str(save_path),
|
||||
},
|
||||
)
|
||||
|
||||
for status_update in downloader(**downloader_args):
|
||||
file_downloaded = status_update.get("file_downloaded")
|
||||
if file_downloaded:
|
||||
|
||||
@@ -185,7 +185,10 @@ class Episode(Title):
|
||||
if hdr_format:
|
||||
if hdr_format_full.startswith("Dolby Vision"):
|
||||
name += " DV"
|
||||
if any(indicator in hdr_format_full for indicator in ["HDR10", "SMPTE ST 2086"]):
|
||||
if any(
|
||||
indicator in (hdr_format_full + " " + hdr_format)
|
||||
for indicator in ["HDR10", "SMPTE ST 2086"]
|
||||
):
|
||||
name += " HDR"
|
||||
else:
|
||||
name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} "
|
||||
|
||||
@@ -136,7 +136,10 @@ class Movie(Title):
|
||||
if hdr_format:
|
||||
if hdr_format_full.startswith("Dolby Vision"):
|
||||
name += " DV"
|
||||
if any(indicator in hdr_format_full for indicator in ["HDR10", "SMPTE ST 2086"]):
|
||||
if any(
|
||||
indicator in (hdr_format_full + " " + hdr_format)
|
||||
for indicator in ["HDR10", "SMPTE ST 2086"]
|
||||
):
|
||||
name += " HDR"
|
||||
else:
|
||||
name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} "
|
||||
|
||||
@@ -91,6 +91,12 @@ class Subtitle(Track):
|
||||
return Subtitle.Codec.TimedTextMarkupLang
|
||||
raise ValueError(f"The Content Profile '{profile}' is not a supported Subtitle Codec")
|
||||
|
||||
# WebVTT sanitization patterns (compiled once for performance)
|
||||
_CUE_ID_PATTERN = re.compile(r"^[A-Za-z]+\d+$")
|
||||
_TIMING_START_PATTERN = re.compile(r"^\d+:\d+[:\.]")
|
||||
_TIMING_LINE_PATTERN = re.compile(r"^((?:\d+:)?\d+:\d+[.,]\d+)\s*-->\s*((?:\d+:)?\d+:\d+[.,]\d+)(.*)$")
|
||||
_LINE_POS_PATTERN = re.compile(r"line:(\d+(?:\.\d+)?%?)")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
@@ -239,6 +245,11 @@ class Subtitle(Track):
|
||||
|
||||
# Sanitize WebVTT timestamps before parsing
|
||||
text = Subtitle.sanitize_webvtt_timestamps(text)
|
||||
# Remove cue identifiers that confuse parsers like pysubs2
|
||||
text = Subtitle.sanitize_webvtt_cue_identifiers(text)
|
||||
# Merge overlapping cues with line positioning into single multi-line cues
|
||||
text = Subtitle.merge_overlapping_webvtt_cues(text)
|
||||
|
||||
preserve_formatting = config.subtitle.get("preserve_formatting", True)
|
||||
|
||||
if preserve_formatting:
|
||||
@@ -277,6 +288,240 @@ class Subtitle(Track):
|
||||
# Replace negative timestamps with 00:00:00.000
|
||||
return re.sub(r"(-\d+:\d+:\d+\.\d+)", "00:00:00.000", text)
|
||||
|
||||
@staticmethod
|
||||
def has_webvtt_cue_identifiers(text: str) -> bool:
|
||||
"""
|
||||
Check if WebVTT content has cue identifiers that need removal.
|
||||
|
||||
Parameters:
|
||||
text: The WebVTT content as string
|
||||
|
||||
Returns:
|
||||
True if cue identifiers are detected, False otherwise
|
||||
"""
|
||||
lines = text.split("\n")
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
line = line.strip()
|
||||
if Subtitle._CUE_ID_PATTERN.match(line):
|
||||
# Look ahead to see if next non-empty line is a timing line
|
||||
j = i + 1
|
||||
while j < len(lines) and not lines[j].strip():
|
||||
j += 1
|
||||
if j < len(lines) and ("-->" in lines[j] or Subtitle._TIMING_START_PATTERN.match(lines[j].strip())):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def sanitize_webvtt_cue_identifiers(text: str) -> str:
|
||||
"""
|
||||
Remove WebVTT cue identifiers that can confuse subtitle parsers.
|
||||
|
||||
Some services use cue identifiers like "Q0", "Q1", etc.
|
||||
that appear on their own line before the timing line. These can be
|
||||
incorrectly parsed as part of the previous cue's text content by
|
||||
some parsers (like pysubs2).
|
||||
|
||||
Parameters:
|
||||
text: The WebVTT content as string
|
||||
|
||||
Returns:
|
||||
Sanitized WebVTT content with cue identifiers removed
|
||||
"""
|
||||
if not Subtitle.has_webvtt_cue_identifiers(text):
|
||||
return text
|
||||
|
||||
lines = text.split("\n")
|
||||
sanitized_lines = []
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
# Check if this line is a cue identifier followed by a timing line
|
||||
if Subtitle._CUE_ID_PATTERN.match(line):
|
||||
# Look ahead to see if next non-empty line is a timing line
|
||||
j = i + 1
|
||||
while j < len(lines) and not lines[j].strip():
|
||||
j += 1
|
||||
if j < len(lines) and ("-->" in lines[j] or Subtitle._TIMING_START_PATTERN.match(lines[j].strip())):
|
||||
# This is a cue identifier, skip it
|
||||
i += 1
|
||||
continue
|
||||
|
||||
sanitized_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
return "\n".join(sanitized_lines)
|
||||
|
||||
@staticmethod
|
||||
def _parse_vtt_time(t: str) -> int:
|
||||
"""Parse WebVTT timestamp to milliseconds. Returns 0 for malformed input."""
|
||||
try:
|
||||
t = t.replace(",", ".")
|
||||
parts = t.split(":")
|
||||
if len(parts) == 2:
|
||||
m, s = parts
|
||||
h = "0"
|
||||
elif len(parts) >= 3:
|
||||
h, m, s = parts[:3]
|
||||
else:
|
||||
return 0
|
||||
sec_parts = s.split(".")
|
||||
secs = int(sec_parts[0])
|
||||
# Handle variable millisecond digits (e.g., .5 = 500ms, .50 = 500ms, .500 = 500ms)
|
||||
ms = int(sec_parts[1].ljust(3, "0")[:3]) if len(sec_parts) > 1 else 0
|
||||
return int(h) * 3600000 + int(m) * 60000 + secs * 1000 + ms
|
||||
except (ValueError, IndexError):
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def has_overlapping_webvtt_cues(text: str) -> bool:
|
||||
"""
|
||||
Check if WebVTT content has overlapping cues that need merging.
|
||||
|
||||
Detects cues with start times within 50ms of each other and the same end time,
|
||||
which indicates multi-line subtitles split into separate cues.
|
||||
|
||||
Parameters:
|
||||
text: The WebVTT content as string
|
||||
|
||||
Returns:
|
||||
True if overlapping cues are detected, False otherwise
|
||||
"""
|
||||
timings = []
|
||||
for line in text.split("\n"):
|
||||
match = Subtitle._TIMING_LINE_PATTERN.match(line)
|
||||
if match:
|
||||
start_str, end_str = match.group(1), match.group(2)
|
||||
timings.append((Subtitle._parse_vtt_time(start_str), Subtitle._parse_vtt_time(end_str)))
|
||||
|
||||
# Check for overlapping cues (within 50ms start, same end)
|
||||
for i in range(len(timings) - 1):
|
||||
curr_start, curr_end = timings[i]
|
||||
next_start, next_end = timings[i + 1]
|
||||
if abs(curr_start - next_start) <= 50 and curr_end == next_end:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def merge_overlapping_webvtt_cues(text: str) -> str:
|
||||
"""
|
||||
Merge WebVTT cues that have overlapping/near-identical times but different line positions.
|
||||
|
||||
Some services use separate cues for each line of a multi-line subtitle, with
|
||||
slightly different start times (1ms apart) and different line: positions.
|
||||
This merges them into single cues with proper line ordering based on the
|
||||
line: position (lower percentage = higher on screen = first line).
|
||||
|
||||
Parameters:
|
||||
text: The WebVTT content as string
|
||||
|
||||
Returns:
|
||||
WebVTT content with overlapping cues merged
|
||||
"""
|
||||
if not Subtitle.has_overlapping_webvtt_cues(text):
|
||||
return text
|
||||
|
||||
lines = text.split("\n")
|
||||
cues = []
|
||||
header_lines = []
|
||||
in_header = True
|
||||
i = 0
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
|
||||
if in_header:
|
||||
if "-->" in line:
|
||||
in_header = False
|
||||
else:
|
||||
header_lines.append(line)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
match = Subtitle._TIMING_LINE_PATTERN.match(line)
|
||||
if match:
|
||||
start_str, end_str, settings = match.groups()
|
||||
line_pos = 100.0 # Default to bottom
|
||||
line_match = Subtitle._LINE_POS_PATTERN.search(settings)
|
||||
if line_match:
|
||||
pos_str = line_match.group(1).rstrip("%")
|
||||
line_pos = float(pos_str)
|
||||
|
||||
content_lines = []
|
||||
i += 1
|
||||
while i < len(lines) and lines[i].strip() and "-->" not in lines[i]:
|
||||
content_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
cues.append(
|
||||
{
|
||||
"start_ms": Subtitle._parse_vtt_time(start_str),
|
||||
"end_ms": Subtitle._parse_vtt_time(end_str),
|
||||
"start_str": start_str,
|
||||
"end_str": end_str,
|
||||
"line_pos": line_pos,
|
||||
"content": "\n".join(content_lines),
|
||||
"settings": settings,
|
||||
}
|
||||
)
|
||||
else:
|
||||
i += 1
|
||||
|
||||
# Merge overlapping cues (within 50ms of each other with same end time)
|
||||
merged_cues = []
|
||||
i = 0
|
||||
while i < len(cues):
|
||||
current = cues[i]
|
||||
group = [current]
|
||||
|
||||
j = i + 1
|
||||
while j < len(cues):
|
||||
other = cues[j]
|
||||
if abs(current["start_ms"] - other["start_ms"]) <= 50 and current["end_ms"] == other["end_ms"]:
|
||||
group.append(other)
|
||||
j += 1
|
||||
else:
|
||||
break
|
||||
|
||||
if len(group) > 1:
|
||||
# Sort by line position (lower % = higher on screen = first)
|
||||
group.sort(key=lambda x: x["line_pos"])
|
||||
# Use the earliest start time from the group
|
||||
earliest = min(group, key=lambda x: x["start_ms"])
|
||||
merged_cues.append(
|
||||
{
|
||||
"start_str": earliest["start_str"],
|
||||
"end_str": group[0]["end_str"],
|
||||
"content": "\n".join(c["content"] for c in group),
|
||||
"settings": "",
|
||||
}
|
||||
)
|
||||
else:
|
||||
merged_cues.append(
|
||||
{
|
||||
"start_str": current["start_str"],
|
||||
"end_str": current["end_str"],
|
||||
"content": current["content"],
|
||||
"settings": current["settings"],
|
||||
}
|
||||
)
|
||||
|
||||
i = j if len(group) > 1 else i + 1
|
||||
|
||||
result_lines = header_lines[:]
|
||||
if result_lines and result_lines[-1].strip():
|
||||
result_lines.append("")
|
||||
|
||||
for cue in merged_cues:
|
||||
result_lines.append(f"{cue['start_str']} --> {cue['end_str']}{cue['settings']}")
|
||||
result_lines.append(cue["content"])
|
||||
result_lines.append("")
|
||||
|
||||
return "\n".join(result_lines)
|
||||
|
||||
@staticmethod
|
||||
def sanitize_webvtt(text: str) -> str:
|
||||
"""
|
||||
@@ -565,13 +810,18 @@ class Subtitle(Track):
|
||||
|
||||
if binaries.SubtitleEdit and self.codec not in (Subtitle.Codec.fTTML, Subtitle.Codec.fVTT):
|
||||
sub_edit_format = {
|
||||
Subtitle.Codec.SubStationAlphav4: "AdvancedSubStationAlpha",
|
||||
Subtitle.Codec.TimedTextMarkupLang: "TimedText1.0",
|
||||
}.get(codec, codec.name)
|
||||
Subtitle.Codec.SubRip: "subrip",
|
||||
Subtitle.Codec.SubStationAlpha: "substationalpha",
|
||||
Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha",
|
||||
Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0",
|
||||
Subtitle.Codec.WebVTT: "webvtt",
|
||||
Subtitle.Codec.SAMI: "sami",
|
||||
Subtitle.Codec.MicroDVD: "microdvd",
|
||||
}.get(codec, codec.name.lower())
|
||||
sub_edit_args = [
|
||||
binaries.SubtitleEdit,
|
||||
"/Convert",
|
||||
self.path,
|
||||
str(binaries.SubtitleEdit),
|
||||
"/convert",
|
||||
str(self.path),
|
||||
sub_edit_format,
|
||||
f"/outputfilename:{output_path.name}",
|
||||
"/encoding:utf8",
|
||||
@@ -631,7 +881,7 @@ class Subtitle(Track):
|
||||
text = try_ensure_utf8(data).decode("utf8")
|
||||
text = text.replace("tt:", "")
|
||||
# negative size values aren't allowed in TTML/DFXP spec, replace with 0
|
||||
text = re.sub(r'"(-\d+(\.\d+)?(px|em|%|c|pt))"', '"0"', text)
|
||||
text = re.sub(r"-(\d+(?:\.\d+)?)(px|em|%|c|pt)", r"0\2", text)
|
||||
caption_set = pycaption.DFXPReader().read(text)
|
||||
elif codec == Subtitle.Codec.fVTT:
|
||||
caption_lists: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList)
|
||||
@@ -962,18 +1212,26 @@ class Subtitle(Track):
|
||||
except Exception:
|
||||
pass # Fall through to other methods
|
||||
|
||||
if binaries.SubtitleEdit:
|
||||
if self.codec == Subtitle.Codec.SubStationAlphav4:
|
||||
output_format = "AdvancedSubStationAlpha"
|
||||
elif self.codec == Subtitle.Codec.TimedTextMarkupLang:
|
||||
output_format = "TimedText1.0"
|
||||
else:
|
||||
output_format = self.codec.name
|
||||
conversion_method = config.subtitle.get("conversion_method", "auto")
|
||||
use_subtitleedit = sdh_method == "subtitleedit" or (
|
||||
sdh_method == "auto" and conversion_method in ("auto", "subtitleedit")
|
||||
)
|
||||
|
||||
if binaries.SubtitleEdit and use_subtitleedit:
|
||||
output_format = {
|
||||
Subtitle.Codec.SubRip: "subrip",
|
||||
Subtitle.Codec.SubStationAlpha: "substationalpha",
|
||||
Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha",
|
||||
Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0",
|
||||
Subtitle.Codec.WebVTT: "webvtt",
|
||||
Subtitle.Codec.SAMI: "sami",
|
||||
Subtitle.Codec.MicroDVD: "microdvd",
|
||||
}.get(self.codec, self.codec.name.lower())
|
||||
subprocess.run(
|
||||
[
|
||||
binaries.SubtitleEdit,
|
||||
"/Convert",
|
||||
self.path,
|
||||
str(binaries.SubtitleEdit),
|
||||
"/convert",
|
||||
str(self.path),
|
||||
output_format,
|
||||
"/encoding:utf8",
|
||||
"/overwrite",
|
||||
@@ -981,6 +1239,7 @@ class Subtitle(Track):
|
||||
],
|
||||
check=True,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
else:
|
||||
if config.subtitle.get("convert_before_strip", True) and self.codec != Subtitle.Codec.SubRip:
|
||||
@@ -1022,18 +1281,21 @@ class Subtitle(Track):
|
||||
if not binaries.SubtitleEdit:
|
||||
raise EnvironmentError("SubtitleEdit executable not found...")
|
||||
|
||||
if self.codec == Subtitle.Codec.SubStationAlphav4:
|
||||
output_format = "AdvancedSubStationAlpha"
|
||||
elif self.codec == Subtitle.Codec.TimedTextMarkupLang:
|
||||
output_format = "TimedText1.0"
|
||||
else:
|
||||
output_format = self.codec.name
|
||||
output_format = {
|
||||
Subtitle.Codec.SubRip: "subrip",
|
||||
Subtitle.Codec.SubStationAlpha: "substationalpha",
|
||||
Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha",
|
||||
Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0",
|
||||
Subtitle.Codec.WebVTT: "webvtt",
|
||||
Subtitle.Codec.SAMI: "sami",
|
||||
Subtitle.Codec.MicroDVD: "microdvd",
|
||||
}.get(self.codec, self.codec.name.lower())
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
binaries.SubtitleEdit,
|
||||
"/Convert",
|
||||
self.path,
|
||||
str(binaries.SubtitleEdit),
|
||||
"/convert",
|
||||
str(self.path),
|
||||
output_format,
|
||||
"/ReverseRtlStartEnd",
|
||||
"/encoding:utf8",
|
||||
@@ -1041,6 +1303,7 @@ class Subtitle(Track):
|
||||
],
|
||||
check=True,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -295,12 +295,23 @@ class Track:
|
||||
try:
|
||||
if not self.drm and track_type in ("Video", "Audio"):
|
||||
# the service might not have explicitly defined the `drm` property
|
||||
# try find widevine DRM information from the init data of URL
|
||||
# try find DRM information from the init data of URL based on CDM type
|
||||
if isinstance(cdm, PlayReadyCdm):
|
||||
try:
|
||||
self.drm = [PlayReady.from_track(self, session)]
|
||||
except PlayReady.Exceptions.PSSHNotFound:
|
||||
try:
|
||||
self.drm = [Widevine.from_track(self, session)]
|
||||
except Widevine.Exceptions.PSSHNotFound:
|
||||
# it might not have Widevine DRM, or might not have found the PSSH
|
||||
log.warning("No Widevine PSSH was found for this track, is it DRM free?")
|
||||
log.warning("No PlayReady or Widevine PSSH was found for this track, is it DRM free?")
|
||||
else:
|
||||
try:
|
||||
self.drm = [Widevine.from_track(self, session)]
|
||||
except Widevine.Exceptions.PSSHNotFound:
|
||||
try:
|
||||
self.drm = [PlayReady.from_track(self, session)]
|
||||
except PlayReady.Exceptions.PSSHNotFound:
|
||||
log.warning("No Widevine or PlayReady PSSH was found for this track, is it DRM free?")
|
||||
|
||||
if self.drm:
|
||||
track_kid = self.get_key_id(session=session)
|
||||
|
||||
@@ -22,7 +22,7 @@ from unshackle.core.tracks.chapters import Chapter, Chapters
|
||||
from unshackle.core.tracks.subtitle import Subtitle
|
||||
from unshackle.core.tracks.track import Track
|
||||
from unshackle.core.tracks.video import Video
|
||||
from unshackle.core.utilities import is_close_match, sanitize_filename
|
||||
from unshackle.core.utilities import get_debug_logger, is_close_match, sanitize_filename
|
||||
from unshackle.core.utils.collections import as_list, flatten
|
||||
|
||||
|
||||
@@ -507,6 +507,35 @@ class Tracks:
|
||||
if not output_path:
|
||||
raise ValueError("No tracks provided, at least one track must be provided.")
|
||||
|
||||
debug_logger = get_debug_logger()
|
||||
if debug_logger:
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="mux_start",
|
||||
message="Starting mkvmerge muxing",
|
||||
context={
|
||||
"title": title,
|
||||
"output_path": str(output_path),
|
||||
"video_count": len(self.videos),
|
||||
"audio_count": len(self.audio),
|
||||
"subtitle_count": len(self.subtitles),
|
||||
"attachment_count": len(self.attachments),
|
||||
"has_chapters": bool(self.chapters),
|
||||
"video_tracks": [
|
||||
{"id": v.id, "codec": getattr(v, "codec", None), "language": str(v.language)}
|
||||
for v in self.videos
|
||||
],
|
||||
"audio_tracks": [
|
||||
{"id": a.id, "codec": getattr(a, "codec", None), "language": str(a.language)}
|
||||
for a in self.audio
|
||||
],
|
||||
"subtitle_tracks": [
|
||||
{"id": s.id, "codec": getattr(s, "codec", None), "language": str(s.language)}
|
||||
for s in self.subtitles
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
# let potential failures go to caller, caller should handle
|
||||
try:
|
||||
errors = []
|
||||
@@ -516,7 +545,33 @@ class Tracks:
|
||||
errors.append(line)
|
||||
if "progress" in line:
|
||||
progress(total=100, completed=int(line.strip()[14:-1]))
|
||||
return output_path, p.wait(), errors
|
||||
|
||||
returncode = p.wait()
|
||||
|
||||
if debug_logger:
|
||||
if returncode != 0 or errors:
|
||||
debug_logger.log(
|
||||
level="ERROR",
|
||||
operation="mux_failed",
|
||||
message=f"mkvmerge exited with code {returncode}",
|
||||
context={
|
||||
"returncode": returncode,
|
||||
"output_path": str(output_path),
|
||||
"errors": errors,
|
||||
},
|
||||
)
|
||||
else:
|
||||
debug_logger.log(
|
||||
level="DEBUG",
|
||||
operation="mux_complete",
|
||||
message="mkvmerge muxing completed successfully",
|
||||
context={
|
||||
"output_path": str(output_path),
|
||||
"output_exists": output_path.exists() if output_path else False,
|
||||
},
|
||||
)
|
||||
|
||||
return output_path, returncode, errors
|
||||
finally:
|
||||
if chapters_path:
|
||||
chapters_path.unlink()
|
||||
|
||||
@@ -120,8 +120,13 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str:
|
||||
|
||||
The spacer is safer to be a '.' for older DDL and p2p sharing spaces.
|
||||
This includes web-served content via direct links and such.
|
||||
|
||||
Set `unicode_filenames: true` in config to preserve native language
|
||||
characters (Korean, Japanese, Chinese, etc.) instead of transliterating
|
||||
them to ASCII equivalents.
|
||||
"""
|
||||
# replace all non-ASCII characters with ASCII equivalents
|
||||
# optionally replace non-ASCII characters with ASCII equivalents
|
||||
if not config.unicode_filenames:
|
||||
filename = unidecode(filename)
|
||||
|
||||
# remove or replace further characters as needed
|
||||
|
||||
@@ -114,11 +114,47 @@ class API(Vault):
|
||||
return added or updated
|
||||
|
||||
def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int:
|
||||
data = self.session.post(
|
||||
# Normalize keys
|
||||
normalized_keys = {str(kid).replace("-", ""): key for kid, key in kid_keys.items()}
|
||||
kid_list = list(normalized_keys.keys())
|
||||
|
||||
if not kid_list:
|
||||
return 0
|
||||
|
||||
# Try batches starting at 500, stepping down by 100 on failure, fallback to 1
|
||||
batch_size = 500
|
||||
total_added = 0
|
||||
i = 0
|
||||
|
||||
while i < len(kid_list):
|
||||
batch_kids = kid_list[i : i + batch_size]
|
||||
batch_keys = {kid: normalized_keys[kid] for kid in batch_kids}
|
||||
|
||||
try:
|
||||
response = self.session.post(
|
||||
url=f"{self.uri}/{service.lower()}",
|
||||
json={"content_keys": {str(kid).replace("-", ""): key for kid, key in kid_keys.items()}},
|
||||
json={"content_keys": batch_keys},
|
||||
headers={"Accept": "application/json"},
|
||||
).json()
|
||||
)
|
||||
|
||||
# Check for HTTP errors that suggest batch is too large
|
||||
if response.status_code in (413, 414, 400) and batch_size > 1:
|
||||
if batch_size > 100:
|
||||
batch_size -= 100
|
||||
else:
|
||||
batch_size = 1
|
||||
continue
|
||||
|
||||
data = response.json()
|
||||
except Exception:
|
||||
# JSON decode error or connection issue - try smaller batch
|
||||
if batch_size > 1:
|
||||
if batch_size > 100:
|
||||
batch_size -= 100
|
||||
else:
|
||||
batch_size = 1
|
||||
continue
|
||||
raise
|
||||
|
||||
code = int(data.get("code", 0))
|
||||
message = data.get("message")
|
||||
@@ -135,11 +171,14 @@ class API(Vault):
|
||||
raise error(f"{message} ({code})")
|
||||
|
||||
# each kid:key that was new to the vault (optional)
|
||||
added = int(data.get("added"))
|
||||
added = int(data.get("added", 0))
|
||||
# each key for a kid that was changed/updated (optional)
|
||||
updated = int(data.get("updated"))
|
||||
updated = int(data.get("updated", 0))
|
||||
|
||||
return added + updated
|
||||
total_added += added + updated
|
||||
i += batch_size
|
||||
|
||||
return total_added
|
||||
|
||||
def get_services(self) -> Iterator[str]:
|
||||
data = self.session.post(url=self.uri, headers={"Accept": "application/json"}).json()
|
||||
|
||||
@@ -119,9 +119,25 @@ class SQLite(Vault):
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
placeholders = ",".join(["?"] * len(kid_keys))
|
||||
cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", list(kid_keys.keys()))
|
||||
existing_kids = {row[0] for row in cursor.fetchall()}
|
||||
# Query existing KIDs in batches to avoid SQLite variable limit
|
||||
# Try larger batch first (newer SQLite supports 32766), fall back to 500 if needed
|
||||
existing_kids: set[str] = set()
|
||||
kid_list = list(kid_keys.keys())
|
||||
batch_size = 32000
|
||||
|
||||
i = 0
|
||||
while i < len(kid_list):
|
||||
batch = kid_list[i : i + batch_size]
|
||||
placeholders = ",".join(["?"] * len(batch))
|
||||
try:
|
||||
cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", batch)
|
||||
existing_kids.update(row[0] for row in cursor.fetchall())
|
||||
i += batch_size
|
||||
except sqlite3.OperationalError as e:
|
||||
if "too many SQL variables" in str(e) and batch_size > 500:
|
||||
batch_size = 500
|
||||
continue
|
||||
raise
|
||||
|
||||
new_keys = {kid: key for kid, key in kid_keys.items() if kid not in existing_kids}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user