feat(dl): extract closed captions from HLS manifests and improve CC extraction
- Parse CLOSED-CAPTIONS entries from HLS manifests and attach CC metadata (language, name, instream_id) to video tracks - Move CC extraction to run after decryption instead of before, fixing extraction failures on encrypted streams - Extract CCs even when other subtitle tracks exist, using manifest CC language info instead of guessing - Try ccextractor on the original file before repacking to preserve container-level CC data (e.g. c608 boxes) that ffmpeg remux strips - Display deduplicated closed captions in --list output and download progress, positioned after subtitles - Add closed_captions field to Video track class
This commit is contained in:
@@ -112,6 +112,15 @@ class HLS:
|
||||
session_drm = HLS.get_all_drm(session_keys)
|
||||
|
||||
audio_codecs_by_group_id: dict[str, Audio.Codec] = {}
|
||||
cc_by_group_id: dict[str, list[dict[str, Any]]] = {}
|
||||
for media in self.manifest.media:
|
||||
if media.type == "CLOSED-CAPTIONS":
|
||||
cc_by_group_id.setdefault(media.group_id, []).append({
|
||||
"language": media.language,
|
||||
"name": media.name,
|
||||
"instream_id": media.instream_id,
|
||||
"characteristics": media.characteristics,
|
||||
})
|
||||
tracks = Tracks()
|
||||
|
||||
for playlist in self.manifest.playlists:
|
||||
@@ -161,6 +170,9 @@ class HLS:
|
||||
width=playlist.stream_info.resolution[0] if playlist.stream_info.resolution else None,
|
||||
height=playlist.stream_info.resolution[1] if playlist.stream_info.resolution else None,
|
||||
fps=playlist.stream_info.frame_rate,
|
||||
closed_captions=cc_by_group_id.get(
|
||||
(playlist.stream_info.closed_captions or "").strip('"'), []
|
||||
),
|
||||
)
|
||||
if primary_track_type is Video
|
||||
else {}
|
||||
|
||||
Reference in New Issue
Block a user