Files
usk_schedule_downloader/lib/check_track_detail.py
2026-03-30 13:43:29 +07:00

247 lines
13 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
from langcodes import Language, tag_is_valid
def first_valid_bcp47(parts):
"""
Return the first token in parts that is a valid BCP 47 tag,
or None if none are valid.
"""
for p in parts:
tok = p.strip()
# Remove bracketed markers like [Original]
if tok.startswith("[") and tok.endswith("]"):
continue
# langcodes works with exact case; tags are typically case-insensitive
# but language=lower, region/script=proper-case is okay.
# We'll just feed the token as-is; tag_is_valid handles common cases.
if tag_is_valid(tok):
return tok
return None
def extract_langs(text):
audio = []
subs = []
LANG = r'([a-z]{2}(?:-[A-Z]{2})?)'
for line in text.splitlines():
# audio
m_audio = re.search(
rf'\[(AAC|DD\+?|AC-4|OPUS|VORB|DTS|ALAC|FLAC)\]\s*\|\s*{LANG}',
line
)
if m_audio:
lang = m_audio.group(2)
if lang not in audio:
audio.append(lang)
# subtitles
m_sub = re.search(
rf'\[(SRT|SSA|ASS|VTT|TTML|SMI|SUB|MPL2|TMP|STPP|WVTT)\]\s*\|\s*{LANG}',
line
)
if m_sub:
lang = m_sub.group(2)
if lang not in subs:
subs.append(lang)
return audio, subs
def check_langs_with_langcodes(stderr_text: str, audio_lang_cfg: list[str], sub_lang_cfg: list[str]):
# audio_tags = find_audio_tags(stderr_text)
# sub_tags = find_sub_tags(stderr_text)
audio_tags,sub_tags=extract_langs(stderr_text)
# Normalize found tags to their primary language subtags
audio_langs_found = {Language.get(tag).language for tag in audio_tags}
sub_langs_found = {Language.get(tag).language for tag in sub_tags}
return {
"audio": {
"configured": audio_lang_cfg,
"found_tags": audio_tags,
"found_langs": sorted(audio_langs_found),
"exists_all": all(Language.get(c).language in audio_langs_found for c in audio_lang_cfg),
},
"subtitle": {
"configured": sub_lang_cfg,
"found_tags": sub_tags,
"found_langs": sorted(sub_langs_found),
"exists_all": all(Language.get(c).language in sub_langs_found for c in sub_lang_cfg),
},
}
def video_details(stderr_text: str):
"""
Parses the 'All Tracks' part (stopping at 'Selected Tracks') using a single regex.
Returns a list of dicts with codec, range, resolution [w,h], bitrate (int kb/s),
framerate (float or None if unknown), and size (e.g., '376.04 MiB').
"""
# One regex, anchored to 'VID | [ ... ]' so it won't ever read the log-level [I]
VID_RE = re.compile(r"""
VID\s*\|\s*\[\s*(?P<codec>[^,\]]+)\s*(?:,\s*(?P<range>[^\]]+))?\]\s*\|\s*
(?P<width>\d{3,4})x(?P<height>\d{3,4})\s*@\s*(?P<kbps>[\d,]+)\s*kb/s
(?:\s*\((?P<size>[^()]*?(?:MiB|GiB)[^()]*)\))?\s*,\s*(?P<fps>\d+(?:\.\d+)?)\s*FPS
""", re.VERBOSE)
# Only parse the 'All Tracks' section if 'Selected Tracks' exists
if "Selected Tracks" in stderr_text:
all_section = stderr_text.split("Selected Tracks", 1)[0]
else:
all_section = stderr_text
results = []
for m in VID_RE.finditer(all_section):
bitrate_kbps = int(m.group("kbps").replace(",", ""))
fps_val = None
if m.group("fps"):
try:
fps_val = float(m.group("fps"))
except ValueError:
fps_val = None # fallback if numeric parse fails
results.append({
"codec": m.group("codec").strip() if m.group("codec") else None,
"range": (m.group("range").strip() if m.group("range") else None),
"resolution": [m.group("width"), m.group("height")],
"bitrate": bitrate_kbps,
"framerate": fps_val, # None when 'Unknown FPS'
"size": (m.group("size").strip() if m.group("size") else None),
})
return results
def extract_chapters(stderr_text: str):
"""
Parse chapter lines from vinetrimmer-like logs.
Returns: list of dicts: {'index': '01', 'time': '00:04:21.762', 'name': 'intro'}
Stops parsing at 'Selected Tracks' to prefer the 'All Tracks' inventory if present.
"""
# Matches: "CHP | [01] | 00:04:21.762 | intro"
CHAPTER_RE = re.compile(
r"""
^.*?\bCHP\b\s*\|\s*\[(?P<index>\d{1,3})\]\s*\|\s*
(?P<time>\d{2}:\d{2}:\d{2}(?:\.\d{1,4})?)\s*\|\s*
(?P<name>.+?)\s*$
""",
re.IGNORECASE | re.MULTILINE | re.VERBOSE
)
# Prefer 'All Tracks' (before 'Selected Tracks:' marker) to capture full menu
section = stderr_text.split("Selected Tracks:", 1)[0]
chapters = []
for m in CHAPTER_RE.finditer(section):
chapters.append({
"index": m.group("index"),
"time": m.group("time"),
"name": m.group("name"),
})
return chapters
def extract_title(stderr_text: str) -> str | None:
TITLE_RE = re.compile(r"Getting tracks for\s+(?P<title>.+?)\s*\[", re.IGNORECASE)
m = TITLE_RE.search(stderr_text)
return m.group("title").strip() if m else None
def extract_file_path(stderr: str) -> str | None:
import re
m = re.search(r"File path -\s*\n([\s\S]*?)\n\s*\n", stderr)
if not m:
return None
return "".join(line.strip() for line in m.group(1).splitlines())
def main():
# Example usage
stderr_example = """
▄• ▄▌ ▐ ▄ .▄▄ · ▄ .▄ ▄▄▄· ▄▄· ▄ •▄ ▄▄▌ ▄▄▄ .
█▪██▌•█▌▐█▐█ ▀. ██▪▐█▐█ ▀█ ▐█ ▌▪█▌▄▌▪██• ▀▄.▀·
█▌▐█▌▐█▐▐▌▄▀▀▀█▄██▀▐█▄█▀▀█ ██ ▄▄▐▀▀▄·██▪ ▐▀▀▪▄
▐█▄█▌██▐█▌▐█▄▪▐███▌▐▀▐█ ▪▐▌▐███▌▐█.█▌▐█▌▐▌▐█▄▄▌
▀▀▀ ▀▀ █▪ ▀▀▀▀ ▀▀▀ · ▀ ▀ ·▀▀▀ ·▀ ▀.▀▀▀ ▀▀▀
v 4.0.0 - © 2025-2026 - github.com/unshackle-dl/unshackle
Service Config loaded
Loaded 1/1 Vaults
Loaded Widevine CDM: 8159 (L3)
─────────────────────────────── Service: CR ────────────────────────────────
Authenticated with Service
─ Series: Noble Reincarnation: Born Blessed, So Ill Obtain Ultimate Powe… ─
1 seasons, S1(12)
─ Noble Reincarnation: Born Blessed, So Ill Obtain Ultimate Power 2026 S… ─
1 Video
└── [H.264, SDR] | ja-JP | 1920x1080 @ 11038 kb/s, 23.976 FPS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:24 • Downloaded
2 Audio
├── [AAC] | ja-JP | 2.0 | 200 kb/s | Japanese
│ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:15 • Downloaded
└── [AAC] | th-TH | 2.0 | 201 kb/s | Thai
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:15 • Downloaded
2 Subtitles
├── [ASS] | th-TH | Thai
│ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:01 • Downloaded
└── [ASS] | en-US | English
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:01 • Downloaded
6 Chapters
├── 00:00:00.000 | Chapter 1
├── 00:02:11.000 | Chapter 2
├── 00:03:21.000 | Intro
├── 00:04:50.000 | Chapter 3
├── 00:21:56.000 | Credits
└── 00:23:27.000 | Chapter 4
Widevine(AAAAgnBzc2gAAAAA7e+LqXnWSs6jyCfc1R0h7QAAAGIIARIQx4rAibzIP...)
└── c78ac089bcc83e8ea8fe89729f1093c7:f4797a42fa189a1326dc3da31b8957ab*
from Local SQLite
Widevine(AAAAgnBzc2gAAAAA7e+LqXnWSs6jyCfc1R0h7QAAAGIIARIQrm5MD9N8M...)
└── ae6e4c0fd37c32d5be7a3188ce31a60b:d11e30c933334530a5e591e58978929c*
from Local SQLite
Track downloads finished in 0m24s
Using 'DejaVu Sans' as fallback for 'Arial Unicode MS'
Using 'Liberation Sans' as fallback for 'Arial'
Using 'Liberation Serif' as fallback for 'Times New Roman'
Using 'DejaVu Sans' as fallback for 'Trebuchet MS'
Attached 5 fonts for the Subtitles
Repacked one or more tracks with FFMPEG
Multiplexing... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:03
🎉 Title downloaded in 0m29s!
File path -
/Entertainment_1/Downloads/USCK/Noble.Reincarnation.Born.Blessed.So.Il
l.Obtain.Ultimate.Power.2026.S01.1080p.CR.WEB-DL.DUAL.AAC2.0.H.264-[Se
Free]/Noble.Reincarnation.Born.Blessed.So.Ill.Obtain.Ultimate.Power.20
26.S01E11.Disinheritance.1080p.CR.WEB-DL.DUAL.AAC2.0.H.264-[SeFree].mk
v
Processed all titles in 0m33s
"""
# audio_lang_cfg = "ja,th"
# sub_lang_cfg = "th,en"
# audio_lang_cfg= audio_lang_cfg.split(",")
# sub_lang_cfg = sub_lang_cfg.split(",")
# title = extract_title(stderr_example)
# vid_details = video_details(stderr_example)
# chapters = extract_chapters(stderr_example)
# lang_result = check_langs_with_langcodes(stderr_example, audio_lang_cfg, sub_lang_cfg)
# print(title)
# print(vid_details)
# print(chapters)
# print(lang_result)
# print("dsfasdf")
print(extract_file_path(stderr_example))
if __name__ == "__main__":
main()