247 lines
13 KiB
Python
247 lines
13 KiB
Python
import re
|
||
from langcodes import Language, tag_is_valid
|
||
|
||
def first_valid_bcp47(parts):
|
||
"""
|
||
Return the first token in parts that is a valid BCP 47 tag,
|
||
or None if none are valid.
|
||
"""
|
||
for p in parts:
|
||
|
||
tok = p.strip()
|
||
# Remove bracketed markers like [Original]
|
||
if tok.startswith("[") and tok.endswith("]"):
|
||
continue
|
||
# langcodes works with exact case; tags are typically case-insensitive
|
||
# but language=lower, region/script=proper-case is okay.
|
||
# We'll just feed the token as-is; tag_is_valid handles common cases.
|
||
if tag_is_valid(tok):
|
||
return tok
|
||
return None
|
||
|
||
def extract_langs(text):
|
||
audio = []
|
||
subs = []
|
||
LANG = r'([a-z]{2}(?:-[A-Z]{2})?)'
|
||
for line in text.splitlines():
|
||
# audio
|
||
m_audio = re.search(
|
||
rf'\[(AAC|DD\+?|AC-4|OPUS|VORB|DTS|ALAC|FLAC)\]\s*\|\s*{LANG}',
|
||
line
|
||
)
|
||
if m_audio:
|
||
lang = m_audio.group(2)
|
||
if lang not in audio:
|
||
audio.append(lang)
|
||
|
||
# subtitles
|
||
m_sub = re.search(
|
||
rf'\[(SRT|SSA|ASS|VTT|TTML|SMI|SUB|MPL2|TMP|STPP|WVTT)\]\s*\|\s*{LANG}',
|
||
line
|
||
)
|
||
if m_sub:
|
||
lang = m_sub.group(2)
|
||
if lang not in subs:
|
||
subs.append(lang)
|
||
|
||
return audio, subs
|
||
|
||
def check_langs_with_langcodes(stderr_text: str, audio_lang_cfg: list[str], sub_lang_cfg: list[str]):
|
||
# audio_tags = find_audio_tags(stderr_text)
|
||
# sub_tags = find_sub_tags(stderr_text)
|
||
audio_tags,sub_tags=extract_langs(stderr_text)
|
||
|
||
|
||
# Normalize found tags to their primary language subtags
|
||
audio_langs_found = {Language.get(tag).language for tag in audio_tags}
|
||
sub_langs_found = {Language.get(tag).language for tag in sub_tags}
|
||
|
||
return {
|
||
"audio": {
|
||
"configured": audio_lang_cfg,
|
||
"found_tags": audio_tags,
|
||
"found_langs": sorted(audio_langs_found),
|
||
"exists_all": all(Language.get(c).language in audio_langs_found for c in audio_lang_cfg),
|
||
},
|
||
"subtitle": {
|
||
"configured": sub_lang_cfg,
|
||
"found_tags": sub_tags,
|
||
"found_langs": sorted(sub_langs_found),
|
||
"exists_all": all(Language.get(c).language in sub_langs_found for c in sub_lang_cfg),
|
||
},
|
||
}
|
||
|
||
def video_details(stderr_text: str):
|
||
"""
|
||
Parses the 'All Tracks' part (stopping at 'Selected Tracks') using a single regex.
|
||
Returns a list of dicts with codec, range, resolution [w,h], bitrate (int kb/s),
|
||
framerate (float or None if unknown), and size (e.g., '376.04 MiB').
|
||
"""
|
||
# One regex, anchored to 'VID | [ ... ]' so it won't ever read the log-level [I]
|
||
VID_RE = re.compile(r"""
|
||
VID\s*\|\s*\[\s*(?P<codec>[^,\]]+)\s*(?:,\s*(?P<range>[^\]]+))?\]\s*\|\s*
|
||
(?P<width>\d{3,4})x(?P<height>\d{3,4})\s*@\s*(?P<kbps>[\d,]+)\s*kb/s
|
||
(?:\s*\((?P<size>[^()]*?(?:MiB|GiB)[^()]*)\))?\s*,\s*(?P<fps>\d+(?:\.\d+)?)\s*FPS
|
||
""", re.VERBOSE)
|
||
|
||
# Only parse the 'All Tracks' section if 'Selected Tracks' exists
|
||
if "Selected Tracks" in stderr_text:
|
||
all_section = stderr_text.split("Selected Tracks", 1)[0]
|
||
else:
|
||
all_section = stderr_text
|
||
|
||
results = []
|
||
for m in VID_RE.finditer(all_section):
|
||
bitrate_kbps = int(m.group("kbps").replace(",", ""))
|
||
fps_val = None
|
||
if m.group("fps"):
|
||
try:
|
||
fps_val = float(m.group("fps"))
|
||
except ValueError:
|
||
fps_val = None # fallback if numeric parse fails
|
||
|
||
results.append({
|
||
"codec": m.group("codec").strip() if m.group("codec") else None,
|
||
"range": (m.group("range").strip() if m.group("range") else None),
|
||
"resolution": [m.group("width"), m.group("height")],
|
||
"bitrate": bitrate_kbps,
|
||
"framerate": fps_val, # None when 'Unknown FPS'
|
||
"size": (m.group("size").strip() if m.group("size") else None),
|
||
})
|
||
|
||
return results
|
||
|
||
def extract_chapters(stderr_text: str):
|
||
"""
|
||
Parse chapter lines from vinetrimmer-like logs.
|
||
Returns: list of dicts: {'index': '01', 'time': '00:04:21.762', 'name': 'intro'}
|
||
Stops parsing at 'Selected Tracks' to prefer the 'All Tracks' inventory if present.
|
||
"""
|
||
# Matches: "CHP | [01] | 00:04:21.762 | intro"
|
||
CHAPTER_RE = re.compile(
|
||
r"""
|
||
^.*?\bCHP\b\s*\|\s*\[(?P<index>\d{1,3})\]\s*\|\s*
|
||
(?P<time>\d{2}:\d{2}:\d{2}(?:\.\d{1,4})?)\s*\|\s*
|
||
(?P<name>.+?)\s*$
|
||
""",
|
||
re.IGNORECASE | re.MULTILINE | re.VERBOSE
|
||
)
|
||
# Prefer 'All Tracks' (before 'Selected Tracks:' marker) to capture full menu
|
||
section = stderr_text.split("Selected Tracks:", 1)[0]
|
||
chapters = []
|
||
for m in CHAPTER_RE.finditer(section):
|
||
chapters.append({
|
||
"index": m.group("index"),
|
||
"time": m.group("time"),
|
||
"name": m.group("name"),
|
||
})
|
||
return chapters
|
||
|
||
def extract_title(stderr_text: str) -> str | None:
|
||
TITLE_RE = re.compile(r"Getting tracks for\s+(?P<title>.+?)\s*\[", re.IGNORECASE)
|
||
|
||
m = TITLE_RE.search(stderr_text)
|
||
return m.group("title").strip() if m else None
|
||
|
||
def extract_file_path(stderr: str) -> str | None:
|
||
import re
|
||
m = re.search(r"File path -\s*\n([\s\S]*?)\n\s*\n", stderr)
|
||
if not m:
|
||
return None
|
||
return "".join(line.strip() for line in m.group(1).splitlines())
|
||
|
||
def main():
|
||
# Example usage
|
||
stderr_example = """
|
||
▄• ▄▌ ▐ ▄ .▄▄ · ▄ .▄ ▄▄▄· ▄▄· ▄ •▄ ▄▄▌ ▄▄▄ .
|
||
█▪██▌•█▌▐█▐█ ▀. ██▪▐█▐█ ▀█ ▐█ ▌▪█▌▄▌▪██• ▀▄.▀·
|
||
█▌▐█▌▐█▐▐▌▄▀▀▀█▄██▀▐█▄█▀▀█ ██ ▄▄▐▀▀▄·██▪ ▐▀▀▪▄
|
||
▐█▄█▌██▐█▌▐█▄▪▐███▌▐▀▐█ ▪▐▌▐███▌▐█.█▌▐█▌▐▌▐█▄▄▌
|
||
▀▀▀ ▀▀ █▪ ▀▀▀▀ ▀▀▀ · ▀ ▀ ·▀▀▀ ·▀ ▀.▀▀▀ ▀▀▀
|
||
v 4.0.0 - © 2025-2026 - github.com/unshackle-dl/unshackle
|
||
|
||
Service Config loaded
|
||
Loaded 1/1 Vaults
|
||
Loaded Widevine CDM: 8159 (L3)
|
||
|
||
─────────────────────────────── Service: CR ────────────────────────────────
|
||
|
||
Authenticated with Service
|
||
|
||
─ Series: Noble Reincarnation: Born Blessed, So I’ll Obtain Ultimate Powe… ─
|
||
|
||
1 seasons, S1(12)
|
||
|
||
─ Noble Reincarnation: Born Blessed, So I’ll Obtain Ultimate Power 2026 S… ─
|
||
|
||
|
||
1 Video
|
||
└── [H.264, SDR] | ja-JP | 1920x1080 @ 11038 kb/s, 23.976 FPS
|
||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:24 • Downloaded
|
||
2 Audio
|
||
├── [AAC] | ja-JP | 2.0 | 200 kb/s | Japanese
|
||
│ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:15 • Downloaded
|
||
└── [AAC] | th-TH | 2.0 | 201 kb/s | Thai
|
||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:15 • Downloaded
|
||
2 Subtitles
|
||
├── [ASS] | th-TH | Thai
|
||
│ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:01 • Downloaded
|
||
└── [ASS] | en-US | English
|
||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:01 • Downloaded
|
||
6 Chapters
|
||
├── 00:00:00.000 | Chapter 1
|
||
├── 00:02:11.000 | Chapter 2
|
||
├── 00:03:21.000 | Intro
|
||
├── 00:04:50.000 | Chapter 3
|
||
├── 00:21:56.000 | Credits
|
||
└── 00:23:27.000 | Chapter 4
|
||
|
||
Widevine(AAAAgnBzc2gAAAAA7e+LqXnWSs6jyCfc1R0h7QAAAGIIARIQx4rAibzIP...)
|
||
└── c78ac089bcc83e8ea8fe89729f1093c7:f4797a42fa189a1326dc3da31b8957ab*
|
||
from Local SQLite
|
||
|
||
Widevine(AAAAgnBzc2gAAAAA7e+LqXnWSs6jyCfc1R0h7QAAAGIIARIQrm5MD9N8M...)
|
||
└── ae6e4c0fd37c32d5be7a3188ce31a60b:d11e30c933334530a5e591e58978929c*
|
||
from Local SQLite
|
||
|
||
Track downloads finished in 0m24s
|
||
Using 'DejaVu Sans' as fallback for 'Arial Unicode MS'
|
||
Using 'Liberation Sans' as fallback for 'Arial'
|
||
Using 'Liberation Serif' as fallback for 'Times New Roman'
|
||
Using 'DejaVu Sans' as fallback for 'Trebuchet MS'
|
||
Attached 5 fonts for the Subtitles
|
||
Repacked one or more tracks with FFMPEG
|
||
Multiplexing... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:03
|
||
|
||
🎉 Title downloaded in 0m29s!
|
||
|
||
File path -
|
||
/Entertainment_1/Downloads/USCK/Noble.Reincarnation.Born.Blessed.So.Il
|
||
l.Obtain.Ultimate.Power.2026.S01.1080p.CR.WEB-DL.DUAL.AAC2.0.H.264-[Se
|
||
Free]/Noble.Reincarnation.Born.Blessed.So.Ill.Obtain.Ultimate.Power.20
|
||
26.S01E11.Disinheritance.1080p.CR.WEB-DL.DUAL.AAC2.0.H.264-[SeFree].mk
|
||
v
|
||
|
||
Processed all titles in 0m33s
|
||
"""
|
||
# audio_lang_cfg = "ja,th"
|
||
# sub_lang_cfg = "th,en"
|
||
|
||
# audio_lang_cfg= audio_lang_cfg.split(",")
|
||
# sub_lang_cfg = sub_lang_cfg.split(",")
|
||
|
||
# title = extract_title(stderr_example)
|
||
# vid_details = video_details(stderr_example)
|
||
# chapters = extract_chapters(stderr_example)
|
||
# lang_result = check_langs_with_langcodes(stderr_example, audio_lang_cfg, sub_lang_cfg)
|
||
|
||
# print(title)
|
||
# print(vid_details)
|
||
# print(chapters)
|
||
# print(lang_result)
|
||
# print("dsfasdf")
|
||
print(extract_file_path(stderr_example))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main() |