usk_schedule_downloader/lib/check_track_detail.py

import re
from langcodes import Language, tag_is_valid

def first_valid_bcp47(parts):
    """
    Return the first token in parts that is a valid BCP 47 tag,
    or None if none are valid.
    """
    for p in parts:

        tok = p.strip()
        # Remove bracketed markers like [Original]
        if tok.startswith("[") and tok.endswith("]"):
            continue
        # langcodes works with exact case; tags are typically case-insensitive
        # but language=lower, region/script=proper-case is okay.
        # We'll just feed the token as-is; tag_is_valid handles common cases.
        if tag_is_valid(tok):
            return tok
    return None

def extract_langs(text):
    audio = []
    subs = []
    LANG = r'([a-z]{2}(?:-[A-Z]{2})?)'
    for line in text.splitlines():
        # audio
        m_audio = re.search(
            rf'\[(AAC|DD\+?|AC-4|OPUS|VORB|DTS|ALAC|FLAC)\]\s*\|\s*{LANG}',
            line
        )
        if m_audio:
            lang = m_audio.group(2)
            if lang not in audio:
                audio.append(lang)

        # subtitles
        m_sub = re.search(
            rf'\[(SRT|SSA|ASS|VTT|TTML|SMI|SUB|MPL2|TMP|STPP|WVTT)\]\s*\|\s*{LANG}',
            line
        )
        if m_sub:
            lang = m_sub.group(2)
            if lang not in subs:
                subs.append(lang)

    return audio, subs

def check_langs_with_langcodes(stderr_text: str, audio_lang_cfg: list[str], sub_lang_cfg: list[str]):
    # audio_tags = find_audio_tags(stderr_text)
    # sub_tags = find_sub_tags(stderr_text)
    audio_tags,sub_tags=extract_langs(stderr_text)


    # Normalize found tags to their primary language subtags
    audio_langs_found = {Language.get(tag).language for tag in audio_tags}
    sub_langs_found   = {Language.get(tag).language for tag in sub_tags}

    return {
        "audio": {
            "configured": audio_lang_cfg,
            "found_tags": audio_tags,
            "found_langs": sorted(audio_langs_found),
            "exists_all": all(Language.get(c).language in audio_langs_found for c in audio_lang_cfg),
        },
        "subtitle": {
            "configured": sub_lang_cfg,
            "found_tags": sub_tags,
            "found_langs": sorted(sub_langs_found),
            "exists_all": all(Language.get(c).language in sub_langs_found for c in sub_lang_cfg),
        },
    }

def video_details(stderr_text: str):
    """
    Parses the 'All Tracks' part (stopping at 'Selected Tracks') using a single regex.
    Returns a list of dicts with codec, range, resolution [w,h], bitrate (int kb/s),
    framerate (float or None if unknown), and size (e.g., '376.04 MiB').
    """
    # One regex, anchored to 'VID | [ ... ]' so it won't ever read the log-level [I]
    VID_RE = re.compile(r"""
        VID\s*\|\s*\[\s*(?P<codec>[^,\]]+)\s*(?:,\s*(?P<range>[^\]]+))?\]\s*\|\s*
        (?P<width>\d{3,4})x(?P<height>\d{3,4})\s*@\s*(?P<kbps>[\d,]+)\s*kb/s
        (?:\s*\((?P<size>[^()]*?(?:MiB|GiB)[^()]*)\))?\s*,\s*(?P<fps>\d+(?:\.\d+)?)\s*FPS
    """, re.VERBOSE)

    # Only parse the 'All Tracks' section if 'Selected Tracks' exists
    if "Selected Tracks" in stderr_text:
        all_section = stderr_text.split("Selected Tracks", 1)[0]
    else:
        all_section = stderr_text

    results = []
    for m in VID_RE.finditer(all_section):
        bitrate_kbps = int(m.group("kbps").replace(",", ""))
        fps_val = None
        if m.group("fps"):
            try:
                fps_val = float(m.group("fps"))
            except ValueError:
                fps_val = None  # fallback if numeric parse fails

        results.append({
            "codec":      m.group("codec").strip() if m.group("codec") else None,
            "range":      (m.group("range").strip() if m.group("range") else None),
            "resolution": [m.group("width"), m.group("height")],
            "bitrate":    bitrate_kbps,
            "framerate":  fps_val,  # None when 'Unknown FPS'
            "size":       (m.group("size").strip() if m.group("size") else None),
        })

    return results

def extract_chapters(stderr_text: str):
    """
    Parse chapter lines from vinetrimmer-like logs.
    Returns: list of dicts: {'index': '01', 'time': '00:04:21.762', 'name': 'intro'}
    Stops parsing at 'Selected Tracks' to prefer the 'All Tracks' inventory if present.
    """
    # Matches: "CHP | [01] | 00:04:21.762 | intro"
    CHAPTER_RE = re.compile(
        r"""
        ^.*?\bCHP\b\s*\|\s*\[(?P<index>\d{1,3})\]\s*\|\s*
        (?P<time>\d{2}:\d{2}:\d{2}(?:\.\d{1,4})?)\s*\|\s*
        (?P<name>.+?)\s*$
        """,
        re.IGNORECASE | re.MULTILINE | re.VERBOSE
    )
    # Prefer 'All Tracks' (before 'Selected Tracks:' marker) to capture full menu
    section = stderr_text.split("Selected Tracks:", 1)[0]
    chapters = []
    for m in CHAPTER_RE.finditer(section):
        chapters.append({
            "index": m.group("index"),
            "time": m.group("time"),
            "name": m.group("name"),
        })
    return chapters

def extract_title(stderr_text: str) -> str | None:
    TITLE_RE = re.compile(r"Getting tracks for\s+(?P<title>.+?)\s*\[", re.IGNORECASE)

    m = TITLE_RE.search(stderr_text)
    return m.group("title").strip() if m else None

def extract_file_path(stderr: str) -> str | None:
    import re
    m = re.search(r"File path -\s*\n([\s\S]*?)\n\s*\n", stderr)
    if not m:
        return None
    return "".join(line.strip() for line in m.group(1).splitlines())

def main():
    # Example usage
    stderr_example = """
                ▄• ▄▌ ▐ ▄ .▄▄ ·  ▄ .▄ ▄▄▄·  ▄▄· ▄ •▄ ▄▄▌  ▄▄▄ .
                █▪██▌•█▌▐█▐█ ▀. ██▪▐█▐█ ▀█ ▐█ ▌▪█▌▄▌▪██•  ▀▄.▀·
                █▌▐█▌▐█▐▐▌▄▀▀▀█▄██▀▐█▄█▀▀█ ██ ▄▄▐▀▀▄·██▪  ▐▀▀▪▄
                ▐█▄█▌██▐█▌▐█▄▪▐███▌▐▀▐█ ▪▐▌▐███▌▐█.█▌▐█▌▐▌▐█▄▄▌
                 ▀▀▀ ▀▀ █▪ ▀▀▀▀ ▀▀▀ · ▀  ▀ ·▀▀▀ ·▀  ▀.▀▀▀  ▀▀▀
           v 4.0.0 - © 2025-2026 - github.com/unshackle-dl/unshackle

     Service Config loaded
     Loaded 1/1 Vaults
     Loaded Widevine CDM: 8159 (L3)

  ─────────────────────────────── Service: CR ────────────────────────────────

     Authenticated with Service

  ─ Series: Noble Reincarnation: Born Blessed, So I’ll Obtain Ultimate Powe… ─

     1 seasons, S1(12)

  ─ Noble Reincarnation: Born Blessed, So I’ll Obtain Ultimate Power 2026 S… ─


     1 Video
     └── [H.264, SDR] | ja-JP | 1920x1080 @ 11038 kb/s, 23.976 FPS
          ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:24 • Downloaded
     2 Audio
     ├── [AAC] | ja-JP | 2.0 | 200 kb/s | Japanese
     │    ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:15 • Downloaded
     └── [AAC] | th-TH | 2.0 | 201 kb/s | Thai
          ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:15 • Downloaded
     2 Subtitles
     ├── [ASS] | th-TH | Thai
     │    ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:01 • Downloaded
     └── [ASS] | en-US | English
          ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:01 • Downloaded
     6 Chapters
     ├── 00:00:00.000 | Chapter 1
     ├── 00:02:11.000 | Chapter 2
     ├── 00:03:21.000 | Intro
     ├── 00:04:50.000 | Chapter 3
     ├── 00:21:56.000 | Credits
     └── 00:23:27.000 | Chapter 4

     Widevine(AAAAgnBzc2gAAAAA7e+LqXnWSs6jyCfc1R0h7QAAAGIIARIQx4rAibzIP...)
     └── c78ac089bcc83e8ea8fe89729f1093c7:f4797a42fa189a1326dc3da31b8957ab*
         from Local SQLite

     Widevine(AAAAgnBzc2gAAAAA7e+LqXnWSs6jyCfc1R0h7QAAAGIIARIQrm5MD9N8M...)
     └── ae6e4c0fd37c32d5be7a3188ce31a60b:d11e30c933334530a5e591e58978929c*
         from Local SQLite

     Track downloads finished in 0m24s
     Using 'DejaVu Sans' as fallback for 'Arial Unicode MS'
     Using 'Liberation Sans' as fallback for 'Arial'
     Using 'Liberation Serif' as fallback for 'Times New Roman'
     Using 'DejaVu Sans' as fallback for 'Trebuchet MS'
     Attached 5 fonts for the Subtitles
     Repacked one or more tracks with FFMPEG
     Multiplexing...  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 00:03

     🎉 Title downloaded in 0m29s!

     File path -
     /Entertainment_1/Downloads/USCK/Noble.Reincarnation.Born.Blessed.So.Il
     l.Obtain.Ultimate.Power.2026.S01.1080p.CR.WEB-DL.DUAL.AAC2.0.H.264-[Se
     Free]/Noble.Reincarnation.Born.Blessed.So.Ill.Obtain.Ultimate.Power.20
     26.S01E11.Disinheritance.1080p.CR.WEB-DL.DUAL.AAC2.0.H.264-[SeFree].mk
     v

     Processed all titles in 0m33s
"""
    # audio_lang_cfg = "ja,th"
    # sub_lang_cfg = "th,en"

    # audio_lang_cfg= audio_lang_cfg.split(",")
    # sub_lang_cfg = sub_lang_cfg.split(",")

    # title = extract_title(stderr_example)
    # vid_details = video_details(stderr_example)
    # chapters = extract_chapters(stderr_example)
    # lang_result = check_langs_with_langcodes(stderr_example, audio_lang_cfg, sub_lang_cfg)

    # print(title)
    # print(vid_details)
    # print(chapters)
    # print(lang_result)
    # print("dsfasdf")
    print(extract_file_path(stderr_example))


if __name__ == "__main__":
    main()