add SeFree-Custom-Script

2026-03-31 12:57:14 +07:00
parent 99bacaff3f
commit ac89cbf545
12 changed files with 1478 additions and 3 deletions
--- a/SeFree-Custom-Script/extend_subtitle.py
+++ b/SeFree-Custom-Script/extend_subtitle.py
@@ -0,0 +1,307 @@
+
+#!/usr/bin/env python3
+"""
+Batch-shift text subtitles (ASS, SRT, TTML, VTT) by +N seconds.
+
+Primary method:
+  - FFmpeg with `-itsoffset` to create a shifted external subtitle file,
+    preserving the original format/extension.
+
+Fallback:
+  - Python per-format shifters for SRT, VTT, ASS, TTML (handles negative-time clamping).
+
+Outputs:
+  - Writes to <folder>/output/ keeping the same file names.
+
+Usage:
+  python shift_subtitles_batch.py /path/to/folder 1
+  python shift_subtitles_batch.py "/subs_folder" 1 --recursive --skip-existing
+"""
+
+import argparse
+import re
+import sys
+import shutil
+import subprocess
+from pathlib import Path
+from typing import List, Tuple, Optional
+import xml.etree.ElementTree as ET
+
+SUPPORTED_EXTS = [".srt", ".ass", ".vtt", ".ttml"]
+FFMPEG_CODEC_BY_EXT = {
+    ".srt": None,             # copy is fine
+    ".ass": "ass",            # be explicit if needed
+    ".vtt": "webvtt",         # FFmpeg supports webvtt muxer/codec
+    ".ttml": "ttml",          # may not be available in all builds; fallback if fails
+}
+
+def check_binary(name: str):
+    if shutil.which(name) is None:
+        print(f"Error: '{name}' not found on PATH. Install it and try again.")
+        sys.exit(1)
+
+def ffmpeg_shift(input_sub: Path, output_sub: Path, seconds: float) -> int:
+    """
+    Try to shift a text subtitle with FFmpeg using -itsoffset.
+    Use -c:s <codec> when known; otherwise -c copy.
+    """
+    ext = input_sub.suffix.lower()
+    codec = FFMPEG_CODEC_BY_EXT.get(ext)
+    cmd = [
+        "ffmpeg", "-hide_banner", "-loglevel", "error", "-y",
+        "-itsoffset", str(seconds),
+        "-i", str(input_sub),
+    ]
+    if codec:
+        cmd += ["-c:s", codec]
+    else:
+        cmd += ["-c", "copy"]
+    cmd += [str(output_sub)]
+    print("FFmpeg shift:\n " + " ".join(map(str, cmd)))
+    return subprocess.run(cmd).returncode
+
+# ---------- Python fallback shifters ----------
+def clamp_ms(ms: int) -> int:
+    return max(ms, 0)
+
+# SRT: 00:00:05,123 --> 00:00:08,456
+SRT_TIME = re.compile(r"(\d{2}):(\d{2}):(\d{2}),(\d{3})")
+def srt_to_ms(m: re.Match) -> int:
+    h, mi, s, ms = map(int, m.groups())
+    return ((h * 3600 + mi * 60 + s) * 1000) + ms
+
+def ms_to_srt(ms: int) -> str:
+    ms = clamp_ms(ms)
+    h = ms // 3600000; ms %= 3600000
+    mi = ms // 60000; ms %= 60000
+    s = ms // 1000; ms %= 1000
+    return f"{h:02}:{mi:02}:{s:02},{ms:03}"
+
+def shift_srt_text(text: str, offset_ms: int) -> str:
+    out_lines = []
+    for line in text.splitlines():
+        if "-->" in line:
+            parts = line.split("-->")
+            left = SRT_TIME.search(parts[0])
+            right = SRT_TIME.search(parts[1])
+            if left and right:
+                l_ms = srt_to_ms(left) + offset_ms
+                r_ms = srt_to_ms(right) + offset_ms
+                new_line = f"{ms_to_srt(l_ms)} --> {ms_to_srt(r_ms)}"
+                out_lines.append(new_line)
+                continue
+        out_lines.append(line)
+    return "\n".join(out_lines)
+
+# VTT: WEBVTT header; times use '.' separator: 00:00:05.123 --> ...
+VTT_TIME = re.compile(r"(\d{2}):(\d{2}):(\d{2})\.(\d{3})")
+def vtt_to_ms(m: re.Match) -> int:
+    h, mi, s, ms = map(int, m.groups())
+    return ((h * 3600 + mi * 60 + s) * 1000) + ms
+
+def ms_to_vtt(ms: int) -> str:
+    ms = clamp_ms(ms)
+    h = ms // 3600000; ms %= 3600000
+    mi = ms // 60000; ms %= 60000
+    s = ms // 1000; ms %= 1000
+    return f"{h:02}:{mi:02}:{s:02}.{ms:03}"
+
+def shift_vtt_text(text: str, offset_ms: int) -> str:
+    out_lines = []
+    for i, line in enumerate(text.splitlines()):
+        if "-->" in line:
+            # Preserve cue settings like "line:-1 align:right" if they exist.
+            left, right = line.split("-->", 1)
+            # Left timestamp may have trailing settings; isolate the time token
+            lm = VTT_TIME.search(left)
+            rm = VTT_TIME.search(right)
+            if lm and rm:
+                l_ms = vtt_to_ms(lm) + offset_ms
+                r_ms = vtt_to_ms(rm) + offset_ms
+                # Replace only the matched portions; keep extra cue settings
+                left_new = VTT_TIME.sub(ms_to_vtt(l_ms), left, count=1)
+                right_new = VTT_TIME.sub(ms_to_vtt(r_ms), right, count=1)
+                out_lines.append(f"{left_new}-->{right_new}")
+                continue
+        out_lines.append(line)
+    return "\n".join(out_lines)
+
+# ASS: times appear in Dialogue events; format line defines field order.
+# Typical: "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"
+# Dialogue: 0,00:00:05.12,00:00:08.34,Default,...
+ASS_TIME = re.compile(r"(\d{2}):(\d{2}):(\d{2})\.(\d{2})")
+def ass_to_cs(m: re.Match) -> int:
+    h, mi, s, cs = map(int, m.groups())
+    return ((h * 3600 + mi * 60 + s) * 100) + cs  # centiseconds
+
+def cs_to_ass(cs: int) -> str:
+    cs = max(cs, 0)
+    h = cs // (3600 * 100); cs %= (3600 * 100)
+    mi = cs // (60 * 100); cs %= (60 * 100)
+    s = cs // 100; cs %= 100
+    return f"{h:02}:{mi:02}:{s:02}.{cs:02}"
+
+def shift_ass_text(text: str, offset_ms: int) -> str:
+    offset_cs = int(round(offset_ms / 10.0))
+    out_lines = []
+    fmt_fields: Optional[List[str]] = None
+    for line in text.splitlines():
+        if line.startswith("Format:"):
+            # Capture field order for reference
+            fmt_fields = [f.strip() for f in line.split(":", 1)[1].split(",")]
+            out_lines.append(line)
+            continue
+        if line.startswith("Dialogue:"):
+            parts = line.split(":", 1)[1].split(",", maxsplit=len(fmt_fields) or 10)
+            # Heuristic: Start = field named "Start" or position 1; End = "End" or position 2
+            try:
+                if fmt_fields:
+                    start_idx = fmt_fields.index("Start")
+                    end_idx = fmt_fields.index("End")
+                else:
+                    start_idx, end_idx = 1, 2
+                sm = ASS_TIME.search(parts[start_idx])
+                em = ASS_TIME.search(parts[end_idx])
+                if sm and em:
+                    s_cs = ass_to_cs(sm) + offset_cs
+                    e_cs = ass_to_cs(em) + offset_cs
+                    parts[start_idx] = ASS_TIME.sub(cs_to_ass(s_cs), parts[start_idx], count=1)
+                    parts[end_idx] = ASS_TIME.sub(cs_to_ass(e_cs), parts[end_idx], count=1)
+                    out_lines.append("Dialogue:" + ",".join(parts))
+                    continue
+            except Exception:
+                pass
+        out_lines.append(line)
+    return "\n".join(out_lines)
+
+# TTML: XML; adjust begin/end/dur attributes when present.
+def parse_time_to_ms(value: str) -> Optional[int]:
+    """
+    Accept forms like 'HH:MM:SS.mmm' or 'HH:MM:SS:FF' (rare) or 'XmYsZms'
+    Keep to simplest: HH:MM:SS.mmm and HH:MM:SS for typical TTML.
+    """
+    m = re.match(r"^(\d{2}):(\d{2}):(\d{2})(?:\.(\d{1,3}))?$", value)
+    if m:
+        h, mi, s = map(int, m.groups()[:3])
+        ms = int((m.group(4) or "0").ljust(3, "0"))
+        return ((h * 3600 + mi * 60 + s) * 1000) + ms
+    return None
+
+def ms_to_ttml(ms: int) -> str:
+    ms = clamp_ms(ms)
+    h = ms // 3600000; ms %= 3600000
+    mi = ms // 60000; ms %= 60000
+    s = ms // 1000; ms %= 1000
+    return f"{h:02}:{mi:02}:{s:02}.{ms:03}"
+
+def shift_ttml_text(text: str, offset_ms: int) -> str:
+    try:
+        root = ET.fromstring(text)
+        # Common TTML namespaces vary; try to adjust attributes on any element
+        for elem in root.iter():
+            for attr in ("begin", "end", "dur"):
+                if attr in elem.attrib:
+                    val = elem.attrib[attr]
+                    ms = parse_time_to_ms(val)
+                    if ms is not None:
+                        if attr == "dur":
+                            # duration stays the same when prepending silence
+                            continue
+                        elem.attrib[attr] = ms_to_ttml(ms + offset_ms)
+        return ET.tostring(root, encoding="unicode")
+    except Exception:
+        # If parsing fails, return original text
+        return text
+
+def python_shift(input_sub: Path, output_sub: Path, seconds: float) -> bool:
+    """
+    Format-aware shifting when FFmpeg fails or for negative offset clamping.
+    """
+    ext = input_sub.suffix.lower()
+    text = input_sub.read_text(encoding="utf-8", errors="replace")
+    offset_ms = int(round(seconds * 1000))
+
+    if ext == ".srt":
+        out = shift_srt_text(text, offset_ms)
+    elif ext == ".vtt":
+        out = shift_vtt_text(text, offset_ms)
+        # Ensure WEBVTT header remains if present
+        if not out.lstrip().startswith("WEBVTT") and text.lstrip().startswith("WEBVTT"):
+            out = "WEBVTT\n\n" + out
+    elif ext == ".ass":
+        out = shift_ass_text(text, offset_ms)
+    elif ext == ".ttml":
+        out = shift_ttml_text(text, offset_ms)
+    else:
+        return False
+
+    output_sub.write_text(out, encoding="utf-8")
+    return True
+
+# ---------- batch ----------
+def find_sub_files(folder: Path, recursive: bool, exts: List[str]) -> List[Path]:
+    pattern = "**/*" if recursive else "*"
+    exts_norm = {e.lower() for e in exts}
+    return sorted([p for p in folder.glob(pattern) if p.is_file() and p.suffix.lower() in exts_norm])
+
+def process_one(file: Path, out_dir: Path, seconds: float, skip_existing: bool):
+    out_path = out_dir / file.name
+    if skip_existing and out_path.exists():
+        print(f"Skip (exists): {out_path}")
+        return
+
+    # 1) Try FFmpeg first
+    rc = ffmpeg_shift(file, out_path, seconds)
+    if rc == 0:
+        print(f"OK (ffmpeg): {file.name} -> {out_path.name}")
+        return
+
+    print(f"FFmpeg failed; using Python fallback for {file.name} …")
+    ok = python_shift(file, out_path, seconds)
+    if ok:
+        print(f"OK (python): {file.name} -> {out_path.name}")
+    else:
+        print(f"FAILED: {file}")
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Batch shift text subtitles by N seconds (ASS/SRT/TTML/VTT)."
+    )
+    parser.add_argument("folder", help="Folder containing subtitle files")
+    parser.add_argument("seconds", nargs="?", type=float, default=1.0,
+                        help="Constant time shift in seconds (default: +1.0)")
+    parser.add_argument("--recursive", action="store_true", help="Process subfolders")
+    parser.add_argument("--skip-existing", action="store_true", help="Skip if output already exists")
+    parser.add_argument("--exts", nargs="+", default=SUPPORTED_EXTS, help="Extensions to include")
+    parser.add_argument("--output-dir-name", default="output", help="Name of the output subfolder")
+    args = parser.parse_args()
+
+    check_binary("ffmpeg")
+
+    root = Path(args.folder).expanduser().resolve()
+    if not root.exists() or not root.is_dir():
+        print(f"Error: '{root}' is not a folder.")
+        sys.exit(1)
+
+    out_dir = root / args.output_dir_name
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    files = find_sub_files(root, args.recursive, args.exts)
+    files = [f for f in files if out_dir not in f.parents]
+    if not files:
+        print("No matching subtitle files found.")
+        sys.exit(0)
+
+    print(f"Found {len(files)} file(s). Output: {out_dir}")
+    for f in files:
+        try:
+            process_one(f, out_dir, args.seconds, args.skip_existing)
+        except KeyboardInterrupt:
+            print("\nInterrupted.")
+            sys.exit(130)
+        except Exception as e:
+            print(f"Error on '{f}': {e}")
+    print("Done.")
+
+if __name__ == "__main__":
+    main()