feat: add --exact-lang flag for precise language matching

New --exact-lang CLI flag that enables exact language code matching instead of fuzzy matching. This allows users to get specific regional variants without matching all related variants.

Examples:
- `-l es-419` normally matches all Spanish (es-ES, es-419, es-MX)
- `-l es-419 --exact-lang` matches ONLY es-419 (Latin American Spanish)

Fixes language detection issue where specific variants like es-419 (Latin American Spanish) would match all Spanish variants instead of just close regional variants.
This commit is contained in:
Andy
2025-10-08 01:54:30 +00:00
parent e9ba78cec3
commit 3f6a7e1f68
4 changed files with 33 additions and 8 deletions

View File

@@ -24,7 +24,7 @@ from unidecode import unidecode
from unshackle.core.cacher import Cacher
from unshackle.core.config import config
from unshackle.core.constants import LANGUAGE_MAX_DISTANCE
from unshackle.core.constants import LANGUAGE_EXACT_DISTANCE, LANGUAGE_MAX_DISTANCE
def rotate_log_file(log_path: Path, keep: int = 20) -> Path:
@@ -114,6 +114,14 @@ def is_close_match(language: Union[str, Language], languages: Sequence[Union[str
return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_MAX_DISTANCE
def is_exact_match(language: Union[str, Language], languages: Sequence[Union[str, Language, None]]) -> bool:
"""Check if a language is an exact match to any of the provided languages."""
languages = [x for x in languages if x]
if not languages:
return False
return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_EXACT_DISTANCE
def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
"""
Scan a byte array for a wanted MP4/ISOBMFF box, then parse and yield each find.