refactor(providers): extract metadata providers into modular system
- Create `unshackle/core/providers/` package with abstract base class, IMDBApi (free, no key), SIMKL, and TMDB provider implementations - Add consensus-based ID enrichment: cross-references IMDB IDs with TMDB and SIMKL, drops all data from providers that disagree on tmdb_id (likely resolved to wrong title) - Cache enriched IDs alongside raw provider data so they survive cache round-trips - Genericize TitleCacher with `cache_provider()`/`get_cached_provider()` replacing provider-specific methods; respect `--no-cache` flag - Add `--imdb` CLI flag to dl command for direct IMDB ID lookup
This commit is contained in:
97
unshackle/core/providers/_base.py
Normal file
97
unshackle/core/providers/_base.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Optional, Union
|
||||
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter, Retry
|
||||
|
||||
log = logging.getLogger("METADATA")
|
||||
|
||||
HEADERS = {"User-Agent": "unshackle-tags/1.0"}
|
||||
|
||||
STRIP_RE = re.compile(r"[^a-z0-9]+", re.I)
|
||||
YEAR_RE = re.compile(r"\s*\(?[12][0-9]{3}\)?$")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExternalIds:
|
||||
"""Normalized external IDs across providers."""
|
||||
|
||||
imdb_id: Optional[str] = None
|
||||
tmdb_id: Optional[int] = None
|
||||
tmdb_kind: Optional[str] = None # "movie" or "tv"
|
||||
tvdb_id: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetadataResult:
|
||||
"""Unified metadata result from any provider."""
|
||||
|
||||
title: Optional[str] = None
|
||||
year: Optional[int] = None
|
||||
kind: Optional[str] = None # "movie" or "tv"
|
||||
external_ids: ExternalIds = field(default_factory=ExternalIds)
|
||||
source: str = "" # provider name, e.g. "tmdb", "simkl", "imdbapi"
|
||||
raw: Optional[dict] = None # original API response for caching
|
||||
|
||||
|
||||
class MetadataProvider(metaclass=ABCMeta):
|
||||
"""Abstract base for metadata providers."""
|
||||
|
||||
NAME: str = ""
|
||||
REQUIRES_KEY: bool = True
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.log = logging.getLogger(f"METADATA.{self.NAME.upper()}")
|
||||
self._session: Optional[requests.Session] = None
|
||||
|
||||
@property
|
||||
def session(self) -> requests.Session:
|
||||
if self._session is None:
|
||||
self._session = requests.Session()
|
||||
self._session.headers.update(HEADERS)
|
||||
retry = Retry(
|
||||
total=3,
|
||||
backoff_factor=1,
|
||||
status_forcelist=[429, 500, 502, 503, 504],
|
||||
allowed_methods=["GET", "POST"],
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry)
|
||||
self._session.mount("https://", adapter)
|
||||
self._session.mount("http://", adapter)
|
||||
return self._session
|
||||
|
||||
@abstractmethod
|
||||
def is_available(self) -> bool:
|
||||
"""Return True if this provider has the credentials/keys it needs."""
|
||||
|
||||
@abstractmethod
|
||||
def search(self, title: str, year: Optional[int], kind: str) -> Optional[MetadataResult]:
|
||||
"""Search for a title and return metadata, or None on failure/no match."""
|
||||
|
||||
@abstractmethod
|
||||
def get_by_id(self, provider_id: Union[int, str], kind: str) -> Optional[MetadataResult]:
|
||||
"""Fetch metadata by this provider's native ID."""
|
||||
|
||||
@abstractmethod
|
||||
def get_external_ids(self, provider_id: Union[int, str], kind: str) -> ExternalIds:
|
||||
"""Fetch external IDs for a title by this provider's native ID."""
|
||||
|
||||
|
||||
def _clean(s: str) -> str:
|
||||
return STRIP_RE.sub("", s).lower()
|
||||
|
||||
|
||||
def _strip_year(s: str) -> str:
|
||||
return YEAR_RE.sub("", s).strip()
|
||||
|
||||
|
||||
def fuzzy_match(a: str, b: str, threshold: float = 0.8) -> bool:
|
||||
"""Return True if ``a`` and ``b`` are a close match."""
|
||||
ratio = SequenceMatcher(None, _clean(a), _clean(b)).ratio()
|
||||
return ratio >= threshold
|
||||
Reference in New Issue
Block a user