refactor(providers): extract metadata providers into modular system
- Create `unshackle/core/providers/` package with abstract base class, IMDBApi (free, no key), SIMKL, and TMDB provider implementations - Add consensus-based ID enrichment: cross-references IMDB IDs with TMDB and SIMKL, drops all data from providers that disagree on tmdb_id (likely resolved to wrong title) - Cache enriched IDs alongside raw provider data so they survive cache round-trips - Genericize TitleCacher with `cache_provider()`/`get_cached_provider()` replacing provider-specific methods; respect `--no-cache` flag - Add `--imdb` CLI flag to dl command for direct IMDB ID lookup
This commit is contained in:
428
unshackle/core/providers/__init__.py
Normal file
428
unshackle/core/providers/__init__.py
Normal file
@@ -0,0 +1,428 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from unshackle.core.providers._base import ExternalIds, MetadataProvider, MetadataResult, fuzzy_match, log
|
||||
from unshackle.core.providers.imdbapi import IMDBApiProvider
|
||||
from unshackle.core.providers.simkl import SimklProvider
|
||||
from unshackle.core.providers.tmdb import TMDBProvider
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from unshackle.core.title_cacher import TitleCacher
|
||||
|
||||
# Ordered by priority: IMDBApi (free), SIMKL, TMDB
|
||||
ALL_PROVIDERS: list[type[MetadataProvider]] = [IMDBApiProvider, SimklProvider, TMDBProvider]
|
||||
|
||||
|
||||
def get_available_providers() -> list[MetadataProvider]:
|
||||
"""Return instantiated providers that have valid credentials."""
|
||||
return [cls() for cls in ALL_PROVIDERS if cls().is_available()]
|
||||
|
||||
|
||||
def get_provider(name: str) -> Optional[MetadataProvider]:
|
||||
"""Get a specific provider by name."""
|
||||
for cls in ALL_PROVIDERS:
|
||||
if cls.NAME == name:
|
||||
p = cls()
|
||||
return p if p.is_available() else None
|
||||
return None
|
||||
|
||||
|
||||
# -- Public API (replaces tags.py functions) --
|
||||
|
||||
|
||||
def search_metadata(
|
||||
title: str,
|
||||
year: Optional[int],
|
||||
kind: str,
|
||||
title_cacher: Optional[TitleCacher] = None,
|
||||
cache_title_id: Optional[str] = None,
|
||||
cache_region: Optional[str] = None,
|
||||
cache_account_hash: Optional[str] = None,
|
||||
) -> Optional[MetadataResult]:
|
||||
"""Search all available providers for metadata. Returns best match."""
|
||||
# Check cache first
|
||||
if title_cacher and cache_title_id:
|
||||
for cls in ALL_PROVIDERS:
|
||||
p = cls()
|
||||
if not p.is_available():
|
||||
continue
|
||||
cached = title_cacher.get_cached_provider(p.NAME, cache_title_id, kind, cache_region, cache_account_hash)
|
||||
if cached:
|
||||
result = _cached_to_result(cached, p.NAME, kind)
|
||||
if result and result.title and fuzzy_match(result.title, title):
|
||||
log.debug("Using cached %s data for %r", p.NAME, title)
|
||||
return result
|
||||
|
||||
# Search providers in priority order
|
||||
for cls in ALL_PROVIDERS:
|
||||
p = cls()
|
||||
if not p.is_available():
|
||||
continue
|
||||
try:
|
||||
result = p.search(title, year, kind)
|
||||
except (requests.RequestException, ValueError, KeyError) as exc:
|
||||
log.debug("%s search failed: %s", p.NAME, exc)
|
||||
continue
|
||||
if result and result.title and fuzzy_match(result.title, title):
|
||||
# Enrich with cross-referenced IDs if we have IMDB but missing TMDB/TVDB
|
||||
enrich_ids(result)
|
||||
# Cache the result (include enriched IDs so they survive round-trip)
|
||||
if title_cacher and cache_title_id and result.raw:
|
||||
try:
|
||||
cache_data = result.raw
|
||||
if result.external_ids.tmdb_id or result.external_ids.tvdb_id:
|
||||
cache_data = {
|
||||
**result.raw,
|
||||
"_enriched_ids": _external_ids_to_dict(result.external_ids),
|
||||
}
|
||||
title_cacher.cache_provider(
|
||||
p.NAME, cache_title_id, cache_data, kind, cache_region, cache_account_hash
|
||||
)
|
||||
except Exception as exc:
|
||||
log.debug("Failed to cache %s data: %s", p.NAME, exc)
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_title_by_id(
|
||||
tmdb_id: int,
|
||||
kind: str,
|
||||
title_cacher: Optional[TitleCacher] = None,
|
||||
cache_title_id: Optional[str] = None,
|
||||
cache_region: Optional[str] = None,
|
||||
cache_account_hash: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""Get title name by TMDB ID."""
|
||||
# Check cache first
|
||||
if title_cacher and cache_title_id:
|
||||
cached = title_cacher.get_cached_provider("tmdb", cache_title_id, kind, cache_region, cache_account_hash)
|
||||
if cached and cached.get("detail"):
|
||||
detail = cached["detail"]
|
||||
tmdb_title = detail.get("title") or detail.get("name")
|
||||
if tmdb_title:
|
||||
log.debug("Using cached TMDB title: %r", tmdb_title)
|
||||
return tmdb_title
|
||||
|
||||
tmdb = get_provider("tmdb")
|
||||
if not tmdb:
|
||||
return None
|
||||
result = tmdb.get_by_id(tmdb_id, kind)
|
||||
if not result:
|
||||
return None
|
||||
|
||||
# Cache if possible
|
||||
if title_cacher and cache_title_id and result.raw:
|
||||
try:
|
||||
ext_ids = tmdb.get_external_ids(tmdb_id, kind)
|
||||
title_cacher.cache_provider(
|
||||
"tmdb",
|
||||
cache_title_id,
|
||||
{"detail": result.raw, "external_ids": _external_ids_to_dict(ext_ids)},
|
||||
kind,
|
||||
cache_region,
|
||||
cache_account_hash,
|
||||
)
|
||||
except Exception as exc:
|
||||
log.debug("Failed to cache TMDB data: %s", exc)
|
||||
|
||||
return result.title
|
||||
|
||||
|
||||
def get_year_by_id(
|
||||
tmdb_id: int,
|
||||
kind: str,
|
||||
title_cacher: Optional[TitleCacher] = None,
|
||||
cache_title_id: Optional[str] = None,
|
||||
cache_region: Optional[str] = None,
|
||||
cache_account_hash: Optional[str] = None,
|
||||
) -> Optional[int]:
|
||||
"""Get release year by TMDB ID."""
|
||||
# Check cache first
|
||||
if title_cacher and cache_title_id:
|
||||
cached = title_cacher.get_cached_provider("tmdb", cache_title_id, kind, cache_region, cache_account_hash)
|
||||
if cached and cached.get("detail"):
|
||||
detail = cached["detail"]
|
||||
date = detail.get("release_date") or detail.get("first_air_date")
|
||||
if date and len(date) >= 4 and date[:4].isdigit():
|
||||
year = int(date[:4])
|
||||
log.debug("Using cached TMDB year: %d", year)
|
||||
return year
|
||||
|
||||
tmdb = get_provider("tmdb")
|
||||
if not tmdb:
|
||||
return None
|
||||
result = tmdb.get_by_id(tmdb_id, kind)
|
||||
if not result:
|
||||
return None
|
||||
|
||||
# Cache if possible
|
||||
if title_cacher and cache_title_id and result.raw:
|
||||
try:
|
||||
ext_ids = tmdb.get_external_ids(tmdb_id, kind)
|
||||
title_cacher.cache_provider(
|
||||
"tmdb",
|
||||
cache_title_id,
|
||||
{"detail": result.raw, "external_ids": _external_ids_to_dict(ext_ids)},
|
||||
kind,
|
||||
cache_region,
|
||||
cache_account_hash,
|
||||
)
|
||||
except Exception as exc:
|
||||
log.debug("Failed to cache TMDB data: %s", exc)
|
||||
|
||||
return result.year
|
||||
|
||||
|
||||
def fetch_external_ids(
|
||||
tmdb_id: int,
|
||||
kind: str,
|
||||
title_cacher: Optional[TitleCacher] = None,
|
||||
cache_title_id: Optional[str] = None,
|
||||
cache_region: Optional[str] = None,
|
||||
cache_account_hash: Optional[str] = None,
|
||||
) -> ExternalIds:
|
||||
"""Get external IDs by TMDB ID."""
|
||||
# Check cache first
|
||||
if title_cacher and cache_title_id:
|
||||
cached = title_cacher.get_cached_provider("tmdb", cache_title_id, kind, cache_region, cache_account_hash)
|
||||
if cached and cached.get("external_ids"):
|
||||
log.debug("Using cached TMDB external IDs")
|
||||
raw = cached["external_ids"]
|
||||
return ExternalIds(
|
||||
imdb_id=raw.get("imdb_id"),
|
||||
tmdb_id=tmdb_id,
|
||||
tmdb_kind=kind,
|
||||
tvdb_id=raw.get("tvdb_id"),
|
||||
)
|
||||
|
||||
tmdb = get_provider("tmdb")
|
||||
if not tmdb:
|
||||
return ExternalIds()
|
||||
ext = tmdb.get_external_ids(tmdb_id, kind)
|
||||
|
||||
# Cache if possible
|
||||
if title_cacher and cache_title_id:
|
||||
try:
|
||||
detail = None
|
||||
result = tmdb.get_by_id(tmdb_id, kind)
|
||||
if result and result.raw:
|
||||
detail = result.raw
|
||||
if detail:
|
||||
title_cacher.cache_provider(
|
||||
"tmdb",
|
||||
cache_title_id,
|
||||
{"detail": detail, "external_ids": _external_ids_to_dict(ext)},
|
||||
kind,
|
||||
cache_region,
|
||||
cache_account_hash,
|
||||
)
|
||||
except Exception as exc:
|
||||
log.debug("Failed to cache TMDB data: %s", exc)
|
||||
|
||||
return ext
|
||||
|
||||
|
||||
# -- Internal helpers --
|
||||
|
||||
|
||||
# Provider authority ranking for tie-breaking (lower index = more authoritative)
|
||||
_ENRICHMENT_PROVIDERS = ("tmdb", "simkl")
|
||||
_ENRICHMENT_AUTHORITY: dict[str, int] = {name: i for i, name in enumerate(_ENRICHMENT_PROVIDERS)}
|
||||
|
||||
|
||||
def enrich_ids(result: MetadataResult) -> None:
|
||||
"""Enrich a MetadataResult by cross-referencing IMDB ID with available providers.
|
||||
|
||||
Queries all available providers, cross-validates tmdb_id as anchor.
|
||||
If a provider returns a different tmdb_id than the authoritative source,
|
||||
ALL of that provider's data is dropped (likely resolved to wrong title).
|
||||
"""
|
||||
ids = result.external_ids
|
||||
if not ids.imdb_id:
|
||||
return
|
||||
if ids.tmdb_id and ids.tvdb_id:
|
||||
return # already have everything
|
||||
|
||||
kind = result.kind or "movie"
|
||||
|
||||
# Step 1: Collect enrichment results from all available providers
|
||||
enrichments: list[tuple[str, ExternalIds]] = []
|
||||
for provider_name in _ENRICHMENT_PROVIDERS:
|
||||
p = get_provider(provider_name)
|
||||
if not p:
|
||||
continue
|
||||
try:
|
||||
enriched = p.find_by_imdb_id(ids.imdb_id, kind) # type: ignore[union-attr]
|
||||
except Exception as exc:
|
||||
log.debug("Enrichment via %s failed: %s", provider_name, exc)
|
||||
continue
|
||||
if enriched:
|
||||
enrichments.append((provider_name, enriched))
|
||||
|
||||
if not enrichments:
|
||||
return
|
||||
|
||||
# Step 2: Cross-validate using tmdb_id as anchor — drop providers that disagree
|
||||
validated = _validate_enrichments(enrichments)
|
||||
|
||||
# Step 3: Merge validated data (fill gaps only)
|
||||
for _provider_name, ext in validated:
|
||||
if not ids.tmdb_id and ext.tmdb_id:
|
||||
ids.tmdb_id = ext.tmdb_id
|
||||
ids.tmdb_kind = ext.tmdb_kind or kind
|
||||
if not ids.tvdb_id and ext.tvdb_id:
|
||||
ids.tvdb_id = ext.tvdb_id
|
||||
|
||||
|
||||
def _validate_enrichments(
|
||||
enrichments: list[tuple[str, ExternalIds]],
|
||||
) -> list[tuple[str, ExternalIds]]:
|
||||
"""Drop providers whose tmdb_id conflicts with the authoritative value.
|
||||
|
||||
If providers disagree on tmdb_id, the more authoritative source wins
|
||||
and ALL data from disagreeing providers is discarded (different tmdb_id
|
||||
means the provider likely resolved to a different title entirely).
|
||||
"""
|
||||
from collections import Counter
|
||||
|
||||
# Collect tmdb_id votes
|
||||
tmdb_votes: dict[str, int] = {}
|
||||
for provider_name, ext in enrichments:
|
||||
if ext.tmdb_id is not None:
|
||||
tmdb_votes[provider_name] = ext.tmdb_id
|
||||
|
||||
if len(set(tmdb_votes.values())) <= 1:
|
||||
return enrichments # all agree or only one voted — no conflict
|
||||
|
||||
# Find the authoritative tmdb_id
|
||||
value_counts = Counter(tmdb_votes.values())
|
||||
most_common_val, most_common_count = value_counts.most_common(1)[0]
|
||||
|
||||
if most_common_count > 1:
|
||||
anchor_tmdb_id = most_common_val
|
||||
else:
|
||||
# No majority — pick the most authoritative provider
|
||||
best_provider = min(
|
||||
tmdb_votes.keys(),
|
||||
key=lambda name: _ENRICHMENT_AUTHORITY.get(name, 99),
|
||||
)
|
||||
anchor_tmdb_id = tmdb_votes[best_provider]
|
||||
|
||||
# Drop any provider that disagrees
|
||||
validated: list[tuple[str, ExternalIds]] = []
|
||||
for provider_name, ext in enrichments:
|
||||
if ext.tmdb_id is not None and ext.tmdb_id != anchor_tmdb_id:
|
||||
log.debug(
|
||||
"Dropping %s enrichment data: tmdb_id %s conflicts with "
|
||||
"authoritative value %s (likely resolved to wrong title)",
|
||||
provider_name,
|
||||
ext.tmdb_id,
|
||||
anchor_tmdb_id,
|
||||
)
|
||||
continue
|
||||
validated.append((provider_name, ext))
|
||||
|
||||
return validated
|
||||
|
||||
|
||||
def _external_ids_to_dict(ext: ExternalIds) -> dict:
|
||||
"""Convert ExternalIds to a dict for caching."""
|
||||
result: dict = {}
|
||||
if ext.imdb_id:
|
||||
result["imdb_id"] = ext.imdb_id
|
||||
if ext.tmdb_id:
|
||||
result["tmdb_id"] = ext.tmdb_id
|
||||
if ext.tmdb_kind:
|
||||
result["tmdb_kind"] = ext.tmdb_kind
|
||||
if ext.tvdb_id:
|
||||
result["tvdb_id"] = ext.tvdb_id
|
||||
return result
|
||||
|
||||
|
||||
def _cached_to_result(cached: dict, provider_name: str, kind: str) -> Optional[MetadataResult]:
|
||||
"""Convert a cached provider dict back to a MetadataResult."""
|
||||
if provider_name == "tmdb":
|
||||
detail = cached.get("detail", {})
|
||||
ext_raw = cached.get("external_ids", {})
|
||||
title = detail.get("title") or detail.get("name")
|
||||
date = detail.get("release_date") or detail.get("first_air_date")
|
||||
year = int(date[:4]) if date and len(date) >= 4 and date[:4].isdigit() else None
|
||||
tmdb_id = detail.get("id")
|
||||
return MetadataResult(
|
||||
title=title,
|
||||
year=year,
|
||||
kind=kind,
|
||||
external_ids=ExternalIds(
|
||||
imdb_id=ext_raw.get("imdb_id"),
|
||||
tmdb_id=tmdb_id,
|
||||
tmdb_kind=kind,
|
||||
tvdb_id=ext_raw.get("tvdb_id"),
|
||||
),
|
||||
source="tmdb",
|
||||
raw=cached,
|
||||
)
|
||||
elif provider_name == "simkl":
|
||||
response = cached.get("response", cached)
|
||||
if response.get("type") == "episode" and "show" in response:
|
||||
info = response["show"]
|
||||
elif response.get("type") == "movie" and "movie" in response:
|
||||
info = response["movie"]
|
||||
else:
|
||||
return None
|
||||
ids = info.get("ids", {})
|
||||
tmdb_id = ids.get("tmdbtv") or ids.get("tmdb") or ids.get("moviedb")
|
||||
if tmdb_id:
|
||||
tmdb_id = int(tmdb_id)
|
||||
return MetadataResult(
|
||||
title=info.get("title"),
|
||||
year=info.get("year"),
|
||||
kind=kind,
|
||||
external_ids=ExternalIds(
|
||||
imdb_id=ids.get("imdb"),
|
||||
tmdb_id=tmdb_id,
|
||||
tmdb_kind=kind,
|
||||
tvdb_id=ids.get("tvdb"),
|
||||
),
|
||||
source="simkl",
|
||||
raw=cached,
|
||||
)
|
||||
elif provider_name == "imdbapi":
|
||||
title = cached.get("primaryTitle") or cached.get("originalTitle")
|
||||
year = cached.get("startYear")
|
||||
imdb_id = cached.get("id")
|
||||
# Restore enriched IDs that were saved alongside the raw data
|
||||
enriched = cached.get("_enriched_ids", {})
|
||||
return MetadataResult(
|
||||
title=title,
|
||||
year=year,
|
||||
kind=kind,
|
||||
external_ids=ExternalIds(
|
||||
imdb_id=imdb_id,
|
||||
tmdb_id=enriched.get("tmdb_id"),
|
||||
tmdb_kind=enriched.get("tmdb_kind"),
|
||||
tvdb_id=enriched.get("tvdb_id"),
|
||||
),
|
||||
source="imdbapi",
|
||||
raw=cached,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ALL_PROVIDERS",
|
||||
"ExternalIds",
|
||||
"MetadataProvider",
|
||||
"MetadataResult",
|
||||
"enrich_ids",
|
||||
"fetch_external_ids",
|
||||
"fuzzy_match",
|
||||
"get_available_providers",
|
||||
"get_provider",
|
||||
"get_title_by_id",
|
||||
"get_year_by_id",
|
||||
"search_metadata",
|
||||
]
|
||||
97
unshackle/core/providers/_base.py
Normal file
97
unshackle/core/providers/_base.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Optional, Union
|
||||
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter, Retry
|
||||
|
||||
log = logging.getLogger("METADATA")
|
||||
|
||||
HEADERS = {"User-Agent": "unshackle-tags/1.0"}
|
||||
|
||||
STRIP_RE = re.compile(r"[^a-z0-9]+", re.I)
|
||||
YEAR_RE = re.compile(r"\s*\(?[12][0-9]{3}\)?$")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExternalIds:
|
||||
"""Normalized external IDs across providers."""
|
||||
|
||||
imdb_id: Optional[str] = None
|
||||
tmdb_id: Optional[int] = None
|
||||
tmdb_kind: Optional[str] = None # "movie" or "tv"
|
||||
tvdb_id: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetadataResult:
|
||||
"""Unified metadata result from any provider."""
|
||||
|
||||
title: Optional[str] = None
|
||||
year: Optional[int] = None
|
||||
kind: Optional[str] = None # "movie" or "tv"
|
||||
external_ids: ExternalIds = field(default_factory=ExternalIds)
|
||||
source: str = "" # provider name, e.g. "tmdb", "simkl", "imdbapi"
|
||||
raw: Optional[dict] = None # original API response for caching
|
||||
|
||||
|
||||
class MetadataProvider(metaclass=ABCMeta):
|
||||
"""Abstract base for metadata providers."""
|
||||
|
||||
NAME: str = ""
|
||||
REQUIRES_KEY: bool = True
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.log = logging.getLogger(f"METADATA.{self.NAME.upper()}")
|
||||
self._session: Optional[requests.Session] = None
|
||||
|
||||
@property
|
||||
def session(self) -> requests.Session:
|
||||
if self._session is None:
|
||||
self._session = requests.Session()
|
||||
self._session.headers.update(HEADERS)
|
||||
retry = Retry(
|
||||
total=3,
|
||||
backoff_factor=1,
|
||||
status_forcelist=[429, 500, 502, 503, 504],
|
||||
allowed_methods=["GET", "POST"],
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry)
|
||||
self._session.mount("https://", adapter)
|
||||
self._session.mount("http://", adapter)
|
||||
return self._session
|
||||
|
||||
@abstractmethod
|
||||
def is_available(self) -> bool:
|
||||
"""Return True if this provider has the credentials/keys it needs."""
|
||||
|
||||
@abstractmethod
|
||||
def search(self, title: str, year: Optional[int], kind: str) -> Optional[MetadataResult]:
|
||||
"""Search for a title and return metadata, or None on failure/no match."""
|
||||
|
||||
@abstractmethod
|
||||
def get_by_id(self, provider_id: Union[int, str], kind: str) -> Optional[MetadataResult]:
|
||||
"""Fetch metadata by this provider's native ID."""
|
||||
|
||||
@abstractmethod
|
||||
def get_external_ids(self, provider_id: Union[int, str], kind: str) -> ExternalIds:
|
||||
"""Fetch external IDs for a title by this provider's native ID."""
|
||||
|
||||
|
||||
def _clean(s: str) -> str:
|
||||
return STRIP_RE.sub("", s).lower()
|
||||
|
||||
|
||||
def _strip_year(s: str) -> str:
|
||||
return YEAR_RE.sub("", s).strip()
|
||||
|
||||
|
||||
def fuzzy_match(a: str, b: str, threshold: float = 0.8) -> bool:
|
||||
"""Return True if ``a`` and ``b`` are a close match."""
|
||||
ratio = SequenceMatcher(None, _clean(a), _clean(b)).ratio()
|
||||
return ratio >= threshold
|
||||
123
unshackle/core/providers/imdbapi.py
Normal file
123
unshackle/core/providers/imdbapi.py
Normal file
@@ -0,0 +1,123 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Optional, Union
|
||||
|
||||
import requests
|
||||
|
||||
from unshackle.core.providers._base import ExternalIds, MetadataProvider, MetadataResult, _clean, fuzzy_match
|
||||
|
||||
# Mapping from our kind ("movie"/"tv") to imdbapi.dev title types
|
||||
KIND_TO_TYPES: dict[str, list[str]] = {
|
||||
"movie": ["movie"],
|
||||
"tv": ["tvSeries", "tvMiniSeries"],
|
||||
}
|
||||
|
||||
|
||||
class IMDBApiProvider(MetadataProvider):
|
||||
"""IMDb metadata provider using imdbapi.dev (free, no API key)."""
|
||||
|
||||
NAME = "imdbapi"
|
||||
REQUIRES_KEY = False
|
||||
BASE_URL = "https://api.imdbapi.dev"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True # no key needed
|
||||
|
||||
def search(self, title: str, year: Optional[int], kind: str) -> Optional[MetadataResult]:
|
||||
self.log.debug("Searching IMDBApi for %r (%s, %s)", title, kind, year)
|
||||
|
||||
try:
|
||||
params: dict[str, str | int] = {"query": title, "limit": 20}
|
||||
r = self.session.get(
|
||||
f"{self.BASE_URL}/search/titles",
|
||||
params=params,
|
||||
timeout=30,
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
except (requests.RequestException, ValueError) as exc:
|
||||
self.log.debug("IMDBApi search failed: %s", exc)
|
||||
return None
|
||||
|
||||
results = data.get("titles") or data.get("results") or []
|
||||
if not results:
|
||||
self.log.debug("IMDBApi returned no results for %r", title)
|
||||
return None
|
||||
|
||||
# Filter by type if possible
|
||||
type_filter = KIND_TO_TYPES.get(kind, [])
|
||||
filtered = [r for r in results if r.get("type") in type_filter] if type_filter else results
|
||||
candidates = filtered if filtered else results
|
||||
|
||||
# Find best fuzzy match, optionally filtered by year
|
||||
best_match: Optional[dict] = None
|
||||
best_ratio = 0.0
|
||||
|
||||
for candidate in candidates:
|
||||
primary = candidate.get("primaryTitle") or ""
|
||||
original = candidate.get("originalTitle") or ""
|
||||
|
||||
for name in [primary, original]:
|
||||
if not name:
|
||||
continue
|
||||
ratio = SequenceMatcher(None, _clean(title), _clean(name)).ratio()
|
||||
if ratio > best_ratio:
|
||||
# If year provided, prefer matches within 1 year
|
||||
candidate_year = candidate.get("startYear")
|
||||
if year and candidate_year and abs(year - candidate_year) > 1:
|
||||
continue
|
||||
best_ratio = ratio
|
||||
best_match = candidate
|
||||
|
||||
if not best_match:
|
||||
self.log.debug("No matching result found in IMDBApi for %r", title)
|
||||
return None
|
||||
|
||||
result_title = best_match.get("primaryTitle") or best_match.get("originalTitle")
|
||||
if not result_title or not fuzzy_match(result_title, title):
|
||||
self.log.debug("IMDBApi title mismatch: searched %r, got %r", title, result_title)
|
||||
return None
|
||||
|
||||
imdb_id = best_match.get("id")
|
||||
result_year = best_match.get("startYear")
|
||||
|
||||
self.log.debug("IMDBApi -> %s (ID %s)", result_title, imdb_id)
|
||||
|
||||
return MetadataResult(
|
||||
title=result_title,
|
||||
year=result_year,
|
||||
kind=kind,
|
||||
external_ids=ExternalIds(imdb_id=imdb_id),
|
||||
source="imdbapi",
|
||||
raw=best_match,
|
||||
)
|
||||
|
||||
def get_by_id(self, provider_id: Union[int, str], kind: str) -> Optional[MetadataResult]:
|
||||
"""Fetch metadata by IMDB ID (e.g. 'tt1375666')."""
|
||||
imdb_id = str(provider_id)
|
||||
self.log.debug("Fetching IMDBApi title %s", imdb_id)
|
||||
|
||||
try:
|
||||
r = self.session.get(f"{self.BASE_URL}/titles/{imdb_id}", timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
except (requests.RequestException, ValueError) as exc:
|
||||
self.log.debug("IMDBApi get_by_id failed: %s", exc)
|
||||
return None
|
||||
|
||||
title = data.get("primaryTitle") or data.get("originalTitle")
|
||||
result_year = data.get("startYear")
|
||||
|
||||
return MetadataResult(
|
||||
title=title,
|
||||
year=result_year,
|
||||
kind=kind,
|
||||
external_ids=ExternalIds(imdb_id=data.get("id")),
|
||||
source="imdbapi",
|
||||
raw=data,
|
||||
)
|
||||
|
||||
def get_external_ids(self, provider_id: Union[int, str], kind: str) -> ExternalIds:
|
||||
"""Return external IDs. For IMDB, the provider_id IS the IMDB ID."""
|
||||
return ExternalIds(imdb_id=str(provider_id))
|
||||
172
unshackle/core/providers/simkl.py
Normal file
172
unshackle/core/providers/simkl.py
Normal file
@@ -0,0 +1,172 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
import requests
|
||||
|
||||
from unshackle.core.config import config
|
||||
from unshackle.core.providers._base import ExternalIds, MetadataProvider, MetadataResult, fuzzy_match
|
||||
|
||||
|
||||
class SimklProvider(MetadataProvider):
|
||||
"""SIMKL metadata provider (filename-based search)."""
|
||||
|
||||
NAME = "simkl"
|
||||
REQUIRES_KEY = True
|
||||
BASE_URL = "https://api.simkl.com"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return bool(config.simkl_client_id)
|
||||
|
||||
def search(self, title: str, year: Optional[int], kind: str) -> Optional[MetadataResult]:
|
||||
self.log.debug("Searching Simkl for %r (%s, %s)", title, kind, year)
|
||||
|
||||
# Construct appropriate filename based on type
|
||||
filename = f"{title}"
|
||||
if year:
|
||||
filename = f"{title} {year}"
|
||||
if kind == "tv":
|
||||
filename += " S01E01.mkv"
|
||||
else:
|
||||
filename += " 2160p.mkv"
|
||||
|
||||
try:
|
||||
headers = {"simkl-api-key": config.simkl_client_id}
|
||||
resp = self.session.post(
|
||||
f"{self.BASE_URL}/search/file", json={"file": filename}, headers=headers, timeout=30
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
self.log.debug("Simkl API response received")
|
||||
except (requests.RequestException, ValueError) as exc:
|
||||
self.log.debug("Simkl search failed: %s", exc)
|
||||
return None
|
||||
|
||||
# Handle case where SIMKL returns empty list (no results)
|
||||
if isinstance(data, list):
|
||||
self.log.debug("Simkl returned list (no matches) for %r", filename)
|
||||
return None
|
||||
|
||||
return self._parse_response(data, title, year, kind)
|
||||
|
||||
def get_by_id(self, provider_id: Union[int, str], kind: str) -> Optional[MetadataResult]:
|
||||
return None # SIMKL has no direct ID lookup used here
|
||||
|
||||
def get_external_ids(self, provider_id: Union[int, str], kind: str) -> ExternalIds:
|
||||
return ExternalIds() # IDs come from search() response
|
||||
|
||||
def find_by_imdb_id(self, imdb_id: str, kind: str) -> Optional[ExternalIds]:
|
||||
"""Look up TMDB/TVDB IDs from an IMDB ID using SIMKL's /search/id and detail endpoints."""
|
||||
self.log.debug("Looking up IMDB ID %s on SIMKL", imdb_id)
|
||||
headers = {"simkl-api-key": config.simkl_client_id}
|
||||
|
||||
try:
|
||||
r = self.session.get(f"{self.BASE_URL}/search/id", params={"imdb": imdb_id}, headers=headers, timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
except (requests.RequestException, ValueError) as exc:
|
||||
self.log.debug("SIMKL search/id failed: %s", exc)
|
||||
return None
|
||||
|
||||
if not isinstance(data, list) or not data:
|
||||
self.log.debug("No SIMKL results for IMDB ID %s", imdb_id)
|
||||
return None
|
||||
|
||||
entry = data[0]
|
||||
simkl_id = entry.get("ids", {}).get("simkl")
|
||||
if not simkl_id:
|
||||
return None
|
||||
|
||||
# Map SIMKL type to endpoint
|
||||
simkl_type = entry.get("type", "")
|
||||
endpoint = "tv" if simkl_type in ("tv", "anime") else "movies"
|
||||
|
||||
# Fetch full details to get cross-referenced IDs
|
||||
try:
|
||||
r2 = self.session.get(
|
||||
f"{self.BASE_URL}/{endpoint}/{simkl_id}",
|
||||
params={"extended": "full"},
|
||||
headers=headers,
|
||||
timeout=30,
|
||||
)
|
||||
r2.raise_for_status()
|
||||
detail = r2.json()
|
||||
except (requests.RequestException, ValueError) as exc:
|
||||
self.log.debug("SIMKL detail fetch failed: %s", exc)
|
||||
return None
|
||||
|
||||
ids = detail.get("ids", {})
|
||||
tmdb_id: Optional[int] = None
|
||||
raw_tmdb = ids.get("tmdb")
|
||||
if raw_tmdb:
|
||||
tmdb_id = int(raw_tmdb)
|
||||
|
||||
tvdb_id: Optional[int] = None
|
||||
raw_tvdb = ids.get("tvdb")
|
||||
if raw_tvdb:
|
||||
tvdb_id = int(raw_tvdb)
|
||||
|
||||
self.log.debug("SIMKL find -> TMDB %s, TVDB %s for IMDB %s", tmdb_id, tvdb_id, imdb_id)
|
||||
|
||||
return ExternalIds(
|
||||
imdb_id=imdb_id,
|
||||
tmdb_id=tmdb_id,
|
||||
tmdb_kind=kind,
|
||||
tvdb_id=tvdb_id,
|
||||
)
|
||||
|
||||
def _parse_response(
|
||||
self, data: dict, search_title: str, search_year: Optional[int], kind: str
|
||||
) -> Optional[MetadataResult]:
|
||||
"""Parse a SIMKL response into a MetadataResult."""
|
||||
if data.get("type") == "episode" and "show" in data:
|
||||
info = data["show"]
|
||||
content_type = "tv"
|
||||
elif data.get("type") == "movie" and "movie" in data:
|
||||
info = data["movie"]
|
||||
content_type = "movie"
|
||||
else:
|
||||
return None
|
||||
|
||||
result_title = info.get("title")
|
||||
result_year = info.get("year")
|
||||
|
||||
# Verify title matches
|
||||
if not result_title or not fuzzy_match(result_title, search_title):
|
||||
self.log.debug("Simkl title mismatch: searched %r, got %r", search_title, result_title)
|
||||
return None
|
||||
|
||||
# Verify year if provided (allow 1 year difference)
|
||||
if search_year and result_year and abs(search_year - result_year) > 1:
|
||||
self.log.debug("Simkl year mismatch: searched %d, got %d", search_year, result_year)
|
||||
return None
|
||||
|
||||
ids = info.get("ids", {})
|
||||
tmdb_id: Optional[int] = None
|
||||
if content_type == "tv":
|
||||
raw_tmdb = ids.get("tmdbtv")
|
||||
else:
|
||||
raw_tmdb = ids.get("tmdb") or ids.get("moviedb")
|
||||
if raw_tmdb:
|
||||
tmdb_id = int(raw_tmdb)
|
||||
|
||||
tvdb_id: Optional[int] = None
|
||||
raw_tvdb = ids.get("tvdb")
|
||||
if raw_tvdb:
|
||||
tvdb_id = int(raw_tvdb)
|
||||
|
||||
self.log.debug("Simkl -> %s (TMDB ID %s)", result_title, tmdb_id)
|
||||
|
||||
return MetadataResult(
|
||||
title=result_title,
|
||||
year=result_year,
|
||||
kind=kind,
|
||||
external_ids=ExternalIds(
|
||||
imdb_id=ids.get("imdb"),
|
||||
tmdb_id=tmdb_id,
|
||||
tmdb_kind=kind,
|
||||
tvdb_id=tvdb_id,
|
||||
),
|
||||
source="simkl",
|
||||
raw=data,
|
||||
)
|
||||
199
unshackle/core/providers/tmdb.py
Normal file
199
unshackle/core/providers/tmdb.py
Normal file
@@ -0,0 +1,199 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Optional, Union
|
||||
|
||||
import requests
|
||||
|
||||
from unshackle.core.config import config
|
||||
from unshackle.core.providers._base import ExternalIds, MetadataProvider, MetadataResult, _clean, _strip_year
|
||||
|
||||
|
||||
class TMDBProvider(MetadataProvider):
|
||||
"""TMDB (The Movie Database) metadata provider."""
|
||||
|
||||
NAME = "tmdb"
|
||||
REQUIRES_KEY = True
|
||||
BASE_URL = "https://api.themoviedb.org/3"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return bool(config.tmdb_api_key)
|
||||
|
||||
@property
|
||||
def _api_key(self) -> str:
|
||||
return config.tmdb_api_key
|
||||
|
||||
def search(self, title: str, year: Optional[int], kind: str) -> Optional[MetadataResult]:
|
||||
search_title = _strip_year(title)
|
||||
self.log.debug("Searching TMDB for %r (%s, %s)", search_title, kind, year)
|
||||
|
||||
params: dict[str, str | int] = {"api_key": self._api_key, "query": search_title}
|
||||
if year is not None:
|
||||
params["year" if kind == "movie" else "first_air_date_year"] = year
|
||||
|
||||
try:
|
||||
r = self.session.get(f"{self.BASE_URL}/search/{kind}", params=params, timeout=30)
|
||||
r.raise_for_status()
|
||||
results = r.json().get("results") or []
|
||||
self.log.debug("TMDB returned %d results", len(results))
|
||||
if not results:
|
||||
return None
|
||||
except requests.RequestException as exc:
|
||||
self.log.warning("Failed to search TMDB for %s: %s", title, exc)
|
||||
return None
|
||||
|
||||
best_ratio = 0.0
|
||||
best_id: Optional[int] = None
|
||||
best_title: Optional[str] = None
|
||||
for result in results:
|
||||
candidates = [
|
||||
result.get("title"),
|
||||
result.get("name"),
|
||||
result.get("original_title"),
|
||||
result.get("original_name"),
|
||||
]
|
||||
candidates = [c for c in candidates if c]
|
||||
|
||||
for candidate in candidates:
|
||||
ratio = SequenceMatcher(None, _clean(search_title), _clean(candidate)).ratio()
|
||||
if ratio > best_ratio:
|
||||
best_ratio = ratio
|
||||
best_id = result.get("id")
|
||||
best_title = candidate
|
||||
|
||||
self.log.debug("Best candidate ratio %.2f for %r (ID %s)", best_ratio, best_title, best_id)
|
||||
|
||||
if best_id is None:
|
||||
first = results[0]
|
||||
best_id = first.get("id")
|
||||
best_title = first.get("title") or first.get("name")
|
||||
|
||||
if best_id is None:
|
||||
return None
|
||||
|
||||
# Fetch full detail for caching
|
||||
detail = self._fetch_detail(best_id, kind)
|
||||
ext_raw = self._fetch_external_ids_raw(best_id, kind)
|
||||
|
||||
date = (detail or {}).get("release_date") or (detail or {}).get("first_air_date")
|
||||
result_year = int(date[:4]) if date and len(date) >= 4 and date[:4].isdigit() else None
|
||||
|
||||
ext = ExternalIds(
|
||||
imdb_id=ext_raw.get("imdb_id") if ext_raw else None,
|
||||
tmdb_id=best_id,
|
||||
tmdb_kind=kind,
|
||||
tvdb_id=ext_raw.get("tvdb_id") if ext_raw else None,
|
||||
)
|
||||
|
||||
return MetadataResult(
|
||||
title=best_title,
|
||||
year=result_year,
|
||||
kind=kind,
|
||||
external_ids=ext,
|
||||
source="tmdb",
|
||||
raw={"detail": detail or {}, "external_ids": ext_raw or {}},
|
||||
)
|
||||
|
||||
def get_by_id(self, provider_id: Union[int, str], kind: str) -> Optional[MetadataResult]:
|
||||
detail = self._fetch_detail(int(provider_id), kind)
|
||||
if not detail:
|
||||
return None
|
||||
|
||||
title = detail.get("title") or detail.get("name")
|
||||
date = detail.get("release_date") or detail.get("first_air_date")
|
||||
year = int(date[:4]) if date and len(date) >= 4 and date[:4].isdigit() else None
|
||||
|
||||
return MetadataResult(
|
||||
title=title,
|
||||
year=year,
|
||||
kind=kind,
|
||||
external_ids=ExternalIds(tmdb_id=int(provider_id), tmdb_kind=kind),
|
||||
source="tmdb",
|
||||
raw=detail,
|
||||
)
|
||||
|
||||
def get_external_ids(self, provider_id: Union[int, str], kind: str) -> ExternalIds:
|
||||
raw = self._fetch_external_ids_raw(int(provider_id), kind)
|
||||
if not raw:
|
||||
return ExternalIds(tmdb_id=int(provider_id), tmdb_kind=kind)
|
||||
return ExternalIds(
|
||||
imdb_id=raw.get("imdb_id"),
|
||||
tmdb_id=int(provider_id),
|
||||
tmdb_kind=kind,
|
||||
tvdb_id=raw.get("tvdb_id"),
|
||||
)
|
||||
|
||||
def find_by_imdb_id(self, imdb_id: str, kind: str) -> Optional[ExternalIds]:
|
||||
"""Look up TMDB/TVDB IDs from an IMDB ID using TMDB's /find endpoint."""
|
||||
self.log.debug("Looking up IMDB ID %s on TMDB", imdb_id)
|
||||
try:
|
||||
r = self.session.get(
|
||||
f"{self.BASE_URL}/find/{imdb_id}",
|
||||
params={"api_key": self._api_key, "external_source": "imdb_id"},
|
||||
timeout=30,
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
except requests.RequestException as exc:
|
||||
self.log.debug("TMDB find by IMDB ID failed: %s", exc)
|
||||
return None
|
||||
|
||||
# Check movie_results or tv_results based on kind
|
||||
if kind == "movie":
|
||||
results = data.get("movie_results") or []
|
||||
else:
|
||||
results = data.get("tv_results") or []
|
||||
|
||||
if not results:
|
||||
# Try the other type as fallback
|
||||
fallback_key = "tv_results" if kind == "movie" else "movie_results"
|
||||
results = data.get(fallback_key) or []
|
||||
if results:
|
||||
kind = "tv" if kind == "movie" else "movie"
|
||||
|
||||
if not results:
|
||||
self.log.debug("No TMDB results found for IMDB ID %s", imdb_id)
|
||||
return None
|
||||
|
||||
match = results[0]
|
||||
tmdb_id = match.get("id")
|
||||
if not tmdb_id:
|
||||
return None
|
||||
|
||||
self.log.debug("TMDB find -> ID %s (%s) for IMDB %s", tmdb_id, kind, imdb_id)
|
||||
|
||||
# Now fetch the full external IDs from TMDB to get TVDB etc.
|
||||
ext_raw = self._fetch_external_ids_raw(tmdb_id, kind)
|
||||
|
||||
return ExternalIds(
|
||||
imdb_id=imdb_id,
|
||||
tmdb_id=tmdb_id,
|
||||
tmdb_kind=kind,
|
||||
tvdb_id=ext_raw.get("tvdb_id") if ext_raw else None,
|
||||
)
|
||||
|
||||
def _fetch_detail(self, tmdb_id: int, kind: str) -> Optional[dict]:
|
||||
try:
|
||||
r = self.session.get(
|
||||
f"{self.BASE_URL}/{kind}/{tmdb_id}",
|
||||
params={"api_key": self._api_key},
|
||||
timeout=30,
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
except requests.RequestException as exc:
|
||||
self.log.debug("Failed to fetch TMDB detail: %s", exc)
|
||||
return None
|
||||
|
||||
def _fetch_external_ids_raw(self, tmdb_id: int, kind: str) -> Optional[dict]:
|
||||
try:
|
||||
r = self.session.get(
|
||||
f"{self.BASE_URL}/{kind}/{tmdb_id}/external_ids",
|
||||
params={"api_key": self._api_key},
|
||||
timeout=30,
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
except requests.RequestException as exc:
|
||||
self.log.debug("Failed to fetch TMDB external IDs: %s", exc)
|
||||
return None
|
||||
Reference in New Issue
Block a user