refactor(providers): extract metadata providers into modular system

- Create `unshackle/core/providers/` package with abstract base class, IMDBApi (free, no key), SIMKL, and TMDB provider implementations - Add consensus-based ID enrichment: cross-references IMDB IDs with TMDB and SIMKL, drops all data from providers that disagree on tmdb_id (likely resolved to wrong title) - Cache enriched IDs alongside raw provider data so they survive cache round-trips - Genericize TitleCacher with `cache_provider()`/`get_cached_provider()` replacing provider-specific methods; respect `--no-cache` flag - Add `--imdb` CLI flag to dl command for direct IMDB ID lookup
2026-02-25 19:02:18 -07:00
parent 42d6ef5765
commit 820db5f179
10 changed files with 1207 additions and 749 deletions
--- a/unshackle/core/providers/imdbapi.py
+++ b/unshackle/core/providers/imdbapi.py
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+from difflib import SequenceMatcher
+from typing import Optional, Union
+
+import requests
+
+from unshackle.core.providers._base import ExternalIds, MetadataProvider, MetadataResult, _clean, fuzzy_match
+
+# Mapping from our kind ("movie"/"tv") to imdbapi.dev title types
+KIND_TO_TYPES: dict[str, list[str]] = {
+    "movie": ["movie"],
+    "tv": ["tvSeries", "tvMiniSeries"],
+}
+
+
+class IMDBApiProvider(MetadataProvider):
+    """IMDb metadata provider using imdbapi.dev (free, no API key)."""
+
+    NAME = "imdbapi"
+    REQUIRES_KEY = False
+    BASE_URL = "https://api.imdbapi.dev"
+
+    def is_available(self) -> bool:
+        return True  # no key needed
+
+    def search(self, title: str, year: Optional[int], kind: str) -> Optional[MetadataResult]:
+        self.log.debug("Searching IMDBApi for %r (%s, %s)", title, kind, year)
+
+        try:
+            params: dict[str, str | int] = {"query": title, "limit": 20}
+            r = self.session.get(
+                f"{self.BASE_URL}/search/titles",
+                params=params,
+                timeout=30,
+            )
+            r.raise_for_status()
+            data = r.json()
+        except (requests.RequestException, ValueError) as exc:
+            self.log.debug("IMDBApi search failed: %s", exc)
+            return None
+
+        results = data.get("titles") or data.get("results") or []
+        if not results:
+            self.log.debug("IMDBApi returned no results for %r", title)
+            return None
+
+        # Filter by type if possible
+        type_filter = KIND_TO_TYPES.get(kind, [])
+        filtered = [r for r in results if r.get("type") in type_filter] if type_filter else results
+        candidates = filtered if filtered else results
+
+        # Find best fuzzy match, optionally filtered by year
+        best_match: Optional[dict] = None
+        best_ratio = 0.0
+
+        for candidate in candidates:
+            primary = candidate.get("primaryTitle") or ""
+            original = candidate.get("originalTitle") or ""
+
+            for name in [primary, original]:
+                if not name:
+                    continue
+                ratio = SequenceMatcher(None, _clean(title), _clean(name)).ratio()
+                if ratio > best_ratio:
+                    # If year provided, prefer matches within 1 year
+                    candidate_year = candidate.get("startYear")
+                    if year and candidate_year and abs(year - candidate_year) > 1:
+                        continue
+                    best_ratio = ratio
+                    best_match = candidate
+
+        if not best_match:
+            self.log.debug("No matching result found in IMDBApi for %r", title)
+            return None
+
+        result_title = best_match.get("primaryTitle") or best_match.get("originalTitle")
+        if not result_title or not fuzzy_match(result_title, title):
+            self.log.debug("IMDBApi title mismatch: searched %r, got %r", title, result_title)
+            return None
+
+        imdb_id = best_match.get("id")
+        result_year = best_match.get("startYear")
+
+        self.log.debug("IMDBApi -> %s (ID %s)", result_title, imdb_id)
+
+        return MetadataResult(
+            title=result_title,
+            year=result_year,
+            kind=kind,
+            external_ids=ExternalIds(imdb_id=imdb_id),
+            source="imdbapi",
+            raw=best_match,
+        )
+
+    def get_by_id(self, provider_id: Union[int, str], kind: str) -> Optional[MetadataResult]:
+        """Fetch metadata by IMDB ID (e.g. 'tt1375666')."""
+        imdb_id = str(provider_id)
+        self.log.debug("Fetching IMDBApi title %s", imdb_id)
+
+        try:
+            r = self.session.get(f"{self.BASE_URL}/titles/{imdb_id}", timeout=30)
+            r.raise_for_status()
+            data = r.json()
+        except (requests.RequestException, ValueError) as exc:
+            self.log.debug("IMDBApi get_by_id failed: %s", exc)
+            return None
+
+        title = data.get("primaryTitle") or data.get("originalTitle")
+        result_year = data.get("startYear")
+
+        return MetadataResult(
+            title=title,
+            year=result_year,
+            kind=kind,
+            external_ids=ExternalIds(imdb_id=data.get("id")),
+            source="imdbapi",
+            raw=data,
+        )
+
+    def get_external_ids(self, provider_id: Union[int, str], kind: str) -> ExternalIds:
+        """Return external IDs. For IMDB, the provider_id IS the IMDB ID."""
+        return ExternalIds(imdb_id=str(provider_id))