feat: Implement title caching system to reduce API calls

- Add configurable title caching with fallback support
- Cache titles for 30 minutes by default, with 24-hour fallback on API failures
- Add --no-cache and --reset-cache CLI flags for cache control
- Implement region-aware caching to handle geo-restricted content
- Use SHA256 hashing for cache keys to handle complex title IDs
- Add cache configuration variables to config system
- Document new caching options in example config

This caching system significantly reduces redundant API calls when debugging
or modifying CLI parameters, improving both performance and reliability.
This commit is contained in:
Andy
2025-08-06 17:08:58 +00:00
parent ead05d08ac
commit f0493292af
5 changed files with 318 additions and 1 deletions

View File

@@ -0,0 +1,240 @@
from __future__ import annotations
import hashlib
import logging
from datetime import datetime, timedelta
from typing import Optional
from unshackle.core.cacher import Cacher
from unshackle.core.config import config
from unshackle.core.titles import Titles_T
class TitleCacher:
"""
Handles caching of Title objects to reduce redundant API calls.
This wrapper provides:
- Region-aware caching to handle geo-restricted content
- Automatic fallback to cached data when API calls fail
- Cache lifetime extension during failures
- Cache hit/miss statistics for debugging
"""
def __init__(self, service_name: str):
self.service_name = service_name
self.log = logging.getLogger(f"{service_name}.TitleCache")
self.cacher = Cacher(service_name)
self.stats = {"hits": 0, "misses": 0, "fallbacks": 0}
def _generate_cache_key(
self, title_id: str, region: Optional[str] = None, account_hash: Optional[str] = None
) -> str:
"""
Generate a unique cache key for title data.
Args:
title_id: The title identifier
region: The region/proxy identifier
account_hash: Hash of account credentials (if applicable)
Returns:
A unique cache key string
"""
# Hash the title_id to handle complex IDs (URLs, dots, special chars)
# This ensures consistent length and filesystem-safe keys
title_hash = hashlib.sha256(title_id.encode()).hexdigest()[:16]
# Start with base key using hash
key_parts = ["titles", title_hash]
# Add region if available
if region:
key_parts.append(region.lower())
# Add account hash if available
if account_hash:
key_parts.append(account_hash[:8]) # Use first 8 chars of hash
# Join with underscores
cache_key = "_".join(key_parts)
# Log the mapping for debugging
self.log.debug(f"Cache key mapping: {title_id} -> {cache_key}")
return cache_key
def get_cached_titles(
self,
title_id: str,
fetch_function,
region: Optional[str] = None,
account_hash: Optional[str] = None,
no_cache: bool = False,
reset_cache: bool = False,
) -> Optional[Titles_T]:
"""
Get titles from cache or fetch from API with fallback support.
Args:
title_id: The title identifier
fetch_function: Function to call to fetch fresh titles
region: The region/proxy identifier
account_hash: Hash of account credentials
no_cache: Bypass cache completely
reset_cache: Clear cache before fetching
Returns:
Titles object (Movies, Series, or Album)
"""
# If caching is globally disabled or no_cache flag is set
if not config.title_cache_enabled or no_cache:
self.log.debug("Cache bypassed, fetching fresh titles")
return fetch_function()
# Generate cache key
cache_key = self._generate_cache_key(title_id, region, account_hash)
# If reset_cache flag is set, clear the cache entry
if reset_cache:
self.log.info(f"Clearing cache for {cache_key}")
cache_path = (config.directories.cache / self.service_name / cache_key).with_suffix(".json")
if cache_path.exists():
cache_path.unlink()
# Try to get from cache
cache = self.cacher.get(cache_key, version=1)
# Check if we have valid cached data
if cache and not cache.expired:
self.stats["hits"] += 1
self.log.debug(f"Cache hit for {title_id} (hits: {self.stats['hits']}, misses: {self.stats['misses']})")
return cache.data
# Cache miss or expired, try to fetch fresh data
self.stats["misses"] += 1
self.log.debug(f"Cache miss for {title_id}, fetching fresh data")
try:
# Attempt to fetch fresh titles
titles = fetch_function()
if titles:
# Successfully fetched, update cache
self.log.debug(f"Successfully fetched titles for {title_id}, updating cache")
cache = self.cacher.get(cache_key, version=1)
cache.set(titles, expiration=datetime.now() + timedelta(seconds=config.title_cache_time))
return titles
except Exception as e:
# API call failed, check if we have fallback cached data
if cache and cache.data:
# We have expired cached data, use it as fallback
current_time = datetime.now()
max_retention_time = cache.expiration + timedelta(
seconds=config.title_cache_max_retention - config.title_cache_time
)
if current_time < max_retention_time:
self.stats["fallbacks"] += 1
self.log.warning(
f"API call failed for {title_id}, using cached data as fallback "
f"(fallbacks: {self.stats['fallbacks']})"
)
self.log.debug(f"Error was: {e}")
# Extend cache lifetime
extended_expiration = current_time + timedelta(minutes=5)
if extended_expiration < max_retention_time:
cache.expiration = extended_expiration
cache.set(cache.data, expiration=extended_expiration)
return cache.data
else:
self.log.error(f"API call failed and cached data for {title_id} exceeded maximum retention time")
# Re-raise the exception if no fallback available
raise
def clear_all_title_cache(self):
"""Clear all title caches for this service."""
cache_dir = config.directories.cache / self.service_name
if cache_dir.exists():
for cache_file in cache_dir.glob("titles_*.json"):
cache_file.unlink()
self.log.info(f"Cleared cache file: {cache_file.name}")
def get_cache_stats(self) -> dict:
"""Get cache statistics."""
total = sum(self.stats.values())
if total > 0:
hit_rate = (self.stats["hits"] / total) * 100
else:
hit_rate = 0
return {
"hits": self.stats["hits"],
"misses": self.stats["misses"],
"fallbacks": self.stats["fallbacks"],
"hit_rate": f"{hit_rate:.1f}%",
}
def get_region_from_proxy(proxy_url: Optional[str]) -> Optional[str]:
"""
Extract region identifier from proxy URL.
Args:
proxy_url: The proxy URL string
Returns:
Region identifier or None
"""
if not proxy_url:
return None
# Try to extract region from common proxy patterns
# e.g., "us123.nordvpn.com", "gb-proxy.example.com"
import re
# Pattern for NordVPN style
nord_match = re.search(r"([a-z]{2})\d+\.nordvpn", proxy_url.lower())
if nord_match:
return nord_match.group(1)
# Pattern for country code at start
cc_match = re.search(r"([a-z]{2})[-_]", proxy_url.lower())
if cc_match:
return cc_match.group(1)
# Pattern for country code subdomain
subdomain_match = re.search(r"://([a-z]{2})\.", proxy_url.lower())
if subdomain_match:
return subdomain_match.group(1)
return None
def get_account_hash(credential) -> Optional[str]:
"""
Generate a hash for account identification.
Args:
credential: Credential object
Returns:
SHA1 hash of the credential or None
"""
if not credential:
return None
# Use existing sha1 property if available
if hasattr(credential, "sha1"):
return credential.sha1
# Otherwise generate hash from username
if hasattr(credential, "username"):
return hashlib.sha1(credential.username.encode()).hexdigest()
return None