"""Lyrics fetching service using LRCLIB API""" import requests import logging from typing import Optional, Dict, Any from datetime import timedelta from django.utils import timezone from django.core.cache import cache logger = logging.getLogger(__name__) class LRCLIBClient: """Client for LRCLIB API (https://lrclib.net/)""" DEFAULT_INSTANCE = "https://lrclib.net" USER_AGENT = "SoundWave/1.0 (https://github.com/soundwave)" TIMEOUT = 10 # seconds def __init__(self, instance_url: str = None): self.instance_url = (instance_url or self.DEFAULT_INSTANCE).rstrip('/') self.session = requests.Session() self.session.headers.update({ 'User-Agent': self.USER_AGENT, }) def get_lyrics( self, title: str, artist_name: str, album_name: str = "", duration: int = 0 ) -> Dict[str, Any]: """ Fetch lyrics from LRCLIB API Args: title: Track title artist_name: Artist name album_name: Album name (optional) duration: Track duration in seconds Returns: Dict with keys: - synced_lyrics: LRC format lyrics with timestamps - plain_lyrics: Plain text lyrics - instrumental: Boolean if track is instrumental - language: Language code """ # Build request parameters params = { 'track_name': title, 'artist_name': artist_name, 'album_name': album_name, 'duration': round(duration) if duration else 0, } # Make request api_endpoint = f"{self.instance_url}/api/get" try: response = self.session.get( api_endpoint, params=params, timeout=self.TIMEOUT ) if response.status_code == 404: # No lyrics found return { 'synced_lyrics': '', 'plain_lyrics': '', 'instrumental': False, 'language': '', 'not_found': True, } response.raise_for_status() data = response.json() # Extract lyrics data synced = data.get('syncedLyrics') or '' plain = data.get('plainLyrics') or '' instrumental = data.get('instrumental', False) language = data.get('lang') or '' # If we have synced lyrics but no plain, strip timestamps if synced and not plain: plain = self._strip_timestamps(synced) return { 'synced_lyrics': synced, 'plain_lyrics': plain, 'instrumental': instrumental, 'language': language, 'not_found': False, } except requests.exceptions.Timeout: logger.error(f"LRCLIB API timeout for {title} - {artist_name}") raise LyricsAPIError("Request timeout") except requests.exceptions.RequestException as e: logger.error(f"LRCLIB API error for {title} - {artist_name}: {e}") raise LyricsAPIError(f"API request failed: {e}") @staticmethod def _strip_timestamps(synced_lyrics: str) -> str: """Strip timestamps from LRC format lyrics""" import re lines = [] for line in synced_lyrics.split('\n'): # Remove all timestamp tags [mm:ss.xx] cleaned = re.sub(r'\[\d{2}:\d{2}\.\d{2,3}\]', '', line) # Remove metadata tags [tag:value] cleaned = re.sub(r'\[[a-z]+:.*?\]', '', cleaned) if cleaned.strip(): lines.append(cleaned.strip()) return '\n'.join(lines) class LyricsAPIError(Exception): """Exception for lyrics API errors""" pass class LyricsService: """Service for fetching and caching lyrics""" def __init__(self, lrclib_instance: str = None): self.client = LRCLIBClient(lrclib_instance) def fetch_lyrics( self, title: str, artist_name: str, album_name: str = "", duration: int = 0, use_cache: bool = True ) -> Dict[str, Any]: """ Fetch lyrics with caching Args: title: Track title artist_name: Artist name album_name: Album name duration: Duration in seconds use_cache: Whether to use cached results Returns: Dict with lyrics data """ # Create cache key cache_key = self._make_cache_key(title, artist_name, album_name, duration) # Check cache first if use_cache: cached = cache.get(cache_key) if cached is not None: logger.debug(f"Cache hit for {title} - {artist_name}") return cached # Fetch from API try: logger.info(f"Fetching lyrics for {title} - {artist_name}") result = self.client.get_lyrics(title, artist_name, album_name, duration) # Cache the result (even if not found, to avoid repeated requests) cache_timeout = 86400 * 7 # 7 days if result.get('not_found'): cache_timeout = 86400 # 1 day for not found cache.set(cache_key, result, cache_timeout) return result except LyricsAPIError as e: logger.warning(f"Failed to fetch lyrics: {e}") # Cache the error for a short time to avoid hammering the API error_result = { 'synced_lyrics': '', 'plain_lyrics': '', 'instrumental': False, 'language': '', 'not_found': True, 'error': str(e), } cache.set(cache_key, error_result, 3600) # 1 hour return error_result @staticmethod def _make_cache_key(title: str, artist: str, album: str, duration: int) -> str: """Create cache key from track metadata""" import hashlib key_str = f"{title}|{artist}|{album}|{duration}" return f"lyrics:{hashlib.md5(key_str.encode()).hexdigest()}" def fetch_and_store_lyrics(self, audio_obj, force: bool = False): """ Fetch lyrics and store in database Args: audio_obj: Audio model instance force: Force fetch even if already attempted """ from audio.models_lyrics import Lyrics, LyricsCache # Check if already attempted existing, created = Lyrics.objects.get_or_create(audio=audio_obj) if not force and existing.fetch_attempted and existing.fetch_attempts >= 3: logger.debug(f"Skipping {audio_obj.title} - already attempted {existing.fetch_attempts} times") return existing # Check database cache first duration_rounded = round(audio_obj.duration) cache_entry = LyricsCache.objects.filter( title=audio_obj.title, artist_name=audio_obj.channel_name, duration=duration_rounded ).first() if cache_entry and not force: # Use cached data existing.synced_lyrics = cache_entry.synced_lyrics existing.plain_lyrics = cache_entry.plain_lyrics existing.is_instrumental = cache_entry.is_instrumental existing.language = cache_entry.language existing.source = cache_entry.source existing.fetch_attempted = True existing.save() # Update cache stats cache_entry.access_count += 1 cache_entry.save() logger.info(f"Using cached lyrics for {audio_obj.title}") return existing # Fetch from API try: result = self.fetch_lyrics( title=audio_obj.title, artist_name=audio_obj.channel_name, album_name="", # YouTube doesn't provide album info duration=duration_rounded, use_cache=True ) # Update lyrics entry existing.synced_lyrics = result.get('synced_lyrics', '') existing.plain_lyrics = result.get('plain_lyrics', '') existing.is_instrumental = result.get('instrumental', False) existing.language = result.get('language', '') existing.source = 'lrclib' existing.fetch_attempted = True existing.fetch_attempts += 1 existing.last_error = result.get('error', '') existing.save() # Store in cache if not result.get('not_found'): LyricsCache.objects.update_or_create( title=audio_obj.title, artist_name=audio_obj.channel_name, album_name="", duration=duration_rounded, defaults={ 'synced_lyrics': result.get('synced_lyrics', ''), 'plain_lyrics': result.get('plain_lyrics', ''), 'is_instrumental': result.get('instrumental', False), 'language': result.get('language', ''), 'source': 'lrclib', 'not_found': result.get('not_found', False), } ) logger.info(f"Fetched lyrics for {audio_obj.title}") return existing except Exception as e: logger.error(f"Error fetching lyrics for {audio_obj.title}: {e}") existing.fetch_attempted = True existing.fetch_attempts += 1 existing.last_error = str(e) existing.save() return existing