"""Celery tasks for automatic lyrics fetching""" from celery import shared_task from django.utils import timezone from datetime import timedelta import logging logger = logging.getLogger(__name__) @shared_task(name="audio.fetch_lyrics_for_audio") def fetch_lyrics_for_audio(audio_youtube_id: str, force: bool = False): """ Fetch lyrics for a single audio track Args: audio_youtube_id: YouTube ID of the audio force: Force fetch even if already attempted """ from audio.models import Audio from audio.lyrics_service import LyricsService try: audio = Audio.objects.get(youtube_id=audio_youtube_id) service = LyricsService() service.fetch_and_store_lyrics(audio, force=force) logger.info(f"Fetched lyrics for {audio.title}") return {"status": "success", "youtube_id": audio_youtube_id} except Audio.DoesNotExist: logger.error(f"Audio not found: {audio_youtube_id}") return {"status": "error", "error": "Audio not found"} except Exception as e: logger.error(f"Error fetching lyrics for {audio_youtube_id}: {e}") return {"status": "error", "error": str(e)} @shared_task(name="audio.fetch_lyrics_batch") def fetch_lyrics_batch(audio_ids: list, delay_seconds: int = 2): """ Fetch lyrics for multiple audio tracks with delay between requests Args: audio_ids: List of YouTube IDs delay_seconds: Delay between requests to avoid rate limiting """ import time from audio.models import Audio from audio.lyrics_service import LyricsService service = LyricsService() results = { 'success': 0, 'failed': 0, 'skipped': 0, } for youtube_id in audio_ids: try: audio = Audio.objects.get(youtube_id=youtube_id) service.fetch_and_store_lyrics(audio, force=False) results['success'] += 1 logger.info(f"Fetched lyrics for {audio.title}") except Audio.DoesNotExist: results['skipped'] += 1 logger.warning(f"Audio not found: {youtube_id}") except Exception as e: results['failed'] += 1 logger.error(f"Error fetching lyrics for {youtube_id}: {e}") # Delay to avoid rate limiting if delay_seconds > 0: time.sleep(delay_seconds) return results @shared_task(name="audio.auto_fetch_lyrics") def auto_fetch_lyrics(limit: int = 50, max_attempts: int = 3): """ Automatically fetch lyrics for audio without lyrics This task should be scheduled to run periodically (e.g., every hour) Args: limit: Maximum number of tracks to process max_attempts: Skip tracks that have been attempted this many times """ from audio.models import Audio from audio.models_lyrics import Lyrics from audio.lyrics_service import LyricsService # Find audio without lyrics or with failed attempts audio_without_lyrics = Audio.objects.filter( downloaded=True ).exclude( lyrics__fetch_attempted=True, lyrics__fetch_attempts__gte=max_attempts )[:limit] if not audio_without_lyrics: logger.info("No audio tracks need lyrics fetching") return {"status": "no_work", "message": "No tracks need lyrics"} service = LyricsService() results = { 'processed': 0, 'success': 0, 'failed': 0, } for audio in audio_without_lyrics: try: lyrics = service.fetch_and_store_lyrics(audio, force=False) results['processed'] += 1 if lyrics.has_lyrics: results['success'] += 1 else: results['failed'] += 1 except Exception as e: logger.error(f"Error in auto-fetch for {audio.title}: {e}") results['failed'] += 1 # Small delay to be nice to the API import time time.sleep(1) logger.info(f"Auto-fetch completed: {results}") return results @shared_task(name="audio.cleanup_lyrics_cache") def cleanup_lyrics_cache(days_old: int = 30): """ Clean up old lyrics cache entries Args: days_old: Remove cache entries older than this many days """ from audio.models_lyrics import LyricsCache from django.utils import timezone from datetime import timedelta cutoff_date = timezone.now() - timedelta(days=days_old) # Delete old not_found entries deleted_count = LyricsCache.objects.filter( not_found=True, cached_date__lt=cutoff_date ).delete()[0] # Delete old unused entries (not accessed in the last N days) deleted_unused = LyricsCache.objects.filter( last_accessed__lt=cutoff_date, access_count=0 ).delete()[0] logger.info(f"Cleaned up {deleted_count} not_found and {deleted_unused} unused cache entries") return { 'deleted_not_found': deleted_count, 'deleted_unused': deleted_unused, } @shared_task(name="audio.refetch_failed_lyrics") def refetch_failed_lyrics(days_old: int = 7, limit: int = 20): """ Retry fetching lyrics for tracks that failed before Args: days_old: Retry tracks that failed more than this many days ago limit: Maximum number of tracks to retry """ from audio.models_lyrics import Lyrics from audio.lyrics_service import LyricsService from django.utils import timezone from datetime import timedelta cutoff_date = timezone.now() - timedelta(days=days_old) # Find tracks that failed but haven't been tried recently failed_lyrics = Lyrics.objects.filter( fetch_attempted=True, synced_lyrics='', plain_lyrics='', is_instrumental=False, fetched_date__lt=cutoff_date, fetch_attempts__lt=5 # Don't retry if attempted 5+ times )[:limit] service = LyricsService() results = { 'retried': 0, 'success': 0, 'failed': 0, } for lyrics in failed_lyrics: try: updated = service.fetch_and_store_lyrics(lyrics.audio, force=True) results['retried'] += 1 if updated.has_lyrics: results['success'] += 1 else: results['failed'] += 1 except Exception as e: logger.error(f"Error retrying lyrics for {lyrics.audio.title}: {e}") results['failed'] += 1 import time time.sleep(2) # Be nice to the API logger.info(f"Refetch completed: {results}") return results