Fix: Include backend/audio Django app in repository

2025-12-24 01:58:56 +00:00 · 2025-12-24 01:58:56 +00:00 · 644cfab298
commit 644cfab298
parent d04e726373
37 changed files with 6632 additions and 4 deletions
--- a/backend/audio/tasks_lyrics.py
+++ b/backend/audio/tasks_lyrics.py
@ -0,0 +1,217 @@
+"""Celery tasks for automatic lyrics fetching"""
+from celery import shared_task
+from django.utils import timezone
+from datetime import timedelta
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+@shared_task(name="audio.fetch_lyrics_for_audio")
+def fetch_lyrics_for_audio(audio_youtube_id: str, force: bool = False):
+    """
+    Fetch lyrics for a single audio track
+    
+    Args:
+        audio_youtube_id: YouTube ID of the audio
+        force: Force fetch even if already attempted
+    """
+    from audio.models import Audio
+    from audio.lyrics_service import LyricsService
+    
+    try:
+        audio = Audio.objects.get(youtube_id=audio_youtube_id)
+        service = LyricsService()
+        service.fetch_and_store_lyrics(audio, force=force)
+        logger.info(f"Fetched lyrics for {audio.title}")
+        return {"status": "success", "youtube_id": audio_youtube_id}
+    except Audio.DoesNotExist:
+        logger.error(f"Audio not found: {audio_youtube_id}")
+        return {"status": "error", "error": "Audio not found"}
+    except Exception as e:
+        logger.error(f"Error fetching lyrics for {audio_youtube_id}: {e}")
+        return {"status": "error", "error": str(e)}
+
+
+@shared_task(name="audio.fetch_lyrics_batch")
+def fetch_lyrics_batch(audio_ids: list, delay_seconds: int = 2):
+    """
+    Fetch lyrics for multiple audio tracks with delay between requests
+    
+    Args:
+        audio_ids: List of YouTube IDs
+        delay_seconds: Delay between requests to avoid rate limiting
+    """
+    import time
+    from audio.models import Audio
+    from audio.lyrics_service import LyricsService
+    
+    service = LyricsService()
+    results = {
+        'success': 0,
+        'failed': 0,
+        'skipped': 0,
+    }
+    
+    for youtube_id in audio_ids:
+        try:
+            audio = Audio.objects.get(youtube_id=youtube_id)
+            service.fetch_and_store_lyrics(audio, force=False)
+            results['success'] += 1
+            logger.info(f"Fetched lyrics for {audio.title}")
+        except Audio.DoesNotExist:
+            results['skipped'] += 1
+            logger.warning(f"Audio not found: {youtube_id}")
+        except Exception as e:
+            results['failed'] += 1
+            logger.error(f"Error fetching lyrics for {youtube_id}: {e}")
+        
+        # Delay to avoid rate limiting
+        if delay_seconds > 0:
+            time.sleep(delay_seconds)
+    
+    return results
+
+
+@shared_task(name="audio.auto_fetch_lyrics")
+def auto_fetch_lyrics(limit: int = 50, max_attempts: int = 3):
+    """
+    Automatically fetch lyrics for audio without lyrics
+    
+    This task should be scheduled to run periodically (e.g., every hour)
+    
+    Args:
+        limit: Maximum number of tracks to process
+        max_attempts: Skip tracks that have been attempted this many times
+    """
+    from audio.models import Audio
+    from audio.models_lyrics import Lyrics
+    from audio.lyrics_service import LyricsService
+    
+    # Find audio without lyrics or with failed attempts
+    audio_without_lyrics = Audio.objects.filter(
+        downloaded=True
+    ).exclude(
+        lyrics__fetch_attempted=True,
+        lyrics__fetch_attempts__gte=max_attempts
+    )[:limit]
+    
+    if not audio_without_lyrics:
+        logger.info("No audio tracks need lyrics fetching")
+        return {"status": "no_work", "message": "No tracks need lyrics"}
+    
+    service = LyricsService()
+    results = {
+        'processed': 0,
+        'success': 0,
+        'failed': 0,
+    }
+    
+    for audio in audio_without_lyrics:
+        try:
+            lyrics = service.fetch_and_store_lyrics(audio, force=False)
+            results['processed'] += 1
+            
+            if lyrics.has_lyrics:
+                results['success'] += 1
+            else:
+                results['failed'] += 1
+                
+        except Exception as e:
+            logger.error(f"Error in auto-fetch for {audio.title}: {e}")
+            results['failed'] += 1
+        
+        # Small delay to be nice to the API
+        import time
+        time.sleep(1)
+    
+    logger.info(f"Auto-fetch completed: {results}")
+    return results
+
+
+@shared_task(name="audio.cleanup_lyrics_cache")
+def cleanup_lyrics_cache(days_old: int = 30):
+    """
+    Clean up old lyrics cache entries
+    
+    Args:
+        days_old: Remove cache entries older than this many days
+    """
+    from audio.models_lyrics import LyricsCache
+    from django.utils import timezone
+    from datetime import timedelta
+    
+    cutoff_date = timezone.now() - timedelta(days=days_old)
+    
+    # Delete old not_found entries
+    deleted_count = LyricsCache.objects.filter(
+        not_found=True,
+        cached_date__lt=cutoff_date
+    ).delete()[0]
+    
+    # Delete old unused entries (not accessed in the last N days)
+    deleted_unused = LyricsCache.objects.filter(
+        last_accessed__lt=cutoff_date,
+        access_count=0
+    ).delete()[0]
+    
+    logger.info(f"Cleaned up {deleted_count} not_found and {deleted_unused} unused cache entries")
+    
+    return {
+        'deleted_not_found': deleted_count,
+        'deleted_unused': deleted_unused,
+    }
+
+
+@shared_task(name="audio.refetch_failed_lyrics")
+def refetch_failed_lyrics(days_old: int = 7, limit: int = 20):
+    """
+    Retry fetching lyrics for tracks that failed before
+    
+    Args:
+        days_old: Retry tracks that failed more than this many days ago
+        limit: Maximum number of tracks to retry
+    """
+    from audio.models_lyrics import Lyrics
+    from audio.lyrics_service import LyricsService
+    from django.utils import timezone
+    from datetime import timedelta
+    
+    cutoff_date = timezone.now() - timedelta(days=days_old)
+    
+    # Find tracks that failed but haven't been tried recently
+    failed_lyrics = Lyrics.objects.filter(
+        fetch_attempted=True,
+        synced_lyrics='',
+        plain_lyrics='',
+        is_instrumental=False,
+        fetched_date__lt=cutoff_date,
+        fetch_attempts__lt=5  # Don't retry if attempted 5+ times
+    )[:limit]
+    
+    service = LyricsService()
+    results = {
+        'retried': 0,
+        'success': 0,
+        'failed': 0,
+    }
+    
+    for lyrics in failed_lyrics:
+        try:
+            updated = service.fetch_and_store_lyrics(lyrics.audio, force=True)
+            results['retried'] += 1
+            
+            if updated.has_lyrics:
+                results['success'] += 1
+            else:
+                results['failed'] += 1
+                
+        except Exception as e:
+            logger.error(f"Error retrying lyrics for {lyrics.audio.title}: {e}")
+            results['failed'] += 1
+        
+        import time
+        time.sleep(2)  # Be nice to the API
+    
+    logger.info(f"Refetch completed: {results}")
+    return results