Fix: Include backend/audio Django app in repository
This commit is contained in:
parent
d04e726373
commit
644cfab298
37 changed files with 6632 additions and 4 deletions
217
backend/audio/tasks_lyrics.py
Normal file
217
backend/audio/tasks_lyrics.py
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
"""Celery tasks for automatic lyrics fetching"""
|
||||
from celery import shared_task
|
||||
from django.utils import timezone
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@shared_task(name="audio.fetch_lyrics_for_audio")
|
||||
def fetch_lyrics_for_audio(audio_youtube_id: str, force: bool = False):
|
||||
"""
|
||||
Fetch lyrics for a single audio track
|
||||
|
||||
Args:
|
||||
audio_youtube_id: YouTube ID of the audio
|
||||
force: Force fetch even if already attempted
|
||||
"""
|
||||
from audio.models import Audio
|
||||
from audio.lyrics_service import LyricsService
|
||||
|
||||
try:
|
||||
audio = Audio.objects.get(youtube_id=audio_youtube_id)
|
||||
service = LyricsService()
|
||||
service.fetch_and_store_lyrics(audio, force=force)
|
||||
logger.info(f"Fetched lyrics for {audio.title}")
|
||||
return {"status": "success", "youtube_id": audio_youtube_id}
|
||||
except Audio.DoesNotExist:
|
||||
logger.error(f"Audio not found: {audio_youtube_id}")
|
||||
return {"status": "error", "error": "Audio not found"}
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching lyrics for {audio_youtube_id}: {e}")
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
|
||||
@shared_task(name="audio.fetch_lyrics_batch")
|
||||
def fetch_lyrics_batch(audio_ids: list, delay_seconds: int = 2):
|
||||
"""
|
||||
Fetch lyrics for multiple audio tracks with delay between requests
|
||||
|
||||
Args:
|
||||
audio_ids: List of YouTube IDs
|
||||
delay_seconds: Delay between requests to avoid rate limiting
|
||||
"""
|
||||
import time
|
||||
from audio.models import Audio
|
||||
from audio.lyrics_service import LyricsService
|
||||
|
||||
service = LyricsService()
|
||||
results = {
|
||||
'success': 0,
|
||||
'failed': 0,
|
||||
'skipped': 0,
|
||||
}
|
||||
|
||||
for youtube_id in audio_ids:
|
||||
try:
|
||||
audio = Audio.objects.get(youtube_id=youtube_id)
|
||||
service.fetch_and_store_lyrics(audio, force=False)
|
||||
results['success'] += 1
|
||||
logger.info(f"Fetched lyrics for {audio.title}")
|
||||
except Audio.DoesNotExist:
|
||||
results['skipped'] += 1
|
||||
logger.warning(f"Audio not found: {youtube_id}")
|
||||
except Exception as e:
|
||||
results['failed'] += 1
|
||||
logger.error(f"Error fetching lyrics for {youtube_id}: {e}")
|
||||
|
||||
# Delay to avoid rate limiting
|
||||
if delay_seconds > 0:
|
||||
time.sleep(delay_seconds)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@shared_task(name="audio.auto_fetch_lyrics")
|
||||
def auto_fetch_lyrics(limit: int = 50, max_attempts: int = 3):
|
||||
"""
|
||||
Automatically fetch lyrics for audio without lyrics
|
||||
|
||||
This task should be scheduled to run periodically (e.g., every hour)
|
||||
|
||||
Args:
|
||||
limit: Maximum number of tracks to process
|
||||
max_attempts: Skip tracks that have been attempted this many times
|
||||
"""
|
||||
from audio.models import Audio
|
||||
from audio.models_lyrics import Lyrics
|
||||
from audio.lyrics_service import LyricsService
|
||||
|
||||
# Find audio without lyrics or with failed attempts
|
||||
audio_without_lyrics = Audio.objects.filter(
|
||||
downloaded=True
|
||||
).exclude(
|
||||
lyrics__fetch_attempted=True,
|
||||
lyrics__fetch_attempts__gte=max_attempts
|
||||
)[:limit]
|
||||
|
||||
if not audio_without_lyrics:
|
||||
logger.info("No audio tracks need lyrics fetching")
|
||||
return {"status": "no_work", "message": "No tracks need lyrics"}
|
||||
|
||||
service = LyricsService()
|
||||
results = {
|
||||
'processed': 0,
|
||||
'success': 0,
|
||||
'failed': 0,
|
||||
}
|
||||
|
||||
for audio in audio_without_lyrics:
|
||||
try:
|
||||
lyrics = service.fetch_and_store_lyrics(audio, force=False)
|
||||
results['processed'] += 1
|
||||
|
||||
if lyrics.has_lyrics:
|
||||
results['success'] += 1
|
||||
else:
|
||||
results['failed'] += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in auto-fetch for {audio.title}: {e}")
|
||||
results['failed'] += 1
|
||||
|
||||
# Small delay to be nice to the API
|
||||
import time
|
||||
time.sleep(1)
|
||||
|
||||
logger.info(f"Auto-fetch completed: {results}")
|
||||
return results
|
||||
|
||||
|
||||
@shared_task(name="audio.cleanup_lyrics_cache")
|
||||
def cleanup_lyrics_cache(days_old: int = 30):
|
||||
"""
|
||||
Clean up old lyrics cache entries
|
||||
|
||||
Args:
|
||||
days_old: Remove cache entries older than this many days
|
||||
"""
|
||||
from audio.models_lyrics import LyricsCache
|
||||
from django.utils import timezone
|
||||
from datetime import timedelta
|
||||
|
||||
cutoff_date = timezone.now() - timedelta(days=days_old)
|
||||
|
||||
# Delete old not_found entries
|
||||
deleted_count = LyricsCache.objects.filter(
|
||||
not_found=True,
|
||||
cached_date__lt=cutoff_date
|
||||
).delete()[0]
|
||||
|
||||
# Delete old unused entries (not accessed in the last N days)
|
||||
deleted_unused = LyricsCache.objects.filter(
|
||||
last_accessed__lt=cutoff_date,
|
||||
access_count=0
|
||||
).delete()[0]
|
||||
|
||||
logger.info(f"Cleaned up {deleted_count} not_found and {deleted_unused} unused cache entries")
|
||||
|
||||
return {
|
||||
'deleted_not_found': deleted_count,
|
||||
'deleted_unused': deleted_unused,
|
||||
}
|
||||
|
||||
|
||||
@shared_task(name="audio.refetch_failed_lyrics")
|
||||
def refetch_failed_lyrics(days_old: int = 7, limit: int = 20):
|
||||
"""
|
||||
Retry fetching lyrics for tracks that failed before
|
||||
|
||||
Args:
|
||||
days_old: Retry tracks that failed more than this many days ago
|
||||
limit: Maximum number of tracks to retry
|
||||
"""
|
||||
from audio.models_lyrics import Lyrics
|
||||
from audio.lyrics_service import LyricsService
|
||||
from django.utils import timezone
|
||||
from datetime import timedelta
|
||||
|
||||
cutoff_date = timezone.now() - timedelta(days=days_old)
|
||||
|
||||
# Find tracks that failed but haven't been tried recently
|
||||
failed_lyrics = Lyrics.objects.filter(
|
||||
fetch_attempted=True,
|
||||
synced_lyrics='',
|
||||
plain_lyrics='',
|
||||
is_instrumental=False,
|
||||
fetched_date__lt=cutoff_date,
|
||||
fetch_attempts__lt=5 # Don't retry if attempted 5+ times
|
||||
)[:limit]
|
||||
|
||||
service = LyricsService()
|
||||
results = {
|
||||
'retried': 0,
|
||||
'success': 0,
|
||||
'failed': 0,
|
||||
}
|
||||
|
||||
for lyrics in failed_lyrics:
|
||||
try:
|
||||
updated = service.fetch_and_store_lyrics(lyrics.audio, force=True)
|
||||
results['retried'] += 1
|
||||
|
||||
if updated.has_lyrics:
|
||||
results['success'] += 1
|
||||
else:
|
||||
results['failed'] += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrying lyrics for {lyrics.audio.title}: {e}")
|
||||
results['failed'] += 1
|
||||
|
||||
import time
|
||||
time.sleep(2) # Be nice to the API
|
||||
|
||||
logger.info(f"Refetch completed: {results}")
|
||||
return results
|
||||
Loading…
Add table
Add a link
Reference in a new issue