soundwave/backend/audio/lyrics_service.py

287 lines
10 KiB
Python

"""Lyrics fetching service using LRCLIB API"""
import requests
import logging
from typing import Optional, Dict, Any
from datetime import timedelta
from django.utils import timezone
from django.core.cache import cache
logger = logging.getLogger(__name__)
class LRCLIBClient:
"""Client for LRCLIB API (https://lrclib.net/)"""
DEFAULT_INSTANCE = "https://lrclib.net"
USER_AGENT = "SoundWave/1.0 (https://github.com/soundwave)"
TIMEOUT = 10 # seconds
def __init__(self, instance_url: str = None):
self.instance_url = (instance_url or self.DEFAULT_INSTANCE).rstrip('/')
self.session = requests.Session()
self.session.headers.update({
'User-Agent': self.USER_AGENT,
})
def get_lyrics(
self,
title: str,
artist_name: str,
album_name: str = "",
duration: int = 0
) -> Dict[str, Any]:
"""
Fetch lyrics from LRCLIB API
Args:
title: Track title
artist_name: Artist name
album_name: Album name (optional)
duration: Track duration in seconds
Returns:
Dict with keys:
- synced_lyrics: LRC format lyrics with timestamps
- plain_lyrics: Plain text lyrics
- instrumental: Boolean if track is instrumental
- language: Language code
"""
# Build request parameters
params = {
'track_name': title,
'artist_name': artist_name,
'album_name': album_name,
'duration': round(duration) if duration else 0,
}
# Make request
api_endpoint = f"{self.instance_url}/api/get"
try:
response = self.session.get(
api_endpoint,
params=params,
timeout=self.TIMEOUT
)
if response.status_code == 404:
# No lyrics found
return {
'synced_lyrics': '',
'plain_lyrics': '',
'instrumental': False,
'language': '',
'not_found': True,
}
response.raise_for_status()
data = response.json()
# Extract lyrics data
synced = data.get('syncedLyrics') or ''
plain = data.get('plainLyrics') or ''
instrumental = data.get('instrumental', False)
language = data.get('lang') or ''
# If we have synced lyrics but no plain, strip timestamps
if synced and not plain:
plain = self._strip_timestamps(synced)
return {
'synced_lyrics': synced,
'plain_lyrics': plain,
'instrumental': instrumental,
'language': language,
'not_found': False,
}
except requests.exceptions.Timeout:
logger.error(f"LRCLIB API timeout for {title} - {artist_name}")
raise LyricsAPIError("Request timeout")
except requests.exceptions.RequestException as e:
logger.error(f"LRCLIB API error for {title} - {artist_name}: {e}")
raise LyricsAPIError(f"API request failed: {e}")
@staticmethod
def _strip_timestamps(synced_lyrics: str) -> str:
"""Strip timestamps from LRC format lyrics"""
import re
lines = []
for line in synced_lyrics.split('\n'):
# Remove all timestamp tags [mm:ss.xx]
cleaned = re.sub(r'\[\d{2}:\d{2}\.\d{2,3}\]', '', line)
# Remove metadata tags [tag:value]
cleaned = re.sub(r'\[[a-z]+:.*?\]', '', cleaned)
if cleaned.strip():
lines.append(cleaned.strip())
return '\n'.join(lines)
class LyricsAPIError(Exception):
"""Exception for lyrics API errors"""
pass
class LyricsService:
"""Service for fetching and caching lyrics"""
def __init__(self, lrclib_instance: str = None):
self.client = LRCLIBClient(lrclib_instance)
def fetch_lyrics(
self,
title: str,
artist_name: str,
album_name: str = "",
duration: int = 0,
use_cache: bool = True
) -> Dict[str, Any]:
"""
Fetch lyrics with caching
Args:
title: Track title
artist_name: Artist name
album_name: Album name
duration: Duration in seconds
use_cache: Whether to use cached results
Returns:
Dict with lyrics data
"""
# Create cache key
cache_key = self._make_cache_key(title, artist_name, album_name, duration)
# Check cache first
if use_cache:
cached = cache.get(cache_key)
if cached is not None:
logger.debug(f"Cache hit for {title} - {artist_name}")
return cached
# Fetch from API
try:
logger.info(f"Fetching lyrics for {title} - {artist_name}")
result = self.client.get_lyrics(title, artist_name, album_name, duration)
# Cache the result (even if not found, to avoid repeated requests)
cache_timeout = 86400 * 7 # 7 days
if result.get('not_found'):
cache_timeout = 86400 # 1 day for not found
cache.set(cache_key, result, cache_timeout)
return result
except LyricsAPIError as e:
logger.warning(f"Failed to fetch lyrics: {e}")
# Cache the error for a short time to avoid hammering the API
error_result = {
'synced_lyrics': '',
'plain_lyrics': '',
'instrumental': False,
'language': '',
'not_found': True,
'error': str(e),
}
cache.set(cache_key, error_result, 3600) # 1 hour
return error_result
@staticmethod
def _make_cache_key(title: str, artist: str, album: str, duration: int) -> str:
"""Create cache key from track metadata"""
import hashlib
key_str = f"{title}|{artist}|{album}|{duration}"
return f"lyrics:{hashlib.md5(key_str.encode()).hexdigest()}"
def fetch_and_store_lyrics(self, audio_obj, force: bool = False):
"""
Fetch lyrics and store in database
Args:
audio_obj: Audio model instance
force: Force fetch even if already attempted
"""
from audio.models_lyrics import Lyrics, LyricsCache
# Check if already attempted
existing, created = Lyrics.objects.get_or_create(audio=audio_obj)
if not force and existing.fetch_attempted and existing.fetch_attempts >= 3:
logger.debug(f"Skipping {audio_obj.title} - already attempted {existing.fetch_attempts} times")
return existing
# Check database cache first
duration_rounded = round(audio_obj.duration)
cache_entry = LyricsCache.objects.filter(
title=audio_obj.title,
artist_name=audio_obj.channel_name,
duration=duration_rounded
).first()
if cache_entry and not force:
# Use cached data
existing.synced_lyrics = cache_entry.synced_lyrics
existing.plain_lyrics = cache_entry.plain_lyrics
existing.is_instrumental = cache_entry.is_instrumental
existing.language = cache_entry.language
existing.source = cache_entry.source
existing.fetch_attempted = True
existing.save()
# Update cache stats
cache_entry.access_count += 1
cache_entry.save()
logger.info(f"Using cached lyrics for {audio_obj.title}")
return existing
# Fetch from API
try:
result = self.fetch_lyrics(
title=audio_obj.title,
artist_name=audio_obj.channel_name,
album_name="", # YouTube doesn't provide album info
duration=duration_rounded,
use_cache=True
)
# Update lyrics entry
existing.synced_lyrics = result.get('synced_lyrics', '')
existing.plain_lyrics = result.get('plain_lyrics', '')
existing.is_instrumental = result.get('instrumental', False)
existing.language = result.get('language', '')
existing.source = 'lrclib'
existing.fetch_attempted = True
existing.fetch_attempts += 1
existing.last_error = result.get('error', '')
existing.save()
# Store in cache
if not result.get('not_found'):
LyricsCache.objects.update_or_create(
title=audio_obj.title,
artist_name=audio_obj.channel_name,
album_name="",
duration=duration_rounded,
defaults={
'synced_lyrics': result.get('synced_lyrics', ''),
'plain_lyrics': result.get('plain_lyrics', ''),
'is_instrumental': result.get('instrumental', False),
'language': result.get('language', ''),
'source': 'lrclib',
'not_found': result.get('not_found', False),
}
)
logger.info(f"Fetched lyrics for {audio_obj.title}")
return existing
except Exception as e:
logger.error(f"Error fetching lyrics for {audio_obj.title}: {e}")
existing.fetch_attempted = True
existing.fetch_attempts += 1
existing.last_error = str(e)
existing.save()
return existing