287 lines
10 KiB
Python
287 lines
10 KiB
Python
"""Lyrics fetching service using LRCLIB API"""
|
|
import requests
|
|
import logging
|
|
from typing import Optional, Dict, Any
|
|
from datetime import timedelta
|
|
from django.utils import timezone
|
|
from django.core.cache import cache
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class LRCLIBClient:
|
|
"""Client for LRCLIB API (https://lrclib.net/)"""
|
|
|
|
DEFAULT_INSTANCE = "https://lrclib.net"
|
|
USER_AGENT = "SoundWave/1.0 (https://github.com/soundwave)"
|
|
TIMEOUT = 10 # seconds
|
|
|
|
def __init__(self, instance_url: str = None):
|
|
self.instance_url = (instance_url or self.DEFAULT_INSTANCE).rstrip('/')
|
|
self.session = requests.Session()
|
|
self.session.headers.update({
|
|
'User-Agent': self.USER_AGENT,
|
|
})
|
|
|
|
def get_lyrics(
|
|
self,
|
|
title: str,
|
|
artist_name: str,
|
|
album_name: str = "",
|
|
duration: int = 0
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Fetch lyrics from LRCLIB API
|
|
|
|
Args:
|
|
title: Track title
|
|
artist_name: Artist name
|
|
album_name: Album name (optional)
|
|
duration: Track duration in seconds
|
|
|
|
Returns:
|
|
Dict with keys:
|
|
- synced_lyrics: LRC format lyrics with timestamps
|
|
- plain_lyrics: Plain text lyrics
|
|
- instrumental: Boolean if track is instrumental
|
|
- language: Language code
|
|
"""
|
|
# Build request parameters
|
|
params = {
|
|
'track_name': title,
|
|
'artist_name': artist_name,
|
|
'album_name': album_name,
|
|
'duration': round(duration) if duration else 0,
|
|
}
|
|
|
|
# Make request
|
|
api_endpoint = f"{self.instance_url}/api/get"
|
|
|
|
try:
|
|
response = self.session.get(
|
|
api_endpoint,
|
|
params=params,
|
|
timeout=self.TIMEOUT
|
|
)
|
|
|
|
if response.status_code == 404:
|
|
# No lyrics found
|
|
return {
|
|
'synced_lyrics': '',
|
|
'plain_lyrics': '',
|
|
'instrumental': False,
|
|
'language': '',
|
|
'not_found': True,
|
|
}
|
|
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
# Extract lyrics data
|
|
synced = data.get('syncedLyrics') or ''
|
|
plain = data.get('plainLyrics') or ''
|
|
instrumental = data.get('instrumental', False)
|
|
language = data.get('lang') or ''
|
|
|
|
# If we have synced lyrics but no plain, strip timestamps
|
|
if synced and not plain:
|
|
plain = self._strip_timestamps(synced)
|
|
|
|
return {
|
|
'synced_lyrics': synced,
|
|
'plain_lyrics': plain,
|
|
'instrumental': instrumental,
|
|
'language': language,
|
|
'not_found': False,
|
|
}
|
|
|
|
except requests.exceptions.Timeout:
|
|
logger.error(f"LRCLIB API timeout for {title} - {artist_name}")
|
|
raise LyricsAPIError("Request timeout")
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"LRCLIB API error for {title} - {artist_name}: {e}")
|
|
raise LyricsAPIError(f"API request failed: {e}")
|
|
|
|
@staticmethod
|
|
def _strip_timestamps(synced_lyrics: str) -> str:
|
|
"""Strip timestamps from LRC format lyrics"""
|
|
import re
|
|
lines = []
|
|
for line in synced_lyrics.split('\n'):
|
|
# Remove all timestamp tags [mm:ss.xx]
|
|
cleaned = re.sub(r'\[\d{2}:\d{2}\.\d{2,3}\]', '', line)
|
|
# Remove metadata tags [tag:value]
|
|
cleaned = re.sub(r'\[[a-z]+:.*?\]', '', cleaned)
|
|
if cleaned.strip():
|
|
lines.append(cleaned.strip())
|
|
return '\n'.join(lines)
|
|
|
|
|
|
class LyricsAPIError(Exception):
|
|
"""Exception for lyrics API errors"""
|
|
pass
|
|
|
|
|
|
class LyricsService:
|
|
"""Service for fetching and caching lyrics"""
|
|
|
|
def __init__(self, lrclib_instance: str = None):
|
|
self.client = LRCLIBClient(lrclib_instance)
|
|
|
|
def fetch_lyrics(
|
|
self,
|
|
title: str,
|
|
artist_name: str,
|
|
album_name: str = "",
|
|
duration: int = 0,
|
|
use_cache: bool = True
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Fetch lyrics with caching
|
|
|
|
Args:
|
|
title: Track title
|
|
artist_name: Artist name
|
|
album_name: Album name
|
|
duration: Duration in seconds
|
|
use_cache: Whether to use cached results
|
|
|
|
Returns:
|
|
Dict with lyrics data
|
|
"""
|
|
# Create cache key
|
|
cache_key = self._make_cache_key(title, artist_name, album_name, duration)
|
|
|
|
# Check cache first
|
|
if use_cache:
|
|
cached = cache.get(cache_key)
|
|
if cached is not None:
|
|
logger.debug(f"Cache hit for {title} - {artist_name}")
|
|
return cached
|
|
|
|
# Fetch from API
|
|
try:
|
|
logger.info(f"Fetching lyrics for {title} - {artist_name}")
|
|
result = self.client.get_lyrics(title, artist_name, album_name, duration)
|
|
|
|
# Cache the result (even if not found, to avoid repeated requests)
|
|
cache_timeout = 86400 * 7 # 7 days
|
|
if result.get('not_found'):
|
|
cache_timeout = 86400 # 1 day for not found
|
|
|
|
cache.set(cache_key, result, cache_timeout)
|
|
|
|
return result
|
|
|
|
except LyricsAPIError as e:
|
|
logger.warning(f"Failed to fetch lyrics: {e}")
|
|
# Cache the error for a short time to avoid hammering the API
|
|
error_result = {
|
|
'synced_lyrics': '',
|
|
'plain_lyrics': '',
|
|
'instrumental': False,
|
|
'language': '',
|
|
'not_found': True,
|
|
'error': str(e),
|
|
}
|
|
cache.set(cache_key, error_result, 3600) # 1 hour
|
|
return error_result
|
|
|
|
@staticmethod
|
|
def _make_cache_key(title: str, artist: str, album: str, duration: int) -> str:
|
|
"""Create cache key from track metadata"""
|
|
import hashlib
|
|
key_str = f"{title}|{artist}|{album}|{duration}"
|
|
return f"lyrics:{hashlib.md5(key_str.encode()).hexdigest()}"
|
|
|
|
def fetch_and_store_lyrics(self, audio_obj, force: bool = False):
|
|
"""
|
|
Fetch lyrics and store in database
|
|
|
|
Args:
|
|
audio_obj: Audio model instance
|
|
force: Force fetch even if already attempted
|
|
"""
|
|
from audio.models_lyrics import Lyrics, LyricsCache
|
|
|
|
# Check if already attempted
|
|
existing, created = Lyrics.objects.get_or_create(audio=audio_obj)
|
|
|
|
if not force and existing.fetch_attempted and existing.fetch_attempts >= 3:
|
|
logger.debug(f"Skipping {audio_obj.title} - already attempted {existing.fetch_attempts} times")
|
|
return existing
|
|
|
|
# Check database cache first
|
|
duration_rounded = round(audio_obj.duration)
|
|
cache_entry = LyricsCache.objects.filter(
|
|
title=audio_obj.title,
|
|
artist_name=audio_obj.channel_name,
|
|
duration=duration_rounded
|
|
).first()
|
|
|
|
if cache_entry and not force:
|
|
# Use cached data
|
|
existing.synced_lyrics = cache_entry.synced_lyrics
|
|
existing.plain_lyrics = cache_entry.plain_lyrics
|
|
existing.is_instrumental = cache_entry.is_instrumental
|
|
existing.language = cache_entry.language
|
|
existing.source = cache_entry.source
|
|
existing.fetch_attempted = True
|
|
existing.save()
|
|
|
|
# Update cache stats
|
|
cache_entry.access_count += 1
|
|
cache_entry.save()
|
|
|
|
logger.info(f"Using cached lyrics for {audio_obj.title}")
|
|
return existing
|
|
|
|
# Fetch from API
|
|
try:
|
|
result = self.fetch_lyrics(
|
|
title=audio_obj.title,
|
|
artist_name=audio_obj.channel_name,
|
|
album_name="", # YouTube doesn't provide album info
|
|
duration=duration_rounded,
|
|
use_cache=True
|
|
)
|
|
|
|
# Update lyrics entry
|
|
existing.synced_lyrics = result.get('synced_lyrics', '')
|
|
existing.plain_lyrics = result.get('plain_lyrics', '')
|
|
existing.is_instrumental = result.get('instrumental', False)
|
|
existing.language = result.get('language', '')
|
|
existing.source = 'lrclib'
|
|
existing.fetch_attempted = True
|
|
existing.fetch_attempts += 1
|
|
existing.last_error = result.get('error', '')
|
|
existing.save()
|
|
|
|
# Store in cache
|
|
if not result.get('not_found'):
|
|
LyricsCache.objects.update_or_create(
|
|
title=audio_obj.title,
|
|
artist_name=audio_obj.channel_name,
|
|
album_name="",
|
|
duration=duration_rounded,
|
|
defaults={
|
|
'synced_lyrics': result.get('synced_lyrics', ''),
|
|
'plain_lyrics': result.get('plain_lyrics', ''),
|
|
'is_instrumental': result.get('instrumental', False),
|
|
'language': result.get('language', ''),
|
|
'source': 'lrclib',
|
|
'not_found': result.get('not_found', False),
|
|
}
|
|
)
|
|
|
|
logger.info(f"Fetched lyrics for {audio_obj.title}")
|
|
return existing
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching lyrics for {audio_obj.title}: {e}")
|
|
existing.fetch_attempted = True
|
|
existing.fetch_attempts += 1
|
|
existing.last_error = str(e)
|
|
existing.save()
|
|
return existing
|