- Full PWA support with offline capabilities - Comprehensive search across songs, playlists, and channels - Offline playlist manager with download tracking - Pre-built frontend for zero-build deployment - Docker-based deployment with docker compose - Material-UI dark theme interface - YouTube audio download and management - Multi-user authentication support
507 lines
19 KiB
Python
507 lines
19 KiB
Python
"""Celery tasks for background processing"""
|
|
|
|
from celery import shared_task
|
|
import yt_dlp
|
|
from audio.models import Audio
|
|
from channel.models import Channel
|
|
from download.models import DownloadQueue
|
|
from datetime import datetime
|
|
from django.utils import timezone
|
|
import os
|
|
|
|
|
|
@shared_task
|
|
def download_audio_task(queue_id):
|
|
"""Download audio from YouTube - AUDIO ONLY, no video"""
|
|
try:
|
|
queue_item = DownloadQueue.objects.get(id=queue_id)
|
|
queue_item.status = 'downloading'
|
|
queue_item.started_date = timezone.now()
|
|
queue_item.save()
|
|
|
|
# yt-dlp options for AUDIO ONLY (no video)
|
|
ydl_opts = {
|
|
'format': 'bestaudio/best', # Best audio quality, no video
|
|
'postprocessors': [{
|
|
'key': 'FFmpegExtractAudio',
|
|
'preferredcodec': 'm4a',
|
|
'preferredquality': '192',
|
|
}],
|
|
'outtmpl': '/app/audio/%(channel)s/%(title)s-%(id)s.%(ext)s',
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
'extract_audio': True, # Ensure audio extraction
|
|
}
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(queue_item.url, download=True)
|
|
|
|
# Get the actual downloaded filename from yt-dlp
|
|
# After post-processing with FFmpegExtractAudio, the extension will be .m4a
|
|
# We need to use prepare_filename and replace the extension
|
|
actual_filename = ydl.prepare_filename(info)
|
|
|
|
# Replace extension with .m4a since we're extracting audio
|
|
import os as os_module
|
|
base_filename = os_module.path.splitext(actual_filename)[0]
|
|
actual_filename = base_filename + '.m4a'
|
|
|
|
# Remove /app/audio/ prefix to get relative path
|
|
if actual_filename.startswith('/app/audio/'):
|
|
file_path = actual_filename[11:] # Remove '/app/audio/' prefix
|
|
else:
|
|
# Fallback to constructed path if prepare_filename doesn't work as expected
|
|
file_path = f"{info.get('channel', 'unknown')}/{info.get('title', 'unknown')}-{info['id']}.m4a"
|
|
|
|
# Create Audio object
|
|
audio, created = Audio.objects.get_or_create(
|
|
owner=queue_item.owner,
|
|
youtube_id=info['id'],
|
|
defaults={
|
|
'title': info.get('title', 'Unknown'),
|
|
'description': info.get('description', ''),
|
|
'channel_id': info.get('channel_id', ''),
|
|
'channel_name': info.get('channel', 'Unknown'),
|
|
'duration': info.get('duration', 0),
|
|
'file_path': file_path,
|
|
'file_size': info.get('filesize', 0) or 0,
|
|
'thumbnail_url': info.get('thumbnail', ''),
|
|
'published_date': datetime.strptime(info.get('upload_date', '20230101'), '%Y%m%d'),
|
|
'view_count': info.get('view_count', 0) or 0,
|
|
'like_count': info.get('like_count', 0) or 0,
|
|
}
|
|
)
|
|
|
|
# Queue a task to link this audio to playlists (optimized - runs after download)
|
|
# This prevents blocking the download task with expensive playlist lookups
|
|
link_audio_to_playlists.delay(audio.id, queue_item.owner.id)
|
|
|
|
queue_item.status = 'completed'
|
|
queue_item.completed_date = timezone.now()
|
|
queue_item.youtube_id = info['id']
|
|
queue_item.title = info.get('title', '')
|
|
queue_item.save()
|
|
|
|
return f"Downloaded: {info.get('title', 'Unknown')}"
|
|
|
|
except Exception as e:
|
|
queue_item.status = 'failed'
|
|
queue_item.error_message = str(e)
|
|
queue_item.save()
|
|
raise
|
|
|
|
|
|
@shared_task
|
|
def download_channel_task(channel_id):
|
|
"""Smart sync: Download only NEW audio from channel (not already downloaded)"""
|
|
try:
|
|
channel = Channel.objects.get(id=channel_id)
|
|
channel.sync_status = 'syncing'
|
|
channel.error_message = ''
|
|
channel.save()
|
|
|
|
url = f"https://www.youtube.com/channel/{channel.channel_id}/videos"
|
|
|
|
# Extract flat to get list quickly
|
|
ydl_opts = {
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
'extract_flat': True,
|
|
'playlistend': 50, # Limit to last 50 videos per sync
|
|
}
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(url, download=False)
|
|
|
|
if not info or 'entries' not in info:
|
|
channel.sync_status = 'failed'
|
|
channel.error_message = 'Failed to fetch channel videos'
|
|
channel.save()
|
|
return f"Failed to fetch channel videos"
|
|
|
|
# Get list of already downloaded video IDs
|
|
existing_ids = set(Audio.objects.filter(
|
|
owner=channel.owner
|
|
).values_list('youtube_id', flat=True))
|
|
|
|
# Queue only NEW videos
|
|
new_videos = 0
|
|
skipped = 0
|
|
|
|
for entry in info['entries']:
|
|
if not entry:
|
|
continue
|
|
|
|
video_id = entry.get('id')
|
|
if not video_id:
|
|
continue
|
|
|
|
# SMART SYNC: Skip if already downloaded
|
|
if video_id in existing_ids:
|
|
skipped += 1
|
|
continue
|
|
|
|
# This is NEW content
|
|
queue_item, created = DownloadQueue.objects.get_or_create(
|
|
owner=channel.owner,
|
|
url=f"https://www.youtube.com/watch?v={video_id}",
|
|
defaults={
|
|
'youtube_id': video_id,
|
|
'title': entry.get('title', 'Unknown'),
|
|
'status': 'pending',
|
|
'auto_start': True
|
|
}
|
|
)
|
|
|
|
if created:
|
|
new_videos += 1
|
|
download_audio_task.delay(queue_item.id)
|
|
|
|
# Update channel status
|
|
channel.sync_status = 'success'
|
|
channel.downloaded_count = len(existing_ids)
|
|
channel.save()
|
|
|
|
if new_videos == 0:
|
|
return f"Channel '{channel.channel_name}' up to date ({skipped} already downloaded)"
|
|
|
|
return f"Channel '{channel.channel_name}': {new_videos} new audio(s) queued, {skipped} already downloaded"
|
|
|
|
except Exception as e:
|
|
channel.sync_status = 'failed'
|
|
channel.error_message = str(e)
|
|
channel.save()
|
|
raise
|
|
|
|
|
|
@shared_task(bind=True, name="subscribe_to_playlist")
|
|
def subscribe_to_playlist(self, user_id, playlist_url):
|
|
"""
|
|
TubeArchivist pattern: Subscribe to playlist and trigger audio download
|
|
Called from API → Creates subscription → Downloads audio (not video)
|
|
"""
|
|
from django.contrib.auth import get_user_model
|
|
from playlist.models import Playlist
|
|
from common.src.youtube_metadata import get_playlist_metadata
|
|
import re
|
|
|
|
User = get_user_model()
|
|
user = User.objects.get(id=user_id)
|
|
|
|
# Extract playlist ID from URL
|
|
patterns = [
|
|
r'[?&]list=([a-zA-Z0-9_-]+)',
|
|
r'playlist\?list=([a-zA-Z0-9_-]+)',
|
|
]
|
|
|
|
playlist_id = None
|
|
for pattern in patterns:
|
|
match = re.search(pattern, playlist_url)
|
|
if match:
|
|
playlist_id = match.group(1)
|
|
break
|
|
|
|
if not playlist_id and len(playlist_url) >= 13 and playlist_url.startswith(('PL', 'UU', 'LL', 'RD')):
|
|
playlist_id = playlist_url
|
|
|
|
if not playlist_id:
|
|
raise ValueError("Invalid playlist URL")
|
|
|
|
# Check if already subscribed
|
|
if Playlist.objects.filter(owner=user, playlist_id=playlist_id).exists():
|
|
return f"Already subscribed to playlist {playlist_id}"
|
|
|
|
# Fetch metadata
|
|
metadata = get_playlist_metadata(playlist_id)
|
|
if not metadata:
|
|
raise ValueError("Failed to fetch playlist metadata")
|
|
|
|
# Create subscription
|
|
playlist = Playlist.objects.create(
|
|
owner=user,
|
|
playlist_id=playlist_id,
|
|
title=metadata['title'],
|
|
description=metadata['description'],
|
|
channel_name=metadata['channel_name'],
|
|
channel_id=metadata['channel_id'],
|
|
thumbnail_url=metadata['thumbnail_url'],
|
|
item_count=metadata['item_count'],
|
|
playlist_type='youtube',
|
|
subscribed=True,
|
|
auto_download=True,
|
|
sync_status='pending',
|
|
)
|
|
|
|
# Trigger audio download task
|
|
download_playlist_task.delay(playlist.id)
|
|
|
|
return f"Subscribed to playlist: {metadata['title']}"
|
|
|
|
|
|
@shared_task(bind=True, name="subscribe_to_channel")
|
|
def subscribe_to_channel(self, user_id, channel_url):
|
|
"""
|
|
TubeArchivist pattern: Subscribe to channel and trigger audio download
|
|
Called from API → Creates subscription → Downloads audio (not video)
|
|
"""
|
|
from django.contrib.auth import get_user_model
|
|
from channel.models import Channel
|
|
from common.src.youtube_metadata import get_channel_metadata
|
|
import re
|
|
|
|
User = get_user_model()
|
|
user = User.objects.get(id=user_id)
|
|
|
|
# Extract channel ID from URL
|
|
patterns = [
|
|
r'youtube\.com/channel/(UC[\w-]+)',
|
|
r'youtube\.com/@([\w-]+)',
|
|
r'youtube\.com/c/([\w-]+)',
|
|
r'youtube\.com/user/([\w-]+)',
|
|
]
|
|
|
|
channel_id = None
|
|
for pattern in patterns:
|
|
match = re.search(pattern, channel_url)
|
|
if match:
|
|
channel_id = match.group(1)
|
|
break
|
|
|
|
if not channel_id and channel_url.startswith('UC') and len(channel_url) == 24:
|
|
channel_id = channel_url
|
|
|
|
if not channel_id:
|
|
channel_id = channel_url # Try as-is
|
|
|
|
# Fetch metadata (this resolves handles to actual channel IDs)
|
|
metadata = get_channel_metadata(channel_id)
|
|
if not metadata:
|
|
raise ValueError("Failed to fetch channel metadata")
|
|
|
|
actual_channel_id = metadata['channel_id']
|
|
|
|
# Check if already subscribed
|
|
if Channel.objects.filter(owner=user, channel_id=actual_channel_id).exists():
|
|
return f"Already subscribed to channel {actual_channel_id}"
|
|
|
|
# Create subscription
|
|
channel = Channel.objects.create(
|
|
owner=user,
|
|
channel_id=actual_channel_id,
|
|
channel_name=metadata['channel_name'],
|
|
channel_description=metadata['channel_description'],
|
|
channel_thumbnail=metadata['channel_thumbnail'],
|
|
subscriber_count=metadata['subscriber_count'],
|
|
video_count=metadata['video_count'],
|
|
subscribed=True,
|
|
auto_download=True,
|
|
sync_status='pending',
|
|
)
|
|
|
|
# Trigger audio download task
|
|
download_channel_task.delay(channel.id)
|
|
|
|
return f"Subscribed to channel: {metadata['channel_name']}"
|
|
|
|
|
|
@shared_task(name="update_subscriptions")
|
|
def update_subscriptions_task():
|
|
"""
|
|
TubeArchivist pattern: Periodic task to check ALL subscriptions for NEW audio
|
|
Runs every 2 hours via Celery Beat
|
|
"""
|
|
from playlist.models import Playlist
|
|
|
|
# Sync all subscribed playlists
|
|
playlists = Playlist.objects.filter(subscribed=True, auto_download=True)
|
|
for playlist in playlists:
|
|
download_playlist_task.delay(playlist.id)
|
|
|
|
# Sync all subscribed channels
|
|
channels = Channel.objects.filter(subscribed=True, auto_download=True)
|
|
for channel in channels:
|
|
download_channel_task.delay(channel.id)
|
|
|
|
return f"Syncing {playlists.count()} playlists and {channels.count()} channels"
|
|
|
|
|
|
@shared_task
|
|
def download_playlist_task(playlist_id):
|
|
"""Smart sync: Download only NEW audio from playlist (not already downloaded)"""
|
|
from playlist.models import Playlist, PlaylistItem
|
|
|
|
try:
|
|
playlist = Playlist.objects.get(id=playlist_id)
|
|
playlist.sync_status = 'syncing'
|
|
playlist.error_message = ''
|
|
playlist.save()
|
|
|
|
url = f"https://www.youtube.com/playlist?list={playlist.playlist_id}"
|
|
|
|
# Extract flat to get list quickly without downloading
|
|
ydl_opts = {
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
'extract_flat': True,
|
|
}
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(url, download=False)
|
|
|
|
if not info or 'entries' not in info:
|
|
playlist.sync_status = 'failed'
|
|
playlist.error_message = 'Failed to fetch playlist items'
|
|
playlist.save()
|
|
return f"Failed to fetch playlist items"
|
|
|
|
# Update item count
|
|
total_items = len([e for e in info['entries'] if e])
|
|
playlist.item_count = total_items
|
|
|
|
# Get list of already downloaded video IDs
|
|
existing_ids = set(Audio.objects.filter(
|
|
owner=playlist.owner
|
|
).values_list('youtube_id', flat=True))
|
|
|
|
# Queue only NEW videos (not already downloaded)
|
|
new_videos = 0
|
|
skipped = 0
|
|
|
|
for idx, entry in enumerate(info['entries']):
|
|
if not entry:
|
|
continue
|
|
|
|
video_id = entry.get('id')
|
|
if not video_id:
|
|
continue
|
|
|
|
# Check if audio already exists
|
|
audio_obj = Audio.objects.filter(
|
|
owner=playlist.owner,
|
|
youtube_id=video_id
|
|
).first()
|
|
|
|
# Create PlaylistItem if audio exists but not in playlist yet
|
|
if audio_obj:
|
|
PlaylistItem.objects.get_or_create(
|
|
playlist=playlist,
|
|
audio=audio_obj,
|
|
defaults={'position': idx}
|
|
)
|
|
skipped += 1
|
|
continue
|
|
|
|
# This is NEW content - add to download queue
|
|
queue_item, created = DownloadQueue.objects.get_or_create(
|
|
owner=playlist.owner,
|
|
url=f"https://www.youtube.com/watch?v={video_id}",
|
|
defaults={
|
|
'youtube_id': video_id,
|
|
'title': entry.get('title', 'Unknown'),
|
|
'status': 'pending',
|
|
'auto_start': True
|
|
}
|
|
)
|
|
|
|
if created:
|
|
new_videos += 1
|
|
# Trigger download task for NEW video
|
|
download_audio_task.delay(queue_item.id)
|
|
|
|
# Create PlaylistItem for the downloaded audio (will be created after download completes)
|
|
# Note: Audio object might not exist yet, so we'll add a post-download hook
|
|
|
|
# Update playlist status
|
|
playlist.sync_status = 'success'
|
|
playlist.last_refresh = timezone.now()
|
|
# Count only audios from THIS playlist (match by checking all video IDs in playlist)
|
|
all_playlist_video_ids = [e.get('id') for e in info['entries'] if e and e.get('id')]
|
|
playlist.downloaded_count = Audio.objects.filter(
|
|
owner=playlist.owner,
|
|
youtube_id__in=all_playlist_video_ids
|
|
).count()
|
|
playlist.save()
|
|
|
|
if new_videos == 0:
|
|
return f"Playlist '{playlist.title}' up to date ({skipped} already downloaded)"
|
|
|
|
return f"Playlist '{playlist.title}': {new_videos} new audio(s) queued, {skipped} already downloaded"
|
|
|
|
except Exception as e:
|
|
playlist.sync_status = 'failed'
|
|
playlist.error_message = str(e)
|
|
playlist.save()
|
|
raise
|
|
|
|
|
|
@shared_task
|
|
def link_audio_to_playlists(audio_id, user_id):
|
|
"""Link newly downloaded audio to playlists that contain it (optimized)"""
|
|
from playlist.models import Playlist, PlaylistItem
|
|
from django.contrib.auth import get_user_model
|
|
|
|
try:
|
|
User = get_user_model()
|
|
user = User.objects.get(id=user_id)
|
|
audio = Audio.objects.get(id=audio_id)
|
|
|
|
# Get all playlists for this user
|
|
playlists = Playlist.objects.filter(owner=user, playlist_type='youtube')
|
|
|
|
# For each playlist, check if this video is in it
|
|
for playlist in playlists:
|
|
# Check if already linked
|
|
if PlaylistItem.objects.filter(playlist=playlist, audio=audio).exists():
|
|
continue
|
|
|
|
try:
|
|
ydl_opts = {
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
'extract_flat': True,
|
|
}
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
playlist_info = ydl.extract_info(
|
|
f"https://www.youtube.com/playlist?list={playlist.playlist_id}",
|
|
download=False
|
|
)
|
|
if playlist_info and 'entries' in playlist_info:
|
|
for idx, entry in enumerate(playlist_info['entries']):
|
|
if entry and entry.get('id') == audio.youtube_id:
|
|
# Found it! Create the link
|
|
PlaylistItem.objects.get_or_create(
|
|
playlist=playlist,
|
|
audio=audio,
|
|
defaults={'position': idx}
|
|
)
|
|
# Update playlist downloaded count
|
|
all_video_ids = [e.get('id') for e in playlist_info['entries'] if e and e.get('id')]
|
|
playlist.downloaded_count = Audio.objects.filter(
|
|
owner=user,
|
|
youtube_id__in=all_video_ids
|
|
).count()
|
|
playlist.save(update_fields=['downloaded_count'])
|
|
break
|
|
except Exception as e:
|
|
# Don't fail if playlist linking fails
|
|
pass
|
|
|
|
return f"Linked audio {audio.youtube_id} to playlists"
|
|
except Exception as e:
|
|
# Don't fail - this is a best-effort operation
|
|
return f"Failed to link audio: {str(e)}"
|
|
|
|
|
|
@shared_task
|
|
def cleanup_task():
|
|
"""Cleanup old download queue items"""
|
|
# Remove completed items older than 7 days
|
|
from datetime import timedelta
|
|
cutoff_date = timezone.now() - timedelta(days=7)
|
|
|
|
deleted = DownloadQueue.objects.filter(
|
|
status='completed',
|
|
completed_date__lt=cutoff_date
|
|
).delete()
|
|
|
|
return f"Cleaned up {deleted[0]} items"
|