diff --git a/app.py b/app.py index be63c51..dd93c3e 100644 --- a/app.py +++ b/app.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Flask server for YouTube Concert Splitter +Flask server for YouTube Concert Splitter with async job processing Downloads YouTube videos and splits them into tracks based on a setlist. """ from flask import Flask, request, render_template, jsonify, Response @@ -12,6 +12,9 @@ from pydub import AudioSegment from mutagen.easyid3 import EasyID3 import json import sys +import threading +import uuid +from datetime import datetime app = Flask(__name__) app.secret_key = os.environ.get('SECRET_KEY', 'your_secret_key_here_change_this') @@ -20,6 +23,9 @@ app.secret_key = os.environ.get('SECRET_KEY', 'your_secret_key_here_change_this' DOWNLOAD_FOLDER = os.environ.get('DOWNLOAD_FOLDER', 'youtube') os.makedirs(DOWNLOAD_FOLDER, exist_ok=True) +# Job storage (in production, use Redis or database) +jobs = {} + def sanitize(s: str) -> str: """Sanitize string for use as filename.""" @@ -45,19 +51,14 @@ def parse_timestamp(ts: str) -> int: raise ValueError(f"Cannot parse timestamp '{ts}': {e}") -def download_youtube_audio(url: str, output_folder: str): - """Download YouTube video and convert to MP3 at 320kbps with enhanced anti-blocking measures.""" +def download_youtube_audio(url: str, output_folder: str, job_id: str): + """Download YouTube video and convert to MP3 at 320kbps.""" + jobs[job_id]['status'] = 'downloading' + jobs[job_id]['progress'] = 'Downloading video from YouTube...' - # Check if cookies file exists cookies_file = 'cookies.txt' has_cookies = os.path.isfile(cookies_file) - if has_cookies: - print(f"✓ Using cookies from {cookies_file}", flush=True) - else: - print("⚠️ No cookies.txt found - download may fail for some videos", flush=True) - print(" See COOKIES_INSTRUCTIONS.txt for how to add cookies", flush=True) - # Enhanced options to bypass YouTube restrictions ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': f'{output_folder}/%(title)s.%(ext)s', @@ -66,16 +67,10 @@ def download_youtube_audio(url: str, output_folder: str): 'no_warnings': False, 'extract_flat': False, 'ignoreerrors': False, - - # Use cookies if available 'cookiefile': cookies_file if has_cookies else None, - - # Anti-blocking measures 'nocheckcertificate': True, 'geo_bypass': True, 'age_limit': None, - - # Better headers to mimic a real browser 'http_headers': { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', @@ -84,23 +79,17 @@ def download_youtube_audio(url: str, output_folder: str): 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Connection': 'keep-alive', }, - - # Extractor specific arguments for YouTube 'extractor_args': { 'youtube': { 'player_client': ['android', 'web'], 'player_skip': ['webpage', 'configs'], } }, - - # Post-processing 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '320', }], - - # Retry options 'retries': 10, 'fragment_retries': 10, 'skip_unavailable_fragments': True, @@ -110,12 +99,12 @@ def download_youtube_audio(url: str, output_folder: str): info = ydl.extract_info(url, download=True) base_name = ydl.prepare_filename(info) - # Determine the actual MP3 filename mp3_path = os.path.splitext(base_name)[0] + '.mp3' if not os.path.isfile(mp3_path): raise FileNotFoundError(f"Downloaded MP3 not found: {mp3_path}") + jobs[job_id]['progress'] = 'Download complete. Processing audio...' return mp3_path, info @@ -129,7 +118,6 @@ def parse_setlist(setlist_text: str): if not line: continue - # Match pattern: "TIMESTAMP TITLE" m = re.match(r"(\d+:\d+(?::\d+)?)\s+(.+)", line) if not m: raise ValueError(f"Invalid setlist line format: '{line}'") @@ -141,18 +129,15 @@ def parse_setlist(setlist_text: str): except ValueError as e: raise ValueError(f"Error parsing line '{line}': {e}") - # Sort by timestamp entries.sort(key=lambda x: x[0]) return entries -def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_dir: str): - """ - Split audio file into tracks based on setlist entries. - Skip tracks that already exist. - Returns list of track info with status (created or skipped). - """ - print(f"Loading audio file: {mp3_path}", flush=True) +def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_dir: str, job_id: str): + """Split audio file into tracks based on setlist entries.""" + jobs[job_id]['status'] = 'splitting' + jobs[job_id]['progress'] = 'Loading audio file...' + audio = AudioSegment.from_file(mp3_path) total_ms = len(audio) @@ -161,31 +146,20 @@ def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_di skipped_count = 0 for idx, (start_ms, title) in enumerate(entries, start=1): - # Determine end time - end_ms = entries[idx][0] if idx < len(entries) else total_ms + jobs[job_id]['progress'] = f'Processing track {idx}/{len(entries)}: {title}' - # Create filename + end_ms = entries[idx][0] if idx < len(entries) else total_ms filename = f"{idx:02d} - {sanitize(title)}.mp3" filepath = os.path.join(output_dir, filename) - # Check if file already exists if os.path.isfile(filepath): - print(f"Skipping track {idx}/{len(entries)}: {filename} (already exists)", flush=True) - track_results.append({ - 'filename': filename, - 'status': 'skipped' - }) + track_results.append({'filename': filename, 'status': 'skipped'}) skipped_count += 1 continue - # Extract and export segment - print(f"Creating track {idx}/{len(entries)}: {filename}", flush=True) segment = audio[start_ms:end_ms] - - # Export with 320kbps segment.export(filepath, format='mp3', bitrate='320k') - # Add ID3 tags try: tags = EasyID3(filepath) tags['title'] = title @@ -196,10 +170,7 @@ def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_di except Exception as e: print(f"Warning: Could not add ID3 tags to {filename}: {e}", flush=True) - track_results.append({ - 'filename': filename, - 'status': 'created' - }) + track_results.append({'filename': filename, 'status': 'created'}) created_count += 1 return track_results, created_count, skipped_count @@ -209,96 +180,40 @@ def set_permissions(directory: str): """Set directory permissions to 775 recursively.""" try: subprocess.run(['chmod', '-R', '775', directory], check=True) - print(f"Set permissions 775 on {directory}", flush=True) - except subprocess.CalledProcessError as e: - print(f"Warning: Could not set permissions: {e}", flush=True) - except FileNotFoundError: - print("Warning: chmod command not found (might be on Windows)", flush=True) + except (subprocess.CalledProcessError, FileNotFoundError): + pass -@app.route('/') -def index(): - """Render the main page.""" - return render_template('index.html') - - -@app.route('/split', methods=['POST']) -def split_concert(): - """Handle the split request.""" - # Force flush stdout immediately - sys.stdout.flush() - +def process_job(job_id: str, url: str, artist: str, album: str, setlist_text: str): + """Background job processor.""" try: - # Get form data - url = request.form.get('youtube_url', '').strip() - artist = request.form.get('artist', '').strip() - album = request.form.get('album', '').strip() - setlist_text = request.form.get('setlist', '').strip() - - print(f"=== Processing Request ===", flush=True) - print(f"URL: {url}", flush=True) - print(f"Artist: {artist}", flush=True) - print(f"Album: {album}", flush=True) - - # Validate required inputs - if not url: - return jsonify({'error': 'No YouTube URL provided'}), 400 - - if not artist: - return jsonify({'error': 'No artist name provided'}), 400 - - if not album: - return jsonify({'error': 'No album name provided'}), 400 - - # Sanitize album for directory name album_sanitized = sanitize(album) - - # Create output directory output_dir = os.path.join(DOWNLOAD_FOLDER, album_sanitized) if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True) - print(f"✓ Created album directory: {output_dir}", flush=True) - else: - print(f"✓ Album directory already exists: {output_dir}", flush=True) # Download audio - print(f"Downloading audio from: {url}", flush=True) - sys.stdout.flush() + mp3_path, info = download_youtube_audio(url, DOWNLOAD_FOLDER, job_id) - mp3_path, info = download_youtube_audio(url, DOWNLOAD_FOLDER) - - print(f"✓ Download complete: {mp3_path}", flush=True) - sys.stdout.flush() - - # DECISION POINT: Empty setlist = Single song mode + # Single song mode or split mode if not setlist_text: - print("📀 Single song mode: No setlist provided", flush=True) + jobs[job_id]['status'] = 'processing' + jobs[job_id]['progress'] = 'Creating single track...' - # Use video title as track name track_title = info.get('title', 'Unknown Track') filename = f"01 - {sanitize(track_title)}.mp3" filepath = os.path.join(output_dir, filename) - # Check if file already exists if os.path.isfile(filepath): - print(f"⊘ Track already exists: {filename}", flush=True) os.remove(mp3_path) - - track_results = [{ - 'filename': filename, - 'status': 'skipped' - }] + track_results = [{'filename': filename, 'status': 'skipped'}] created_count = 0 skipped_count = 1 else: - print(f"✓ Creating single track: {filename}", flush=True) - - # Load audio to re-export with proper tags audio = AudioSegment.from_file(mp3_path) audio.export(filepath, format='mp3', bitrate='320k') - # Add ID3 tags try: tags = EasyID3(filepath) tags['title'] = track_title @@ -306,56 +221,31 @@ def split_concert(): tags['artist'] = artist tags['tracknumber'] = '1' tags.save() - print(f"✓ Added ID3 tags", flush=True) except Exception as e: - print(f"⚠️ Warning: Could not add ID3 tags: {e}", flush=True) + print(f"Warning: Could not add ID3 tags: {e}", flush=True) - # Clean up original os.remove(mp3_path) - - track_results = [{ - 'filename': filename, - 'status': 'created' - }] + track_results = [{'filename': filename, 'status': 'created'}] created_count = 1 skipped_count = 0 - else: - # SPLIT MODE - print(f"✂️ Split mode: Processing setlist with {len(setlist_text.splitlines())} lines", flush=True) - - # Parse setlist - try: - entries = parse_setlist(setlist_text) - except ValueError as e: - os.remove(mp3_path) - return jsonify({'error': f'Setlist parsing error: {str(e)}'}), 400 - + # Split mode + entries = parse_setlist(setlist_text) if not entries: - os.remove(mp3_path) - return jsonify({'error': 'No valid tracks found in setlist'}), 400 - - # Split audio - print(f"Splitting into {len(entries)} tracks...", flush=True) - sys.stdout.flush() + raise ValueError('No valid tracks found in setlist') track_results, created_count, skipped_count = split_audio( - mp3_path, entries, album, artist, output_dir + mp3_path, entries, album, artist, output_dir, job_id ) - - # Clean up original - print(f"Removing original file: {mp3_path}", flush=True) os.remove(mp3_path) # Set permissions set_permissions(output_dir) - print(f"✓ Processing complete!", flush=True) - print(f" Created: {created_count}, Skipped: {skipped_count}", flush=True) - sys.stdout.flush() - - # Build response - response_data = { + # Update job with results + jobs[job_id]['status'] = 'completed' + jobs[job_id]['progress'] = 'Processing complete!' + jobs[job_id]['result'] = { 'success': True, 'album': album, 'artist': artist, @@ -366,32 +256,89 @@ def split_concert(): 'output_dir': output_dir } - print(f"Sending response: {json.dumps(response_data, indent=2)}", flush=True) - - # Return JSON response with explicit content type - return Response( - json.dumps(response_data), - status=200, - mimetype='application/json' - ) - except Exception as e: - print(f"❌ Error: {str(e)}", flush=True) + print(f"Job {job_id} failed: {str(e)}", flush=True) import traceback traceback.print_exc() - sys.stdout.flush() - # Return error as JSON - error_response = {'error': str(e)} - return Response( - json.dumps(error_response), - status=500, - mimetype='application/json' + jobs[job_id]['status'] = 'failed' + jobs[job_id]['error'] = str(e) + + +@app.route('/') +def index(): + """Render the main page.""" + return render_template('index.html') + + +@app.route('/split', methods=['POST']) +def split_concert(): + """Submit a split job and return job ID immediately.""" + try: + url = request.form.get('youtube_url', '').strip() + artist = request.form.get('artist', '').strip() + album = request.form.get('album', '').strip() + setlist_text = request.form.get('setlist', '').strip() + + if not url: + return jsonify({'error': 'No YouTube URL provided'}), 400 + if not artist: + return jsonify({'error': 'No artist name provided'}), 400 + if not album: + return jsonify({'error': 'No album name provided'}), 400 + + # Create job + job_id = str(uuid.uuid4()) + jobs[job_id] = { + 'status': 'queued', + 'progress': 'Job queued...', + 'created_at': datetime.now().isoformat(), + 'url': url, + 'artist': artist, + 'album': album + } + + # Start background thread + thread = threading.Thread( + target=process_job, + args=(job_id, url, artist, album, setlist_text) ) + thread.daemon = True + thread.start() + + print(f"✓ Job {job_id} created and started", flush=True) + + return jsonify({ + 'success': True, + 'job_id': job_id + }) + + except Exception as e: + print(f"Error creating job: {str(e)}", flush=True) + return jsonify({'error': str(e)}), 500 + + +@app.route('/status/', methods=['GET']) +def get_status(job_id): + """Get job status and results.""" + if job_id not in jobs: + return jsonify({'error': 'Job not found'}), 404 + + job = jobs[job_id] + response = { + 'status': job['status'], + 'progress': job.get('progress', '') + } + + if job['status'] == 'completed': + response['result'] = job.get('result', {}) + elif job['status'] == 'failed': + response['error'] = job.get('error', 'Unknown error') + + return jsonify(response) if __name__ == '__main__': - # Get configuration from environment variables host = os.environ.get('FLASK_HOST', '0.0.0.0') port = int(os.environ.get('FLASK_PORT', 5000)) debug = os.environ.get('FLASK_DEBUG', 'False').lower() == 'true' @@ -399,5 +346,4 @@ if __name__ == '__main__': print(f"Starting YouTube Concert Splitter on {host}:{port}", flush=True) print(f"Music directory: {DOWNLOAD_FOLDER}", flush=True) - # Run with threaded=True for better handling of long requests app.run(host="0.0.0.0", port=port, debug=debug, threaded=True) \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index 2c682cf..569d449 100644 --- a/templates/index.html +++ b/templates/index.html @@ -131,6 +131,17 @@ .loading-text { color: #666; font-size: 14px; + margin-bottom: 10px; + } + + .progress-text { + color: #667eea; + font-size: 13px; + font-weight: 600; + margin-top: 15px; + padding: 10px; + background: #f0f3ff; + border-radius: 6px; } .results { @@ -282,8 +293,8 @@
-
Processing... This may take a few minutes.
-
Please be patient, downloading and converting can take time.
+
Processing your request...
+
Initializing...
@@ -323,6 +334,76 @@