fix streaming of the logs

2026-01-02 20:13:56 +01:00
parent 492d336ee3
commit 629f2adf58
2 changed files with 215 additions and 218 deletions
--- a/app.py
+++ b/app.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Flask server for YouTube Concert Splitter
+Flask server for YouTube Concert Splitter with async job processing
 Downloads YouTube videos and splits them into tracks based on a setlist.
 """
 from flask import Flask, request, render_template, jsonify, Response
@@ -12,6 +12,9 @@ from pydub import AudioSegment
 from mutagen.easyid3 import EasyID3
 import json
 import sys
+import threading
+import uuid
+from datetime import datetime

 app = Flask(__name__)
 app.secret_key = os.environ.get('SECRET_KEY', 'your_secret_key_here_change_this')
@@ -20,6 +23,9 @@ app.secret_key = os.environ.get('SECRET_KEY', 'your_secret_key_here_change_this'
 DOWNLOAD_FOLDER = os.environ.get('DOWNLOAD_FOLDER', 'youtube')
 os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)

+# Job storage (in production, use Redis or database)
+jobs = {}
+

 def sanitize(s: str) -> str:
    """Sanitize string for use as filename."""
@@ -45,19 +51,14 @@ def parse_timestamp(ts: str) -> int:
        raise ValueError(f"Cannot parse timestamp '{ts}': {e}")


-def download_youtube_audio(url: str, output_folder: str):
-    """Download YouTube video and convert to MP3 at 320kbps with enhanced anti-blocking measures."""
+def download_youtube_audio(url: str, output_folder: str, job_id: str):
+    """Download YouTube video and convert to MP3 at 320kbps."""
+    jobs[job_id]['status'] = 'downloading'
+    jobs[job_id]['progress'] = 'Downloading video from YouTube...'

-    # Check if cookies file exists
    cookies_file = 'cookies.txt'
    has_cookies = os.path.isfile(cookies_file)
-    if has_cookies:
-        print(f"✓ Using cookies from {cookies_file}", flush=True)
-    else:
-        print("⚠️  No cookies.txt found - download may fail for some videos", flush=True)
-        print("   See COOKIES_INSTRUCTIONS.txt for how to add cookies", flush=True)

-    # Enhanced options to bypass YouTube restrictions
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': f'{output_folder}/%(title)s.%(ext)s',
@@ -66,16 +67,10 @@ def download_youtube_audio(url: str, output_folder: str):
        'no_warnings': False,
        'extract_flat': False,
        'ignoreerrors': False,
-
-        # Use cookies if available
        'cookiefile': cookies_file if has_cookies else None,
-
-        # Anti-blocking measures
        'nocheckcertificate': True,
        'geo_bypass': True,
        'age_limit': None,
-
-        # Better headers to mimic a real browser
        'http_headers': {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
@@ -84,23 +79,17 @@ def download_youtube_audio(url: str, output_folder: str):
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
            'Connection': 'keep-alive',
        },
-
-        # Extractor specific arguments for YouTube
        'extractor_args': {
            'youtube': {
                'player_client': ['android', 'web'],
                'player_skip': ['webpage', 'configs'],
            }
        },
-
-        # Post-processing
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '320',
        }],
-
-        # Retry options
        'retries': 10,
        'fragment_retries': 10,
        'skip_unavailable_fragments': True,
@@ -110,12 +99,12 @@ def download_youtube_audio(url: str, output_folder: str):
        info = ydl.extract_info(url, download=True)
        base_name = ydl.prepare_filename(info)

-    # Determine the actual MP3 filename
    mp3_path = os.path.splitext(base_name)[0] + '.mp3'

    if not os.path.isfile(mp3_path):
        raise FileNotFoundError(f"Downloaded MP3 not found: {mp3_path}")

+    jobs[job_id]['progress'] = 'Download complete. Processing audio...'
    return mp3_path, info


@@ -129,7 +118,6 @@ def parse_setlist(setlist_text: str):
        if not line:
            continue

-        # Match pattern: "TIMESTAMP TITLE"
        m = re.match(r"(\d+:\d+(?::\d+)?)\s+(.+)", line)
        if not m:
            raise ValueError(f"Invalid setlist line format: '{line}'")
@@ -141,18 +129,15 @@ def parse_setlist(setlist_text: str):
        except ValueError as e:
            raise ValueError(f"Error parsing line '{line}': {e}")

-    # Sort by timestamp
    entries.sort(key=lambda x: x[0])
    return entries


-def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_dir: str):
-    """
-    Split audio file into tracks based on setlist entries.
-    Skip tracks that already exist.
-    Returns list of track info with status (created or skipped).
-    """
-    print(f"Loading audio file: {mp3_path}", flush=True)
+def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_dir: str, job_id: str):
+    """Split audio file into tracks based on setlist entries."""
+    jobs[job_id]['status'] = 'splitting'
+    jobs[job_id]['progress'] = 'Loading audio file...'
+
    audio = AudioSegment.from_file(mp3_path)
    total_ms = len(audio)

@@ -161,31 +146,20 @@ def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_di
    skipped_count = 0

    for idx, (start_ms, title) in enumerate(entries, start=1):
-        # Determine end time
-        end_ms = entries[idx][0] if idx < len(entries) else total_ms
+        jobs[job_id]['progress'] = f'Processing track {idx}/{len(entries)}: {title}'

-        # Create filename
+        end_ms = entries[idx][0] if idx < len(entries) else total_ms
        filename = f"{idx:02d} - {sanitize(title)}.mp3"
        filepath = os.path.join(output_dir, filename)

-        # Check if file already exists
        if os.path.isfile(filepath):
-            print(f"Skipping track {idx}/{len(entries)}: {filename} (already exists)", flush=True)
-            track_results.append({
-                'filename': filename,
-                'status': 'skipped'
-            })
+            track_results.append({'filename': filename, 'status': 'skipped'})
            skipped_count += 1
            continue

-        # Extract and export segment
-        print(f"Creating track {idx}/{len(entries)}: {filename}", flush=True)
        segment = audio[start_ms:end_ms]
-
-        # Export with 320kbps
        segment.export(filepath, format='mp3', bitrate='320k')

-        # Add ID3 tags
        try:
            tags = EasyID3(filepath)
            tags['title'] = title
@@ -196,10 +170,7 @@ def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_di
        except Exception as e:
            print(f"Warning: Could not add ID3 tags to {filename}: {e}", flush=True)

-        track_results.append({
-            'filename': filename,
-            'status': 'created'
-        })
+        track_results.append({'filename': filename, 'status': 'created'})
        created_count += 1

    return track_results, created_count, skipped_count
@@ -209,96 +180,40 @@ def set_permissions(directory: str):
    """Set directory permissions to 775 recursively."""
    try:
        subprocess.run(['chmod', '-R', '775', directory], check=True)
-        print(f"Set permissions 775 on {directory}", flush=True)
-    except subprocess.CalledProcessError as e:
-        print(f"Warning: Could not set permissions: {e}", flush=True)
-    except FileNotFoundError:
-        print("Warning: chmod command not found (might be on Windows)", flush=True)
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        pass


-@app.route('/')
-def index():
-    """Render the main page."""
-    return render_template('index.html')
-
-
-@app.route('/split', methods=['POST'])
-def split_concert():
-    """Handle the split request."""
-    # Force flush stdout immediately
-    sys.stdout.flush()
-
+def process_job(job_id: str, url: str, artist: str, album: str, setlist_text: str):
+    """Background job processor."""
    try:
-        # Get form data
-        url = request.form.get('youtube_url', '').strip()
-        artist = request.form.get('artist', '').strip()
-        album = request.form.get('album', '').strip()
-        setlist_text = request.form.get('setlist', '').strip()
-
-        print(f"=== Processing Request ===", flush=True)
-        print(f"URL: {url}", flush=True)
-        print(f"Artist: {artist}", flush=True)
-        print(f"Album: {album}", flush=True)
-
-        # Validate required inputs
-        if not url:
-            return jsonify({'error': 'No YouTube URL provided'}), 400
-
-        if not artist:
-            return jsonify({'error': 'No artist name provided'}), 400
-
-        if not album:
-            return jsonify({'error': 'No album name provided'}), 400
-
-        # Sanitize album for directory name
        album_sanitized = sanitize(album)
-
-        # Create output directory
        output_dir = os.path.join(DOWNLOAD_FOLDER, album_sanitized)

        if not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)
-            print(f"✓ Created album directory: {output_dir}", flush=True)
-        else:
-            print(f"✓ Album directory already exists: {output_dir}", flush=True)

        # Download audio
-        print(f"Downloading audio from: {url}", flush=True)
-        sys.stdout.flush()
+        mp3_path, info = download_youtube_audio(url, DOWNLOAD_FOLDER, job_id)

-        mp3_path, info = download_youtube_audio(url, DOWNLOAD_FOLDER)
-
-        print(f"✓ Download complete: {mp3_path}", flush=True)
-        sys.stdout.flush()
-
-        # DECISION POINT: Empty setlist = Single song mode
+        # Single song mode or split mode
        if not setlist_text:
-            print("📀 Single song mode: No setlist provided", flush=True)
+            jobs[job_id]['status'] = 'processing'
+            jobs[job_id]['progress'] = 'Creating single track...'

-            # Use video title as track name
            track_title = info.get('title', 'Unknown Track')
            filename = f"01 - {sanitize(track_title)}.mp3"
            filepath = os.path.join(output_dir, filename)

-            # Check if file already exists
            if os.path.isfile(filepath):
-                print(f"⊘ Track already exists: {filename}", flush=True)
                os.remove(mp3_path)
-
-                track_results = [{
-                    'filename': filename,
-                    'status': 'skipped'
-                }]
+                track_results = [{'filename': filename, 'status': 'skipped'}]
                created_count = 0
                skipped_count = 1
            else:
-                print(f"✓ Creating single track: {filename}", flush=True)
-
-                # Load audio to re-export with proper tags
                audio = AudioSegment.from_file(mp3_path)
                audio.export(filepath, format='mp3', bitrate='320k')

-                # Add ID3 tags
                try:
                    tags = EasyID3(filepath)
                    tags['title'] = track_title
@@ -306,56 +221,31 @@ def split_concert():
                    tags['artist'] = artist
                    tags['tracknumber'] = '1'
                    tags.save()
-                    print(f"✓ Added ID3 tags", flush=True)
                except Exception as e:
-                    print(f"⚠️  Warning: Could not add ID3 tags: {e}", flush=True)
+                    print(f"Warning: Could not add ID3 tags: {e}", flush=True)

-                # Clean up original
                os.remove(mp3_path)
-
-                track_results = [{
-                    'filename': filename,
-                    'status': 'created'
-                }]
+                track_results = [{'filename': filename, 'status': 'created'}]
                created_count = 1
                skipped_count = 0
-
        else:
-            # SPLIT MODE
-            print(f"✂️  Split mode: Processing setlist with {len(setlist_text.splitlines())} lines", flush=True)
-
-            # Parse setlist
-            try:
-                entries = parse_setlist(setlist_text)
-            except ValueError as e:
-                os.remove(mp3_path)
-                return jsonify({'error': f'Setlist parsing error: {str(e)}'}), 400
-
+            # Split mode
+            entries = parse_setlist(setlist_text)
            if not entries:
-                os.remove(mp3_path)
-                return jsonify({'error': 'No valid tracks found in setlist'}), 400
-
-            # Split audio
-            print(f"Splitting into {len(entries)} tracks...", flush=True)
-            sys.stdout.flush()
+                raise ValueError('No valid tracks found in setlist')

            track_results, created_count, skipped_count = split_audio(
-                mp3_path, entries, album, artist, output_dir
+                mp3_path, entries, album, artist, output_dir, job_id
            )
-
-            # Clean up original
-            print(f"Removing original file: {mp3_path}", flush=True)
            os.remove(mp3_path)

        # Set permissions
        set_permissions(output_dir)

-        print(f"✓ Processing complete!", flush=True)
-        print(f"  Created: {created_count}, Skipped: {skipped_count}", flush=True)
-        sys.stdout.flush()
-
-        # Build response
-        response_data = {
+        # Update job with results
+        jobs[job_id]['status'] = 'completed'
+        jobs[job_id]['progress'] = 'Processing complete!'
+        jobs[job_id]['result'] = {
            'success': True,
            'album': album,
            'artist': artist,
@@ -366,32 +256,89 @@ def split_concert():
            'output_dir': output_dir
        }

-        print(f"Sending response: {json.dumps(response_data, indent=2)}", flush=True)
-
-        # Return JSON response with explicit content type
-        return Response(
-            json.dumps(response_data),
-            status=200,
-            mimetype='application/json'
-        )
-
    except Exception as e:
-        print(f"❌ Error: {str(e)}", flush=True)
+        print(f"Job {job_id} failed: {str(e)}", flush=True)
        import traceback
        traceback.print_exc()
-        sys.stdout.flush()

-        # Return error as JSON
-        error_response = {'error': str(e)}
-        return Response(
-            json.dumps(error_response),
-            status=500,
-            mimetype='application/json'
+        jobs[job_id]['status'] = 'failed'
+        jobs[job_id]['error'] = str(e)
+
+
+@app.route('/')
+def index():
+    """Render the main page."""
+    return render_template('index.html')
+
+
+@app.route('/split', methods=['POST'])
+def split_concert():
+    """Submit a split job and return job ID immediately."""
+    try:
+        url = request.form.get('youtube_url', '').strip()
+        artist = request.form.get('artist', '').strip()
+        album = request.form.get('album', '').strip()
+        setlist_text = request.form.get('setlist', '').strip()
+
+        if not url:
+            return jsonify({'error': 'No YouTube URL provided'}), 400
+        if not artist:
+            return jsonify({'error': 'No artist name provided'}), 400
+        if not album:
+            return jsonify({'error': 'No album name provided'}), 400
+
+        # Create job
+        job_id = str(uuid.uuid4())
+        jobs[job_id] = {
+            'status': 'queued',
+            'progress': 'Job queued...',
+            'created_at': datetime.now().isoformat(),
+            'url': url,
+            'artist': artist,
+            'album': album
+        }
+
+        # Start background thread
+        thread = threading.Thread(
+            target=process_job,
+            args=(job_id, url, artist, album, setlist_text)
        )
+        thread.daemon = True
+        thread.start()
+
+        print(f"✓ Job {job_id} created and started", flush=True)
+
+        return jsonify({
+            'success': True,
+            'job_id': job_id
+        })
+
+    except Exception as e:
+        print(f"Error creating job: {str(e)}", flush=True)
+        return jsonify({'error': str(e)}), 500
+
+
+@app.route('/status/<job_id>', methods=['GET'])
+def get_status(job_id):
+    """Get job status and results."""
+    if job_id not in jobs:
+        return jsonify({'error': 'Job not found'}), 404
+
+    job = jobs[job_id]
+    response = {
+        'status': job['status'],
+        'progress': job.get('progress', '')
+    }
+
+    if job['status'] == 'completed':
+        response['result'] = job.get('result', {})
+    elif job['status'] == 'failed':
+        response['error'] = job.get('error', 'Unknown error')
+
+    return jsonify(response)


 if __name__ == '__main__':
-    # Get configuration from environment variables
    host = os.environ.get('FLASK_HOST', '0.0.0.0')
    port = int(os.environ.get('FLASK_PORT', 5000))
    debug = os.environ.get('FLASK_DEBUG', 'False').lower() == 'true'
@@ -399,5 +346,4 @@ if __name__ == '__main__':
    print(f"Starting YouTube Concert Splitter on {host}:{port}", flush=True)
    print(f"Music directory: {DOWNLOAD_FOLDER}", flush=True)

-    # Run with threaded=True for better handling of long requests
    app.run(host="0.0.0.0", port=port, debug=debug, threaded=True)