fix streaming of the logs

This commit is contained in:
2026-01-02 20:13:56 +01:00
parent 492d336ee3
commit 629f2adf58
2 changed files with 215 additions and 218 deletions

284
app.py
View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
"""
Flask server for YouTube Concert Splitter
Flask server for YouTube Concert Splitter with async job processing
Downloads YouTube videos and splits them into tracks based on a setlist.
"""
from flask import Flask, request, render_template, jsonify, Response
@@ -12,6 +12,9 @@ from pydub import AudioSegment
from mutagen.easyid3 import EasyID3
import json
import sys
import threading
import uuid
from datetime import datetime
app = Flask(__name__)
app.secret_key = os.environ.get('SECRET_KEY', 'your_secret_key_here_change_this')
@@ -20,6 +23,9 @@ app.secret_key = os.environ.get('SECRET_KEY', 'your_secret_key_here_change_this'
DOWNLOAD_FOLDER = os.environ.get('DOWNLOAD_FOLDER', 'youtube')
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
# Job storage (in production, use Redis or database)
jobs = {}
def sanitize(s: str) -> str:
"""Sanitize string for use as filename."""
@@ -45,19 +51,14 @@ def parse_timestamp(ts: str) -> int:
raise ValueError(f"Cannot parse timestamp '{ts}': {e}")
def download_youtube_audio(url: str, output_folder: str):
"""Download YouTube video and convert to MP3 at 320kbps with enhanced anti-blocking measures."""
def download_youtube_audio(url: str, output_folder: str, job_id: str):
"""Download YouTube video and convert to MP3 at 320kbps."""
jobs[job_id]['status'] = 'downloading'
jobs[job_id]['progress'] = 'Downloading video from YouTube...'
# Check if cookies file exists
cookies_file = 'cookies.txt'
has_cookies = os.path.isfile(cookies_file)
if has_cookies:
print(f"✓ Using cookies from {cookies_file}", flush=True)
else:
print("⚠️ No cookies.txt found - download may fail for some videos", flush=True)
print(" See COOKIES_INSTRUCTIONS.txt for how to add cookies", flush=True)
# Enhanced options to bypass YouTube restrictions
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': f'{output_folder}/%(title)s.%(ext)s',
@@ -66,16 +67,10 @@ def download_youtube_audio(url: str, output_folder: str):
'no_warnings': False,
'extract_flat': False,
'ignoreerrors': False,
# Use cookies if available
'cookiefile': cookies_file if has_cookies else None,
# Anti-blocking measures
'nocheckcertificate': True,
'geo_bypass': True,
'age_limit': None,
# Better headers to mimic a real browser
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
@@ -84,23 +79,17 @@ def download_youtube_audio(url: str, output_folder: str):
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Connection': 'keep-alive',
},
# Extractor specific arguments for YouTube
'extractor_args': {
'youtube': {
'player_client': ['android', 'web'],
'player_skip': ['webpage', 'configs'],
}
},
# Post-processing
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '320',
}],
# Retry options
'retries': 10,
'fragment_retries': 10,
'skip_unavailable_fragments': True,
@@ -110,12 +99,12 @@ def download_youtube_audio(url: str, output_folder: str):
info = ydl.extract_info(url, download=True)
base_name = ydl.prepare_filename(info)
# Determine the actual MP3 filename
mp3_path = os.path.splitext(base_name)[0] + '.mp3'
if not os.path.isfile(mp3_path):
raise FileNotFoundError(f"Downloaded MP3 not found: {mp3_path}")
jobs[job_id]['progress'] = 'Download complete. Processing audio...'
return mp3_path, info
@@ -129,7 +118,6 @@ def parse_setlist(setlist_text: str):
if not line:
continue
# Match pattern: "TIMESTAMP TITLE"
m = re.match(r"(\d+:\d+(?::\d+)?)\s+(.+)", line)
if not m:
raise ValueError(f"Invalid setlist line format: '{line}'")
@@ -141,18 +129,15 @@ def parse_setlist(setlist_text: str):
except ValueError as e:
raise ValueError(f"Error parsing line '{line}': {e}")
# Sort by timestamp
entries.sort(key=lambda x: x[0])
return entries
def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_dir: str):
"""
Split audio file into tracks based on setlist entries.
Skip tracks that already exist.
Returns list of track info with status (created or skipped).
"""
print(f"Loading audio file: {mp3_path}", flush=True)
def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_dir: str, job_id: str):
"""Split audio file into tracks based on setlist entries."""
jobs[job_id]['status'] = 'splitting'
jobs[job_id]['progress'] = 'Loading audio file...'
audio = AudioSegment.from_file(mp3_path)
total_ms = len(audio)
@@ -161,31 +146,20 @@ def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_di
skipped_count = 0
for idx, (start_ms, title) in enumerate(entries, start=1):
# Determine end time
end_ms = entries[idx][0] if idx < len(entries) else total_ms
jobs[job_id]['progress'] = f'Processing track {idx}/{len(entries)}: {title}'
# Create filename
end_ms = entries[idx][0] if idx < len(entries) else total_ms
filename = f"{idx:02d} - {sanitize(title)}.mp3"
filepath = os.path.join(output_dir, filename)
# Check if file already exists
if os.path.isfile(filepath):
print(f"Skipping track {idx}/{len(entries)}: {filename} (already exists)", flush=True)
track_results.append({
'filename': filename,
'status': 'skipped'
})
track_results.append({'filename': filename, 'status': 'skipped'})
skipped_count += 1
continue
# Extract and export segment
print(f"Creating track {idx}/{len(entries)}: {filename}", flush=True)
segment = audio[start_ms:end_ms]
# Export with 320kbps
segment.export(filepath, format='mp3', bitrate='320k')
# Add ID3 tags
try:
tags = EasyID3(filepath)
tags['title'] = title
@@ -196,10 +170,7 @@ def split_audio(mp3_path: str, entries: list, album: str, artist: str, output_di
except Exception as e:
print(f"Warning: Could not add ID3 tags to {filename}: {e}", flush=True)
track_results.append({
'filename': filename,
'status': 'created'
})
track_results.append({'filename': filename, 'status': 'created'})
created_count += 1
return track_results, created_count, skipped_count
@@ -209,96 +180,40 @@ def set_permissions(directory: str):
"""Set directory permissions to 775 recursively."""
try:
subprocess.run(['chmod', '-R', '775', directory], check=True)
print(f"Set permissions 775 on {directory}", flush=True)
except subprocess.CalledProcessError as e:
print(f"Warning: Could not set permissions: {e}", flush=True)
except FileNotFoundError:
print("Warning: chmod command not found (might be on Windows)", flush=True)
except (subprocess.CalledProcessError, FileNotFoundError):
pass
@app.route('/')
def index():
"""Render the main page."""
return render_template('index.html')
@app.route('/split', methods=['POST'])
def split_concert():
"""Handle the split request."""
# Force flush stdout immediately
sys.stdout.flush()
def process_job(job_id: str, url: str, artist: str, album: str, setlist_text: str):
"""Background job processor."""
try:
# Get form data
url = request.form.get('youtube_url', '').strip()
artist = request.form.get('artist', '').strip()
album = request.form.get('album', '').strip()
setlist_text = request.form.get('setlist', '').strip()
print(f"=== Processing Request ===", flush=True)
print(f"URL: {url}", flush=True)
print(f"Artist: {artist}", flush=True)
print(f"Album: {album}", flush=True)
# Validate required inputs
if not url:
return jsonify({'error': 'No YouTube URL provided'}), 400
if not artist:
return jsonify({'error': 'No artist name provided'}), 400
if not album:
return jsonify({'error': 'No album name provided'}), 400
# Sanitize album for directory name
album_sanitized = sanitize(album)
# Create output directory
output_dir = os.path.join(DOWNLOAD_FOLDER, album_sanitized)
if not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)
print(f"✓ Created album directory: {output_dir}", flush=True)
else:
print(f"✓ Album directory already exists: {output_dir}", flush=True)
# Download audio
print(f"Downloading audio from: {url}", flush=True)
sys.stdout.flush()
mp3_path, info = download_youtube_audio(url, DOWNLOAD_FOLDER, job_id)
mp3_path, info = download_youtube_audio(url, DOWNLOAD_FOLDER)
print(f"✓ Download complete: {mp3_path}", flush=True)
sys.stdout.flush()
# DECISION POINT: Empty setlist = Single song mode
# Single song mode or split mode
if not setlist_text:
print("📀 Single song mode: No setlist provided", flush=True)
jobs[job_id]['status'] = 'processing'
jobs[job_id]['progress'] = 'Creating single track...'
# Use video title as track name
track_title = info.get('title', 'Unknown Track')
filename = f"01 - {sanitize(track_title)}.mp3"
filepath = os.path.join(output_dir, filename)
# Check if file already exists
if os.path.isfile(filepath):
print(f"⊘ Track already exists: {filename}", flush=True)
os.remove(mp3_path)
track_results = [{
'filename': filename,
'status': 'skipped'
}]
track_results = [{'filename': filename, 'status': 'skipped'}]
created_count = 0
skipped_count = 1
else:
print(f"✓ Creating single track: {filename}", flush=True)
# Load audio to re-export with proper tags
audio = AudioSegment.from_file(mp3_path)
audio.export(filepath, format='mp3', bitrate='320k')
# Add ID3 tags
try:
tags = EasyID3(filepath)
tags['title'] = track_title
@@ -306,56 +221,31 @@ def split_concert():
tags['artist'] = artist
tags['tracknumber'] = '1'
tags.save()
print(f"✓ Added ID3 tags", flush=True)
except Exception as e:
print(f"⚠️ Warning: Could not add ID3 tags: {e}", flush=True)
print(f"Warning: Could not add ID3 tags: {e}", flush=True)
# Clean up original
os.remove(mp3_path)
track_results = [{
'filename': filename,
'status': 'created'
}]
track_results = [{'filename': filename, 'status': 'created'}]
created_count = 1
skipped_count = 0
else:
# SPLIT MODE
print(f"✂️ Split mode: Processing setlist with {len(setlist_text.splitlines())} lines", flush=True)
# Parse setlist
try:
entries = parse_setlist(setlist_text)
except ValueError as e:
os.remove(mp3_path)
return jsonify({'error': f'Setlist parsing error: {str(e)}'}), 400
# Split mode
entries = parse_setlist(setlist_text)
if not entries:
os.remove(mp3_path)
return jsonify({'error': 'No valid tracks found in setlist'}), 400
# Split audio
print(f"Splitting into {len(entries)} tracks...", flush=True)
sys.stdout.flush()
raise ValueError('No valid tracks found in setlist')
track_results, created_count, skipped_count = split_audio(
mp3_path, entries, album, artist, output_dir
mp3_path, entries, album, artist, output_dir, job_id
)
# Clean up original
print(f"Removing original file: {mp3_path}", flush=True)
os.remove(mp3_path)
# Set permissions
set_permissions(output_dir)
print(f"✓ Processing complete!", flush=True)
print(f" Created: {created_count}, Skipped: {skipped_count}", flush=True)
sys.stdout.flush()
# Build response
response_data = {
# Update job with results
jobs[job_id]['status'] = 'completed'
jobs[job_id]['progress'] = 'Processing complete!'
jobs[job_id]['result'] = {
'success': True,
'album': album,
'artist': artist,
@@ -366,32 +256,89 @@ def split_concert():
'output_dir': output_dir
}
print(f"Sending response: {json.dumps(response_data, indent=2)}", flush=True)
# Return JSON response with explicit content type
return Response(
json.dumps(response_data),
status=200,
mimetype='application/json'
)
except Exception as e:
print(f"❌ Error: {str(e)}", flush=True)
print(f"Job {job_id} failed: {str(e)}", flush=True)
import traceback
traceback.print_exc()
sys.stdout.flush()
# Return error as JSON
error_response = {'error': str(e)}
return Response(
json.dumps(error_response),
status=500,
mimetype='application/json'
jobs[job_id]['status'] = 'failed'
jobs[job_id]['error'] = str(e)
@app.route('/')
def index():
"""Render the main page."""
return render_template('index.html')
@app.route('/split', methods=['POST'])
def split_concert():
"""Submit a split job and return job ID immediately."""
try:
url = request.form.get('youtube_url', '').strip()
artist = request.form.get('artist', '').strip()
album = request.form.get('album', '').strip()
setlist_text = request.form.get('setlist', '').strip()
if not url:
return jsonify({'error': 'No YouTube URL provided'}), 400
if not artist:
return jsonify({'error': 'No artist name provided'}), 400
if not album:
return jsonify({'error': 'No album name provided'}), 400
# Create job
job_id = str(uuid.uuid4())
jobs[job_id] = {
'status': 'queued',
'progress': 'Job queued...',
'created_at': datetime.now().isoformat(),
'url': url,
'artist': artist,
'album': album
}
# Start background thread
thread = threading.Thread(
target=process_job,
args=(job_id, url, artist, album, setlist_text)
)
thread.daemon = True
thread.start()
print(f"✓ Job {job_id} created and started", flush=True)
return jsonify({
'success': True,
'job_id': job_id
})
except Exception as e:
print(f"Error creating job: {str(e)}", flush=True)
return jsonify({'error': str(e)}), 500
@app.route('/status/<job_id>', methods=['GET'])
def get_status(job_id):
"""Get job status and results."""
if job_id not in jobs:
return jsonify({'error': 'Job not found'}), 404
job = jobs[job_id]
response = {
'status': job['status'],
'progress': job.get('progress', '')
}
if job['status'] == 'completed':
response['result'] = job.get('result', {})
elif job['status'] == 'failed':
response['error'] = job.get('error', 'Unknown error')
return jsonify(response)
if __name__ == '__main__':
# Get configuration from environment variables
host = os.environ.get('FLASK_HOST', '0.0.0.0')
port = int(os.environ.get('FLASK_PORT', 5000))
debug = os.environ.get('FLASK_DEBUG', 'False').lower() == 'true'
@@ -399,5 +346,4 @@ if __name__ == '__main__':
print(f"Starting YouTube Concert Splitter on {host}:{port}", flush=True)
print(f"Music directory: {DOWNLOAD_FOLDER}", flush=True)
# Run with threaded=True for better handling of long requests
app.run(host="0.0.0.0", port=port, debug=debug, threaded=True)