import gradio as gr from logging_config import logger, log_buffer from ffmpeg_setup import ensure_ffmpeg_in_path from youtube_utils import fetch_youtube_transcript from transcription import fallback_whisper_transcription def get_transcript(youtube_url: str, skip_official_transcript: bool): """ Fetch the official YouTube transcript, or fall back to Whisper transcription if required returns (partial_transcript, logs_so_far). """ # Clear logs log_buffer.seek(0) log_buffer.truncate() logger.info(f"Received YouTube URL: {youtube_url}") logger.info(f"Skip official transcript check? {skip_official_transcript}") logger.info("") yield "", log_buffer.getvalue() # If user wants to skip the official transcript, go straight to fallback if skip_official_transcript: logger.info("User selected to skip official transcript.") yield "", log_buffer.getvalue() for partial_transcript, current_logs in fallback_whisper_transcription(youtube_url): yield partial_transcript, current_logs return # Otherwise, try official transcript first transcript = fetch_youtube_transcript(youtube_url) logger.info("") yield "", log_buffer.getvalue() # If official transcript wasn't found, do fallback if transcript.startswith("Error"): logger.info("Transcript not found. Falling back to local Whisper transcription...") logger.info("") yield "", log_buffer.getvalue() for partial_transcript, current_logs in fallback_whisper_transcription(youtube_url): yield partial_transcript, current_logs return # Otherwise, we succeeded with the official transcript logger.info("Official transcript found successfully.") logger.info("") yield transcript, log_buffer.getvalue() def run_demo(): ensure_ffmpeg_in_path() demo = gr.Interface( fn=get_transcript, inputs=[ gr.Textbox(label="YouTube URL"), gr.Checkbox(label="Skip official transcript check?", value=False) ], outputs=[ gr.Textbox(label="Transcript"), gr.Textbox(label="Logs (Streaming)"), ], title="YouTube Transcript Tool", description=( "Enter a YouTube link to retrieve its official transcript. " "If that fails (or if 'Skip' is selected), we'll download the best " "audio track with yt-dlp, convert it to WAV (via ffmpeg), and " "then run Whisper to transcribe. Logs are displayed as it runs." "Currently does NOT run on Hugging Face Spaces. Download and run locally." ), ) demo.launch() if __name__ == "__main__": run_demo()