Spaces:
Running
Running
import gradio as gr | |
from logging_config import logger, log_buffer | |
from ffmpeg_setup import ensure_ffmpeg_in_path | |
from youtube_utils import fetch_youtube_transcript | |
from transcription import fallback_whisper_transcription | |
def get_transcript(youtube_url: str, skip_official_transcript: bool): | |
""" | |
Fetch the official YouTube transcript, | |
or fall back to Whisper transcription if required | |
returns (partial_transcript, logs_so_far). | |
""" | |
# Clear logs | |
log_buffer.seek(0) | |
log_buffer.truncate() | |
logger.info(f"Received YouTube URL: {youtube_url}") | |
logger.info(f"Skip official transcript check? {skip_official_transcript}") | |
logger.info("") | |
yield "", log_buffer.getvalue() | |
# If user wants to skip the official transcript, go straight to fallback | |
if skip_official_transcript: | |
logger.info("User selected to skip official transcript.") | |
yield "", log_buffer.getvalue() | |
for partial_transcript, current_logs in fallback_whisper_transcription(youtube_url): | |
yield partial_transcript, current_logs | |
return | |
# Otherwise, try official transcript first | |
transcript = fetch_youtube_transcript(youtube_url) | |
logger.info("") | |
yield "", log_buffer.getvalue() | |
# If official transcript wasn't found, do fallback | |
if transcript.startswith("Error"): | |
logger.info("Transcript not found. Falling back to local Whisper transcription...") | |
logger.info("") | |
yield "", log_buffer.getvalue() | |
for partial_transcript, current_logs in fallback_whisper_transcription(youtube_url): | |
yield partial_transcript, current_logs | |
return | |
# Otherwise, we succeeded with the official transcript | |
logger.info("Official transcript found successfully.") | |
logger.info("") | |
yield transcript, log_buffer.getvalue() | |
def run_demo(): | |
ensure_ffmpeg_in_path() | |
demo = gr.Interface( | |
fn=get_transcript, | |
inputs=[ | |
gr.Textbox(label="YouTube URL"), | |
gr.Checkbox(label="Skip official transcript check?", value=False) | |
], | |
outputs=[ | |
gr.Textbox(label="Transcript"), | |
gr.Textbox(label="Logs (Streaming)"), | |
], | |
title="YouTube Transcript Tool", | |
description=( | |
"Enter a YouTube link to retrieve its official transcript. " | |
"If that fails (or if 'Skip' is selected), we'll download the best " | |
"audio track with yt-dlp, convert it to WAV (via ffmpeg), and " | |
"then run Whisper to transcribe. Logs are displayed as it runs." | |
"Currently does NOT run on Hugging Face Spaces. Download and run locally." | |
), | |
) | |
demo.launch() | |
if __name__ == "__main__": | |
run_demo() | |