import sys from google import genai import subprocess import os import shutil import gradio as gr import uuid import subprocess def download_subtitles(video_url): # Execute the bash script and capture the output # result = subprocess.run( # ['bash', 'download_subtitles.sh', url], # check=True, # text=True, # stdout=subprocess.PIPE, # stderr=subprocess.PIPE # ) # # Extract the last line from stdout which is the directory name # stdout_lines = result.stdout.strip().split('\n') # directory = stdout_lines[-1].strip() uuid_dir = str(uuid.uuid4()) # First command for auto-generated subtitles subprocess.run([ "yt-dlp", "--write-auto-subs", "--sub-lang", "en", "--convert-subs", "srt", "--skip-download", "-P", f"home:{uuid_dir}", video_url ], check=True) # Second command for regular subtitles subprocess.run([ "yt-dlp", "--write-subs", "--sub-lang", "en", "--convert-subs", "srt", "--skip-download", "-P", f"home:{uuid_dir}", video_url ], check=True) directory = uuid_dir # Verify the directory exists if not os.path.isdir(directory): raise FileNotFoundError(f"Directory {directory} does not exist") # Find the .srt file in the directory print( os.listdir(directory)) srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')] if not srt_files: raise FileNotFoundError(f"No .srt file found in {directory}") if len(srt_files) > 1: raise RuntimeError(f"Multiple .srt files found in {directory}") srt_path = os.path.join(directory, srt_files[0]) return srt_path def cleanup_directory(folder_path): # Check if the folder exists if not os.path.exists(folder_path): raise FileNotFoundError(f"The directory {folder_path} does not exist") # Remove the directory and all its contents shutil.rmtree(folder_path) def srt_to_text(input_file): try: with open(input_file, "r", encoding="utf-8") as f: content = f.read() except FileNotFoundError: print(f"Error: Input file '{input_file}' not found") sys.exit(1) entries = content.strip().split("\n\n") output_lines = [] for entry in entries: lines = entry.strip().split("\n") if len(lines) < 3: continue text_lines = lines[2:] for line in text_lines: stripped_line = line.strip() if stripped_line: if not output_lines or stripped_line != output_lines[-1]: output_lines.append(stripped_line) return "\n".join(output_lines) # url = "https://www.youtube.com/watch?v=B1dWbiXnz_s" # subtitlesfile = download_subtitles(url) # video_text = srt_to_text(subtitlesfile) # cleanup_directory(os.path.dirname(subtitlesfile)) # GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") # client = genai.Client(api_key=GEMINI_API_KEY) # response = client.models.generate_content( # model='gemini-2.0-flash', # contents=f"Summarize following text chronollogically, make it long, use markdown: \n{video_text}", # ) # print(response.text) def get_transcript_text(url): # try: print("Downloading subtitles...") subtitlesfile = download_subtitles(url) print("Extracting text from subtitles...") video_text = srt_to_text(subtitlesfile) print("Cleaning up...") cleanup_directory(os.path.dirname(subtitlesfile)) return video_text # except Exception as e: # raise gr.Error(f"Error retrieving transcript: {e}") def summarize_video(url, prompt): # try: video_text = get_transcript_text(url) client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) final_prompt = prompt + "\n" + video_text print("Generating summary...") response = client.models.generate_content( model='gemini-2.0-flash', contents=final_prompt, ) summary = response.text return summary # except Exception as e: # return f"An error occurred: {str(e)}" with gr.Blocks() as app: gr.Markdown("# YouTube Video Summarizer") with gr.Row(): with gr.Column(scale=5): url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...") with gr.Column(scale=1): summarize_btn = gr.Button("Summarize", variant="primary") default_prompt = """Summarize the following text chronologically, make it long, use markdown:""" prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=4) output = gr.Markdown() summarize_btn.click( fn=summarize_video, inputs=[url_input, prompt_input], outputs=output ) if __name__ == "__main__": app.launch()