Spaces:

hermanda
/

yt-summarize

Build error

File size: 4,202 Bytes

b2be5ce

import sys
from google import genai
import subprocess
import os
import shutil
import gradio as gr

def download_subtitles(url):
    # Execute the bash script and capture the output
    result = subprocess.run(
        ['bash', 'download_subtitles.sh', url],
        check=True,
        text=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )
    
    # Extract the last line from stdout which is the directory name
    stdout_lines = result.stdout.strip().split('\n')
    directory = stdout_lines[-1].strip()
    
    # Verify the directory exists
    if not os.path.isdir(directory):
        raise FileNotFoundError(f"Directory {directory} does not exist")
    
    # Find the .srt file in the directory
    srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')]
    if not srt_files:
        raise FileNotFoundError(f"No .srt file found in {directory}")
    if len(srt_files) > 1:
        raise RuntimeError(f"Multiple .srt files found in {directory}")
    
    srt_path = os.path.join(directory, srt_files[0])
    return srt_path

def cleanup_directory(folder_path):
    # Check if the folder exists
    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"The directory {folder_path} does not exist")
    # Remove the directory and all its contents
    shutil.rmtree(folder_path)
    
    
def srt_to_text(input_file):
    try:
        with open(input_file, "r", encoding="utf-8") as f:
            content = f.read()
    except FileNotFoundError:
        print(f"Error: Input file '{input_file}' not found")
        sys.exit(1)

    entries = content.strip().split("\n\n")
    output_lines = []

    for entry in entries:
        lines = entry.strip().split("\n")
        if len(lines) < 3:
            continue
        text_lines = lines[2:]
        for line in text_lines:
            stripped_line = line.strip()
            if stripped_line:
                if not output_lines or stripped_line != output_lines[-1]:
                    output_lines.append(stripped_line)

    return "\n".join(output_lines)

# url = "https://www.youtube.com/watch?v=B1dWbiXnz_s"
# subtitlesfile = download_subtitles(url)
# video_text = srt_to_text(subtitlesfile)
# cleanup_directory(os.path.dirname(subtitlesfile))

# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# client = genai.Client(api_key=GEMINI_API_KEY)
# response = client.models.generate_content(
#     model='gemini-2.0-flash',
#     contents=f"Summarize following text chronollogically, make it long, use markdown: \n{video_text}",
# )

# print(response.text)

def get_transcript_text(url):
    try:
        print("Downloading subtitles...")
        subtitlesfile = download_subtitles(url)
        print("Extracting text from subtitles...")
        video_text = srt_to_text(subtitlesfile)
        print("Cleaning up...")
        cleanup_directory(os.path.dirname(subtitlesfile))
        return video_text
    except Exception as e:
        raise gr.Error(f"Error retrieving transcript: {e}")

def summarize_video(url, prompt):
    try:
        video_text = get_transcript_text(url)
        
        client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
        final_prompt = prompt + "\n" + video_text
        response = client.models.generate_content(
            model='gemini-2.0-flash',
            contents=final_prompt,
        )
        summary = response.text
        
        return summary
    except Exception as e:
        return f"An error occurred: {str(e)}"

with gr.Blocks() as app:
    gr.Markdown("# YouTube Video Summarizer")
    
    with gr.Row():
        with gr.Column(scale=5):
            url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...")
        with gr.Column(scale=5):
            summarize_btn = gr.Button("Summarize", variant="primary")
    
    default_prompt = """Summarize the following text chronologically, make it long, use markdown:"""
    prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=4)
    
    output = gr.Markdown()

    summarize_btn.click(
        fn=summarize_video,
        inputs=[url_input, prompt_input],
        outputs=output
    )

if __name__ == "__main__":
    app.launch()