File size: 4,202 Bytes
b2be5ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import sys
from google import genai
import subprocess
import os
import shutil
import gradio as gr

def download_subtitles(url):
    # Execute the bash script and capture the output
    result = subprocess.run(
        ['bash', 'download_subtitles.sh', url],
        check=True,
        text=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )
    
    # Extract the last line from stdout which is the directory name
    stdout_lines = result.stdout.strip().split('\n')
    directory = stdout_lines[-1].strip()
    
    # Verify the directory exists
    if not os.path.isdir(directory):
        raise FileNotFoundError(f"Directory {directory} does not exist")
    
    # Find the .srt file in the directory
    srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')]
    if not srt_files:
        raise FileNotFoundError(f"No .srt file found in {directory}")
    if len(srt_files) > 1:
        raise RuntimeError(f"Multiple .srt files found in {directory}")
    
    srt_path = os.path.join(directory, srt_files[0])
    return srt_path

def cleanup_directory(folder_path):
    # Check if the folder exists
    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"The directory {folder_path} does not exist")
    # Remove the directory and all its contents
    shutil.rmtree(folder_path)
    
    
def srt_to_text(input_file):
    try:
        with open(input_file, "r", encoding="utf-8") as f:
            content = f.read()
    except FileNotFoundError:
        print(f"Error: Input file '{input_file}' not found")
        sys.exit(1)

    entries = content.strip().split("\n\n")
    output_lines = []

    for entry in entries:
        lines = entry.strip().split("\n")
        if len(lines) < 3:
            continue
        text_lines = lines[2:]
        for line in text_lines:
            stripped_line = line.strip()
            if stripped_line:
                if not output_lines or stripped_line != output_lines[-1]:
                    output_lines.append(stripped_line)

    return "\n".join(output_lines)

# url = "https://www.youtube.com/watch?v=B1dWbiXnz_s"
# subtitlesfile = download_subtitles(url)
# video_text = srt_to_text(subtitlesfile)
# cleanup_directory(os.path.dirname(subtitlesfile))

# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# client = genai.Client(api_key=GEMINI_API_KEY)
# response = client.models.generate_content(
#     model='gemini-2.0-flash',
#     contents=f"Summarize following text chronollogically, make it long, use markdown: \n{video_text}",
# )

# print(response.text)

def get_transcript_text(url):
    try:
        print("Downloading subtitles...")
        subtitlesfile = download_subtitles(url)
        print("Extracting text from subtitles...")
        video_text = srt_to_text(subtitlesfile)
        print("Cleaning up...")
        cleanup_directory(os.path.dirname(subtitlesfile))
        return video_text
    except Exception as e:
        raise gr.Error(f"Error retrieving transcript: {e}")

def summarize_video(url, prompt):
    try:
        video_text = get_transcript_text(url)
        
        client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
        final_prompt = prompt + "\n" + video_text
        response = client.models.generate_content(
            model='gemini-2.0-flash',
            contents=final_prompt,
        )
        summary = response.text
        
        return summary
    except Exception as e:
        return f"An error occurred: {str(e)}"

with gr.Blocks() as app:
    gr.Markdown("# YouTube Video Summarizer")
    
    with gr.Row():
        with gr.Column(scale=5):
            url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...")
        with gr.Column(scale=5):
            summarize_btn = gr.Button("Summarize", variant="primary")
    
    default_prompt = """Summarize the following text chronologically, make it long, use markdown:"""
    prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=4)
    
    output = gr.Markdown()

    summarize_btn.click(
        fn=summarize_video,
        inputs=[url_input, prompt_input],
        outputs=output
    )

if __name__ == "__main__":
    app.launch()