File size: 4,865 Bytes
b2be5ce
 
 
 
 
 
77bc72b
 
b2be5ce
77bc72b
b2be5ce
77bc72b
 
 
 
 
 
 
b2be5ce
77bc72b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2be5ce
 
 
77bc72b
b2be5ce
77bc72b
b2be5ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77bc72b
 
 
 
 
 
 
 
 
 
b2be5ce
 
77bc72b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2be5ce
 
 
 
 
 
 
77bc72b
b2be5ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import sys
from google import genai
import subprocess
import os
import shutil
import gradio as gr
import uuid
import subprocess

def download_subtitles(video_url):
    # Execute the bash script and capture the output
    # result = subprocess.run(
    #     ['bash', 'download_subtitles.sh', url],
    #     check=True,
    #     text=True,
    #     stdout=subprocess.PIPE,
    #     stderr=subprocess.PIPE
    # )
    
    # # Extract the last line from stdout which is the directory name
    # stdout_lines = result.stdout.strip().split('\n')
    # directory = stdout_lines[-1].strip()

    uuid_dir = str(uuid.uuid4())

    # First command for auto-generated subtitles
    subprocess.run([
        "yt-dlp",
        "--write-auto-subs",
        "--sub-lang", "en",
        "--convert-subs", "srt",
        "--skip-download",
        "-P", f"home:{uuid_dir}",
        video_url
    ], check=True)

    # Second command for regular subtitles
    subprocess.run([
        "yt-dlp",
        "--write-subs",
        "--sub-lang", "en",
        "--convert-subs", "srt",
        "--skip-download",
        "-P", f"home:{uuid_dir}",
        video_url
    ], check=True)

    directory = uuid_dir
    # Verify the directory exists
    if not os.path.isdir(directory):
        raise FileNotFoundError(f"Directory {directory} does not exist")
    
    # Find the .srt file in the directory
    print( os.listdir(directory))
    srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')]
    if not srt_files:
        raise FileNotFoundError(f"No .srt file found in {directory}")
    if len(srt_files) > 1:
        raise RuntimeError(f"Multiple .srt files found in {directory}")
    
    srt_path = os.path.join(directory, srt_files[0])
    return srt_path

def cleanup_directory(folder_path):
    # Check if the folder exists
    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"The directory {folder_path} does not exist")
    # Remove the directory and all its contents
    shutil.rmtree(folder_path)
    
    
def srt_to_text(input_file):
    try:
        with open(input_file, "r", encoding="utf-8") as f:
            content = f.read()
    except FileNotFoundError:
        print(f"Error: Input file '{input_file}' not found")
        sys.exit(1)

    entries = content.strip().split("\n\n")
    output_lines = []

    for entry in entries:
        lines = entry.strip().split("\n")
        if len(lines) < 3:
            continue
        text_lines = lines[2:]
        for line in text_lines:
            stripped_line = line.strip()
            if stripped_line:
                if not output_lines or stripped_line != output_lines[-1]:
                    output_lines.append(stripped_line)

    return "\n".join(output_lines)

# url = "https://www.youtube.com/watch?v=B1dWbiXnz_s"
# subtitlesfile = download_subtitles(url)
# video_text = srt_to_text(subtitlesfile)
# cleanup_directory(os.path.dirname(subtitlesfile))

# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# client = genai.Client(api_key=GEMINI_API_KEY)
# response = client.models.generate_content(
#     model='gemini-2.0-flash',
#     contents=f"Summarize following text chronollogically, make it long, use markdown: \n{video_text}",
# )

# print(response.text)

def get_transcript_text(url):
    # try:
    print("Downloading subtitles...")
    subtitlesfile = download_subtitles(url)
    print("Extracting text from subtitles...")
    video_text = srt_to_text(subtitlesfile)
    print("Cleaning up...")
    cleanup_directory(os.path.dirname(subtitlesfile))
    return video_text
    # except Exception as e:
    #     raise gr.Error(f"Error retrieving transcript: {e}")

def summarize_video(url, prompt):
    # try:
    video_text = get_transcript_text(url)
    
    client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
    final_prompt = prompt + "\n" + video_text
    print("Generating summary...")
    response = client.models.generate_content(
        model='gemini-2.0-flash',
        contents=final_prompt,
    )
    summary = response.text
    
    return summary
    # except Exception as e:
    #     return f"An error occurred: {str(e)}"

with gr.Blocks() as app:
    gr.Markdown("# YouTube Video Summarizer")
    
    with gr.Row():
        with gr.Column(scale=5):
            url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...")
        with gr.Column(scale=1):
            summarize_btn = gr.Button("Summarize", variant="primary")
    
    default_prompt = """Summarize the following text chronologically, make it long, use markdown:"""
    prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=4)
    
    output = gr.Markdown()

    summarize_btn.click(
        fn=summarize_video,
        inputs=[url_input, prompt_input],
        outputs=output
    )

if __name__ == "__main__":
    app.launch()