hermanda commited on
Commit
b2be5ce
·
verified ·
1 Parent(s): 20222eb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from google import genai
3
+ import subprocess
4
+ import os
5
+ import shutil
6
+ import gradio as gr
7
+
8
+ def download_subtitles(url):
9
+ # Execute the bash script and capture the output
10
+ result = subprocess.run(
11
+ ['bash', 'download_subtitles.sh', url],
12
+ check=True,
13
+ text=True,
14
+ stdout=subprocess.PIPE,
15
+ stderr=subprocess.PIPE
16
+ )
17
+
18
+ # Extract the last line from stdout which is the directory name
19
+ stdout_lines = result.stdout.strip().split('\n')
20
+ directory = stdout_lines[-1].strip()
21
+
22
+ # Verify the directory exists
23
+ if not os.path.isdir(directory):
24
+ raise FileNotFoundError(f"Directory {directory} does not exist")
25
+
26
+ # Find the .srt file in the directory
27
+ srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')]
28
+ if not srt_files:
29
+ raise FileNotFoundError(f"No .srt file found in {directory}")
30
+ if len(srt_files) > 1:
31
+ raise RuntimeError(f"Multiple .srt files found in {directory}")
32
+
33
+ srt_path = os.path.join(directory, srt_files[0])
34
+ return srt_path
35
+
36
+ def cleanup_directory(folder_path):
37
+ # Check if the folder exists
38
+ if not os.path.exists(folder_path):
39
+ raise FileNotFoundError(f"The directory {folder_path} does not exist")
40
+ # Remove the directory and all its contents
41
+ shutil.rmtree(folder_path)
42
+
43
+
44
+ def srt_to_text(input_file):
45
+ try:
46
+ with open(input_file, "r", encoding="utf-8") as f:
47
+ content = f.read()
48
+ except FileNotFoundError:
49
+ print(f"Error: Input file '{input_file}' not found")
50
+ sys.exit(1)
51
+
52
+ entries = content.strip().split("\n\n")
53
+ output_lines = []
54
+
55
+ for entry in entries:
56
+ lines = entry.strip().split("\n")
57
+ if len(lines) < 3:
58
+ continue
59
+ text_lines = lines[2:]
60
+ for line in text_lines:
61
+ stripped_line = line.strip()
62
+ if stripped_line:
63
+ if not output_lines or stripped_line != output_lines[-1]:
64
+ output_lines.append(stripped_line)
65
+
66
+ return "\n".join(output_lines)
67
+
68
+ # url = "https://www.youtube.com/watch?v=B1dWbiXnz_s"
69
+ # subtitlesfile = download_subtitles(url)
70
+ # video_text = srt_to_text(subtitlesfile)
71
+ # cleanup_directory(os.path.dirname(subtitlesfile))
72
+
73
+ # GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
74
+ # client = genai.Client(api_key=GEMINI_API_KEY)
75
+ # response = client.models.generate_content(
76
+ # model='gemini-2.0-flash',
77
+ # contents=f"Summarize following text chronollogically, make it long, use markdown: \n{video_text}",
78
+ # )
79
+
80
+ # print(response.text)
81
+
82
+ def get_transcript_text(url):
83
+ try:
84
+ print("Downloading subtitles...")
85
+ subtitlesfile = download_subtitles(url)
86
+ print("Extracting text from subtitles...")
87
+ video_text = srt_to_text(subtitlesfile)
88
+ print("Cleaning up...")
89
+ cleanup_directory(os.path.dirname(subtitlesfile))
90
+ return video_text
91
+ except Exception as e:
92
+ raise gr.Error(f"Error retrieving transcript: {e}")
93
+
94
+ def summarize_video(url, prompt):
95
+ try:
96
+ video_text = get_transcript_text(url)
97
+
98
+ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
99
+ final_prompt = prompt + "\n" + video_text
100
+ response = client.models.generate_content(
101
+ model='gemini-2.0-flash',
102
+ contents=final_prompt,
103
+ )
104
+ summary = response.text
105
+
106
+ return summary
107
+ except Exception as e:
108
+ return f"An error occurred: {str(e)}"
109
+
110
+ with gr.Blocks() as app:
111
+ gr.Markdown("# YouTube Video Summarizer")
112
+
113
+ with gr.Row():
114
+ with gr.Column(scale=5):
115
+ url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...")
116
+ with gr.Column(scale=5):
117
+ summarize_btn = gr.Button("Summarize", variant="primary")
118
+
119
+ default_prompt = """Summarize the following text chronologically, make it long, use markdown:"""
120
+ prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=4)
121
+
122
+ output = gr.Markdown()
123
+
124
+ summarize_btn.click(
125
+ fn=summarize_video,
126
+ inputs=[url_input, prompt_input],
127
+ outputs=output
128
+ )
129
+
130
+ if __name__ == "__main__":
131
+ app.launch()
132
+