yt-summarize / app.py
hermanda's picture
Update app.py
77bc72b verified
raw
history blame
4.87 kB
import sys
from google import genai
import subprocess
import os
import shutil
import gradio as gr
import uuid
import subprocess
def download_subtitles(video_url):
# Execute the bash script and capture the output
# result = subprocess.run(
# ['bash', 'download_subtitles.sh', url],
# check=True,
# text=True,
# stdout=subprocess.PIPE,
# stderr=subprocess.PIPE
# )
# # Extract the last line from stdout which is the directory name
# stdout_lines = result.stdout.strip().split('\n')
# directory = stdout_lines[-1].strip()
uuid_dir = str(uuid.uuid4())
# First command for auto-generated subtitles
subprocess.run([
"yt-dlp",
"--write-auto-subs",
"--sub-lang", "en",
"--convert-subs", "srt",
"--skip-download",
"-P", f"home:{uuid_dir}",
video_url
], check=True)
# Second command for regular subtitles
subprocess.run([
"yt-dlp",
"--write-subs",
"--sub-lang", "en",
"--convert-subs", "srt",
"--skip-download",
"-P", f"home:{uuid_dir}",
video_url
], check=True)
directory = uuid_dir
# Verify the directory exists
if not os.path.isdir(directory):
raise FileNotFoundError(f"Directory {directory} does not exist")
# Find the .srt file in the directory
print( os.listdir(directory))
srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')]
if not srt_files:
raise FileNotFoundError(f"No .srt file found in {directory}")
if len(srt_files) > 1:
raise RuntimeError(f"Multiple .srt files found in {directory}")
srt_path = os.path.join(directory, srt_files[0])
return srt_path
def cleanup_directory(folder_path):
# Check if the folder exists
if not os.path.exists(folder_path):
raise FileNotFoundError(f"The directory {folder_path} does not exist")
# Remove the directory and all its contents
shutil.rmtree(folder_path)
def srt_to_text(input_file):
try:
with open(input_file, "r", encoding="utf-8") as f:
content = f.read()
except FileNotFoundError:
print(f"Error: Input file '{input_file}' not found")
sys.exit(1)
entries = content.strip().split("\n\n")
output_lines = []
for entry in entries:
lines = entry.strip().split("\n")
if len(lines) < 3:
continue
text_lines = lines[2:]
for line in text_lines:
stripped_line = line.strip()
if stripped_line:
if not output_lines or stripped_line != output_lines[-1]:
output_lines.append(stripped_line)
return "\n".join(output_lines)
# url = "https://www.youtube.com/watch?v=B1dWbiXnz_s"
# subtitlesfile = download_subtitles(url)
# video_text = srt_to_text(subtitlesfile)
# cleanup_directory(os.path.dirname(subtitlesfile))
# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# client = genai.Client(api_key=GEMINI_API_KEY)
# response = client.models.generate_content(
# model='gemini-2.0-flash',
# contents=f"Summarize following text chronollogically, make it long, use markdown: \n{video_text}",
# )
# print(response.text)
def get_transcript_text(url):
# try:
print("Downloading subtitles...")
subtitlesfile = download_subtitles(url)
print("Extracting text from subtitles...")
video_text = srt_to_text(subtitlesfile)
print("Cleaning up...")
cleanup_directory(os.path.dirname(subtitlesfile))
return video_text
# except Exception as e:
# raise gr.Error(f"Error retrieving transcript: {e}")
def summarize_video(url, prompt):
# try:
video_text = get_transcript_text(url)
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
final_prompt = prompt + "\n" + video_text
print("Generating summary...")
response = client.models.generate_content(
model='gemini-2.0-flash',
contents=final_prompt,
)
summary = response.text
return summary
# except Exception as e:
# return f"An error occurred: {str(e)}"
with gr.Blocks() as app:
gr.Markdown("# YouTube Video Summarizer")
with gr.Row():
with gr.Column(scale=5):
url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...")
with gr.Column(scale=1):
summarize_btn = gr.Button("Summarize", variant="primary")
default_prompt = """Summarize the following text chronologically, make it long, use markdown:"""
prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=4)
output = gr.Markdown()
summarize_btn.click(
fn=summarize_video,
inputs=[url_input, prompt_input],
outputs=output
)
if __name__ == "__main__":
app.launch()