Spaces:
Running
Running
File size: 4,202 Bytes
b2be5ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import sys
from google import genai
import subprocess
import os
import shutil
import gradio as gr
def download_subtitles(url):
# Execute the bash script and capture the output
result = subprocess.run(
['bash', 'download_subtitles.sh', url],
check=True,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
# Extract the last line from stdout which is the directory name
stdout_lines = result.stdout.strip().split('\n')
directory = stdout_lines[-1].strip()
# Verify the directory exists
if not os.path.isdir(directory):
raise FileNotFoundError(f"Directory {directory} does not exist")
# Find the .srt file in the directory
srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')]
if not srt_files:
raise FileNotFoundError(f"No .srt file found in {directory}")
if len(srt_files) > 1:
raise RuntimeError(f"Multiple .srt files found in {directory}")
srt_path = os.path.join(directory, srt_files[0])
return srt_path
def cleanup_directory(folder_path):
# Check if the folder exists
if not os.path.exists(folder_path):
raise FileNotFoundError(f"The directory {folder_path} does not exist")
# Remove the directory and all its contents
shutil.rmtree(folder_path)
def srt_to_text(input_file):
try:
with open(input_file, "r", encoding="utf-8") as f:
content = f.read()
except FileNotFoundError:
print(f"Error: Input file '{input_file}' not found")
sys.exit(1)
entries = content.strip().split("\n\n")
output_lines = []
for entry in entries:
lines = entry.strip().split("\n")
if len(lines) < 3:
continue
text_lines = lines[2:]
for line in text_lines:
stripped_line = line.strip()
if stripped_line:
if not output_lines or stripped_line != output_lines[-1]:
output_lines.append(stripped_line)
return "\n".join(output_lines)
# url = "https://www.youtube.com/watch?v=B1dWbiXnz_s"
# subtitlesfile = download_subtitles(url)
# video_text = srt_to_text(subtitlesfile)
# cleanup_directory(os.path.dirname(subtitlesfile))
# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# client = genai.Client(api_key=GEMINI_API_KEY)
# response = client.models.generate_content(
# model='gemini-2.0-flash',
# contents=f"Summarize following text chronollogically, make it long, use markdown: \n{video_text}",
# )
# print(response.text)
def get_transcript_text(url):
try:
print("Downloading subtitles...")
subtitlesfile = download_subtitles(url)
print("Extracting text from subtitles...")
video_text = srt_to_text(subtitlesfile)
print("Cleaning up...")
cleanup_directory(os.path.dirname(subtitlesfile))
return video_text
except Exception as e:
raise gr.Error(f"Error retrieving transcript: {e}")
def summarize_video(url, prompt):
try:
video_text = get_transcript_text(url)
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
final_prompt = prompt + "\n" + video_text
response = client.models.generate_content(
model='gemini-2.0-flash',
contents=final_prompt,
)
summary = response.text
return summary
except Exception as e:
return f"An error occurred: {str(e)}"
with gr.Blocks() as app:
gr.Markdown("# YouTube Video Summarizer")
with gr.Row():
with gr.Column(scale=5):
url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...")
with gr.Column(scale=5):
summarize_btn = gr.Button("Summarize", variant="primary")
default_prompt = """Summarize the following text chronologically, make it long, use markdown:"""
prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=4)
output = gr.Markdown()
summarize_btn.click(
fn=summarize_video,
inputs=[url_input, prompt_input],
outputs=output
)
if __name__ == "__main__":
app.launch()
|