Spaces:
Sleeping
Sleeping
File size: 4,865 Bytes
b2be5ce 77bc72b b2be5ce 77bc72b b2be5ce 77bc72b b2be5ce 77bc72b b2be5ce 77bc72b b2be5ce 77bc72b b2be5ce 77bc72b b2be5ce 77bc72b b2be5ce 77bc72b b2be5ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import sys
from google import genai
import subprocess
import os
import shutil
import gradio as gr
import uuid
import subprocess
def download_subtitles(video_url):
# Execute the bash script and capture the output
# result = subprocess.run(
# ['bash', 'download_subtitles.sh', url],
# check=True,
# text=True,
# stdout=subprocess.PIPE,
# stderr=subprocess.PIPE
# )
# # Extract the last line from stdout which is the directory name
# stdout_lines = result.stdout.strip().split('\n')
# directory = stdout_lines[-1].strip()
uuid_dir = str(uuid.uuid4())
# First command for auto-generated subtitles
subprocess.run([
"yt-dlp",
"--write-auto-subs",
"--sub-lang", "en",
"--convert-subs", "srt",
"--skip-download",
"-P", f"home:{uuid_dir}",
video_url
], check=True)
# Second command for regular subtitles
subprocess.run([
"yt-dlp",
"--write-subs",
"--sub-lang", "en",
"--convert-subs", "srt",
"--skip-download",
"-P", f"home:{uuid_dir}",
video_url
], check=True)
directory = uuid_dir
# Verify the directory exists
if not os.path.isdir(directory):
raise FileNotFoundError(f"Directory {directory} does not exist")
# Find the .srt file in the directory
print( os.listdir(directory))
srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')]
if not srt_files:
raise FileNotFoundError(f"No .srt file found in {directory}")
if len(srt_files) > 1:
raise RuntimeError(f"Multiple .srt files found in {directory}")
srt_path = os.path.join(directory, srt_files[0])
return srt_path
def cleanup_directory(folder_path):
# Check if the folder exists
if not os.path.exists(folder_path):
raise FileNotFoundError(f"The directory {folder_path} does not exist")
# Remove the directory and all its contents
shutil.rmtree(folder_path)
def srt_to_text(input_file):
try:
with open(input_file, "r", encoding="utf-8") as f:
content = f.read()
except FileNotFoundError:
print(f"Error: Input file '{input_file}' not found")
sys.exit(1)
entries = content.strip().split("\n\n")
output_lines = []
for entry in entries:
lines = entry.strip().split("\n")
if len(lines) < 3:
continue
text_lines = lines[2:]
for line in text_lines:
stripped_line = line.strip()
if stripped_line:
if not output_lines or stripped_line != output_lines[-1]:
output_lines.append(stripped_line)
return "\n".join(output_lines)
# url = "https://www.youtube.com/watch?v=B1dWbiXnz_s"
# subtitlesfile = download_subtitles(url)
# video_text = srt_to_text(subtitlesfile)
# cleanup_directory(os.path.dirname(subtitlesfile))
# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# client = genai.Client(api_key=GEMINI_API_KEY)
# response = client.models.generate_content(
# model='gemini-2.0-flash',
# contents=f"Summarize following text chronollogically, make it long, use markdown: \n{video_text}",
# )
# print(response.text)
def get_transcript_text(url):
# try:
print("Downloading subtitles...")
subtitlesfile = download_subtitles(url)
print("Extracting text from subtitles...")
video_text = srt_to_text(subtitlesfile)
print("Cleaning up...")
cleanup_directory(os.path.dirname(subtitlesfile))
return video_text
# except Exception as e:
# raise gr.Error(f"Error retrieving transcript: {e}")
def summarize_video(url, prompt):
# try:
video_text = get_transcript_text(url)
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
final_prompt = prompt + "\n" + video_text
print("Generating summary...")
response = client.models.generate_content(
model='gemini-2.0-flash',
contents=final_prompt,
)
summary = response.text
return summary
# except Exception as e:
# return f"An error occurred: {str(e)}"
with gr.Blocks() as app:
gr.Markdown("# YouTube Video Summarizer")
with gr.Row():
with gr.Column(scale=5):
url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...")
with gr.Column(scale=1):
summarize_btn = gr.Button("Summarize", variant="primary")
default_prompt = """Summarize the following text chronologically, make it long, use markdown:"""
prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=4)
output = gr.Markdown()
summarize_btn.click(
fn=summarize_video,
inputs=[url_input, prompt_input],
outputs=output
)
if __name__ == "__main__":
app.launch()
|