Sayiqa7's picture
Update app.py
515a8de verified
import subprocess
subprocess.check_call(["pip", "install", "transformers==4.35.2"])
subprocess.check_call(["pip", "install", "torch>=1.7.1"])
subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
subprocess.check_call(["pip", "install", "tokenizers>=0.15.0"])
subprocess.check_call(["pip", "install", "pytube"])
subprocess.check_call(["pip", "install", "pathlib"])
subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
import transformers
import torch
import os
import pathlib
from huggingface_hub import login
import pytube
def install_missing_packages():
required_packages = {
"torch":">=1.11.0",
"transformers":">=4.35.2",
"pytube":None,
"huggingface_hub": ">=0.19.0"
}
for package, version in required_packages.items():
try:
__import__(package)
except ImportError:
package_name = f"{package}{version}" if version else package
subprocess.check_call(["pip", "install", package_name])
install_missing_packages()
hf_token = os.getenv("HF_TOKEN")
if hf_token:
login(hf_token)
else:
raise ValueError("HF_TOKEN environment variable not set.")
# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
# import gradio as gr
# # Load the model and tokenizer
# tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
# model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
# # Define a function for summarization
# def summarize_youtube_content(input_text):
# # Use the pipeline for summarization
# summarizer = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
# summary = summarizer(input_text, max_length=150, min_length=30, do_sample=False)
# return summary[0]['generated_text']
# # Create a Gradio interface
# interface = gr.Interface(
# fn=summarize_youtube_content,
# inputs=gr.Textbox(lines=10, placeholder="Paste YouTube transcript here..."),
# outputs=gr.Textbox(lines=5, label="Summarized Content"),
# title="YouTube Content Summarizer",
# description="Paste the transcript of a YouTube video to generate a concise summary.",
# )
# # Launch the Gradio app
# if __name__ == "__main__":
# interface.launch()
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
def extract_video_id(url):
"""
Extract video ID from YouTube URL
"""
parsed_url = urlparse(url)
if parsed_url.hostname == 'youtu.be':
return parsed_url.path[1:]
if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
if parsed_url.path == '/watch':
return parse_qs(parsed_url.query)['v'][0]
return None
def get_transcript(video_id):
"""
Get transcript from YouTube video
"""
try:
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
transcript = ' '.join([t['text'] for t in transcript_list])
return transcript
except Exception as e:
return f"Error getting transcript: {str(e)}"
def summarize_youtube_video(video_url):
"""
Main function to summarize YouTube video content
"""
try:
# Extract video ID
video_id = extract_video_id(video_url)
if not video_id:
return "Invalid YouTube URL"
# Get transcript
transcript = get_transcript(video_id)
if transcript.startswith("Error"):
return transcript
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
# Create summarization pipeline
summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
# Generate summary
summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False)
return summary[0]['summary_text']
except Exception as e:
return f"An error occurred: {str(e)}"
# Create Gradio interface
interface = gr.Interface(
fn=summarize_youtube_video,
inputs=gr.Textbox(
lines=1,
placeholder="Enter YouTube video URL here..."
),
outputs=gr.Textbox(
lines=5,
label="Video Summary"
),
title="YouTube Video Summarizer",
description="Enter a YouTube video URL to generate a concise summary of its content.",
)
# Launch the interface
if __name__ == "__main__":
interface.launch()
##########################