Spaces:
Running
Running
import subprocess | |
subprocess.check_call(["pip", "install", "transformers==4.35.2"]) | |
subprocess.check_call(["pip", "install", "torch>=1.7.1"]) | |
subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"]) | |
subprocess.check_call(["pip", "install", "tokenizers>=0.15.0"]) | |
subprocess.check_call(["pip", "install", "pytube"]) | |
subprocess.check_call(["pip", "install", "pathlib"]) | |
subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"]) | |
import transformers | |
import torch | |
import os | |
import pathlib | |
from huggingface_hub import login | |
import pytube | |
def install_missing_packages(): | |
required_packages = { | |
"torch":">=1.11.0", | |
"transformers":">=4.35.2", | |
"pytube":None, | |
"huggingface_hub": ">=0.19.0" | |
} | |
for package, version in required_packages.items(): | |
try: | |
__import__(package) | |
except ImportError: | |
package_name = f"{package}{version}" if version else package | |
subprocess.check_call(["pip", "install", package_name]) | |
install_missing_packages() | |
hf_token = os.getenv("HF_TOKEN") | |
if hf_token: | |
login(hf_token) | |
else: | |
raise ValueError("HF_TOKEN environment variable not set.") | |
# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
# import gradio as gr | |
# # Load the model and tokenizer | |
# tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") | |
# model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") | |
# # Define a function for summarization | |
# def summarize_youtube_content(input_text): | |
# # Use the pipeline for summarization | |
# summarizer = pipeline("text2text-generation", model=model, tokenizer=tokenizer) | |
# summary = summarizer(input_text, max_length=150, min_length=30, do_sample=False) | |
# return summary[0]['generated_text'] | |
# # Create a Gradio interface | |
# interface = gr.Interface( | |
# fn=summarize_youtube_content, | |
# inputs=gr.Textbox(lines=10, placeholder="Paste YouTube transcript here..."), | |
# outputs=gr.Textbox(lines=5, label="Summarized Content"), | |
# title="YouTube Content Summarizer", | |
# description="Paste the transcript of a YouTube video to generate a concise summary.", | |
# ) | |
# # Launch the Gradio app | |
# if __name__ == "__main__": | |
# interface.launch() | |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
import gradio as gr | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from urllib.parse import urlparse, parse_qs | |
def extract_video_id(url): | |
""" | |
Extract video ID from YouTube URL | |
""" | |
parsed_url = urlparse(url) | |
if parsed_url.hostname == 'youtu.be': | |
return parsed_url.path[1:] | |
if parsed_url.hostname in ('www.youtube.com', 'youtube.com'): | |
if parsed_url.path == '/watch': | |
return parse_qs(parsed_url.query)['v'][0] | |
return None | |
def get_transcript(video_id): | |
""" | |
Get transcript from YouTube video | |
""" | |
try: | |
transcript_list = YouTubeTranscriptApi.get_transcript(video_id) | |
transcript = ' '.join([t['text'] for t in transcript_list]) | |
return transcript | |
except Exception as e: | |
return f"Error getting transcript: {str(e)}" | |
def summarize_youtube_video(video_url): | |
""" | |
Main function to summarize YouTube video content | |
""" | |
try: | |
# Extract video ID | |
video_id = extract_video_id(video_url) | |
if not video_id: | |
return "Invalid YouTube URL" | |
# Get transcript | |
transcript = get_transcript(video_id) | |
if transcript.startswith("Error"): | |
return transcript | |
# Load model and tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") | |
model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") | |
# Create summarization pipeline | |
summarizer = pipeline("summarization", model=model, tokenizer=tokenizer) | |
# Generate summary | |
summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False) | |
return summary[0]['summary_text'] | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
# Create Gradio interface | |
interface = gr.Interface( | |
fn=summarize_youtube_video, | |
inputs=gr.Textbox( | |
lines=1, | |
placeholder="Enter YouTube video URL here..." | |
), | |
outputs=gr.Textbox( | |
lines=5, | |
label="Video Summary" | |
), | |
title="YouTube Video Summarizer", | |
description="Enter a YouTube video URL to generate a concise summary of its content.", | |
) | |
# Launch the interface | |
if __name__ == "__main__": | |
interface.launch() | |
########################## | |