import subprocess subprocess.check_call(["pip", "install", "transformers==4.35.2"]) subprocess.check_call(["pip", "install", "torch>=1.7.1"]) subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"]) subprocess.check_call(["pip", "install", "tokenizers>=0.15.0"]) subprocess.check_call(["pip", "install", "pytube"]) subprocess.check_call(["pip", "install", "pathlib"]) subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"]) import transformers import torch import os import pathlib from huggingface_hub import login import pytube def install_missing_packages(): required_packages = { "torch":">=1.11.0", "transformers":">=4.35.2", "pytube":None, "huggingface_hub": ">=0.19.0" } for package, version in required_packages.items(): try: __import__(package) except ImportError: package_name = f"{package}{version}" if version else package subprocess.check_call(["pip", "install", package_name]) install_missing_packages() hf_token = os.getenv("HF_TOKEN") if hf_token: login(hf_token) else: raise ValueError("HF_TOKEN environment variable not set.") # from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM # import gradio as gr # # Load the model and tokenizer # tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") # model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") # # Define a function for summarization # def summarize_youtube_content(input_text): # # Use the pipeline for summarization # summarizer = pipeline("text2text-generation", model=model, tokenizer=tokenizer) # summary = summarizer(input_text, max_length=150, min_length=30, do_sample=False) # return summary[0]['generated_text'] # # Create a Gradio interface # interface = gr.Interface( # fn=summarize_youtube_content, # inputs=gr.Textbox(lines=10, placeholder="Paste YouTube transcript here..."), # outputs=gr.Textbox(lines=5, label="Summarized Content"), # title="YouTube Content Summarizer", # description="Paste the transcript of a YouTube video to generate a concise summary.", # ) # # Launch the Gradio app # if __name__ == "__main__": # interface.launch() from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM import gradio as gr from youtube_transcript_api import YouTubeTranscriptApi from urllib.parse import urlparse, parse_qs def extract_video_id(url): """ Extract video ID from YouTube URL """ parsed_url = urlparse(url) if parsed_url.hostname == 'youtu.be': return parsed_url.path[1:] if parsed_url.hostname in ('www.youtube.com', 'youtube.com'): if parsed_url.path == '/watch': return parse_qs(parsed_url.query)['v'][0] return None def get_transcript(video_id): """ Get transcript from YouTube video """ try: transcript_list = YouTubeTranscriptApi.get_transcript(video_id) transcript = ' '.join([t['text'] for t in transcript_list]) return transcript except Exception as e: return f"Error getting transcript: {str(e)}" def summarize_youtube_video(video_url): """ Main function to summarize YouTube video content """ try: # Extract video ID video_id = extract_video_id(video_url) if not video_id: return "Invalid YouTube URL" # Get transcript transcript = get_transcript(video_id) if transcript.startswith("Error"): return transcript # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") # Create summarization pipeline summarizer = pipeline("summarization", model=model, tokenizer=tokenizer) # Generate summary summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False) return summary[0]['summary_text'] except Exception as e: return f"An error occurred: {str(e)}" # Create Gradio interface interface = gr.Interface( fn=summarize_youtube_video, inputs=gr.Textbox( lines=1, placeholder="Enter YouTube video URL here..." ), outputs=gr.Textbox( lines=5, label="Video Summary" ), title="YouTube Video Summarizer", description="Enter a YouTube video URL to generate a concise summary of its content.", ) # Launch the interface if __name__ == "__main__": interface.launch() ##########################