Spaces:
Sleeping
Sleeping
| import subprocess | |
| subprocess.check_call(["pip", "install", "transformers==4.35.2"]) | |
| subprocess.check_call(["pip", "install", "torch>=1.7.1"]) | |
| subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"]) | |
| subprocess.check_call(["pip", "install", "tokenizers>=0.15.0"]) | |
| subprocess.check_call(["pip", "install", "pytube"]) | |
| subprocess.check_call(["pip", "install", "pathlib"]) | |
| subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"]) | |
| import transformers | |
| import torch | |
| import os | |
| import pathlib | |
| from huggingface_hub import login | |
| import pytube | |
| def install_missing_packages(): | |
| required_packages = { | |
| "torch":">=1.11.0", | |
| "transformers":">=4.35.2", | |
| "pytube":None, | |
| "huggingface_hub": ">=0.19.0" | |
| } | |
| for package, version in required_packages.items(): | |
| try: | |
| __import__(package) | |
| except ImportError: | |
| package_name = f"{package}{version}" if version else package | |
| subprocess.check_call(["pip", "install", package_name]) | |
| install_missing_packages() | |
| hf_token = os.getenv("HF_TOKEN") | |
| if hf_token: | |
| login(hf_token) | |
| else: | |
| raise ValueError("HF_TOKEN environment variable not set.") | |
| # from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
| # import gradio as gr | |
| # # Load the model and tokenizer | |
| # tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") | |
| # model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") | |
| # # Define a function for summarization | |
| # def summarize_youtube_content(input_text): | |
| # # Use the pipeline for summarization | |
| # summarizer = pipeline("text2text-generation", model=model, tokenizer=tokenizer) | |
| # summary = summarizer(input_text, max_length=150, min_length=30, do_sample=False) | |
| # return summary[0]['generated_text'] | |
| # # Create a Gradio interface | |
| # interface = gr.Interface( | |
| # fn=summarize_youtube_content, | |
| # inputs=gr.Textbox(lines=10, placeholder="Paste YouTube transcript here..."), | |
| # outputs=gr.Textbox(lines=5, label="Summarized Content"), | |
| # title="YouTube Content Summarizer", | |
| # description="Paste the transcript of a YouTube video to generate a concise summary.", | |
| # ) | |
| # # Launch the Gradio app | |
| # if __name__ == "__main__": | |
| # interface.launch() | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
| import gradio as gr | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from urllib.parse import urlparse, parse_qs | |
| def extract_video_id(url): | |
| """ | |
| Extract video ID from YouTube URL | |
| """ | |
| parsed_url = urlparse(url) | |
| if parsed_url.hostname == 'youtu.be': | |
| return parsed_url.path[1:] | |
| if parsed_url.hostname in ('www.youtube.com', 'youtube.com'): | |
| if parsed_url.path == '/watch': | |
| return parse_qs(parsed_url.query)['v'][0] | |
| return None | |
| def get_transcript(video_id): | |
| """ | |
| Get transcript from YouTube video | |
| """ | |
| try: | |
| transcript_list = YouTubeTranscriptApi.get_transcript(video_id) | |
| transcript = ' '.join([t['text'] for t in transcript_list]) | |
| return transcript | |
| except Exception as e: | |
| return f"Error getting transcript: {str(e)}" | |
| def summarize_youtube_video(video_url): | |
| """ | |
| Main function to summarize YouTube video content | |
| """ | |
| try: | |
| # Extract video ID | |
| video_id = extract_video_id(video_url) | |
| if not video_id: | |
| return "Invalid YouTube URL" | |
| # Get transcript | |
| transcript = get_transcript(video_id) | |
| if transcript.startswith("Error"): | |
| return transcript | |
| # Load model and tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart") | |
| # Create summarization pipeline | |
| summarizer = pipeline("summarization", model=model, tokenizer=tokenizer) | |
| # Generate summary | |
| summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False) | |
| return summary[0]['summary_text'] | |
| except Exception as e: | |
| return f"An error occurred: {str(e)}" | |
| # Create Gradio interface | |
| interface = gr.Interface( | |
| fn=summarize_youtube_video, | |
| inputs=gr.Textbox( | |
| lines=1, | |
| placeholder="Enter YouTube video URL here..." | |
| ), | |
| outputs=gr.Textbox( | |
| lines=5, | |
| label="Video Summary" | |
| ), | |
| title="YouTube Video Summarizer", | |
| description="Enter a YouTube video URL to generate a concise summary of its content.", | |
| ) | |
| # Launch the interface | |
| if __name__ == "__main__": | |
| interface.launch() | |
| ########################## | |