Spaces:

Sayiqa7
/

youtbe_content_summ

Sleeping

File size: 4,801 Bytes

073e2b4
fbee908
073e2b4
fbee908
 
 
 
151f648
fbee908
 
 
 
d9e3ffc
fbee908
 
 
 
 
 
 
151f648
fbee908
 
 
 
 
 
 
 
 
 
 
 
8cb2a5a
 
 
 
 
 
 
151f648
3d331ca
 
 
 
151f648
 
3d331ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151f648
515a8de
d7aeb92
515a8de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7aeb92
515a8de
 
 
d7aeb92
515a8de
5964686
515a8de
 
 
 
d7aeb92
515a8de
 
 
 
d7aeb92
515a8de
 
 
 
c63913d
515a8de
 
 
 
 
 
 
 
 
 
 
 
 
d7aeb92
515a8de
d7aeb92
515a8de
 
 
 
 
 
 
 
 
 
 
d7aeb92
f60d1cb
515a8de
 
 
 
 
 
 
f60d1cb
d7aeb92

import subprocess
subprocess.check_call(["pip", "install", "transformers==4.35.2"])
subprocess.check_call(["pip", "install", "torch>=1.7.1"])
subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
subprocess.check_call(["pip", "install", "tokenizers>=0.15.0"])
subprocess.check_call(["pip", "install", "pytube"])
subprocess.check_call(["pip", "install", "pathlib"])
subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
import transformers
import torch
import os 
import pathlib
from huggingface_hub import login
import pytube
def install_missing_packages():
    required_packages = {
         "torch":">=1.11.0",
        "transformers":">=4.35.2",
         "pytube":None,
        "huggingface_hub": ">=0.19.0"
        
       
    }


    for package, version in required_packages.items():
        try:
            __import__(package)
        except ImportError:
            package_name = f"{package}{version}" if version else package
            subprocess.check_call(["pip", "install", package_name])

install_missing_packages()

hf_token = os.getenv("HF_TOKEN")
if hf_token:
    login(hf_token)
else:
    raise ValueError("HF_TOKEN environment variable not set.")
    

# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
# import gradio as gr

# # Load the model and tokenizer
# tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
# model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")

# # Define a function for summarization
# def summarize_youtube_content(input_text):
#     # Use the pipeline for summarization
#     summarizer = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
#     summary = summarizer(input_text, max_length=150, min_length=30, do_sample=False)
#     return summary[0]['generated_text']

# # Create a Gradio interface
# interface = gr.Interface(
#     fn=summarize_youtube_content,
#     inputs=gr.Textbox(lines=10, placeholder="Paste YouTube transcript here..."),
#     outputs=gr.Textbox(lines=5, label="Summarized Content"),
#     title="YouTube Content Summarizer",
#     description="Paste the transcript of a YouTube video to generate a concise summary.",
# )

# # Launch the Gradio app
# if __name__ == "__main__":
#     interface.launch()

from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs

def extract_video_id(url):
    """
    Extract video ID from YouTube URL
    """
    parsed_url = urlparse(url)
    if parsed_url.hostname == 'youtu.be':
        return parsed_url.path[1:]
    if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
        if parsed_url.path == '/watch':
            return parse_qs(parsed_url.query)['v'][0]
    return None

def get_transcript(video_id):
    """
    Get transcript from YouTube video
    """
    try:
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        transcript = ' '.join([t['text'] for t in transcript_list])
        return transcript
    except Exception as e:
        return f"Error getting transcript: {str(e)}"

def summarize_youtube_video(video_url):
    """
    Main function to summarize YouTube video content
    """
    try:
        # Extract video ID
        video_id = extract_video_id(video_url)
        if not video_id:
            return "Invalid YouTube URL"
        
        # Get transcript
        transcript = get_transcript(video_id)
        if transcript.startswith("Error"):
            return transcript
        
        # Load model and tokenizer
        tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
        model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
        
        # Create summarization pipeline
        summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
        
        # Generate summary
        summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False)
        return summary[0]['summary_text']
    
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Create Gradio interface
interface = gr.Interface(
    fn=summarize_youtube_video,
    inputs=gr.Textbox(
        lines=1, 
        placeholder="Enter YouTube video URL here..."
    ),
    outputs=gr.Textbox(
        lines=5, 
        label="Video Summary"
    ),
    title="YouTube Video Summarizer",
    description="Enter a YouTube video URL to generate a concise summary of its content.",
)

# Launch the interface
if __name__ == "__main__":
    interface.launch()


##########################