File size: 4,801 Bytes
073e2b4
fbee908
073e2b4
fbee908
 
 
 
151f648
fbee908
 
 
 
d9e3ffc
fbee908
 
 
 
 
 
 
151f648
fbee908
 
 
 
 
 
 
 
 
 
 
 
8cb2a5a
 
 
 
 
 
 
151f648
3d331ca
 
 
 
151f648
 
3d331ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151f648
515a8de
d7aeb92
515a8de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7aeb92
515a8de
 
 
d7aeb92
515a8de
5964686
515a8de
 
 
 
d7aeb92
515a8de
 
 
 
d7aeb92
515a8de
 
 
 
c63913d
515a8de
 
 
 
 
 
 
 
 
 
 
 
 
d7aeb92
515a8de
d7aeb92
515a8de
 
 
 
 
 
 
 
 
 
 
d7aeb92
f60d1cb
515a8de
 
 
 
 
 
 
f60d1cb
d7aeb92
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import subprocess
subprocess.check_call(["pip", "install", "transformers==4.35.2"])
subprocess.check_call(["pip", "install", "torch>=1.7.1"])
subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
subprocess.check_call(["pip", "install", "tokenizers>=0.15.0"])
subprocess.check_call(["pip", "install", "pytube"])
subprocess.check_call(["pip", "install", "pathlib"])
subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
import transformers
import torch
import os 
import pathlib
from huggingface_hub import login
import pytube
def install_missing_packages():
    required_packages = {
         "torch":">=1.11.0",
        "transformers":">=4.35.2",
         "pytube":None,
        "huggingface_hub": ">=0.19.0"
        
       
    }


    for package, version in required_packages.items():
        try:
            __import__(package)
        except ImportError:
            package_name = f"{package}{version}" if version else package
            subprocess.check_call(["pip", "install", package_name])

install_missing_packages()

hf_token = os.getenv("HF_TOKEN")
if hf_token:
    login(hf_token)
else:
    raise ValueError("HF_TOKEN environment variable not set.")
    

# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
# import gradio as gr

# # Load the model and tokenizer
# tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
# model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")

# # Define a function for summarization
# def summarize_youtube_content(input_text):
#     # Use the pipeline for summarization
#     summarizer = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
#     summary = summarizer(input_text, max_length=150, min_length=30, do_sample=False)
#     return summary[0]['generated_text']

# # Create a Gradio interface
# interface = gr.Interface(
#     fn=summarize_youtube_content,
#     inputs=gr.Textbox(lines=10, placeholder="Paste YouTube transcript here..."),
#     outputs=gr.Textbox(lines=5, label="Summarized Content"),
#     title="YouTube Content Summarizer",
#     description="Paste the transcript of a YouTube video to generate a concise summary.",
# )

# # Launch the Gradio app
# if __name__ == "__main__":
#     interface.launch()

from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs

def extract_video_id(url):
    """
    Extract video ID from YouTube URL
    """
    parsed_url = urlparse(url)
    if parsed_url.hostname == 'youtu.be':
        return parsed_url.path[1:]
    if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
        if parsed_url.path == '/watch':
            return parse_qs(parsed_url.query)['v'][0]
    return None

def get_transcript(video_id):
    """
    Get transcript from YouTube video
    """
    try:
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        transcript = ' '.join([t['text'] for t in transcript_list])
        return transcript
    except Exception as e:
        return f"Error getting transcript: {str(e)}"

def summarize_youtube_video(video_url):
    """
    Main function to summarize YouTube video content
    """
    try:
        # Extract video ID
        video_id = extract_video_id(video_url)
        if not video_id:
            return "Invalid YouTube URL"
        
        # Get transcript
        transcript = get_transcript(video_id)
        if transcript.startswith("Error"):
            return transcript
        
        # Load model and tokenizer
        tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
        model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
        
        # Create summarization pipeline
        summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
        
        # Generate summary
        summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False)
        return summary[0]['summary_text']
    
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Create Gradio interface
interface = gr.Interface(
    fn=summarize_youtube_video,
    inputs=gr.Textbox(
        lines=1, 
        placeholder="Enter YouTube video URL here..."
    ),
    outputs=gr.Textbox(
        lines=5, 
        label="Video Summary"
    ),
    title="YouTube Video Summarizer",
    description="Enter a YouTube video URL to generate a concise summary of its content.",
)

# Launch the interface
if __name__ == "__main__":
    interface.launch()


##########################