Sayiqa7 commited on
Commit
151f648
·
verified ·
1 Parent(s): 3d331ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -19
app.py CHANGED
@@ -5,6 +5,7 @@ subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
5
  subprocess.check_call(["pip", "install", "tokenizers>=0.15.0"])
6
  subprocess.check_call(["pip", "install", "pytube"])
7
  subprocess.check_call(["pip", "install", "pathlib"])
 
8
  import transformers
9
  import torch
10
  import os
@@ -17,6 +18,7 @@ def install_missing_packages():
17
  "transformers":">=4.35.2",
18
  "pytube":None,
19
  "huggingface_hub": ">=0.19.0"
 
20
 
21
  }
22
 
@@ -36,12 +38,13 @@ if hf_token:
36
  else:
37
  raise ValueError("HF_TOKEN environment variable not set.")
38
 
 
39
  # from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
40
  # import gradio as gr
41
 
42
  # # Load the model and tokenizer
43
- # tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization")
44
- # model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization")
45
 
46
  # # Define a function for summarization
47
  # def summarize_youtube_content(input_text):
@@ -62,29 +65,79 @@ else:
62
  # # Launch the Gradio app
63
  # if __name__ == "__main__":
64
  # interface.launch()
 
65
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
66
  import gradio as gr
 
 
67
 
68
- # Load the model and tokenizer
69
- tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
70
- model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
 
 
 
 
 
 
 
 
71
 
72
- # Define a function for summarization
73
- def summarize_youtube_content(input_text):
74
- # Use the pipeline for summarization
75
- summarizer = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
76
- summary = summarizer(input_text, max_length=150, min_length=30, do_sample=False)
77
- return summary[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- # Create a Gradio interface
80
  interface = gr.Interface(
81
- fn=summarize_youtube_content,
82
- inputs=gr.Textbox(lines=10, placeholder="Paste YouTube transcript here..."),
83
- outputs=gr.Textbox(lines=5, label="Summarized Content"),
84
- title="YouTube Content Summarizer",
85
- description="Paste the transcript of a YouTube video to generate a concise summary.",
 
 
 
 
 
 
86
  )
87
 
88
- # Launch the Gradio app
89
  if __name__ == "__main__":
90
- interface.launch()
 
5
  subprocess.check_call(["pip", "install", "tokenizers>=0.15.0"])
6
  subprocess.check_call(["pip", "install", "pytube"])
7
  subprocess.check_call(["pip", "install", "pathlib"])
8
+ subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
9
  import transformers
10
  import torch
11
  import os
 
18
  "transformers":">=4.35.2",
19
  "pytube":None,
20
  "huggingface_hub": ">=0.19.0"
21
+
22
 
23
  }
24
 
 
38
  else:
39
  raise ValueError("HF_TOKEN environment variable not set.")
40
 
41
+
42
  # from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
43
  # import gradio as gr
44
 
45
  # # Load the model and tokenizer
46
+ # tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
47
+ # model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
48
 
49
  # # Define a function for summarization
50
  # def summarize_youtube_content(input_text):
 
65
  # # Launch the Gradio app
66
  # if __name__ == "__main__":
67
  # interface.launch()
68
+
69
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
70
  import gradio as gr
71
+ from youtube_transcript_api import YouTubeTranscriptApi
72
+ from urllib.parse import urlparse, parse_qs
73
 
74
+ def extract_video_id(url):
75
+ """
76
+ Extract video ID from YouTube URL
77
+ """
78
+ parsed_url = urlparse(url)
79
+ if parsed_url.hostname == 'youtu.be':
80
+ return parsed_url.path[1:]
81
+ if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
82
+ if parsed_url.path == '/watch':
83
+ return parse_qs(parsed_url.query)['v'][0]
84
+ return None
85
 
86
+ def get_transcript(video_id):
87
+ """
88
+ Get transcript from YouTube video
89
+ """
90
+ try:
91
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
92
+ transcript = ' '.join([t['text'] for t in transcript_list])
93
+ return transcript
94
+ except Exception as e:
95
+ return f"Error getting transcript: {str(e)}"
96
+
97
+ def summarize_youtube_video(video_url):
98
+ """
99
+ Main function to summarize YouTube video content
100
+ """
101
+ try:
102
+ # Extract video ID
103
+ video_id = extract_video_id(video_url)
104
+ if not video_id:
105
+ return "Invalid YouTube URL"
106
+
107
+ # Get transcript
108
+ transcript = get_transcript(video_id)
109
+ if transcript.startswith("Error"):
110
+ return transcript
111
+
112
+ # Load model and tokenizer
113
+ tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
114
+ model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
115
+
116
+ # Create summarization pipeline
117
+ summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
118
+
119
+ # Generate summary
120
+ summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False)
121
+ return summary[0]['summary_text']
122
+
123
+ except Exception as e:
124
+ return f"An error occurred: {str(e)}"
125
 
126
+ # Create Gradio interface
127
  interface = gr.Interface(
128
+ fn=summarize_youtube_video,
129
+ inputs=gr.Textbox(
130
+ lines=1,
131
+ placeholder="Enter YouTube video URL here..."
132
+ ),
133
+ outputs=gr.Textbox(
134
+ lines=5,
135
+ label="Video Summary"
136
+ ),
137
+ title="YouTube Video Summarizer",
138
+ description="Enter a YouTube video URL to generate a concise summary of its content.",
139
  )
140
 
141
+ # Launch the interface
142
  if __name__ == "__main__":
143
+ interface.launch()