Sayiqa7 commited on
Commit
5964686
·
verified ·
1 Parent(s): c63913d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -67
app.py CHANGED
@@ -66,83 +66,148 @@ else:
66
  # if __name__ == "__main__":
67
  # interface.launch()
68
 
69
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  import gradio as gr
71
- from youtube_transcript_api import YouTubeTranscriptApi
72
- from urllib.parse import urlparse, parse_qs
73
-
74
- def extract_video_id(url):
75
- """
76
- Extract video ID from YouTube URL
77
- """
78
- parsed_url = urlparse(url)
79
- if parsed_url.hostname == 'youtu.be':
80
- return parsed_url.path[1:]
81
- if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
82
- if parsed_url.path == '/watch':
83
- return parse_qs(parsed_url.query)['v'][0]
84
- return None
85
-
86
- def get_transcript(video_id):
87
- """
88
- Get transcript from YouTube video
89
- """
90
  try:
91
- transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
92
- transcript = ' '.join([t['text'] for t in transcript_list])
93
- return transcript
 
 
 
 
 
 
 
 
 
 
 
94
  except Exception as e:
95
- return f"Error getting transcript: {str(e)}"
 
 
 
 
96
 
97
- def summarize_youtube_video(video_url):
98
- """
99
- Main function to summarize YouTube video content
100
- """
101
  try:
102
- # Extract video ID
103
- video_id = extract_video_id(video_url)
104
- if not video_id:
105
- return "Invalid YouTube URL"
106
-
107
- # Get transcript
108
- transcript = get_transcript(video_id)
109
- if transcript.startswith("Error"):
110
- return transcript
111
-
112
- # Load model and tokenizer
113
- tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
114
- model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
 
 
115
 
116
- # Create summarization pipeline
117
- summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
118
 
119
- # Generate summary
120
- summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False)
121
- return summary[0]['summary_text']
122
-
123
- except Exception as e:
124
- return f"An error occurred: {str(e)}"
125
 
126
- # Create Gradio interface
127
  interface = gr.Interface(
128
- fn=summarize_youtube_video,
129
- inputs=gr.Textbox(
130
- lines=1,
131
- placeholder="Enter YouTube video URL here..."
132
- ),
133
- outputs=gr.Textbox(
134
- lines=5,
135
- label="Video Summary"
136
- ),
137
- title="YouTube Video Summarizer",
138
- description="Enter a YouTube video URL to generate a concise summary of its content.",
139
  )
140
 
141
- # Launch the interface
142
- if __name__ == "__main__":
143
- interface.launch()
144
-
145
-
146
- ##########################
147
 
148
 
 
66
  # if __name__ == "__main__":
67
  # interface.launch()
68
 
69
+ # from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
70
+ # import gradio as gr
71
+ # from youtube_transcript_api import YouTubeTranscriptApi
72
+ # from urllib.parse import urlparse, parse_qs
73
+
74
+ # def extract_video_id(url):
75
+ # """
76
+ # Extract video ID from YouTube URL
77
+ # """
78
+ # parsed_url = urlparse(url)
79
+ # if parsed_url.hostname == 'youtu.be':
80
+ # return parsed_url.path[1:]
81
+ # if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
82
+ # if parsed_url.path == '/watch':
83
+ # return parse_qs(parsed_url.query)['v'][0]
84
+ # return None
85
+
86
+ # def get_transcript(video_id):
87
+ # """
88
+ # Get transcript from YouTube video
89
+ # """
90
+ # try:
91
+ # transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
92
+ # transcript = ' '.join([t['text'] for t in transcript_list])
93
+ # return transcript
94
+ # except Exception as e:
95
+ # return f"Error getting transcript: {str(e)}"
96
+
97
+ # def summarize_youtube_video(video_url):
98
+ # """
99
+ # Main function to summarize YouTube video content
100
+ # """
101
+ # try:
102
+ # # Extract video ID
103
+ # video_id = extract_video_id(video_url)
104
+ # if not video_id:
105
+ # return "Invalid YouTube URL"
106
+
107
+ # # Get transcript
108
+ # transcript = get_transcript(video_id)
109
+ # if transcript.startswith("Error"):
110
+ # return transcript
111
+
112
+ # # Load model and tokenizer
113
+ # tokenizer = AutoTokenizer.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
114
+ # model = AutoModelForSeq2SeqLM.from_pretrained("machinelearningzuu/youtube-content-summarization-bart")
115
+
116
+ # # Create summarization pipeline
117
+ # summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
118
+
119
+ # # Generate summary
120
+ # summary = summarizer(transcript, max_length=150, min_length=30, do_sample=False)
121
+ # return summary[0]['summary_text']
122
+
123
+ # except Exception as e:
124
+ # return f"An error occurred: {str(e)}"
125
+
126
+ # # Create Gradio interface
127
+ # interface = gr.Interface(
128
+ # fn=summarize_youtube_video,
129
+ # inputs=gr.Textbox(
130
+ # lines=1,
131
+ # placeholder="Enter YouTube video URL here..."
132
+ # ),
133
+ # outputs=gr.Textbox(
134
+ # lines=5,
135
+ # label="Video Summary"
136
+ # ),
137
+ # title="YouTube Video Summarizer",
138
+ # description="Enter a YouTube video URL to generate a concise summary of its content.",
139
+ # )
140
+
141
+ # # Launch the interface
142
+ # if __name__ == "__main__":
143
+ # interface.launch()
144
+
145
+
146
+ ##########################
147
+ from pytube import YouTube
148
+ from transformers import pipeline
149
  import gradio as gr
150
+ import os
151
+
152
+ # Define a function to download the audio from YouTube
153
+ def download_audio_from_youtube(video_url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  try:
155
+ # Initialize YouTube object with video URL
156
+ yt = YouTube(video_url)
157
+
158
+ # Filter the stream to get the first available audio-only stream
159
+ stream = yt.streams.filter(only_audio=True).first()
160
+
161
+ if stream is None:
162
+ raise Exception("No audio stream available for this video.")
163
+
164
+ # Download audio stream
165
+ audio_filename = "audio.mp4"
166
+ stream.download(filename=audio_filename)
167
+ return audio_filename
168
+
169
  except Exception as e:
170
+ print(f"Error downloading video: {e}")
171
+ return None
172
+
173
+ # Load the speech-to-text pipeline (Whisper)
174
+ transcriber = pipeline(model="openai/whisper-large", task="automatic-speech-recognition")
175
 
176
+ # Function to transcribe the audio file
177
+ def transcribe_audio(audio_file_path):
 
 
178
  try:
179
+ # Use Whisper model for transcription
180
+ result = transcriber(audio_file_path)
181
+ return result['text']
182
+ except Exception as e:
183
+ print(f"Error during transcription: {e}")
184
+ return "Error transcribing the audio."
185
+
186
+ # Function to handle Gradio interface input and output
187
+ def process_youtube_url(video_url):
188
+ # Step 1: Download audio from the given YouTube video URL
189
+ audio_file = download_audio_from_youtube(video_url)
190
+
191
+ if audio_file:
192
+ # Step 2: Transcribe the audio
193
+ transcription = transcribe_audio(audio_file)
194
 
195
+ # Clean up the downloaded audio file after transcription
196
+ os.remove(audio_file)
197
 
198
+ return transcription
199
+ else:
200
+ return "Failed to download or find audio for the provided YouTube video."
 
 
 
201
 
202
+ # Create a Gradio interface to upload a YouTube URL and display transcription
203
  interface = gr.Interface(
204
+ fn=process_youtube_url,
205
+ inputs=gr.Textbox(label="Enter YouTube Video URL"),
206
+ outputs="text",
207
+ live=True
 
 
 
 
 
 
 
208
  )
209
 
210
+ # Launch the Gradio interface
211
+ interface.launch()
 
 
 
 
212
 
213