basharat8763 commited on
Commit
c21e8e7
·
verified ·
1 Parent(s): 55701f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -29
app.py CHANGED
@@ -1,53 +1,54 @@
1
- from youtube_transcript_api import YouTubeTranscriptApi
2
  import re
 
 
3
  import torch
4
- from transformers import pipeline
5
  import gradio as gr
 
 
 
6
 
7
- text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6",
8
- torch_dtype=torch.bfloat16)
9
 
10
 
11
- def summary(input):
12
  output = text_summary(input)
13
  return output[0]['summary_text']
14
 
15
 
16
- def get_video_id(url):
17
- """Extract the video ID from a YouTube URL."""
18
- pattern = r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11})"
19
- match = re.match(pattern, url)
20
  if match:
21
  return match.group(1)
22
- else:
23
- raise ValueError("Invalid YouTube URL")
24
 
25
 
26
- def fetch_transcript_and_summary(url):
27
- """Fetch the full transcript of a YouTube video and its summary."""
28
- video_id = get_video_id(url)
29
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
30
- full_transcript = " ".join([item["text"] for item in transcript])
31
- summarized_transcript = summary(full_transcript)
32
- return summarized_transcript
33
 
34
-
35
- # Example usage
36
- def get_video_summary(url):
37
- """Main function to fetch and display transcript and summary."""
38
  try:
39
- summarized_transcript = fetch_transcript_and_summary(url)
40
- print(summarized_transcript)
 
 
 
 
 
 
 
41
  except Exception as e:
42
- print(f"Error: {e}")
43
 
44
 
45
- # Call the function with your desired URL command line based
46
- # video_url = input("Enter the YouTube video URL: ")
47
- # get_video_summary(video_url)
48
 
 
49
 
50
- # Web app Gradio based
51
  gr.close_all()
52
 
53
  demo = gr.Interface(
 
 
1
  import re
2
+ from youtube_transcript_api import YouTubeTranscriptApi
3
+ from youtube_transcript_api.formatters import TextFormatter
4
  import torch
 
5
  import gradio as gr
6
+ from transformers import pipeline
7
+
8
+ text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
9
 
 
 
10
 
11
 
12
+ def summary (input):
13
  output = text_summary(input)
14
  return output[0]['summary_text']
15
 
16
 
17
+ def extract_video_id(url):
18
+ # Regex to extract the video ID from various YouTube URL formats
19
+ regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
20
+ match = re.search(regex, url)
21
  if match:
22
  return match.group(1)
23
+ return None
 
24
 
25
 
26
+ def get_youtube_transcript(video_url):
27
+ video_id = extract_video_id(video_url)
28
+ if not video_id:
29
+ return "Video ID could not be extracted."
 
 
 
30
 
 
 
 
 
31
  try:
32
+ # Fetch the transcript
33
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
34
+
35
+ # Format the transcript into plain text
36
+ formatter = TextFormatter()
37
+ text_transcript = formatter.format_transcript(transcript)
38
+ summary_text = summary(text_transcript)
39
+
40
+ return summary_text
41
  except Exception as e:
42
+ return f"An error occurred: {e}"
43
 
44
 
45
+ # Example URL (Replace this with the actual URL when using the script)
46
+ # video_url = "https://youtu.be/5PibknhIsTc"
47
+ # print(get_youtube_transcript(video_url))
48
 
49
+ gr.close_all()
50
 
51
+ # demo = gr.Interface(fn=summary, inputs="text",outputs="text")
52
  gr.close_all()
53
 
54
  demo = gr.Interface(