prashantsv commited on
Commit
8d0feca
·
verified ·
1 Parent(s): 1fb59a5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from youtube_transcript_api import YouTubeTranscriptApi
2
+ import re
3
+ import torch
4
+ import gradio as gr
5
+
6
+ # Use a pipeline as a high-level helper
7
+ from transformers import pipeline
8
+
9
+ text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
10
+ # model_path = ("../Models/models--sshleifer--distilbart-cnn-6-6/snapshots/d2fde4ca965ba893255479612e4b801aa6500029")
11
+
12
+ # text_summary = pipeline("summarization", model=model_path,
13
+ # torch_dtype=torch.bfloat16)
14
+
15
+
16
+ def split_text_to_chunks(text, chunk_size=1024):
17
+ return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
18
+
19
+ def summary(input):
20
+ output = ''
21
+ text_array = split_text_to_chunks(input)
22
+ for i, chunk in enumerate(text_array):
23
+ output += text_summary(chunk[:1024])[0]['summary_text']
24
+ return output
25
+
26
+ def get_video_id(url):
27
+ """Extracts the video ID from a YouTube URL."""
28
+ pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
29
+ match = re.search(pattern, url)
30
+ return match.group(1) if match else None
31
+
32
+ def get_transcript(video_url):
33
+ """Fetches the transcript of a YouTube video."""
34
+ video_id = get_video_id(video_url)
35
+ if not video_id:
36
+ return "Invalid YouTube URL!"
37
+
38
+ try:
39
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
40
+ transcript_text = "\n".join([entry["text"] for entry in transcript])
41
+ output = summary(transcript_text)
42
+ return output
43
+ except Exception as e:
44
+ return f"Error fetching transcript: {e}"
45
+
46
+ # youtube_url = input("Enter YouTube URL: ")
47
+ # transcript = get_transcript(youtube_url)
48
+ # output = summary(transcript)
49
+ # print("\n--- Video Transcript ---\n")
50
+ # print(output)
51
+
52
+ gr.close_all()
53
+ # demo = gr.Interface(fn=summary,inputs="text", outputs="text")
54
+ demo = gr.Interface(fn=get_transcript,
55
+ inputs=[gr.Textbox(label="Input text to summarize", lines=6)],
56
+ outputs=[gr.Textbox(label="Summarized text", lines=4)],
57
+ title="Text Summarizer",
58
+ description="This application will be used to summarise the text")
59
+ demo.launch()