Silemo commited on
Commit
ec608d5
·
1 Parent(s): 484f772

Added app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from pytube import YouTube
3
+ import gradio as gr
4
+ import requests
5
+
6
+ pipe = pipeline(model="Silemo/whisper-it") # change to "your-username/the-name-you-picked"
7
+
8
+ def download_audio(audio_url, filename):
9
+
10
+ # URL of the image to be downloaded is defined as audio_url
11
+ r = requests.get(audio_url) # create HTTP response object
12
+
13
+ # send a HTTP request to the server and save
14
+ # the HTTP response in a response object called r
15
+ with open(["audio/" + filename],'wb') as f:
16
+
17
+ # Saving received content as a mp3 file in
18
+ # binary format
19
+
20
+ # write the contents of the response (r.content)
21
+ # to a new file in binary mode.
22
+ f.write(r.content)
23
+
24
+ def transcribe(audio):
25
+ text = pipe(audio)["text"]
26
+ return text
27
+
28
+ def transcribe_video(url):
29
+ yt = YouTube(url)
30
+ stream = yt.streams.get_audio_only()
31
+
32
+ # Saves the audio in the /audio folder
33
+ audio = stream.download(output_path = "audio/")
34
+
35
+ text = transcribe(audio)
36
+
37
+ return text
38
+
39
+ audio1_url = "https://github.com/Silemo/sml-lab2-2023-manfredi-meneghin/raw/main/task1/audio/offer.mp3"
40
+ audio1_filename = "offer.mp3"
41
+ download_audio(audio1_url, audio1_filename)
42
+
43
+ audio2_url = "https://github.com/Silemo/sml-lab2-2023-manfredi-meneghin/raw/main/task1/audio/fantozzi.mp3"
44
+ audio2_filename = "fantozzi.mp3"
45
+ download_audio(audio2_url, audio2_filename)
46
+
47
+ # Multiple interfaces using tabs -> https://github.com/gradio-app/gradio/issues/450
48
+
49
+ io1 = gr.Interface(
50
+ fn = transcribe,
51
+ inputs = gr.Audio(source=["microphone", "upload"], type="filepath"),
52
+ outputs = "text",
53
+
54
+ examples=[
55
+ ["audio/" + audio1_filename],
56
+ ["audio/" + audio2_filename],
57
+ ],
58
+
59
+ title = "Whisper Small - Italian - Microphone or Audio file",
60
+ description = "Realtime demo for Italian speech recognition using a fine-tuned Whisper small model. It uses the computer microphone as audio input",
61
+ )
62
+
63
+ io2 = gr.Interface(
64
+ fn = transcribe_video,
65
+ inputs = gr.Textbox(label = "YouTube URL", placeholder = "https://youtu.be/9DImRZERJNs?si=1Lme7o_KH2oCxU7y"),
66
+ outputs = "text",
67
+
68
+ examples=[
69
+ # Per me è la cipolla
70
+ ["https://youtu.be/QbwZlURClSA?si=DKMtIiKE-nO2mfcV"],
71
+
72
+ # Breaking Italy - Lollobrigida ferma il treno
73
+ ["https://youtu.be/9MPBN0tnA_E?si=G9Sgn1AsXSkxfCxV"],
74
+ ],
75
+
76
+ title = "Whisper Small - Italian - YouTube link",
77
+ description = "Realtime demo for Italian speech recognition using a fine-tuned Whisper small model. It uses a YouTube link as audio input",
78
+ )
79
+
80
+ gr.TabbedInterface(
81
+ [io1, io2], {"Microphone or audio file", "YouTube"}
82
+ ).launch()