Silemo commited on
Commit
bb506f1
·
1 Parent(s): 044567e

Updated app.py adding tags functionalities

Browse files
Files changed (1) hide show
  1. app.py +71 -33
app.py CHANGED
@@ -1,9 +1,42 @@
 
 
 
1
  from transformers import pipeline
2
  from pytube import YouTube
3
  import gradio as gr
4
  import requests
 
5
 
6
- pipe = pipeline(model="Silemo/whisper-it") # change to "your-username/the-name-you-picked"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def download_audio(audio_url, filename):
9
 
@@ -12,7 +45,7 @@ def download_audio(audio_url, filename):
12
 
13
  # send a HTTP request to the server and save
14
  # the HTTP response in a response object called r
15
- with open(filename,'wb') as f:
16
 
17
  # Saving received content as a mp3 file in
18
  # binary format
@@ -21,21 +54,26 @@ def download_audio(audio_url, filename):
21
  # to a new file in binary mode.
22
  f.write(r.content)
23
 
24
- def transcribe(audio):
25
- text = pipe(audio)["text"]
26
- return text
27
-
28
- def transcribe_video(url):
29
- yt = YouTube(url)
30
- stream = yt.streams.get_audio_only()
31
-
32
- # Saves the audio in the /audio folder
33
- audio = stream.download()
34
-
35
- text = transcribe(audio)
36
-
37
- return text
38
-
 
 
 
 
 
39
  audio1_url = "https://github.com/Silemo/sml-lab2-2023-manfredi-meneghin/raw/main/task1/audio/offer.mp3"
40
  audio1_filename = "offer.mp3"
41
  download_audio(audio1_url, audio1_filename)
@@ -44,36 +82,36 @@ audio2_url = "https://github.com/Silemo/sml-lab2-2023-manfredi-meneghin/raw/main
44
  audio2_filename = "fantozzi.mp3"
45
  download_audio(audio2_url, audio2_filename)
46
 
47
- # Multiple interfaces using tabs -> https://github.com/gradio-app/gradio/issues/450
 
 
 
 
48
 
 
49
  io1 = gr.Interface(
50
- fn = transcribe,
51
- inputs = gr.Audio(sources=["microphone", "upload"], type="filepath"),
52
- outputs = "text",
53
-
54
- examples=[
55
  [audio1_filename],
56
  [audio2_filename],
57
  ],
58
-
59
  title = "Whisper Small - Italian - Microphone or Audio file",
60
- description = "Realtime demo for Italian speech recognition using a fine-tuned Whisper small model. It uses the computer microphone or an audio file as audio input",
61
  )
62
 
63
  io2 = gr.Interface(
64
  fn = transcribe_video,
65
  inputs = gr.Textbox(label = "YouTube URL", placeholder = "https://youtu.be/9DImRZERJNs?si=1Lme7o_KH2oCxU7y"),
66
- outputs = "text",
67
 
68
  examples=[
69
- # Per me è la cipolla
70
- ["https://youtu.be/QbwZlURClSA?si=DKMtIiKE-nO2mfcV"],
71
 
72
- # Breaking Italy - Lollobrigida ferma il treno
73
- ["https://youtu.be/9MPBN0tnA_E?si=8-hqkJS05LNkWprX&t=2"],
74
-
75
- # Mussolini discorso
76
- ["https://youtu.be/UmnxcjRk37Q?si=uxt8oqnMDJ3vFzIB&t=77"],
77
  ],
78
 
79
  title = "Whisper Small - Italian - YouTube link",
 
1
+ """
2
+ Imports
3
+ """
4
  from transformers import pipeline
5
  from pytube import YouTube
6
  import gradio as gr
7
  import requests
8
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
9
 
10
+ """
11
+ Pipeline and models
12
+ """
13
+ transcribe_pipe = pipeline(model="Silemo/whisper-it") # change to "your-username/the-name-you-picked"
14
+
15
+ tags_model = AutoModelForSeq2SeqLM.from_pretrained("efederici/text2tags")
16
+ tags_tokenizer = AutoTokenizer.from_pretrained("efederici/text2tags")
17
+
18
+ """
19
+ Methods
20
+ """
21
+ def transcribe(audio):
22
+ text = transcribe_pipe(audio)["text"]
23
+ return text
24
+
25
+ def transcribe_video(url):
26
+ yt = YouTube(url)
27
+ stream = yt.streams.get_audio_only()
28
+
29
+ # Saves the audio in the /audio folder
30
+ audio = stream.download() #output_path = "audio/"
31
+
32
+ text = transcribe_and_tag(audio)
33
+
34
+ return text
35
+
36
+ def transcribe_and_tag(audio):
37
+ text = transcribe(audio)
38
+ tags = tag(text=text)
39
+ return text, tags
40
 
41
  def download_audio(audio_url, filename):
42
 
 
45
 
46
  # send a HTTP request to the server and save
47
  # the HTTP response in a response object called r
48
+ with open(filename,'wb') as f: #"audio/" +
49
 
50
  # Saving received content as a mp3 file in
51
  # binary format
 
54
  # to a new file in binary mode.
55
  f.write(r.content)
56
 
57
+ def tag(text: str):
58
+ """
59
+ Generates tags from given text
60
+ """
61
+ text = text.strip().replace('\n', '')
62
+ text = 'summarize: ' + text
63
+ tokenized_text = tags_tokenizer.encode(text, return_tensors="pt")
64
+
65
+ tags_ids = tags_model.generate(tokenized_text,
66
+ num_beams=4,
67
+ no_repeat_ngram_size=2,
68
+ max_length=20,
69
+ early_stopping=True)
70
+
71
+ output = tags_tokenizer.decode(tags_ids[0], skip_special_tokens=True)
72
+ return output.split(', ')
73
+
74
+ """
75
+ Downloading audio files
76
+ """
77
  audio1_url = "https://github.com/Silemo/sml-lab2-2023-manfredi-meneghin/raw/main/task1/audio/offer.mp3"
78
  audio1_filename = "offer.mp3"
79
  download_audio(audio1_url, audio1_filename)
 
82
  audio2_filename = "fantozzi.mp3"
83
  download_audio(audio2_url, audio2_filename)
84
 
85
+ """
86
+ Interfaces
87
+ """
88
+ transcription = gr.Textbox(label="Transcription")
89
+ tags = gr.Textbox(label="Tags")
90
 
91
+ # Multiple interfaces using tabs -> https://github.com/gradio-app/gradio/issues/450
92
  io1 = gr.Interface(
93
+ fn = transcribe_and_tag,
94
+ inputs = gr.Audio(source=["microphone", "upload"], type="filepath"),
95
+ outputs = [transcription, tags],
96
+ examples = [
 
97
  [audio1_filename],
98
  [audio2_filename],
99
  ],
 
100
  title = "Whisper Small - Italian - Microphone or Audio file",
101
+ description = "Realtime demo for Italian speech recognition using a fine-tuned Whisper small model. It uses the computer microphone as audio input. It outputs a transcription and the tags of the text",
102
  )
103
 
104
  io2 = gr.Interface(
105
  fn = transcribe_video,
106
  inputs = gr.Textbox(label = "YouTube URL", placeholder = "https://youtu.be/9DImRZERJNs?si=1Lme7o_KH2oCxU7y"),
107
+ outputs=[transcription, tags],
108
 
109
  examples=[
110
+ # Meloni - Confindustria
111
+ ["https://www.youtube.com/watch?v=qMslwA7RCcc"],
112
 
113
+ # Montemagno - Ripartire da zero
114
+ ["https://www.youtube.com/watch?v=WlT3dCAGjRo"],
 
 
 
115
  ],
116
 
117
  title = "Whisper Small - Italian - YouTube link",