pere commited on
Commit
143ef7b
·
verified ·
1 Parent(s): 5e7755f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -30
app.py CHANGED
@@ -18,7 +18,9 @@ except ImportError:
18
  import yt_dlp # Added import for yt-dlp
19
 
20
  MODEL_NAME = "NbAiLab/nb-whisper-large"
21
- max_audio_length = 30 * 60
 
 
22
 
23
  share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
24
  auth_token = os.environ.get("AUTH_TOKEN") or True
@@ -26,7 +28,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
26
  print(f"Bruker enhet: {device}")
27
 
28
  @spaces.GPU(duration=60 * 2)
29
- def pipe(file, return_timestamps=False, lang="no"):
30
  asr = pipeline(
31
  task="automatic-speech-recognition",
32
  model=MODEL_NAME,
@@ -44,20 +46,24 @@ def pipe(file, return_timestamps=False, lang="no"):
44
  return asr(file, return_timestamps=return_timestamps, batch_size=24, generate_kwargs={'task': 'transcribe', 'language': lang})
45
 
46
  def format_output(text):
 
 
 
47
  text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
48
  return text
49
 
50
- def transcribe(file, return_timestamps=False, lang_nn=False):
51
 
52
  waveform, sample_rate = torchaudio.load(file)
53
  audio_duration = waveform.size(1) / sample_rate
54
- warning_message = None
55
 
56
  if audio_duration > max_audio_length:
57
  warning_message = (
58
  "<b style='color:red;'>⚠️ Advarsel:</b> "
59
  "Lydfilen er lengre enn 30 minutter. Kun de første 30 minuttene vil bli transkribert."
60
  )
 
61
  waveform = waveform[:, :int(max_audio_length * sample_rate)]
62
  truncated_file = "truncated_audio.wav"
63
  torchaudio.save(truncated_file, waveform, sample_rate)
@@ -67,6 +73,7 @@ def transcribe(file, return_timestamps=False, lang_nn=False):
67
  file_to_transcribe = file
68
  truncated = False
69
 
 
70
  if not lang_nn:
71
  if not return_timestamps:
72
  text = pipe(file_to_transcribe)["text"]
@@ -82,10 +89,10 @@ def transcribe(file, return_timestamps=False, lang_nn=False):
82
  formatted_text = "<br>".join(text)
83
  else:
84
  if not return_timestamps:
85
- text = pipe(file_to_transcribe, lang="nn")["text"]
86
  formatted_text = format_output(text)
87
  else:
88
- chunks = pipe(file_to_transcribe, return_timestamps=True, lang="nn")["chunks"]
89
  text = []
90
  for chunk in chunks:
91
  start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
@@ -146,47 +153,57 @@ def yt_transcribe(yt_url, return_timestamps=False):
146
  demo = gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.red))
147
 
148
  with demo:
149
- with gr.Row():
150
- # Scale up the logo and align the title to the right of the logo
151
- with gr.Column(scale=1, min_width=150):
152
- gr.HTML(f"<img src='file/Logonew.png' style='width:250px;'>") # Increased logo size
153
- with gr.Column(scale=4, min_width=300):
154
  gr.Markdown(
155
  """
156
- <h1 style="font-size: 3em; color: #FF0000; text-align:right;">NB-Whisper Demo</h1>
157
- """ # Title aligned to right and changed to red
158
  )
159
 
160
- # Description moved to the bottom
161
- with gr.Row():
162
- gr.Markdown(
163
- """
164
- Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk!
165
- Demoen bruker den fintunede modellen NbAiLab/nb-whisper-large og 🤗 Transformers til å transkribere lydfiler opp til 30 minutter.
166
- """
167
- )
168
-
169
  mf_transcribe = gr.Interface(
170
  fn=transcribe,
171
  inputs=[
172
  gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
173
- gr.components.Checkbox(label="Inkluder tidskoder"), # Text changed here, functionality unchanged
174
  gr.components.Checkbox(label="Nynorsk"),
175
  ],
176
 
177
  outputs=[
178
  gr.HTML(label="Varsel"),
179
  gr.HTML(label="text"),
180
- gr.File(label="Last ned transkripsjon"), # Removed 'style' argument
181
  ],
 
 
182
  description=(
183
- "Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk! "
184
- "Demoen bruker den fintunede modellen NbAiLab/nb-whisper-large og 🤗 Transformers til å transkribere lydfiler opp til 30 minutter."
185
  ),
186
  allow_flagging="never",
 
187
  )
188
 
189
- # Bottom description and built-with Gradio message
190
- gr.Markdown("<br><br><center><i>Bygget med Gradio</i></center>") # Added description near bottom
191
-
192
- demo.launch(share=share, show_api=False, allowed_paths=["Logonew.png"]).queue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  import yt_dlp # Added import for yt-dlp
19
 
20
  MODEL_NAME = "NbAiLab/nb-whisper-large"
21
+ #lang = "no"
22
+
23
+ max_audio_length= 30 * 60
24
 
25
  share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
26
  auth_token = os.environ.get("AUTH_TOKEN") or True
 
28
  print(f"Bruker enhet: {device}")
29
 
30
  @spaces.GPU(duration=60 * 2)
31
+ def pipe(file, return_timestamps=False,lang="no"):
32
  asr = pipeline(
33
  task="automatic-speech-recognition",
34
  model=MODEL_NAME,
 
46
  return asr(file, return_timestamps=return_timestamps, batch_size=24, generate_kwargs={'task': 'transcribe', 'language': lang})
47
 
48
  def format_output(text):
49
+ # Add a line break after ".", "!", ":", or "?" unless part of sequences like "..."
50
+ #text = re.sub(r'(?<!\.)[.!:?](?!\.)', lambda m: m.group() + '<br>', text)
51
+ # Ensure line break after sequences like "..." or other punctuation patterns
52
  text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
53
  return text
54
 
55
+ def transcribe(file, return_timestamps=False,lang_nn=False):
56
 
57
  waveform, sample_rate = torchaudio.load(file)
58
  audio_duration = waveform.size(1) / sample_rate
59
+ warning_message=None
60
 
61
  if audio_duration > max_audio_length:
62
  warning_message = (
63
  "<b style='color:red;'>⚠️ Advarsel:</b> "
64
  "Lydfilen er lengre enn 30 minutter. Kun de første 30 minuttene vil bli transkribert."
65
  )
66
+ # Trim the waveform to the first 30 minutes
67
  waveform = waveform[:, :int(max_audio_length * sample_rate)]
68
  truncated_file = "truncated_audio.wav"
69
  torchaudio.save(truncated_file, waveform, sample_rate)
 
73
  file_to_transcribe = file
74
  truncated = False
75
 
76
+
77
  if not lang_nn:
78
  if not return_timestamps:
79
  text = pipe(file_to_transcribe)["text"]
 
89
  formatted_text = "<br>".join(text)
90
  else:
91
  if not return_timestamps:
92
+ text = pipe(file_to_transcribe,lang="nn")["text"]
93
  formatted_text = format_output(text)
94
  else:
95
+ chunks = pipe(file_to_transcribe, return_timestamps=True,lang="nn")["chunks"]
96
  text = []
97
  for chunk in chunks:
98
  start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
 
153
  demo = gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.red))
154
 
155
  with demo:
156
+
157
+ with gr.Column():
158
+ gr.HTML(f"<img src='file/Logonew.png' style='width:200px;'>")
159
+ with gr.Column(scale=8):
160
+ # Use Markdown for title and description
161
  gr.Markdown(
162
  """
163
+ <h1 style="font-size: 3em;">NB-Whisper Demo</h1>
164
+ """
165
  )
166
 
 
 
 
 
 
 
 
 
 
167
  mf_transcribe = gr.Interface(
168
  fn=transcribe,
169
  inputs=[
170
  gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
171
+ gr.components.Checkbox(label="Inkluder tidskoder"),
172
  gr.components.Checkbox(label="Nynorsk"),
173
  ],
174
 
175
  outputs=[
176
  gr.HTML(label="Varsel"),
177
  gr.HTML(label="text"),
178
+ gr.File(label="Last ned transkripsjon")
179
  ],
180
+ #outputs="text",
181
+
182
  description=(
183
+ "Demoen bruker"
184
+ f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) til å transkribere lydfiler opp til 30 minutter."
185
  ),
186
  allow_flagging="never",
187
+ #show_submit_button=False,
188
  )
189
 
190
+ # Uncomment to add the YouTube transcription interface if needed
191
+ # yt_transcribe_interface = gr.Interface(
192
+ # fn=yt_transcribe,
193
+ # inputs=[
194
+ # gr.components.Textbox(lines=1, placeholder="Lim inn URL til en YouTube-video her", label="YouTube URL"),
195
+ # gr.components.Checkbox(label="Inkluder tidsstempler"),
196
+ # ],
197
+ # examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
198
+ # outputs=["html", "text"],
199
+ # title="Whisper Demo: Transkriber YouTube",
200
+ # description=(
201
+ # "Transkriber lange YouTube-videoer med et enkelt klikk! Demoen bruker den fintunede modellen:"
202
+ # f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler av"
203
+ # " vilkårlig lengde."
204
+ # ),
205
+ # allow_flagging="never",
206
+ # )
207
+
208
+ # Start demoen uten faner
209
+ demo.launch(share=share, show_api=False,allowed_paths=["Logonew.png"]).queue()