AngelinaZanardi commited on
Commit
3b6c548
·
verified ·
1 Parent(s): 4553dcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -28,7 +28,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
28
  print(f"Bruker enhet: {device}")
29
 
30
  @spaces.GPU(duration=60 * 2)
31
- def pipe(file, return_timestamps=False,lang):
32
  asr = pipeline(
33
  task="automatic-speech-recognition",
34
  model=MODEL_NAME,
@@ -71,10 +71,10 @@ def transcribe(file, return_timestamps=False,lang_nn=False):
71
 
72
  if not lang_nn:
73
  if not return_timestamps:
74
- text = pipe(file_to_transcribe, return_timestams=False, lang="no")["text"]
75
  formatted_text = format_output(text)
76
  else:
77
- chunks = pipe(file_to_transcribe, return_timestamps=True, lang="no")["chunks"]
78
  text = []
79
  for chunk in chunks:
80
  start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
@@ -84,7 +84,7 @@ def transcribe(file, return_timestamps=False,lang_nn=False):
84
  formatted_text = "<br>".join(text)
85
  else:
86
  if not return_timestamps:
87
- text = pipe(file_to_transcribe, return_timestams=False,lang="nn")["text"]
88
  formatted_text = format_output(text)
89
  else:
90
  chunks = pipe(file_to_transcribe, return_timestamps=True,lang="nn")["chunks"]
@@ -95,7 +95,11 @@ def transcribe(file, return_timestamps=False,lang_nn=False):
95
  line = f"[{start_time} -> {end_time}] {chunk['text']}"
96
  text.append(line)
97
  formatted_text = "<br>".join(text)
98
-
 
 
 
 
99
  if truncated:
100
  link="https://github.com/NbAiLab/nostram/blob/main/leverandorer.md"
101
  disclaimer = (
@@ -109,7 +113,7 @@ def transcribe(file, return_timestamps=False,lang_nn=False):
109
  formatted_text += "<br><br><i>Transkribert med NB-Whisper demo</i>"
110
 
111
 
112
- return formatted_text
113
 
114
  def _return_yt_html_embed(yt_url):
115
  video_id = yt_url.split("?v=")[-1]
@@ -162,7 +166,11 @@ with demo:
162
  gr.components.Checkbox(label="Inkluder tidsstempler"),
163
  gr.components.Checkbox(label="Nynorsk"),
164
  ],
165
- outputs=gr.HTML(label="text"),
 
 
 
 
166
  #outputs="text",
167
 
168
  description=(
 
28
  print(f"Bruker enhet: {device}")
29
 
30
  @spaces.GPU(duration=60 * 2)
31
+ def pipe(file, return_timestamps=False,lang="no"):
32
  asr = pipeline(
33
  task="automatic-speech-recognition",
34
  model=MODEL_NAME,
 
71
 
72
  if not lang_nn:
73
  if not return_timestamps:
74
+ text = pipe(file_to_transcribe)["text"]
75
  formatted_text = format_output(text)
76
  else:
77
+ chunks = pipe(file_to_transcribe, return_timestamps=True)["chunks"]
78
  text = []
79
  for chunk in chunks:
80
  start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
 
84
  formatted_text = "<br>".join(text)
85
  else:
86
  if not return_timestamps:
87
+ text = pipe(file_to_transcribe,lang="nn")["text"]
88
  formatted_text = format_output(text)
89
  else:
90
  chunks = pipe(file_to_transcribe, return_timestamps=True,lang="nn")["chunks"]
 
95
  line = f"[{start_time} -> {end_time}] {chunk['text']}"
96
  text.append(line)
97
  formatted_text = "<br>".join(text)
98
+
99
+ output_file = "transcription.txt"
100
+ with open(output_file, "w") as f:
101
+ f.write(re.sub('<br>', '\n', formatted_text))
102
+
103
  if truncated:
104
  link="https://github.com/NbAiLab/nostram/blob/main/leverandorer.md"
105
  disclaimer = (
 
113
  formatted_text += "<br><br><i>Transkribert med NB-Whisper demo</i>"
114
 
115
 
116
+ return formatted_text, output_file
117
 
118
  def _return_yt_html_embed(yt_url):
119
  video_id = yt_url.split("?v=")[-1]
 
166
  gr.components.Checkbox(label="Inkluder tidsstempler"),
167
  gr.components.Checkbox(label="Nynorsk"),
168
  ],
169
+
170
+ outputs=[
171
+ gr.HTML(label="text"),
172
+ gr.File(label="Last ned transkripsjon")
173
+ ],
174
  #outputs="text",
175
 
176
  description=(