nb-whisper-demo

Running on T4

App Files Files Community

AngelinaZanardi commited on Oct 14, 2024

Commit

dc1ede9

verified ·

1 Parent(s): b4f4567

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -15

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print(f"Bruker enhet: {device}")
 @spaces.GPU(duration=60 * 2)
-def pipe(file, return_timestamps=False):
     asr = pipeline(
         task="automatic-speech-recognition",
         model=MODEL_NAME,
@@ -52,7 +52,8 @@ def format_output(text):
     text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
     return text
-def transcribe(file, return_timestamps=False):
     waveform, sample_rate = torchaudio.load(file)
     audio_duration = waveform.size(1) / sample_rate
@@ -66,19 +67,34 @@ def transcribe(file, return_timestamps=False):
     else:
         file_to_transcribe = file
         truncated = False
-    if not return_timestamps:
-        text = pipe(file_to_transcribe)["text"]
-        formatted_text = format_output(text)
     else:
-        chunks = pipe(file_to_transcribe, return_timestamps=True)["chunks"]
-        text = []
-        for chunk in chunks:
-            start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
-            end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
-            line = f"[{start_time} -> {end_time}] {chunk['text']}"
-            text.append(line)
-        formatted_text = "<br>".join(text)
     if truncated:
         link="https://github.com/NbAiLab/nostram/blob/main/leverandorer.md"
@@ -144,13 +160,14 @@ with demo:
         inputs=[
             gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
             gr.components.Checkbox(label="Inkluder tidsstempler"),
         ],
         outputs=gr.HTML(label="text"),
         #outputs="text",
         description=(
-            "Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk! Demoen bruker den fintunede"
-            f" modellen <br> [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler opp til 30 minutter."
         ),
         allow_flagging="never",
         #show_submit_button=False,

 print(f"Bruker enhet: {device}")
 @spaces.GPU(duration=60 * 2)
+def pipe(file, return_timestamps=False,lang="no"):
     asr = pipeline(
         task="automatic-speech-recognition",
         model=MODEL_NAME,
     text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
     return text
+def transcribe(file, return_timestamps=False,lang="no"):
     waveform, sample_rate = torchaudio.load(file)
     audio_duration = waveform.size(1) / sample_rate
     else:
         file_to_transcribe = file
         truncated = False
+    if lang="no":
+        if not return_timestamps:
+            text = pipe(file_to_transcribe)["text"]
+            formatted_text = format_output(text)
+        else:
+            chunks = pipe(file_to_transcribe, return_timestamps=True)["chunks"]
+            text = []
+            for chunk in chunks:
+                start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
+                end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
+                line = f"[{start_time} -> {end_time}] {chunk['text']}"
+                text.append(line)
+            formatted_text = "<br>".join(text)
     else:
+        if not return_timestamps:
+            text = pipe(file_to_transcribe, return_timestams=False,lang="nn")["text"]
+            formatted_text = format_output(text)
+        else:
+            chunks = pipe(file_to_transcribe, return_timestamps=True,lang="nn")["chunks"]
+            text = []
+            for chunk in chunks:
+                start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
+                end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
+                line = f"[{start_time} -> {end_time}] {chunk['text']}"
+                text.append(line)
+            formatted_text = "<br>".join(text)
     if truncated:
         link="https://github.com/NbAiLab/nostram/blob/main/leverandorer.md"
         inputs=[
             gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
             gr.components.Checkbox(label="Inkluder tidsstempler"),
+            gr.components.Checkbox(label="Nynorsk"),
         ],
         outputs=gr.HTML(label="text"),
         #outputs="text",
         description=(
+            "Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk! <br> Demoen bruker den fintunede"
+            f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler opp til 30 minutter."
         ),
         allow_flagging="never",
         #show_submit_button=False,