nb-whisper-demo

Running on T4

App Files Files Community

pere commited on Oct 8, 2024

Commit

9edeaa8

1 Parent(s): 9f35793

update test

Browse files

Files changed (1) hide show

app.py +23 -16

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import time
 import os
 import torch
@@ -21,7 +22,7 @@ lang = "no"
 share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
 auth_token = os.environ.get("AUTH_TOKEN") or True
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-print(f"Using device: {device}")
 @spaces.GPU(duration=60 * 2)
 def pipe(file, return_timestamps=False):
@@ -41,9 +42,15 @@ def pipe(file, return_timestamps=False):
     )
     return asr(file, return_timestamps=return_timestamps, batch_size=24)
 def transcribe(file, return_timestamps=False):
     if not return_timestamps:
         text = pipe(file)["text"]
     else:
         chunks = pipe(file, return_timestamps=True)["chunks"]
         text = []
@@ -52,8 +59,8 @@ def transcribe(file, return_timestamps=False):
             end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
             line = f"[{start_time} -> {end_time}] {chunk['text']}"
             text.append(line)
-        text = "\n".join(text)
-    return text
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
@@ -83,7 +90,7 @@ def yt_transcribe(yt_url, return_timestamps=False):
     return html_embed_str, text
-# Create the Gradio app without tabs
 demo = gr.Blocks()
@@ -92,35 +99,35 @@ with demo:
         fn=transcribe,
         inputs=[
             gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
-            gr.components.Checkbox(label="Return timestamps"),
         ],
         outputs="text",
         title="NB-Whisper",
         description=(
-            "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the fine-tuned"
-            f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
-            " of arbitrary length."
         ),
         allow_flagging="never",
     )
-    # Optional: Uncomment to add the YouTube transcription interface if needed
     # yt_transcribe_interface = gr.Interface(
     #     fn=yt_transcribe,
     #     inputs=[
-    #         gr.components.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
-    #         gr.components.Checkbox(label="Return timestamps"),
     #     ],
     #     examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
     #     outputs=["html", "text"],
-    #     title="Whisper Demo: Transcribe YouTube",
     #     description=(
-    #         "Transcribe long-form YouTube videos with the click of a button! Demo uses the fine-tuned checkpoint:"
-    #         f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
-    #         " arbitrary length."
     #     ),
     #     allow_flagging="never",
     # )
-# Launch the demo without tabs
 demo.launch(share=share).queue()

 import time
 import os
+import re
 import torch
 share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
 auth_token = os.environ.get("AUTH_TOKEN") or True
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+print(f"Bruker enhet: {device}")
 @spaces.GPU(duration=60 * 2)
 def pipe(file, return_timestamps=False):
     )
     return asr(file, return_timestamps=return_timestamps, batch_size=24)
+def format_output(text):
+    # Add a newline after ".", "!", ":", or "?", but not for multiple instances like "..."
+    text = re.sub(r'(?<!\.)[.!:?](?!\.)', lambda m: m.group() + '\n', text)
+    return text
 def transcribe(file, return_timestamps=False):
     if not return_timestamps:
         text = pipe(file)["text"]
+        formatted_text = format_output(text)
     else:
         chunks = pipe(file, return_timestamps=True)["chunks"]
         text = []
             end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
             line = f"[{start_time} -> {end_time}] {chunk['text']}"
             text.append(line)
+        formatted_text = "\n".join(text)
+    return formatted_text
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     return html_embed_str, text
+# Lag Gradio-appen uten faner
 demo = gr.Blocks()
         fn=transcribe,
         inputs=[
             gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
+            gr.components.Checkbox(label="Inkluder tidsstempler"),
         ],
         outputs="text",
         title="NB-Whisper",
         description=(
+            "Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk! Demoen bruker den fintunede"
+            f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler"
+            " av vilkårlig lengde."
         ),
         allow_flagging="never",
     )
+    # Uncomment to add the YouTube transcription interface if needed
     # yt_transcribe_interface = gr.Interface(
     #     fn=yt_transcribe,
     #     inputs=[
+    #         gr.components.Textbox(lines=1, placeholder="Lim inn URL til en YouTube-video her", label="YouTube URL"),
+    #         gr.components.Checkbox(label="Inkluder tidsstempler"),
     #     ],
     #     examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
     #     outputs=["html", "text"],
+    #     title="Whisper Demo: Transkriber YouTube",
     #     description=(
+    #         "Transkriber lange YouTube-videoer med et enkelt klikk! Demoen bruker den fintunede modellen:"
+    #         f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler av"
+    #         " vilkårlig lengde."
     #     ),
     #     allow_flagging="never",
     # )
+# Start demoen uten faner
 demo.launch(share=share).queue()