nb-whisper-demo

Sleeping

App Files Files Community

pere commited on Oct 15, 2024

Commit

143ef7b

verified ·

1 Parent(s): 5e7755f

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -30

app.py CHANGED Viewed

@@ -18,7 +18,9 @@ except ImportError:
 import yt_dlp  # Added import for yt-dlp
 MODEL_NAME = "NbAiLab/nb-whisper-large"
-max_audio_length = 30 * 60
 share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
 auth_token = os.environ.get("AUTH_TOKEN") or True
@@ -26,7 +28,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print(f"Bruker enhet: {device}")
 @spaces.GPU(duration=60 * 2)
-def pipe(file, return_timestamps=False, lang="no"):
     asr = pipeline(
         task="automatic-speech-recognition",
         model=MODEL_NAME,
@@ -44,20 +46,24 @@ def pipe(file, return_timestamps=False, lang="no"):
     return asr(file, return_timestamps=return_timestamps, batch_size=24, generate_kwargs={'task': 'transcribe', 'language': lang})
 def format_output(text):
     text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
     return text
-def transcribe(file, return_timestamps=False, lang_nn=False):
     waveform, sample_rate = torchaudio.load(file)
     audio_duration = waveform.size(1) / sample_rate
-    warning_message = None
     if audio_duration > max_audio_length:
         warning_message = (
             "<b style='color:red;'>⚠️ Advarsel:</b> "
             "Lydfilen er lengre enn 30 minutter. Kun de første 30 minuttene vil bli transkribert."
         )
         waveform = waveform[:, :int(max_audio_length * sample_rate)]
         truncated_file = "truncated_audio.wav"
         torchaudio.save(truncated_file, waveform, sample_rate)
@@ -67,6 +73,7 @@ def transcribe(file, return_timestamps=False, lang_nn=False):
         file_to_transcribe = file
         truncated = False
     if not lang_nn:
         if not return_timestamps:
             text = pipe(file_to_transcribe)["text"]
@@ -82,10 +89,10 @@ def transcribe(file, return_timestamps=False, lang_nn=False):
             formatted_text = "<br>".join(text)
     else:
         if not return_timestamps:
-            text = pipe(file_to_transcribe, lang="nn")["text"]
             formatted_text = format_output(text)
         else:
-            chunks = pipe(file_to_transcribe, return_timestamps=True, lang="nn")["chunks"]
             text = []
             for chunk in chunks:
                 start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
@@ -146,47 +153,57 @@ def yt_transcribe(yt_url, return_timestamps=False):
 demo = gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.red))
 with demo:
-    with gr.Row():
-        # Scale up the logo and align the title to the right of the logo
-        with gr.Column(scale=1, min_width=150):
-            gr.HTML(f"<img src='file/Logonew.png' style='width:250px;'>")  # Increased logo size
-        with gr.Column(scale=4, min_width=300):
             gr.Markdown(
                 """
-                <h1 style="font-size: 3em; color: #FF0000; text-align:right;">NB-Whisper Demo</h1>
-                """  # Title aligned to right and changed to red
             )
-    # Description moved to the bottom
-    with gr.Row():
-        gr.Markdown(
-            """
-            Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk!
-            Demoen bruker den fintunede modellen NbAiLab/nb-whisper-large og 🤗 Transformers til å transkribere lydfiler opp til 30 minutter.
-            """
-        )
     mf_transcribe = gr.Interface(
         fn=transcribe,
         inputs=[
             gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
-            gr.components.Checkbox(label="Inkluder tidskoder"),  # Text changed here, functionality unchanged
             gr.components.Checkbox(label="Nynorsk"),
         ],
         outputs=[
             gr.HTML(label="Varsel"),
             gr.HTML(label="text"),
-            gr.File(label="Last ned transkripsjon"),  # Removed 'style' argument
         ],
         description=(
-            "Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk! "
-            "Demoen bruker den fintunede modellen NbAiLab/nb-whisper-large og 🤗 Transformers til å transkribere lydfiler opp til 30 minutter."
         ),
         allow_flagging="never",
     )
-    # Bottom description and built-with Gradio message
-    gr.Markdown("<br><br><center><i>Bygget med Gradio</i></center>")  # Added description near bottom
-demo.launch(share=share, show_api=False, allowed_paths=["Logonew.png"]).queue()

 import yt_dlp  # Added import for yt-dlp
 MODEL_NAME = "NbAiLab/nb-whisper-large"
+#lang = "no"
+max_audio_length= 30 * 60
 share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
 auth_token = os.environ.get("AUTH_TOKEN") or True
 print(f"Bruker enhet: {device}")
 @spaces.GPU(duration=60 * 2)
+def pipe(file, return_timestamps=False,lang="no"):
     asr = pipeline(
         task="automatic-speech-recognition",
         model=MODEL_NAME,
     return asr(file, return_timestamps=return_timestamps, batch_size=24, generate_kwargs={'task': 'transcribe', 'language': lang})
 def format_output(text):
+    # Add a line break after ".", "!", ":", or "?" unless part of sequences like "..."
+    #text = re.sub(r'(?<!\.)[.!:?](?!\.)', lambda m: m.group() + '<br>', text)
+    # Ensure line break after sequences like "..." or other punctuation patterns
     text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
     return text
+def transcribe(file, return_timestamps=False,lang_nn=False):
     waveform, sample_rate = torchaudio.load(file)
     audio_duration = waveform.size(1) / sample_rate
+    warning_message=None
     if audio_duration > max_audio_length:
         warning_message = (
             "<b style='color:red;'>⚠️ Advarsel:</b> "
             "Lydfilen er lengre enn 30 minutter. Kun de første 30 minuttene vil bli transkribert."
         )
+        # Trim the waveform to the first 30 minutes
         waveform = waveform[:, :int(max_audio_length * sample_rate)]
         truncated_file = "truncated_audio.wav"
         torchaudio.save(truncated_file, waveform, sample_rate)
         file_to_transcribe = file
         truncated = False
     if not lang_nn:
         if not return_timestamps:
             text = pipe(file_to_transcribe)["text"]
             formatted_text = "<br>".join(text)
     else:
         if not return_timestamps:
+            text = pipe(file_to_transcribe,lang="nn")["text"]
             formatted_text = format_output(text)
         else:
+            chunks = pipe(file_to_transcribe, return_timestamps=True,lang="nn")["chunks"]
             text = []
             for chunk in chunks:
                 start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
 demo = gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.red))
 with demo:
+    with gr.Column():
+        gr.HTML(f"<img src='file/Logonew.png' style='width:200px;'>")
+        with gr.Column(scale=8):
+            # Use Markdown for title and description
             gr.Markdown(
                 """
+                <h1 style="font-size: 3em;">NB-Whisper Demo</h1>
+                """
             )
     mf_transcribe = gr.Interface(
         fn=transcribe,
         inputs=[
             gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
+            gr.components.Checkbox(label="Inkluder tidskoder"),
             gr.components.Checkbox(label="Nynorsk"),
         ],
         outputs=[
             gr.HTML(label="Varsel"),
             gr.HTML(label="text"),
+            gr.File(label="Last ned transkripsjon")
         ],
+        #outputs="text",
         description=(
+            "Demoen bruker"
+            f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) til å transkribere lydfiler opp til 30 minutter."
         ),
         allow_flagging="never",
+        #show_submit_button=False,
     )
+    # Uncomment to add the YouTube transcription interface if needed
+    # yt_transcribe_interface = gr.Interface(
+    #     fn=yt_transcribe,
+    #     inputs=[
+    #         gr.components.Textbox(lines=1, placeholder="Lim inn URL til en YouTube-video her", label="YouTube URL"),
+    #         gr.components.Checkbox(label="Inkluder tidsstempler"),
+    #     ],
+    #     examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
+    #     outputs=["html", "text"],
+    #     title="Whisper Demo: Transkriber YouTube",
+    #     description=(
+    #         "Transkriber lange YouTube-videoer med et enkelt klikk! Demoen bruker den fintunede modellen:"
+    #         f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler av"
+    #         " vilkårlig lengde."
+    #     ),
+    #     allow_flagging="never",
+    # )
+# Start demoen uten faner
+demo.launch(share=share, show_api=False,allowed_paths=["Logonew.png"]).queue()