Spaces:

NeuraFusionAI
/

WhisperFast

Running

App Files Files Community

NeuraFusionAI commited on Aug 23, 2024

Commit

8694a76

1 Parent(s): ad392ba

Fix app.py'article' is not defined

Browse files

Files changed (1) hide show

app.py +19 -28

app.py CHANGED Viewed

@@ -8,12 +8,11 @@ from transformers.utils import is_flash_attn_2_available
 from languages import get_language_names
 from subtitle_manager import Subtitle
 logging.basicConfig(level=logging.INFO)
 last_model = None
 pipe = None
-def write_file(output_file,subtitle):
     with open(output_file, 'w', encoding='utf-8') as f:
         f.write(subtitle)
@@ -33,10 +32,6 @@ def create_pipe(model, flash):
         low_cpu_mem_usage=True,
         use_safetensors=True,
         attn_implementation="flash_attention_2" if flash and is_flash_attn_2_available() else "sdpa",
-        # eager (manual attention implementation)
-        # flash_attention_2 (implementation using flash attention 2)
-        # sdpa (implementation using torch.nn.functional.scaled_dot_product_attention)
-        # PyTorch SDPA requirements in Transformers are not met. Please install torch>=2.1.1.
     )
     model.to(device)
@@ -47,9 +42,6 @@ def create_pipe(model, flash):
         model=model,
         tokenizer=processor.tokenizer,
         feature_extractor=processor.feature_extractor,
-        # max_new_tokens=128,
-        # chunk_length_s=15,
-        # batch_size=16,
         torch_dtype=torch_dtype,
         device=device,
     )
@@ -88,7 +80,7 @@ def transcribe_webui_simple_progress(modelName, languageName, urlData, multipleF
     files = []
     if multipleFiles:
-        files+=multipleFiles
     if urlData:
         files.append(urlData)
     if microphoneData:
@@ -107,28 +99,27 @@ def transcribe_webui_simple_progress(modelName, languageName, urlData, multipleF
         logging.info(file)
         outputs = pipe(
             file,
-            chunk_length_s=chunk_length_s,#30
-            batch_size=batch_size,#24
             generate_kwargs=generate_kwargs,
             return_timestamps=True,
         )
         logging.debug(outputs)
-        logging.info(print(f"transcribe: {time.time() - start_time} sec."))
         file_out = file.split('/')[-1]
         srt = srt_sub.get_subtitle(outputs["chunks"])
         vtt = vtt_sub.get_subtitle(outputs["chunks"])
         txt = txt_sub.get_subtitle(outputs["chunks"])
-        write_file(file_out+".srt",srt)
-        write_file(file_out+".vtt",vtt)
-        write_file(file_out+".txt",txt)
-        files_out += [file_out+".srt", file_out+".vtt", file_out+".txt"]
     progress(1, desc="Completed!")
     return files_out, vtt, txt
 with gr.Blocks(title="Insanely Fast Whisper") as demo:
     description = "An opinionated CLI to transcribe Audio files w/ Whisper on-device! Powered by 🤗 Transformers, Optimum & flash-attn"
@@ -142,7 +133,7 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
         "openai/whisper-large-v2", "distil-whisper/distil-large-v2",
         "openai/whisper-large-v3", "distil-whisper/distil-large-v3", "xaviviro/whisper-large-v3-catalan-finetuned-v2",
     ]
-    waveform_options=gr.WaveformOptions(
         waveform_color="#01C6FF",
         waveform_progress_color="#0066B4",
         skip_length=2,
@@ -151,17 +142,17 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
     simple_transcribe = gr.Interface(fn=transcribe_webui_simple_progress,
         description=description,
-        article=article,
         inputs=[
-            gr.Dropdown(choices=whisper_models, value="distil-whisper/distil-large-v2", label="Model", info="Select whisper model", interactive = True,),
-            gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", info="Select audio voice language", interactive = True,),
-            gr.Text(label="URL", info="(YouTube, etc.)", interactive = True),
             gr.File(label="Upload Files", file_count="multiple"),
-            gr.Audio(sources=["upload", "microphone",], type="filepath", label="Input", waveform_options = waveform_options),
-            gr.Dropdown(choices=["transcribe", "translate"], label="Task", value="transcribe", interactive = True),
-            gr.Checkbox(label='Flash',info='Use Flash Attention 2'),
-            gr.Number(label='chunk_length_s',value=30, interactive = True),
-            gr.Number(label='batch_size',value=24, interactive = True)
         ], outputs=[
             gr.File(label="Download"),
             gr.Text(label="Transcription"),

 from languages import get_language_names
 from subtitle_manager import Subtitle
 logging.basicConfig(level=logging.INFO)
 last_model = None
 pipe = None
+def write_file(output_file, subtitle):
     with open(output_file, 'w', encoding='utf-8') as f:
         f.write(subtitle)
         low_cpu_mem_usage=True,
         use_safetensors=True,
         attn_implementation="flash_attention_2" if flash and is_flash_attn_2_available() else "sdpa",
     )
     model.to(device)
         model=model,
         tokenizer=processor.tokenizer,
         feature_extractor=processor.feature_extractor,
         torch_dtype=torch_dtype,
         device=device,
     )
     files = []
     if multipleFiles:
+        files += multipleFiles
     if urlData:
         files.append(urlData)
     if microphoneData:
         logging.info(file)
         outputs = pipe(
             file,
+            chunk_length_s=chunk_length_s,
+            batch_size=batch_size,
             generate_kwargs=generate_kwargs,
             return_timestamps=True,
         )
         logging.debug(outputs)
+        logging.info(f"transcribe: {time.time() - start_time} sec.")
         file_out = file.split('/')[-1]
         srt = srt_sub.get_subtitle(outputs["chunks"])
         vtt = vtt_sub.get_subtitle(outputs["chunks"])
         txt = txt_sub.get_subtitle(outputs["chunks"])
+        write_file(file_out + ".srt", srt)
+        write_file(file_out + ".vtt", vtt)
+        write_file(file_out + ".txt", txt)
+        files_out += [file_out + ".srt", file_out + ".vtt", file_out + ".txt"]
     progress(1, desc="Completed!")
     return files_out, vtt, txt
 with gr.Blocks(title="Insanely Fast Whisper") as demo:
     description = "An opinionated CLI to transcribe Audio files w/ Whisper on-device! Powered by 🤗 Transformers, Optimum & flash-attn"
         "openai/whisper-large-v2", "distil-whisper/distil-large-v2",
         "openai/whisper-large-v3", "distil-whisper/distil-large-v3", "xaviviro/whisper-large-v3-catalan-finetuned-v2",
     ]
+    waveform_options = gr.WaveformOptions(
         waveform_color="#01C6FF",
         waveform_progress_color="#0066B4",
         skip_length=2,
     simple_transcribe = gr.Interface(fn=transcribe_webui_simple_progress,
         description=description,
         inputs=[
+            gr.Dropdown(choices=whisper_models, value="distil-whisper/distil-large-v2", label="Model", info="Select whisper model", interactive=True),
+            gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", info="Select audio voice language", interactive=True),
+            gr.Text(label="URL", info="(YouTube, etc.)", interactive=True),
             gr.File(label="Upload Files", file_count="multiple"),
+            gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input", waveform_options=waveform_options),
+            gr.Dropdown(choices=["transcribe", "translate"], label="Task", value="transcribe", interactive=True),
+            gr.Checkbox(label='Flash', info='Use Flash Attention 2'),
+            gr.Number(label='chunk_length_s', value=30, interactive=True),
+            gr.Number(label='batch_size', value=24, interactive=True)
         ], outputs=[
             gr.File(label="Download"),
             gr.Text(label="Transcription"),