NeuraFusionAI commited on
Commit
8694a76
·
1 Parent(s): ad392ba

Fix app.py'article' is not defined

Browse files
Files changed (1) hide show
  1. app.py +19 -28
app.py CHANGED
@@ -8,12 +8,11 @@ from transformers.utils import is_flash_attn_2_available
8
  from languages import get_language_names
9
  from subtitle_manager import Subtitle
10
 
11
-
12
  logging.basicConfig(level=logging.INFO)
13
  last_model = None
14
  pipe = None
15
 
16
- def write_file(output_file,subtitle):
17
  with open(output_file, 'w', encoding='utf-8') as f:
18
  f.write(subtitle)
19
 
@@ -33,10 +32,6 @@ def create_pipe(model, flash):
33
  low_cpu_mem_usage=True,
34
  use_safetensors=True,
35
  attn_implementation="flash_attention_2" if flash and is_flash_attn_2_available() else "sdpa",
36
- # eager (manual attention implementation)
37
- # flash_attention_2 (implementation using flash attention 2)
38
- # sdpa (implementation using torch.nn.functional.scaled_dot_product_attention)
39
- # PyTorch SDPA requirements in Transformers are not met. Please install torch>=2.1.1.
40
  )
41
  model.to(device)
42
 
@@ -47,9 +42,6 @@ def create_pipe(model, flash):
47
  model=model,
48
  tokenizer=processor.tokenizer,
49
  feature_extractor=processor.feature_extractor,
50
- # max_new_tokens=128,
51
- # chunk_length_s=15,
52
- # batch_size=16,
53
  torch_dtype=torch_dtype,
54
  device=device,
55
  )
@@ -88,7 +80,7 @@ def transcribe_webui_simple_progress(modelName, languageName, urlData, multipleF
88
 
89
  files = []
90
  if multipleFiles:
91
- files+=multipleFiles
92
  if urlData:
93
  files.append(urlData)
94
  if microphoneData:
@@ -107,28 +99,27 @@ def transcribe_webui_simple_progress(modelName, languageName, urlData, multipleF
107
  logging.info(file)
108
  outputs = pipe(
109
  file,
110
- chunk_length_s=chunk_length_s,#30
111
- batch_size=batch_size,#24
112
  generate_kwargs=generate_kwargs,
113
  return_timestamps=True,
114
  )
115
  logging.debug(outputs)
116
- logging.info(print(f"transcribe: {time.time() - start_time} sec."))
117
 
118
  file_out = file.split('/')[-1]
119
  srt = srt_sub.get_subtitle(outputs["chunks"])
120
  vtt = vtt_sub.get_subtitle(outputs["chunks"])
121
  txt = txt_sub.get_subtitle(outputs["chunks"])
122
- write_file(file_out+".srt",srt)
123
- write_file(file_out+".vtt",vtt)
124
- write_file(file_out+".txt",txt)
125
- files_out += [file_out+".srt", file_out+".vtt", file_out+".txt"]
126
 
127
  progress(1, desc="Completed!")
128
 
129
  return files_out, vtt, txt
130
 
131
-
132
  with gr.Blocks(title="Insanely Fast Whisper") as demo:
133
  description = "An opinionated CLI to transcribe Audio files w/ Whisper on-device! Powered by 🤗 Transformers, Optimum & flash-attn"
134
 
@@ -142,7 +133,7 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
142
  "openai/whisper-large-v2", "distil-whisper/distil-large-v2",
143
  "openai/whisper-large-v3", "distil-whisper/distil-large-v3", "xaviviro/whisper-large-v3-catalan-finetuned-v2",
144
  ]
145
- waveform_options=gr.WaveformOptions(
146
  waveform_color="#01C6FF",
147
  waveform_progress_color="#0066B4",
148
  skip_length=2,
@@ -151,17 +142,17 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
151
 
152
  simple_transcribe = gr.Interface(fn=transcribe_webui_simple_progress,
153
  description=description,
154
- article=article,
155
  inputs=[
156
- gr.Dropdown(choices=whisper_models, value="distil-whisper/distil-large-v2", label="Model", info="Select whisper model", interactive = True,),
157
- gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", info="Select audio voice language", interactive = True,),
158
- gr.Text(label="URL", info="(YouTube, etc.)", interactive = True),
159
  gr.File(label="Upload Files", file_count="multiple"),
160
- gr.Audio(sources=["upload", "microphone",], type="filepath", label="Input", waveform_options = waveform_options),
161
- gr.Dropdown(choices=["transcribe", "translate"], label="Task", value="transcribe", interactive = True),
162
- gr.Checkbox(label='Flash',info='Use Flash Attention 2'),
163
- gr.Number(label='chunk_length_s',value=30, interactive = True),
164
- gr.Number(label='batch_size',value=24, interactive = True)
165
  ], outputs=[
166
  gr.File(label="Download"),
167
  gr.Text(label="Transcription"),
 
8
  from languages import get_language_names
9
  from subtitle_manager import Subtitle
10
 
 
11
  logging.basicConfig(level=logging.INFO)
12
  last_model = None
13
  pipe = None
14
 
15
+ def write_file(output_file, subtitle):
16
  with open(output_file, 'w', encoding='utf-8') as f:
17
  f.write(subtitle)
18
 
 
32
  low_cpu_mem_usage=True,
33
  use_safetensors=True,
34
  attn_implementation="flash_attention_2" if flash and is_flash_attn_2_available() else "sdpa",
 
 
 
 
35
  )
36
  model.to(device)
37
 
 
42
  model=model,
43
  tokenizer=processor.tokenizer,
44
  feature_extractor=processor.feature_extractor,
 
 
 
45
  torch_dtype=torch_dtype,
46
  device=device,
47
  )
 
80
 
81
  files = []
82
  if multipleFiles:
83
+ files += multipleFiles
84
  if urlData:
85
  files.append(urlData)
86
  if microphoneData:
 
99
  logging.info(file)
100
  outputs = pipe(
101
  file,
102
+ chunk_length_s=chunk_length_s,
103
+ batch_size=batch_size,
104
  generate_kwargs=generate_kwargs,
105
  return_timestamps=True,
106
  )
107
  logging.debug(outputs)
108
+ logging.info(f"transcribe: {time.time() - start_time} sec.")
109
 
110
  file_out = file.split('/')[-1]
111
  srt = srt_sub.get_subtitle(outputs["chunks"])
112
  vtt = vtt_sub.get_subtitle(outputs["chunks"])
113
  txt = txt_sub.get_subtitle(outputs["chunks"])
114
+ write_file(file_out + ".srt", srt)
115
+ write_file(file_out + ".vtt", vtt)
116
+ write_file(file_out + ".txt", txt)
117
+ files_out += [file_out + ".srt", file_out + ".vtt", file_out + ".txt"]
118
 
119
  progress(1, desc="Completed!")
120
 
121
  return files_out, vtt, txt
122
 
 
123
  with gr.Blocks(title="Insanely Fast Whisper") as demo:
124
  description = "An opinionated CLI to transcribe Audio files w/ Whisper on-device! Powered by 🤗 Transformers, Optimum & flash-attn"
125
 
 
133
  "openai/whisper-large-v2", "distil-whisper/distil-large-v2",
134
  "openai/whisper-large-v3", "distil-whisper/distil-large-v3", "xaviviro/whisper-large-v3-catalan-finetuned-v2",
135
  ]
136
+ waveform_options = gr.WaveformOptions(
137
  waveform_color="#01C6FF",
138
  waveform_progress_color="#0066B4",
139
  skip_length=2,
 
142
 
143
  simple_transcribe = gr.Interface(fn=transcribe_webui_simple_progress,
144
  description=description,
145
+
146
  inputs=[
147
+ gr.Dropdown(choices=whisper_models, value="distil-whisper/distil-large-v2", label="Model", info="Select whisper model", interactive=True),
148
+ gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", info="Select audio voice language", interactive=True),
149
+ gr.Text(label="URL", info="(YouTube, etc.)", interactive=True),
150
  gr.File(label="Upload Files", file_count="multiple"),
151
+ gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input", waveform_options=waveform_options),
152
+ gr.Dropdown(choices=["transcribe", "translate"], label="Task", value="transcribe", interactive=True),
153
+ gr.Checkbox(label='Flash', info='Use Flash Attention 2'),
154
+ gr.Number(label='chunk_length_s', value=30, interactive=True),
155
+ gr.Number(label='batch_size', value=24, interactive=True)
156
  ], outputs=[
157
  gr.File(label="Download"),
158
  gr.Text(label="Transcription"),