insanely-fast-whisper-webui-zero

Running on Zero

App Files Files Community

reedmayhew commited on Aug 9, 2024

Commit

97bdbba

verified ·

1 Parent(s): 5a0e518

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -52

app.py CHANGED Viewed

@@ -42,13 +42,20 @@ def create_pipe(model, flash):
     )
     return pipe, model  # Return both pipe and model for later GPU switch
-@spaces.GPU(duration=120)
 def move_to_gpu(model):
-    device = "cuda:0"
-    torch_dtype = torch.float16  # Use float16 precision on GPU
-    model.to(device, dtype=torch_dtype)
     return device
 def transcribe_webui_simple_progress(modelName, languageName, urlData, multipleFiles, microphoneData, task, flash,
                                      chunk_length_s, batch_size, progress=gr.Progress()):
     global last_model
@@ -77,54 +84,55 @@ def transcribe_webui_simple_progress(modelName, languageName, urlData, multipleF
     last_model = modelName
-    # Now move the model to GPU after the pipe is created
-    device = move_to_gpu(pipe.model)
-    # Update pipe's device
-    pipe.device = torch.device(device)
-    pipe.model.to(pipe.device)
-    srt_sub = Subtitle("srt")
-    vtt_sub = Subtitle("vtt")
-    txt_sub = Subtitle("txt")
-    files = []
-    if multipleFiles:
-        files += multipleFiles
-    if urlData:
-        files.append(urlData)
-    if microphoneData:
-        files.append(microphoneData)
-    logging.info(files)
-    generate_kwargs = {}
-    if languageName != "Automatic Detection" and modelName.endswith(".en") == False:
-        generate_kwargs["language"] = languageName
-    if modelName.endswith(".en") == False:
-        generate_kwargs["task"] = task
-    files_out = []
-    for file in progress.tqdm(files, desc="Working..."):
-        start_time = time.time()
-        logging.info(file)
-        outputs = pipe(
-            file,
-            chunk_length_s=chunk_length_s,  # 30
-            batch_size=batch_size,  # 24
-            generate_kwargs=generate_kwargs,
-            return_timestamps=True,
-        )
-        logging.debug(outputs)
-        logging.info(print(f"transcribe: {time.time() - start_time} sec."))
-        file_out = file.split('/')[-1]
-        srt = srt_sub.get_subtitle(outputs["chunks"])
-        vtt = vtt_sub.get_subtitle(outputs["chunks"])
-        txt = txt_sub.get_subtitle(outputs["chunks"])
-        write_file(file_out + ".srt", srt)
-        write_file(file_out + ".vtt", vtt)
-        write_file(file_out + ".txt", txt)
-        files_out += [file_out + ".srt", file_out + ".vtt", file_out + ".txt"]
     progress(1, desc="Completed!")

     )
     return pipe, model  # Return both pipe and model for later GPU switch
 def move_to_gpu(model):
+    if torch.cuda.is_available():
+        device = "cuda:0"
+        torch_dtype = torch.float16  # Use float16 precision on GPU
+        model.to(device, dtype=torch_dtype)
+    elif platform == "darwin":
+        device = "mps"
+        model.to(device)
+    else:
+        device = "cpu"
     return device
+@spaces.GPU
 def transcribe_webui_simple_progress(modelName, languageName, urlData, multipleFiles, microphoneData, task, flash,
                                      chunk_length_s, batch_size, progress=gr.Progress()):
     global last_model
     last_model = modelName
+    # Now move the model to GPU after the pipe is created, within the function's context
+    with torch.inference_mode():
+        device = move_to_gpu(pipe.model)
+        # Update pipe's device
+        pipe.device = torch.device(device)
+        pipe.model.to(pipe.device)
+        srt_sub = Subtitle("srt")
+        vtt_sub = Subtitle("vtt")
+        txt_sub = Subtitle("txt")
+        files = []
+        if multipleFiles:
+            files += multipleFiles
+        if urlData:
+            files.append(urlData)
+        if microphoneData:
+            files.append(microphoneData)
+        logging.info(files)
+        generate_kwargs = {}
+        if languageName != "Automatic Detection" and modelName.endswith(".en") == False:
+            generate_kwargs["language"] = languageName
+        if modelName.endswith(".en") == False:
+            generate_kwargs["task"] = task
+        files_out = []
+        for file in progress.tqdm(files, desc="Working..."):
+            start_time = time.time()
+            logging.info(file)
+            outputs = pipe(
+                file,
+                chunk_length_s=chunk_length_s,  # 30
+                batch_size=batch_size,  # 24
+                generate_kwargs=generate_kwargs,
+                return_timestamps=True,
+            )
+            logging.debug(outputs)
+            logging.info(print(f"transcribe: {time.time() - start_time} sec."))
+            file_out = file.split('/')[-1]
+            srt = srt_sub.get_subtitle(outputs["chunks"])
+            vtt = vtt_sub.get_subtitle(outputs["chunks"])
+            txt = txt_sub.get_subtitle(outputs["chunks"])
+            write_file(file_out + ".srt", srt)
+            write_file(file_out + ".vtt", vtt)
+            write_file(file_out + ".txt", txt)
+            files_out += [file_out + ".srt", file_out + ".vtt", file_out + ".txt"]
     progress(1, desc="Completed!")