Spaces:

LAP-DEV
/

Demo

Running

App Files Files Community

LAP-DEV commited on Nov 18, 2024

Commit

fe7eaa5

verified ·

1 Parent(s): 8e71727

Update modules/whisper/whisper_base.py

Browse files

Files changed (1) hide show

modules/whisper/whisper_base.py +87 -87

modules/whisper/whisper_base.py CHANGED Viewed

@@ -208,97 +208,97 @@ class WhisperBase(ABC):
         result_file_path:
             Output file path to return to gr.Files()
         """
-        try:
-            if input_folder_path:
-                files = get_media_files(input_folder_path)
-            if isinstance(files, str):
-                files = [files]
-            if files and isinstance(files[0], gr.utils.NamedString):
-                files = [file.name for file in files]
-            ## Load model to detect language
-            model = whisper.load_model("base")
-            files_info = {}
-            files_to_download = {}
-            time_start = datetime.now()
-            for file in files:
-                ## Detect language
-                #params = WhisperParameters.as_value(*whisper_params)
-                #model = whisper.load_model(params.model_size)
-                mel = whisper.log_mel_spectrogram(whisper.pad_or_trim(whisper.load_audio(file))).to(model.device)
-                _, probs = model.detect_language(mel)
-                file_language = ""
-                file_lang_probs = ""
-                for key,value in whisper.tokenizer.LANGUAGES.items():
-                    if key == str(max(probs, key=probs.get)):
-                        file_language = value.capitalize()
-                        max_k, max_v = max(probs.items(), key=lambda x: x[1])
-                        file_lang_probs = str(round(max_v,2))
-                        break
-                transcribed_segments, time_for_task = self.run(
-                    file,
-                    progress,
-                    add_timestamp,
-                    *whisper_params,
-                )
-                file_name, file_ext = os.path.splitext(os.path.basename(file))
-                subtitle, file_path = self.generate_and_write_file(
-                    file_name=file_name,
-                    transcribed_segments=transcribed_segments,
-                    add_timestamp=add_timestamp,
-                    file_format=file_format,
-                    output_dir=self.output_dir
-                )
-                files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path": file_path, "lang": file_language, "lang_prob": file_lang_probs, "input_source_file": (file_name+file_ext)}
-                ## Add output file as txt
-                file_name, file_ext = os.path.splitext(os.path.basename(file))
-                subtitle, file_path = self.generate_and_write_file(
-                    file_name=file_name,
-                    transcribed_segments=transcribed_segments,
-                    add_timestamp=add_timestamp,
-                    file_format="txt",
-                    output_dir=self.output_dir
-                )
-                files_to_download[file_name+"_txt"] = {"path": file_path}
-                ## Add output file as srt
-                file_name, file_ext = os.path.splitext(os.path.basename(file))
-                subtitle, file_path = self.generate_and_write_file(
-                    file_name=file_name,
-                    transcribed_segments=transcribed_segments,
-                    add_timestamp=add_timestamp,
-                    file_format="srt",
-                    output_dir=self.output_dir
-                )
-                files_to_download[file_name+"_srt"] = {"path": file_path}
-            total_result = ''
-            total_info = ''
-            total_time = 0
-            for file_name, info in files_info.items():
-                total_result += f'{info["subtitle"]}'
-                total_time += info["time_for_task"]
-                total_info += f'Input file: {info["input_source_file"]}\nLanguage prediction: {info["lang"]} with probability {info["lang_prob"]}\n'
-            #total_info += f"\nTranscription duration: {self.format_time(total_time)}"
-            time_end = datetime.now()
-            total_info += f"\nTranscription duration: {self.format_time((time_end-time_start).total_seconds())}"
-            result_str = total_result
-            result_file_path = [info['path'] for info in files_to_download.values()]
-            return [result_str,result_file_path,total_info]
-        except Exception as e:
-            print(f"Error transcribing file: {e}")
-        finally:
-            self.release_cuda_memory()
     def transcribe_mic(self,
                        mic_audio: str,

         result_file_path:
             Output file path to return to gr.Files()
         """
+        #try:
+        if input_folder_path:
+            files = get_media_files(input_folder_path)
+        if isinstance(files, str):
+            files = [files]
+        if files and isinstance(files[0], gr.utils.NamedString):
+            files = [file.name for file in files]
+        ## Load model to detect language
+        model = whisper.load_model("base")
+        files_info = {}
+        files_to_download = {}
+        time_start = datetime.now()
+        for file in files:
+            ## Detect language
+            #params = WhisperParameters.as_value(*whisper_params)
+            #model = whisper.load_model(params.model_size)
+            mel = whisper.log_mel_spectrogram(whisper.pad_or_trim(whisper.load_audio(file))).to(model.device)
+            _, probs = model.detect_language(mel)
+            file_language = ""
+            file_lang_probs = ""
+            for key,value in whisper.tokenizer.LANGUAGES.items():
+                if key == str(max(probs, key=probs.get)):
+                    file_language = value.capitalize()
+                    max_k, max_v = max(probs.items(), key=lambda x: x[1])
+                    file_lang_probs = str(round(max_v,2))
+                    break
+            transcribed_segments, time_for_task = self.run(
+                file,
+                progress,
+                add_timestamp,
+                *whisper_params,
+            )
+            file_name, file_ext = os.path.splitext(os.path.basename(file))
+            subtitle, file_path = self.generate_and_write_file(
+                file_name=file_name,
+                transcribed_segments=transcribed_segments,
+                add_timestamp=add_timestamp,
+                file_format=file_format,
+                output_dir=self.output_dir
+            )
+            files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path": file_path, "lang": file_language, "lang_prob": file_lang_probs, "input_source_file": (file_name+file_ext)}
+            ## Add output file as txt
+            file_name, file_ext = os.path.splitext(os.path.basename(file))
+            subtitle, file_path = self.generate_and_write_file(
+                file_name=file_name,
+                transcribed_segments=transcribed_segments,
+                add_timestamp=add_timestamp,
+                file_format="txt",
+                output_dir=self.output_dir
+            )
+            files_to_download[file_name+"_txt"] = {"path": file_path}
+            ## Add output file as srt
+            file_name, file_ext = os.path.splitext(os.path.basename(file))
+            subtitle, file_path = self.generate_and_write_file(
+                file_name=file_name,
+                transcribed_segments=transcribed_segments,
+                add_timestamp=add_timestamp,
+                file_format="srt",
+                output_dir=self.output_dir
+            )
+            files_to_download[file_name+"_srt"] = {"path": file_path}
+        total_result = ''
+        total_info = ''
+        total_time = 0
+        for file_name, info in files_info.items():
+            total_result += f'{info["subtitle"]}'
+            total_time += info["time_for_task"]
+            total_info += f'Input file: {info["input_source_file"]}\nLanguage prediction: {info["lang"]} with probability {info["lang_prob"]}\n'
+        #total_info += f"\nTranscription duration: {self.format_time(total_time)}"
+        time_end = datetime.now()
+        total_info += f"\nTranscription duration: {self.format_time((time_end-time_start).total_seconds())}"
+        result_str = total_result
+        result_file_path = [info['path'] for info in files_to_download.values()]
+        return [result_str,result_file_path,total_info]
+        #except Exception as e:
+        #    print(f"Error transcribing file: {e}")
+        #finally:
+        #    self.release_cuda_memory()
     def transcribe_mic(self,
                        mic_audio: str,