Spaces:

LAP-DEV
/

Demo

Running

App Files Files Community

LAP-DEV commited on Nov 18, 2024

Commit

af50baa

verified ·

1 Parent(s): fe7eaa5

Update modules/whisper/whisper_base.py

Browse files

Files changed (1) hide show

modules/whisper/whisper_base.py +88 -88

modules/whisper/whisper_base.py CHANGED Viewed

@@ -208,97 +208,97 @@ class WhisperBase(ABC):
         result_file_path:
             Output file path to return to gr.Files()
         """
-        #try:
-        if input_folder_path:
-            files = get_media_files(input_folder_path)
-        if isinstance(files, str):
-            files = [files]
-        if files and isinstance(files[0], gr.utils.NamedString):
-            files = [file.name for file in files]
-        ## Load model to detect language
-        model = whisper.load_model("base")
-        files_info = {}
-        files_to_download = {}
-        time_start = datetime.now()
-        for file in files:
-            ## Detect language
-            #params = WhisperParameters.as_value(*whisper_params)
-            #model = whisper.load_model(params.model_size)
-            mel = whisper.log_mel_spectrogram(whisper.pad_or_trim(whisper.load_audio(file))).to(model.device)
-            _, probs = model.detect_language(mel)
-            file_language = ""
-            file_lang_probs = ""
-            for key,value in whisper.tokenizer.LANGUAGES.items():
-                if key == str(max(probs, key=probs.get)):
-                    file_language = value.capitalize()
-                    max_k, max_v = max(probs.items(), key=lambda x: x[1])
-                    file_lang_probs = str(round(max_v,2))
-                    break
-            transcribed_segments, time_for_task = self.run(
-                file,
-                progress,
-                add_timestamp,
-                *whisper_params,
-            )
-            file_name, file_ext = os.path.splitext(os.path.basename(file))
-            subtitle, file_path = self.generate_and_write_file(
-                file_name=file_name,
-                transcribed_segments=transcribed_segments,
-                add_timestamp=add_timestamp,
-                file_format=file_format,
-                output_dir=self.output_dir
-            )
-            files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path": file_path, "lang": file_language, "lang_prob": file_lang_probs, "input_source_file": (file_name+file_ext)}
-            ## Add output file as txt
-            file_name, file_ext = os.path.splitext(os.path.basename(file))
-            subtitle, file_path = self.generate_and_write_file(
-                file_name=file_name,
-                transcribed_segments=transcribed_segments,
-                add_timestamp=add_timestamp,
-                file_format="txt",
-                output_dir=self.output_dir
-            )
-            files_to_download[file_name+"_txt"] = {"path": file_path}
-            ## Add output file as srt
-            file_name, file_ext = os.path.splitext(os.path.basename(file))
-            subtitle, file_path = self.generate_and_write_file(
-                file_name=file_name,
-                transcribed_segments=transcribed_segments,
-                add_timestamp=add_timestamp,
-                file_format="srt",
-                output_dir=self.output_dir
-            )
-            files_to_download[file_name+"_srt"] = {"path": file_path}
-        total_result = ''
-        total_info = ''
-        total_time = 0
-        for file_name, info in files_info.items():
-            total_result += f'{info["subtitle"]}'
-            total_time += info["time_for_task"]
-            total_info += f'Input file: {info["input_source_file"]}\nLanguage prediction: {info["lang"]} with probability {info["lang_prob"]}\n'
-        #total_info += f"\nTranscription duration: {self.format_time(total_time)}"
-        time_end = datetime.now()
-        total_info += f"\nTranscription duration: {self.format_time((time_end-time_start).total_seconds())}"
-        result_str = total_result
-        result_file_path = [info['path'] for info in files_to_download.values()]
-        return [result_str,result_file_path,total_info]
-        #except Exception as e:
-        #    print(f"Error transcribing file: {e}")
-        #finally:
-        #    self.release_cuda_memory()
     def transcribe_mic(self,
                        mic_audio: str,

         result_file_path:
             Output file path to return to gr.Files()
         """
+        try:
+            if input_folder_path:
+                files = get_media_files(input_folder_path)
+            if isinstance(files, str):
+                files = [files]
+            if files and isinstance(files[0], gr.utils.NamedString):
+                files = [file.name for file in files]
+            ## Load model to detect language
+            model = whisper.load_model("base")
+            files_info = {}
+            files_to_download = {}
+            time_start = datetime.now()
+            for file in files:
+                ## Detect language
+                #params = WhisperParameters.as_value(*whisper_params)
+                #model = whisper.load_model(params.model_size)
+                mel = whisper.log_mel_spectrogram(whisper.pad_or_trim(whisper.load_audio(file))).to(model.device)
+                _, probs = model.detect_language(mel)
+                file_language = ""
+                file_lang_probs = ""
+                for key,value in whisper.tokenizer.LANGUAGES.items():
+                    if key == str(max(probs, key=probs.get)):
+                        file_language = value.capitalize()
+                        max_k, max_v = max(probs.items(), key=lambda x: x[1])
+                        file_lang_probs = str(round(max_v,2))
+                        break
+                transcribed_segments, time_for_task = self.run(
+                    file,
+                    progress,
+                    add_timestamp,
+                    *whisper_params,
+                )
+                file_name, file_ext = os.path.splitext(os.path.basename(file))
+                subtitle, file_path = self.generate_and_write_file(
+                    file_name=file_name,
+                    transcribed_segments=transcribed_segments,
+                    add_timestamp=add_timestamp,
+                    file_format=file_format,
+                    output_dir=self.output_dir
+                )
+                files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path": file_path, "lang": file_language, "lang_prob": file_lang_probs, "input_source_file": (file_name+file_ext)}
+                ## Add output file as txt
+                file_name, file_ext = os.path.splitext(os.path.basename(file))
+                subtitle, file_path = self.generate_and_write_file(
+                    file_name=file_name,
+                    transcribed_segments=transcribed_segments,
+                    add_timestamp=add_timestamp,
+                    file_format="txt",
+                    output_dir=self.output_dir
+                )
+                files_to_download[file_name+"_txt"] = {"path": file_path}
+                ## Add output file as srt
+                file_name, file_ext = os.path.splitext(os.path.basename(file))
+                subtitle, file_path = self.generate_and_write_file(
+                    file_name=file_name,
+                    transcribed_segments=transcribed_segments,
+                    add_timestamp=add_timestamp,
+                    file_format="srt",
+                    output_dir=self.output_dir
+                )
+                files_to_download[file_name+"_srt"] = {"path": file_path}
+            total_result = ''
+            total_info = ''
+            total_time = 0
+            for file_name, info in files_info.items():
+                total_result += f'{info["subtitle"]}'
+                total_time += info["time_for_task"]
+                total_info += f'Input file: {info["input_source_file"]}\nLanguage prediction: {info["lang"]} with probability {info["lang_prob"]}\n'
+            #total_info += f"\nTranscription duration: {self.format_time(total_time)}"
+            time_end = datetime.now()
+            total_info += f"\nTranscription duration: {self.format_time((time_end-time_start).total_seconds())}"
+            result_str = total_result
+            result_file_path = [info['path'] for info in files_to_download.values()]
+            return [result_str,result_file_path,total_info]
+        except Exception as e:
+            print(f"Error transcribing file: {e}")
+        finally:
+            self.release_cuda_memory()
     def transcribe_mic(self,
                        mic_audio: str,