Spaces:

mrfakename
/

E2-F5-TTS

Running on Zero

App Files Files Community

mrfakename commited on Oct 30, 2024

Commit

af4e5fa

verified ·

1 Parent(s): c6eda07

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (1) hide show

src/f5_tts/train/finetune_gradio.py +34 -13

src/f5_tts/train/finetune_gradio.py CHANGED Viewed

@@ -737,19 +737,36 @@ def format_seconds_to_hms(seconds):
     return "{:02d}:{:02d}:{:02d}".format(hours, minutes, int(seconds))
-def get_correct_audio_path(audio_input, base_path="wavs"):
-    # Case 1: If it's a full path, use it directly
-    if os.path.isabs(audio_input):
-        file_audio = audio_input
-    # Case 2: If it has .wav but is not a full path
-    elif audio_input.endswith(".wav") and not os.path.isabs(audio_input):
-        file_audio = os.path.join(base_path, audio_input)
-    # Case 3: If only the name (no .wav and not a full path)
-    elif not audio_input.endswith(".wav") and not os.path.isabs(audio_input):
-        file_audio = os.path.join(base_path, audio_input + ".wav")
     return file_audio
@@ -795,8 +812,8 @@ def create_metadata(name_project, ch_tokenizer, progress=gr.Progress()):
             print(f"Error processing {file_audio}: {e}")
             continue
-        if duration < 1 and duration > 25:
-            error_files.append([file_audio, "duration < 1 and > 25 "])
             continue
         if len(text) < 4:
             error_files.append([file_audio, "very small text len 3"])
@@ -1463,7 +1480,11 @@ Skip this step if you have your dataset, raw.arrow , duraction.json and vocab.tx
             gr.Markdown(
                 """```plaintext
-     place all your wavs folder and your metadata.csv file in {your name project}
      my_speak/
      │
      ├── wavs/

     return "{:02d}:{:02d}:{:02d}".format(hours, minutes, int(seconds))
+def get_correct_audio_path(
+    audio_input,
+    base_path="wavs",
+    supported_formats=("wav", "mp3", "aac", "flac", "m4a", "alac", "ogg", "aiff", "wma", "amr"),
+):
+    file_audio = None
+    # Helper function to check if file has a supported extension
+    def has_supported_extension(file_name):
+        return any(file_name.endswith(f".{ext}") for ext in supported_formats)
+    # Case 1: If it's a full path with a valid extension, use it directly
+    if os.path.isabs(audio_input) and has_supported_extension(audio_input):
+        file_audio = audio_input
+    # Case 2: If it has a supported extension but is not a full path
+    elif has_supported_extension(audio_input) and not os.path.isabs(audio_input):
+        file_audio = os.path.join(base_path, audio_input)
+        print("2")
+    # Case 3: If only the name is given (no extension and not a full path)
+    elif not has_supported_extension(audio_input) and not os.path.isabs(audio_input):
+        print("3")
+        for ext in supported_formats:
+            potential_file = os.path.join(base_path, f"{audio_input}.{ext}")
+            if os.path.exists(potential_file):
+                file_audio = potential_file
+                break
+        else:
+            file_audio = os.path.join(base_path, f"{audio_input}.{supported_formats[0]}")
     return file_audio
             print(f"Error processing {file_audio}: {e}")
             continue
+        if duration < 1 or duration > 25:
+            error_files.append([file_audio, "duration < 1 or > 25 "])
             continue
         if len(text) < 4:
             error_files.append([file_audio, "very small text len 3"])
             gr.Markdown(
                 """```plaintext
+     place all your wavs folder and your metadata.csv file in {your name project}
+     suport format for audio "wav", "mp3", "aac", "flac", "m4a", "alac", "ogg", "aiff", "wma", "amr"
+     example wav format
      my_speak/
      │
      ├── wavs/