Spaces:
Running
Running
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
src/f5_tts/train/finetune_gradio.py
CHANGED
|
@@ -147,6 +147,8 @@ def load_settings(project_name):
|
|
| 147 |
|
| 148 |
with open(file_setting, "r") as f:
|
| 149 |
settings = json.load(f)
|
|
|
|
|
|
|
| 150 |
return (
|
| 151 |
settings["exp_name"],
|
| 152 |
settings["learning_rate"],
|
|
@@ -735,6 +737,22 @@ def format_seconds_to_hms(seconds):
|
|
| 735 |
return "{:02d}:{:02d}:{:02d}".format(hours, minutes, int(seconds))
|
| 736 |
|
| 737 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
def create_metadata(name_project, ch_tokenizer, progress=gr.Progress()):
|
| 739 |
path_project = os.path.join(path_data, name_project)
|
| 740 |
path_project_wavs = os.path.join(path_project, "wavs")
|
|
@@ -764,7 +782,7 @@ def create_metadata(name_project, ch_tokenizer, progress=gr.Progress()):
|
|
| 764 |
continue
|
| 765 |
name_audio, text = sp_line[:2]
|
| 766 |
|
| 767 |
-
file_audio =
|
| 768 |
|
| 769 |
if not os.path.isfile(file_audio):
|
| 770 |
error_files.append([file_audio, "error path"])
|
|
@@ -1363,6 +1381,10 @@ for tutorial and updates check here (https://github.com/SWivid/F5-TTS/discussion
|
|
| 1363 |
|
| 1364 |
with gr.Tabs():
|
| 1365 |
with gr.TabItem("transcribe Data"):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1366 |
ch_manual = gr.Checkbox(label="audio from path", value=False)
|
| 1367 |
|
| 1368 |
mark_info_transcribe = gr.Markdown(
|
|
@@ -1435,6 +1457,10 @@ Using the extended model, you can fine-tune to a new language that is missing sy
|
|
| 1435 |
)
|
| 1436 |
|
| 1437 |
with gr.TabItem("prepare Data"):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1438 |
gr.Markdown(
|
| 1439 |
"""```plaintext
|
| 1440 |
place all your wavs folder and your metadata.csv file in {your name project}
|
|
@@ -1447,10 +1473,10 @@ Using the extended model, you can fine-tune to a new language that is missing sy
|
|
| 1447 |
│
|
| 1448 |
└── metadata.csv
|
| 1449 |
|
| 1450 |
-
file format metadata.csv
|
| 1451 |
|
| 1452 |
-
audio1|text1
|
| 1453 |
-
audio2|text1
|
| 1454 |
...
|
| 1455 |
|
| 1456 |
```"""
|
|
|
|
| 147 |
|
| 148 |
with open(file_setting, "r") as f:
|
| 149 |
settings = json.load(f)
|
| 150 |
+
if "logger" not in settings:
|
| 151 |
+
settings["logger"] = "wandb"
|
| 152 |
return (
|
| 153 |
settings["exp_name"],
|
| 154 |
settings["learning_rate"],
|
|
|
|
| 737 |
return "{:02d}:{:02d}:{:02d}".format(hours, minutes, int(seconds))
|
| 738 |
|
| 739 |
|
| 740 |
+
def get_correct_audio_path(audio_input, base_path="wavs"):
|
| 741 |
+
# Case 1: If it's a full path, use it directly
|
| 742 |
+
if os.path.isabs(audio_input):
|
| 743 |
+
file_audio = audio_input
|
| 744 |
+
|
| 745 |
+
# Case 2: If it has .wav but is not a full path
|
| 746 |
+
elif audio_input.endswith(".wav") and not os.path.isabs(audio_input):
|
| 747 |
+
file_audio = os.path.join(base_path, audio_input)
|
| 748 |
+
|
| 749 |
+
# Case 3: If only the name (no .wav and not a full path)
|
| 750 |
+
elif not audio_input.endswith(".wav") and not os.path.isabs(audio_input):
|
| 751 |
+
file_audio = os.path.join(base_path, audio_input + ".wav")
|
| 752 |
+
|
| 753 |
+
return file_audio
|
| 754 |
+
|
| 755 |
+
|
| 756 |
def create_metadata(name_project, ch_tokenizer, progress=gr.Progress()):
|
| 757 |
path_project = os.path.join(path_data, name_project)
|
| 758 |
path_project_wavs = os.path.join(path_project, "wavs")
|
|
|
|
| 782 |
continue
|
| 783 |
name_audio, text = sp_line[:2]
|
| 784 |
|
| 785 |
+
file_audio = get_correct_audio_path(name_audio, path_project_wavs)
|
| 786 |
|
| 787 |
if not os.path.isfile(file_audio):
|
| 788 |
error_files.append([file_audio, "error path"])
|
|
|
|
| 1381 |
|
| 1382 |
with gr.Tabs():
|
| 1383 |
with gr.TabItem("transcribe Data"):
|
| 1384 |
+
gr.Markdown("""```plaintext
|
| 1385 |
+
Skip this step if you have your dataset, metadata.csv, and a folder wavs with all the audio files.
|
| 1386 |
+
```""")
|
| 1387 |
+
|
| 1388 |
ch_manual = gr.Checkbox(label="audio from path", value=False)
|
| 1389 |
|
| 1390 |
mark_info_transcribe = gr.Markdown(
|
|
|
|
| 1457 |
)
|
| 1458 |
|
| 1459 |
with gr.TabItem("prepare Data"):
|
| 1460 |
+
gr.Markdown("""```plaintext
|
| 1461 |
+
Skip this step if you have your dataset, raw.arrow , duraction.json and vocab.txt
|
| 1462 |
+
```""")
|
| 1463 |
+
|
| 1464 |
gr.Markdown(
|
| 1465 |
"""```plaintext
|
| 1466 |
place all your wavs folder and your metadata.csv file in {your name project}
|
|
|
|
| 1473 |
│
|
| 1474 |
└── metadata.csv
|
| 1475 |
|
| 1476 |
+
file format metadata.csv
|
| 1477 |
|
| 1478 |
+
audio1|text1 or audio1.wav|text1 or your_path/audio1.wav|text1
|
| 1479 |
+
audio2|text1 or audio2.wav|text1 or your_path/audio1.wav|text1
|
| 1480 |
...
|
| 1481 |
|
| 1482 |
```"""
|