Spaces:
Running
on
Zero
Running
on
Zero
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
- pyproject.toml +1 -1
- src/f5_tts/infer/SHARED.md +1 -0
- src/f5_tts/train/README.md +2 -0
- src/f5_tts/train/finetune_gradio.py +14 -10
pyproject.toml
CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4 |
|
5 |
[project]
|
6 |
name = "f5-tts"
|
7 |
-
version = "1.0.
|
8 |
description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
|
9 |
readme = "README.md"
|
10 |
license = {text = "MIT License"}
|
|
|
4 |
|
5 |
[project]
|
6 |
name = "f5-tts"
|
7 |
+
version = "1.0.7"
|
8 |
description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
|
9 |
readme = "README.md"
|
10 |
license = {text = "MIT License"}
|
src/f5_tts/infer/SHARED.md
CHANGED
@@ -44,6 +44,7 @@
|
|
44 |
|
45 |
```bash
|
46 |
Model: hf://SWivid/F5-TTS/F5TTS_v1_Base/model_1250000.safetensors
|
|
|
47 |
Vocab: hf://SWivid/F5-TTS/F5TTS_v1_Base/vocab.txt
|
48 |
Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "conv_layers": 4}
|
49 |
```
|
|
|
44 |
|
45 |
```bash
|
46 |
Model: hf://SWivid/F5-TTS/F5TTS_v1_Base/model_1250000.safetensors
|
47 |
+
# A Variant Model: hf://SWivid/F5-TTS/F5TTS_v1_Base_no_zero_init/model_1250000.safetensors
|
48 |
Vocab: hf://SWivid/F5-TTS/F5TTS_v1_Base/vocab.txt
|
49 |
Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "conv_layers": 4}
|
50 |
```
|
src/f5_tts/train/README.md
CHANGED
@@ -51,6 +51,8 @@ Discussion board for Finetuning [#57](https://github.com/SWivid/F5-TTS/discussio
|
|
51 |
|
52 |
Gradio UI training/finetuning with `src/f5_tts/train/finetune_gradio.py` see [#143](https://github.com/SWivid/F5-TTS/discussions/143).
|
53 |
|
|
|
|
|
54 |
If use tensorboard as logger, install it first with `pip install tensorboard`.
|
55 |
|
56 |
<ins>The `use_ema = True` might be harmful for early-stage finetuned checkpoints</ins> (which goes just few updates, thus ema weights still dominated by pretrained ones), try turn it off with finetune gradio option or `load_model(..., use_ema=False)`, see if offer better results.
|
|
|
51 |
|
52 |
Gradio UI training/finetuning with `src/f5_tts/train/finetune_gradio.py` see [#143](https://github.com/SWivid/F5-TTS/discussions/143).
|
53 |
|
54 |
+
If want to finetune with a variant version e.g. *F5TTS_v1_Base_no_zero_init*, manually download pretrained checkpoint from model weight repository and fill in the path correspondingly on web interface.
|
55 |
+
|
56 |
If use tensorboard as logger, install it first with `pip install tensorboard`.
|
57 |
|
58 |
<ins>The `use_ema = True` might be harmful for early-stage finetuned checkpoints</ins> (which goes just few updates, thus ema weights still dominated by pretrained ones), try turn it off with finetune gradio option or `load_model(..., use_ema=False)`, see if offer better results.
|
src/f5_tts/train/finetune_gradio.py
CHANGED
@@ -965,21 +965,23 @@ def calculate_train(
|
|
965 |
)
|
966 |
|
967 |
|
968 |
-
def
|
969 |
try:
|
970 |
checkpoint = torch.load(checkpoint_path, weights_only=True)
|
971 |
print("Original Checkpoint Keys:", checkpoint.keys())
|
972 |
|
973 |
-
|
974 |
-
|
975 |
-
|
|
|
|
|
976 |
|
977 |
if safetensors:
|
978 |
new_checkpoint_path = new_checkpoint_path.replace(".pt", ".safetensors")
|
979 |
-
save_file(
|
980 |
else:
|
981 |
new_checkpoint_path = new_checkpoint_path.replace(".safetensors", ".pt")
|
982 |
-
new_checkpoint = {"ema_model_state_dict":
|
983 |
torch.save(new_checkpoint, new_checkpoint_path)
|
984 |
|
985 |
return f"New checkpoint saved at: {new_checkpoint_path}"
|
@@ -1849,12 +1851,14 @@ Reduce the Base model size from 5GB to 1.3GB. The new checkpoint file prunes out
|
|
1849 |
```""")
|
1850 |
txt_path_checkpoint = gr.Textbox(label="Path to Checkpoint:")
|
1851 |
txt_path_checkpoint_small = gr.Textbox(label="Path to Output:")
|
1852 |
-
|
|
|
|
|
1853 |
txt_info_reduse = gr.Textbox(label="Info", value="")
|
1854 |
-
reduse_button = gr.Button("
|
1855 |
reduse_button.click(
|
1856 |
-
fn=
|
1857 |
-
inputs=[txt_path_checkpoint, txt_path_checkpoint_small, ch_safetensors],
|
1858 |
outputs=[txt_info_reduse],
|
1859 |
)
|
1860 |
|
|
|
965 |
)
|
966 |
|
967 |
|
968 |
+
def prune_checkpoint(checkpoint_path: str, new_checkpoint_path: str, save_ema: bool, safetensors: bool) -> str:
|
969 |
try:
|
970 |
checkpoint = torch.load(checkpoint_path, weights_only=True)
|
971 |
print("Original Checkpoint Keys:", checkpoint.keys())
|
972 |
|
973 |
+
to_retain = "ema_model_state_dict" if save_ema else "model_state_dict"
|
974 |
+
try:
|
975 |
+
model_state_dict_to_retain = checkpoint[to_retain]
|
976 |
+
except KeyError:
|
977 |
+
return f"{to_retain} not found in the checkpoint."
|
978 |
|
979 |
if safetensors:
|
980 |
new_checkpoint_path = new_checkpoint_path.replace(".pt", ".safetensors")
|
981 |
+
save_file(model_state_dict_to_retain, new_checkpoint_path)
|
982 |
else:
|
983 |
new_checkpoint_path = new_checkpoint_path.replace(".safetensors", ".pt")
|
984 |
+
new_checkpoint = {"ema_model_state_dict": model_state_dict_to_retain}
|
985 |
torch.save(new_checkpoint, new_checkpoint_path)
|
986 |
|
987 |
return f"New checkpoint saved at: {new_checkpoint_path}"
|
|
|
1851 |
```""")
|
1852 |
txt_path_checkpoint = gr.Textbox(label="Path to Checkpoint:")
|
1853 |
txt_path_checkpoint_small = gr.Textbox(label="Path to Output:")
|
1854 |
+
with gr.Row():
|
1855 |
+
ch_save_ema = gr.Checkbox(label="Save EMA checkpoint", value=True)
|
1856 |
+
ch_safetensors = gr.Checkbox(label="Save with safetensors format", value=True)
|
1857 |
txt_info_reduse = gr.Textbox(label="Info", value="")
|
1858 |
+
reduse_button = gr.Button("Prune")
|
1859 |
reduse_button.click(
|
1860 |
+
fn=prune_checkpoint,
|
1861 |
+
inputs=[txt_path_checkpoint, txt_path_checkpoint_small, ch_save_ema, ch_safetensors],
|
1862 |
outputs=[txt_info_reduse],
|
1863 |
)
|
1864 |
|