soiz1 commited on
Commit
abc677b
·
verified ·
1 Parent(s): 13eadf4

Update tabs/train/train.py

Browse files
Files changed (1) hide show
  1. tabs/train/train.py +1008 -1008
tabs/train/train.py CHANGED
@@ -1,1008 +1,1008 @@
1
- import os
2
- import shutil
3
- import sys
4
- from multiprocessing import cpu_count
5
-
6
- import gradio as gr
7
-
8
- from assets.i18n.i18n import I18nAuto
9
- from core import (
10
- run_extract_script,
11
- run_index_script,
12
- run_preprocess_script,
13
- run_prerequisites_script,
14
- run_train_script,
15
- )
16
- from rvc.configs.config import get_gpu_info, get_number_of_gpus, max_vram_gpu
17
- from rvc.lib.utils import format_title
18
- from tabs.settings.sections.restart import stop_train
19
-
20
- i18n = I18nAuto()
21
- now_dir = os.getcwd()
22
- sys.path.append(now_dir)
23
-
24
-
25
- sup_audioext = {
26
- "wav",
27
- "mp3",
28
- "flac",
29
- "ogg",
30
- "opus",
31
- "m4a",
32
- "mp4",
33
- "aac",
34
- "alac",
35
- "wma",
36
- "aiff",
37
- "webm",
38
- "ac3",
39
- }
40
-
41
- # Custom Pretraineds
42
- pretraineds_custom_path = os.path.join(
43
- now_dir, "rvc", "models", "pretraineds", "pretraineds_custom"
44
- )
45
-
46
- pretraineds_custom_path_relative = os.path.relpath(pretraineds_custom_path, now_dir)
47
-
48
- custom_embedder_root = os.path.join(
49
- now_dir, "rvc", "models", "embedders", "embedders_custom"
50
- )
51
- custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir)
52
-
53
- os.makedirs(custom_embedder_root, exist_ok=True)
54
- os.makedirs(pretraineds_custom_path_relative, exist_ok=True)
55
-
56
-
57
- def get_pretrained_list(suffix):
58
- return [
59
- os.path.join(dirpath, filename)
60
- for dirpath, _, filenames in os.walk(pretraineds_custom_path_relative)
61
- for filename in filenames
62
- if filename.endswith(".pth") and suffix in filename
63
- ]
64
-
65
-
66
- pretraineds_list_d = get_pretrained_list("D")
67
- pretraineds_list_g = get_pretrained_list("G")
68
-
69
-
70
- def refresh_custom_pretraineds():
71
- return (
72
- {"choices": sorted(get_pretrained_list("G")), "__type__": "update"},
73
- {"choices": sorted(get_pretrained_list("D")), "__type__": "update"},
74
- )
75
-
76
-
77
- # Dataset Creator
78
- datasets_path = os.path.join(now_dir, "assets", "datasets")
79
-
80
- if not os.path.exists(datasets_path):
81
- os.makedirs(datasets_path)
82
-
83
- datasets_path_relative = os.path.relpath(datasets_path, now_dir)
84
-
85
-
86
- def get_datasets_list():
87
- return [
88
- dirpath
89
- for dirpath, _, filenames in os.walk(datasets_path_relative)
90
- if any(filename.endswith(tuple(sup_audioext)) for filename in filenames)
91
- ]
92
-
93
-
94
- def refresh_datasets():
95
- return {"choices": sorted(get_datasets_list()), "__type__": "update"}
96
-
97
-
98
- # Model Names
99
- models_path = os.path.join(now_dir, "logs")
100
-
101
-
102
- def get_models_list():
103
- return [
104
- os.path.basename(dirpath)
105
- for dirpath in os.listdir(models_path)
106
- if os.path.isdir(os.path.join(models_path, dirpath))
107
- and all(excluded not in dirpath for excluded in ["zips", "mute", "reference"])
108
- ]
109
-
110
-
111
- def refresh_models():
112
- return {"choices": sorted(get_models_list()), "__type__": "update"}
113
-
114
-
115
- # Refresh Models and Datasets
116
- def refresh_models_and_datasets():
117
- return (
118
- {"choices": sorted(get_models_list()), "__type__": "update"},
119
- {"choices": sorted(get_datasets_list()), "__type__": "update"},
120
- )
121
-
122
-
123
- # Refresh Custom Embedders
124
- def get_embedder_custom_list():
125
- return [
126
- os.path.join(dirpath, dirname)
127
- for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
128
- for dirname in dirnames
129
- ]
130
-
131
-
132
- def refresh_custom_embedder_list():
133
- return {"choices": sorted(get_embedder_custom_list()), "__type__": "update"}
134
-
135
-
136
- # Drop Model
137
- def save_drop_model(dropbox):
138
- if ".pth" not in dropbox:
139
- gr.Info(
140
- i18n(
141
- "The file you dropped is not a valid pretrained file. Please try again."
142
- )
143
- )
144
- else:
145
- file_name = os.path.basename(dropbox)
146
- pretrained_path = os.path.join(pretraineds_custom_path_relative, file_name)
147
- if os.path.exists(pretrained_path):
148
- os.remove(pretrained_path)
149
- shutil.copy(dropbox, pretrained_path)
150
- gr.Info(
151
- i18n(
152
- "Click the refresh button to see the pretrained file in the dropdown menu."
153
- )
154
- )
155
- return None
156
-
157
-
158
- # Drop Dataset
159
- def save_drop_dataset_audio(dropbox, dataset_name):
160
- if not dataset_name:
161
- gr.Info("Please enter a valid dataset name. Please try again.")
162
- return None, None
163
- else:
164
- file_extension = os.path.splitext(dropbox)[1][1:].lower()
165
- if file_extension not in sup_audioext:
166
- gr.Info("The file you dropped is not a valid audio file. Please try again.")
167
- else:
168
- dataset_name = format_title(dataset_name)
169
- audio_file = format_title(os.path.basename(dropbox))
170
- dataset_path = os.path.join(now_dir, "assets", "datasets", dataset_name)
171
- if not os.path.exists(dataset_path):
172
- os.makedirs(dataset_path)
173
- destination_path = os.path.join(dataset_path, audio_file)
174
- if os.path.exists(destination_path):
175
- os.remove(destination_path)
176
- shutil.copy(dropbox, destination_path)
177
- gr.Info(
178
- i18n(
179
- "The audio file has been successfully added to the dataset. Please click the preprocess button."
180
- )
181
- )
182
- dataset_path = os.path.dirname(destination_path)
183
- relative_dataset_path = os.path.relpath(dataset_path, now_dir)
184
-
185
- return None, relative_dataset_path
186
-
187
-
188
- # Drop Custom Embedder
189
- def create_folder_and_move_files(folder_name, bin_file, config_file):
190
- if not folder_name:
191
- return "Folder name must not be empty."
192
-
193
- folder_name = os.path.join(custom_embedder_root, folder_name)
194
- os.makedirs(folder_name, exist_ok=True)
195
-
196
- if bin_file:
197
- bin_file_path = os.path.join(folder_name, os.path.basename(bin_file))
198
- shutil.copy(bin_file, bin_file_path)
199
-
200
- if config_file:
201
- config_file_path = os.path.join(folder_name, os.path.basename(config_file))
202
- shutil.copy(config_file, config_file_path)
203
-
204
- return f"Files moved to folder {folder_name}"
205
-
206
-
207
- def refresh_embedders_folders():
208
- custom_embedders = [
209
- os.path.join(dirpath, dirname)
210
- for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
211
- for dirname in dirnames
212
- ]
213
- return custom_embedders
214
-
215
-
216
- # Export
217
- ## Get Pth and Index Files
218
- def get_pth_list():
219
- return [
220
- os.path.relpath(os.path.join(dirpath, filename), now_dir)
221
- for dirpath, _, filenames in os.walk(models_path)
222
- for filename in filenames
223
- if filename.endswith(".pth")
224
- ]
225
-
226
-
227
- def get_index_list():
228
- return [
229
- os.path.relpath(os.path.join(dirpath, filename), now_dir)
230
- for dirpath, _, filenames in os.walk(models_path)
231
- for filename in filenames
232
- if filename.endswith(".index") and "trained" not in filename
233
- ]
234
-
235
-
236
- def refresh_pth_and_index_list():
237
- return (
238
- {"choices": sorted(get_pth_list()), "__type__": "update"},
239
- {"choices": sorted(get_index_list()), "__type__": "update"},
240
- )
241
-
242
-
243
- ## Export Pth and Index Files
244
- def export_pth(pth_path):
245
- if pth_path and os.path.exists(pth_path):
246
- return pth_path
247
- return None
248
-
249
-
250
- def export_index(index_path):
251
- if index_path and os.path.exists(index_path):
252
- return index_path
253
- return None
254
-
255
-
256
- ## Upload to Google Drive
257
- def upload_to_google_drive(pth_path, index_path):
258
- def upload_file(file_path):
259
- if file_path:
260
- try:
261
- gr.Info(f"Uploading {pth_path} to Google Drive...")
262
- google_drive_folder = "/content/drive/MyDrive/ApplioExported"
263
- if not os.path.exists(google_drive_folder):
264
- os.makedirs(google_drive_folder)
265
- google_drive_file_path = os.path.join(
266
- google_drive_folder, os.path.basename(file_path)
267
- )
268
- if os.path.exists(google_drive_file_path):
269
- os.remove(google_drive_file_path)
270
- shutil.copy2(file_path, google_drive_file_path)
271
- gr.Info("File uploaded successfully.")
272
- except Exception as error:
273
- print(f"An error occurred uploading to Google Drive: {error}")
274
- gr.Info("Error uploading to Google Drive")
275
-
276
- upload_file(pth_path)
277
- upload_file(index_path)
278
-
279
-
280
- # Train Tab
281
- def train_tab():
282
- # Model settings section
283
- with gr.Accordion(i18n("Model Settings")):
284
- with gr.Row():
285
- with gr.Column():
286
- model_name = gr.Dropdown(
287
- label=i18n("Model Name"),
288
- info=i18n("Name of the new model."),
289
- choices=get_models_list(),
290
- value="my-project",
291
- interactive=True,
292
- allow_custom_value=True,
293
- )
294
- architecture = gr.Radio(
295
- label=i18n("Architecture"),
296
- info=i18n(
297
- "Choose the model architecture:\n- **RVC (V2)**: Default option, compatible with all clients.\n- **Applio**: Advanced quality with improved vocoders and higher sample rates, Applio-only."
298
- ),
299
- choices=["RVC", "Applio"],
300
- value="RVC",
301
- interactive=True,
302
- visible=True,
303
- )
304
- with gr.Column():
305
- sampling_rate = gr.Radio(
306
- label=i18n("Sampling Rate"),
307
- info=i18n("The sampling rate of the audio files."),
308
- choices=["32000", "40000", "48000"],
309
- value="40000",
310
- interactive=True,
311
- )
312
- vocoder = gr.Radio(
313
- label=i18n("Vocoder"),
314
- info=i18n(
315
- "Choose the vocoder for audio synthesis:\n- **HiFi-GAN**: Default option, compatible with all clients.\n- **MRF HiFi-GAN**: Higher fidelity, Applio-only.\n- **RefineGAN**: Superior audio quality, Applio-only."
316
- ),
317
- choices=["HiFi-GAN", "MRF HiFi-GAN", "RefineGAN"],
318
- value="HiFi-GAN",
319
- interactive=False,
320
- visible=True,
321
- )
322
- with gr.Accordion(
323
- i18n("Advanced Settings"),
324
- open=False,
325
- ):
326
- with gr.Row():
327
- with gr.Column():
328
- cpu_cores = gr.Slider(
329
- 1,
330
- min(cpu_count(), 32), # max 32 parallel processes
331
- min(cpu_count(), 32),
332
- step=1,
333
- label=i18n("CPU Cores"),
334
- info=i18n(
335
- "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases."
336
- ),
337
- interactive=True,
338
- )
339
-
340
- with gr.Column():
341
- gpu = gr.Textbox(
342
- label=i18n("GPU Number"),
343
- info=i18n(
344
- "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-)."
345
- ),
346
- placeholder=i18n("0 to ∞ separated by -"),
347
- value=str(get_number_of_gpus()),
348
- interactive=True,
349
- )
350
- gr.Textbox(
351
- label=i18n("GPU Information"),
352
- info=i18n("The GPU information will be displayed here."),
353
- value=get_gpu_info(),
354
- interactive=False,
355
- )
356
- # Preprocess section
357
- with gr.Accordion(i18n("Preprocess")):
358
- dataset_path = gr.Dropdown(
359
- label=i18n("Dataset Path"),
360
- info=i18n("Path to the dataset folder."),
361
- # placeholder=i18n("Enter dataset path"),
362
- choices=get_datasets_list(),
363
- allow_custom_value=True,
364
- interactive=True,
365
- )
366
- dataset_creator = gr.Checkbox(
367
- label=i18n("Dataset Creator"),
368
- value=False,
369
- interactive=True,
370
- visible=True,
371
- )
372
- with gr.Column(visible=False) as dataset_creator_settings:
373
- with gr.Accordion(i18n("Dataset Creator")):
374
- dataset_name = gr.Textbox(
375
- label=i18n("Dataset Name"),
376
- info=i18n("Name of the new dataset."),
377
- placeholder=i18n("Enter dataset name"),
378
- interactive=True,
379
- )
380
- upload_audio_dataset = gr.File(
381
- label=i18n("Upload Audio Dataset"),
382
- type="filepath",
383
- interactive=True,
384
- )
385
- refresh = gr.Button(i18n("Refresh"))
386
-
387
- with gr.Accordion(i18n("Advanced Settings"), open=False):
388
- cut_preprocess = gr.Radio(
389
- label=i18n("Audio cutting"),
390
- info=i18n(
391
- "Audio file slicing method: Select 'Skip' if the files are already pre-sliced, 'Simple' if excessive silence has already been removed from the files, or 'Automatic' for automatic silence detection and slicing around it."
392
- ),
393
- choices=["Skip", "Simple", "Automatic"],
394
- value="Automatic",
395
- interactive=True,
396
- )
397
- with gr.Row():
398
- chunk_len = gr.Slider(
399
- 0.5,
400
- 5.0,
401
- 3.0,
402
- step=0.1,
403
- label=i18n("Chunk length (sec)"),
404
- info=i18n("Length of the audio slice for 'Simple' method."),
405
- interactive=True,
406
- )
407
- overlap_len = gr.Slider(
408
- 0.0,
409
- 0.4,
410
- 0.3,
411
- step=0.1,
412
- label=i18n("Overlap length (sec)"),
413
- info=i18n(
414
- "Length of the overlap between slices for 'Simple' method."
415
- ),
416
- interactive=True,
417
- )
418
-
419
- with gr.Row():
420
- process_effects = gr.Checkbox(
421
- label=i18n("Process effects"),
422
- info=i18n(
423
- "It's recommended to deactivate this option if your dataset has already been processed."
424
- ),
425
- value=True,
426
- interactive=True,
427
- visible=True,
428
- )
429
- noise_reduction = gr.Checkbox(
430
- label=i18n("Noise Reduction"),
431
- info=i18n(
432
- "It's recommended keep deactivate this option if your dataset has already been processed."
433
- ),
434
- value=False,
435
- interactive=True,
436
- visible=True,
437
- )
438
- clean_strength = gr.Slider(
439
- minimum=0,
440
- maximum=1,
441
- label=i18n("Noise Reduction Strength"),
442
- info=i18n(
443
- "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
444
- ),
445
- visible=False,
446
- value=0.5,
447
- interactive=True,
448
- )
449
- preprocess_output_info = gr.Textbox(
450
- label=i18n("Output Information"),
451
- info=i18n("The output information will be displayed here."),
452
- value="",
453
- max_lines=8,
454
- interactive=False,
455
- )
456
-
457
- with gr.Row():
458
- preprocess_button = gr.Button(i18n("Preprocess Dataset"))
459
- preprocess_button.click(
460
- fn=run_preprocess_script,
461
- inputs=[
462
- model_name,
463
- dataset_path,
464
- sampling_rate,
465
- cpu_cores,
466
- cut_preprocess,
467
- process_effects,
468
- noise_reduction,
469
- clean_strength,
470
- chunk_len,
471
- overlap_len,
472
- ],
473
- outputs=[preprocess_output_info],
474
- )
475
-
476
- # Extract section
477
- with gr.Accordion(i18n("Extract")):
478
- with gr.Row():
479
- f0_method = gr.Radio(
480
- label=i18n("Pitch extraction algorithm"),
481
- info=i18n(
482
- "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
483
- ),
484
- choices=["crepe", "crepe-tiny", "rmvpe"],
485
- value="rmvpe",
486
- interactive=True,
487
- )
488
-
489
- embedder_model = gr.Radio(
490
- label=i18n("Embedder Model"),
491
- info=i18n("Model used for learning speaker embedding."),
492
- choices=[
493
- "contentvec",
494
- "chinese-hubert-base",
495
- "japanese-hubert-base",
496
- "korean-hubert-base",
497
- "custom",
498
- ],
499
- value="contentvec",
500
- interactive=True,
501
- )
502
- include_mutes = gr.Slider(
503
- 0,
504
- 10,
505
- 2,
506
- step=1,
507
- label=i18n("Silent training files"),
508
- info=i18n(
509
- "Adding several silent files to the training set enables the model to handle pure silence in inferred audio files. Select 0 if your dataset is clean and already contains segments of pure silence."
510
- ),
511
- value=True,
512
- interactive=True,
513
- )
514
- hop_length = gr.Slider(
515
- 1,
516
- 512,
517
- 128,
518
- step=1,
519
- label=i18n("Hop Length"),
520
- info=i18n(
521
- "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy."
522
- ),
523
- visible=False,
524
- interactive=True,
525
- )
526
- with gr.Row(visible=False) as embedder_custom:
527
- with gr.Accordion("Custom Embedder", open=True):
528
- with gr.Row():
529
- embedder_model_custom = gr.Dropdown(
530
- label="Select Custom Embedder",
531
- choices=refresh_embedders_folders(),
532
- interactive=True,
533
- allow_custom_value=True,
534
- )
535
- refresh_embedders_button = gr.Button("Refresh embedders")
536
- folder_name_input = gr.Textbox(label="Folder Name", interactive=True)
537
- with gr.Row():
538
- bin_file_upload = gr.File(
539
- label="Upload .bin", type="filepath", interactive=True
540
- )
541
- config_file_upload = gr.File(
542
- label="Upload .json", type="filepath", interactive=True
543
- )
544
- move_files_button = gr.Button("Move files to custom embedder folder")
545
-
546
- extract_output_info = gr.Textbox(
547
- label=i18n("Output Information"),
548
- info=i18n("The output information will be displayed here."),
549
- value="",
550
- max_lines=8,
551
- interactive=False,
552
- )
553
- extract_button = gr.Button(i18n("Extract Features"))
554
- extract_button.click(
555
- fn=run_extract_script,
556
- inputs=[
557
- model_name,
558
- f0_method,
559
- hop_length,
560
- cpu_cores,
561
- gpu,
562
- sampling_rate,
563
- embedder_model,
564
- embedder_model_custom,
565
- include_mutes,
566
- ],
567
- outputs=[extract_output_info],
568
- )
569
-
570
- # Training section
571
- with gr.Accordion(i18n("Training")):
572
- with gr.Row():
573
- batch_size = gr.Slider(
574
- 1,
575
- 50,
576
- max_vram_gpu(0),
577
- step=1,
578
- label=i18n("Batch Size"),
579
- info=i18n(
580
- "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results."
581
- ),
582
- interactive=True,
583
- )
584
- save_every_epoch = gr.Slider(
585
- 1,
586
- 100,
587
- 10,
588
- step=1,
589
- label=i18n("Save Every Epoch"),
590
- info=i18n("Determine at how many epochs the model will saved at."),
591
- interactive=True,
592
- )
593
- total_epoch = gr.Slider(
594
- 1,
595
- 10000,
596
- 500,
597
- step=1,
598
- label=i18n("Total Epoch"),
599
- info=i18n(
600
- "Specifies the overall quantity of epochs for the model training process."
601
- ),
602
- interactive=True,
603
- )
604
- with gr.Accordion(i18n("Advanced Settings"), open=False):
605
- with gr.Row():
606
- with gr.Column():
607
- save_only_latest = gr.Checkbox(
608
- label=i18n("Save Only Latest"),
609
- info=i18n(
610
- "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space."
611
- ),
612
- value=True,
613
- interactive=True,
614
- )
615
- save_every_weights = gr.Checkbox(
616
- label=i18n("Save Every Weights"),
617
- info=i18n(
618
- "This setting enables you to save the weights of the model at the conclusion of each epoch."
619
- ),
620
- value=True,
621
- interactive=True,
622
- )
623
- pretrained = gr.Checkbox(
624
- label=i18n("Pretrained"),
625
- info=i18n(
626
- "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality."
627
- ),
628
- value=True,
629
- interactive=True,
630
- )
631
- with gr.Column():
632
- cleanup = gr.Checkbox(
633
- label=i18n("Fresh Training"),
634
- info=i18n(
635
- "Enable this setting only if you are training a new model from scratch or restarting the training. Deletes all previously generated weights and tensorboard logs."
636
- ),
637
- value=False,
638
- interactive=True,
639
- )
640
- cache_dataset_in_gpu = gr.Checkbox(
641
- label=i18n("Cache Dataset in GPU"),
642
- info=i18n(
643
- "Cache the dataset in GPU memory to speed up the training process."
644
- ),
645
- value=False,
646
- interactive=True,
647
- )
648
- checkpointing = gr.Checkbox(
649
- label=i18n("Checkpointing"),
650
- info=i18n(
651
- "Enables memory-efficient training. This reduces VRAM usage at the cost of slower training speed. It is useful for GPUs with limited memory (e.g., <6GB VRAM) or when training with a batch size larger than what your GPU can normally accommodate."
652
- ),
653
- value=False,
654
- interactive=True,
655
- )
656
- with gr.Row():
657
- custom_pretrained = gr.Checkbox(
658
- label=i18n("Custom Pretrained"),
659
- info=i18n(
660
- "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance."
661
- ),
662
- value=False,
663
- interactive=True,
664
- )
665
- overtraining_detector = gr.Checkbox(
666
- label=i18n("Overtraining Detector"),
667
- info=i18n(
668
- "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data."
669
- ),
670
- value=False,
671
- interactive=True,
672
- )
673
- with gr.Row():
674
- with gr.Column(visible=False) as pretrained_custom_settings:
675
- with gr.Accordion(i18n("Pretrained Custom Settings")):
676
- upload_pretrained = gr.File(
677
- label=i18n("Upload Pretrained Model"),
678
- type="filepath",
679
- interactive=True,
680
- )
681
- refresh_custom_pretaineds_button = gr.Button(
682
- i18n("Refresh Custom Pretraineds")
683
- )
684
- g_pretrained_path = gr.Dropdown(
685
- label=i18n("Custom Pretrained G"),
686
- info=i18n(
687
- "Select the custom pretrained model for the generator."
688
- ),
689
- choices=sorted(pretraineds_list_g),
690
- interactive=True,
691
- allow_custom_value=True,
692
- )
693
- d_pretrained_path = gr.Dropdown(
694
- label=i18n("Custom Pretrained D"),
695
- info=i18n(
696
- "Select the custom pretrained model for the discriminator."
697
- ),
698
- choices=sorted(pretraineds_list_d),
699
- interactive=True,
700
- allow_custom_value=True,
701
- )
702
-
703
- with gr.Column(visible=False) as overtraining_settings:
704
- with gr.Accordion(i18n("Overtraining Detector Settings")):
705
- overtraining_threshold = gr.Slider(
706
- 1,
707
- 100,
708
- 50,
709
- step=1,
710
- label=i18n("Overtraining Threshold"),
711
- info=i18n(
712
- "Set the maximum number of epochs you want your model to stop training if no improvement is detected."
713
- ),
714
- interactive=True,
715
- )
716
- index_algorithm = gr.Radio(
717
- label=i18n("Index Algorithm"),
718
- info=i18n(
719
- "KMeans is a clustering algorithm that divides the dataset into K clusters. This setting is particularly useful for large datasets."
720
- ),
721
- choices=["Auto", "Faiss", "KMeans"],
722
- value="Auto",
723
- interactive=True,
724
- )
725
-
726
- def enforce_terms(terms_accepted, *args):
727
- if not terms_accepted:
728
- message = "You must agree to the Terms of Use to proceed."
729
- gr.Info(message)
730
- return message
731
- return run_train_script(*args)
732
-
733
- terms_checkbox = gr.Checkbox(
734
- label=i18n("I agree to the terms of use"),
735
- info=i18n(
736
- "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your training."
737
- ),
738
- value=False,
739
- interactive=True,
740
- )
741
- train_output_info = gr.Textbox(
742
- label=i18n("Output Information"),
743
- info=i18n("The output information will be displayed here."),
744
- value="",
745
- max_lines=8,
746
- interactive=False,
747
- )
748
-
749
- with gr.Row():
750
- train_button = gr.Button(i18n("Start Training"))
751
- train_button.click(
752
- fn=enforce_terms,
753
- inputs=[
754
- terms_checkbox,
755
- model_name,
756
- save_every_epoch,
757
- save_only_latest,
758
- save_every_weights,
759
- total_epoch,
760
- sampling_rate,
761
- batch_size,
762
- gpu,
763
- overtraining_detector,
764
- overtraining_threshold,
765
- pretrained,
766
- cleanup,
767
- index_algorithm,
768
- cache_dataset_in_gpu,
769
- custom_pretrained,
770
- g_pretrained_path,
771
- d_pretrained_path,
772
- vocoder,
773
- checkpointing,
774
- ],
775
- outputs=[train_output_info],
776
- )
777
-
778
- stop_train_button = gr.Button(i18n("Stop Training"), visible=False)
779
- stop_train_button.click(
780
- fn=stop_train,
781
- inputs=[model_name],
782
- outputs=[],
783
- )
784
-
785
- index_button = gr.Button(i18n("Generate Index"))
786
- index_button.click(
787
- fn=run_index_script,
788
- inputs=[model_name, index_algorithm],
789
- outputs=[train_output_info],
790
- )
791
-
792
- # Export Model section
793
- with gr.Accordion(i18n("Export Model"), open=False):
794
- if not os.name == "nt":
795
- gr.Markdown(
796
- i18n(
797
- "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive."
798
- )
799
- )
800
- with gr.Row():
801
- with gr.Column():
802
- pth_file_export = gr.File(
803
- label=i18n("Exported Pth file"),
804
- type="filepath",
805
- value=None,
806
- interactive=False,
807
- )
808
- pth_dropdown_export = gr.Dropdown(
809
- label=i18n("Pth file"),
810
- info=i18n("Select the pth file to be exported"),
811
- choices=get_pth_list(),
812
- value=None,
813
- interactive=True,
814
- allow_custom_value=True,
815
- )
816
- with gr.Column():
817
- index_file_export = gr.File(
818
- label=i18n("Exported Index File"),
819
- type="filepath",
820
- value=None,
821
- interactive=False,
822
- )
823
- index_dropdown_export = gr.Dropdown(
824
- label=i18n("Index File"),
825
- info=i18n("Select the index file to be exported"),
826
- choices=get_index_list(),
827
- value=None,
828
- interactive=True,
829
- allow_custom_value=True,
830
- )
831
- with gr.Row():
832
- with gr.Column():
833
- refresh_export = gr.Button(i18n("Refresh"))
834
- if not os.name == "nt":
835
- upload_exported = gr.Button(i18n("Upload"))
836
- upload_exported.click(
837
- fn=upload_to_google_drive,
838
- inputs=[pth_dropdown_export, index_dropdown_export],
839
- outputs=[],
840
- )
841
-
842
- def toggle_visible(checkbox):
843
- return {"visible": checkbox, "__type__": "update"}
844
-
845
- def toggle_visible_hop_length(f0_method):
846
- if f0_method == "crepe" or f0_method == "crepe-tiny":
847
- return {"visible": True, "__type__": "update"}
848
- return {"visible": False, "__type__": "update"}
849
-
850
- def toggle_pretrained(pretrained, custom_pretrained):
851
- if custom_pretrained == False:
852
- return {"visible": pretrained, "__type__": "update"}, {
853
- "visible": False,
854
- "__type__": "update",
855
- }
856
- else:
857
- return {"visible": pretrained, "__type__": "update"}, {
858
- "visible": pretrained,
859
- "__type__": "update",
860
- }
861
-
862
- def enable_stop_train_button():
863
- return {"visible": False, "__type__": "update"}, {
864
- "visible": True,
865
- "__type__": "update",
866
- }
867
-
868
- def disable_stop_train_button():
869
- return {"visible": True, "__type__": "update"}, {
870
- "visible": False,
871
- "__type__": "update",
872
- }
873
-
874
- def download_prerequisites():
875
- gr.Info(
876
- "Checking for prerequisites with pitch guidance... Missing files will be downloaded. If you already have them, this step will be skipped."
877
- )
878
- run_prerequisites_script(
879
- pretraineds_hifigan=True,
880
- models=False,
881
- exe=False,
882
- )
883
- gr.Info(
884
- "Prerequisites check complete. Missing files were downloaded, and you may now start preprocessing."
885
- )
886
-
887
- def toggle_visible_embedder_custom(embedder_model):
888
- if embedder_model == "custom":
889
- return {"visible": True, "__type__": "update"}
890
- return {"visible": False, "__type__": "update"}
891
-
892
- def toggle_architecture(architecture):
893
- if architecture == "Applio":
894
- return {
895
- "choices": ["32000", "40000", "44100", "48000"],
896
- "__type__": "update",
897
- }, {
898
- "interactive": True,
899
- "__type__": "update",
900
- }
901
- else:
902
- return {
903
- "choices": ["32000", "40000", "48000"],
904
- "__type__": "update",
905
- "value": "40000",
906
- }, {"interactive": False, "__type__": "update", "value": "HiFi-GAN"}
907
-
908
- def update_slider_visibility(noise_reduction):
909
- return gr.update(visible=noise_reduction)
910
-
911
- noise_reduction.change(
912
- fn=update_slider_visibility,
913
- inputs=noise_reduction,
914
- outputs=clean_strength,
915
- )
916
- architecture.change(
917
- fn=toggle_architecture,
918
- inputs=[architecture],
919
- outputs=[sampling_rate, vocoder],
920
- )
921
- refresh.click(
922
- fn=refresh_models_and_datasets,
923
- inputs=[],
924
- outputs=[model_name, dataset_path],
925
- )
926
- dataset_creator.change(
927
- fn=toggle_visible,
928
- inputs=[dataset_creator],
929
- outputs=[dataset_creator_settings],
930
- )
931
- upload_audio_dataset.upload(
932
- fn=save_drop_dataset_audio,
933
- inputs=[upload_audio_dataset, dataset_name],
934
- outputs=[upload_audio_dataset, dataset_path],
935
- )
936
- f0_method.change(
937
- fn=toggle_visible_hop_length,
938
- inputs=[f0_method],
939
- outputs=[hop_length],
940
- )
941
- embedder_model.change(
942
- fn=toggle_visible_embedder_custom,
943
- inputs=[embedder_model],
944
- outputs=[embedder_custom],
945
- )
946
- embedder_model.change(
947
- fn=toggle_visible_embedder_custom,
948
- inputs=[embedder_model],
949
- outputs=[embedder_custom],
950
- )
951
- move_files_button.click(
952
- fn=create_folder_and_move_files,
953
- inputs=[folder_name_input, bin_file_upload, config_file_upload],
954
- outputs=[],
955
- )
956
- refresh_embedders_button.click(
957
- fn=refresh_embedders_folders, inputs=[], outputs=[embedder_model_custom]
958
- )
959
- pretrained.change(
960
- fn=toggle_pretrained,
961
- inputs=[pretrained, custom_pretrained],
962
- outputs=[custom_pretrained, pretrained_custom_settings],
963
- )
964
- custom_pretrained.change(
965
- fn=toggle_visible,
966
- inputs=[custom_pretrained],
967
- outputs=[pretrained_custom_settings],
968
- )
969
- refresh_custom_pretaineds_button.click(
970
- fn=refresh_custom_pretraineds,
971
- inputs=[],
972
- outputs=[g_pretrained_path, d_pretrained_path],
973
- )
974
- upload_pretrained.upload(
975
- fn=save_drop_model,
976
- inputs=[upload_pretrained],
977
- outputs=[upload_pretrained],
978
- )
979
- overtraining_detector.change(
980
- fn=toggle_visible,
981
- inputs=[overtraining_detector],
982
- outputs=[overtraining_settings],
983
- )
984
- train_button.click(
985
- fn=enable_stop_train_button,
986
- inputs=[],
987
- outputs=[train_button, stop_train_button],
988
- )
989
- train_output_info.change(
990
- fn=disable_stop_train_button,
991
- inputs=[],
992
- outputs=[train_button, stop_train_button],
993
- )
994
- pth_dropdown_export.change(
995
- fn=export_pth,
996
- inputs=[pth_dropdown_export],
997
- outputs=[pth_file_export],
998
- )
999
- index_dropdown_export.change(
1000
- fn=export_index,
1001
- inputs=[index_dropdown_export],
1002
- outputs=[index_file_export],
1003
- )
1004
- refresh_export.click(
1005
- fn=refresh_pth_and_index_list,
1006
- inputs=[],
1007
- outputs=[pth_dropdown_export, index_dropdown_export],
1008
- )
 
1
+ import os
2
+ import shutil
3
+ import sys
4
+ from multiprocessing import cpu_count
5
+
6
+ import gradio as gr
7
+
8
+ from assets.i18n.i18n import I18nAuto
9
+ from core import (
10
+ run_extract_script,
11
+ run_index_script,
12
+ run_preprocess_script,
13
+ run_prerequisites_script,
14
+ run_train_script,
15
+ )
16
+ from rvc.configs.config import get_gpu_info, get_number_of_gpus, max_vram_gpu
17
+ from rvc.lib.utils import format_title
18
+ from tabs.settings.sections.restart import stop_train
19
+
20
+ i18n = I18nAuto()
21
+ now_dir = os.getcwd()
22
+ sys.path.append(now_dir)
23
+
24
+
25
+ sup_audioext = {
26
+ "wav",
27
+ "mp3",
28
+ "flac",
29
+ "ogg",
30
+ "opus",
31
+ "m4a",
32
+ "mp4",
33
+ "aac",
34
+ "alac",
35
+ "wma",
36
+ "aiff",
37
+ "webm",
38
+ "ac3",
39
+ }
40
+
41
+ # Custom Pretraineds
42
+ pretraineds_custom_path = os.path.join(
43
+ now_dir, "rvc", "models", "pretraineds", "pretraineds_custom"
44
+ )
45
+
46
+ pretraineds_custom_path_relative = os.path.relpath(pretraineds_custom_path, now_dir)
47
+
48
+ custom_embedder_root = os.path.join(
49
+ now_dir, "rvc", "models", "embedders", "embedders_custom"
50
+ )
51
+ custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir)
52
+
53
+ os.makedirs(custom_embedder_root, exist_ok=True)
54
+ os.makedirs(pretraineds_custom_path_relative, exist_ok=True)
55
+
56
+
57
+ def get_pretrained_list(suffix):
58
+ return [
59
+ os.path.join(dirpath, filename)
60
+ for dirpath, _, filenames in os.walk(pretraineds_custom_path_relative)
61
+ for filename in filenames
62
+ if filename.endswith(".pth") and suffix in filename
63
+ ]
64
+
65
+
66
+ pretraineds_list_d = get_pretrained_list("D")
67
+ pretraineds_list_g = get_pretrained_list("G")
68
+
69
+
70
+ def refresh_custom_pretraineds():
71
+ return (
72
+ {"choices": sorted(get_pretrained_list("G")), "__type__": "update"},
73
+ {"choices": sorted(get_pretrained_list("D")), "__type__": "update"},
74
+ )
75
+
76
+
77
+ # Dataset Creator
78
+ datasets_path = os.path.join(now_dir, "assets", "datasets")
79
+
80
+ if not os.path.exists(datasets_path):
81
+ os.makedirs(datasets_path)
82
+
83
+ datasets_path_relative = os.path.relpath(datasets_path, now_dir)
84
+
85
+
86
+ def get_datasets_list():
87
+ return [
88
+ dirpath
89
+ for dirpath, _, filenames in os.walk(datasets_path_relative)
90
+ if any(filename.endswith(tuple(sup_audioext)) for filename in filenames)
91
+ ]
92
+
93
+
94
+ def refresh_datasets():
95
+ return {"choices": sorted(get_datasets_list()), "__type__": "update"}
96
+
97
+
98
+ # Model Names
99
+ models_path = os.path.join(now_dir, "logs")
100
+
101
+
102
+ def get_models_list():
103
+ return [
104
+ os.path.basename(dirpath)
105
+ for dirpath in os.listdir(models_path)
106
+ if os.path.isdir(os.path.join(models_path, dirpath))
107
+ and all(excluded not in dirpath for excluded in ["zips", "mute", "reference"])
108
+ ]
109
+
110
+
111
+ def refresh_models():
112
+ return {"choices": sorted(get_models_list()), "__type__": "update"}
113
+
114
+
115
+ # Refresh Models and Datasets
116
+ def refresh_models_and_datasets():
117
+ return (
118
+ {"choices": sorted(get_models_list()), "__type__": "update"},
119
+ {"choices": sorted(get_datasets_list()), "__type__": "update"},
120
+ )
121
+
122
+
123
+ # Refresh Custom Embedders
124
+ def get_embedder_custom_list():
125
+ return [
126
+ os.path.join(dirpath, dirname)
127
+ for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
128
+ for dirname in dirnames
129
+ ]
130
+
131
+
132
+ def refresh_custom_embedder_list():
133
+ return {"choices": sorted(get_embedder_custom_list()), "__type__": "update"}
134
+
135
+
136
+ # Drop Model
137
+ def save_drop_model(dropbox):
138
+ if ".pth" not in dropbox:
139
+ gr.Info(
140
+ i18n(
141
+ "The file you dropped is not a valid pretrained file. Please try again."
142
+ )
143
+ )
144
+ else:
145
+ file_name = os.path.basename(dropbox)
146
+ pretrained_path = os.path.join(pretraineds_custom_path_relative, file_name)
147
+ if os.path.exists(pretrained_path):
148
+ os.remove(pretrained_path)
149
+ shutil.copy(dropbox, pretrained_path)
150
+ gr.Info(
151
+ i18n(
152
+ "Click the refresh button to see the pretrained file in the dropdown menu."
153
+ )
154
+ )
155
+ return None
156
+
157
+
158
+ # Drop Dataset
159
+ def save_drop_dataset_audio(dropbox, dataset_name):
160
+ if not dataset_name:
161
+ gr.Info("Please enter a valid dataset name. Please try again.")
162
+ return None, None
163
+ else:
164
+ file_extension = os.path.splitext(dropbox)[1][1:].lower()
165
+ if file_extension not in sup_audioext:
166
+ gr.Info("The file you dropped is not a valid audio file. Please try again.")
167
+ else:
168
+ dataset_name = format_title(dataset_name)
169
+ audio_file = format_title(os.path.basename(dropbox))
170
+ dataset_path = os.path.join(now_dir, "assets", "datasets", dataset_name)
171
+ if not os.path.exists(dataset_path):
172
+ os.makedirs(dataset_path)
173
+ destination_path = os.path.join(dataset_path, audio_file)
174
+ if os.path.exists(destination_path):
175
+ os.remove(destination_path)
176
+ shutil.copy(dropbox, destination_path)
177
+ gr.Info(
178
+ i18n(
179
+ "The audio file has been successfully added to the dataset. Please click the preprocess button."
180
+ )
181
+ )
182
+ dataset_path = os.path.dirname(destination_path)
183
+ relative_dataset_path = os.path.relpath(dataset_path, now_dir)
184
+
185
+ return None, relative_dataset_path
186
+
187
+
188
+ # Drop Custom Embedder
189
+ def create_folder_and_move_files(folder_name, bin_file, config_file):
190
+ if not folder_name:
191
+ return "Folder name must not be empty."
192
+
193
+ folder_name = os.path.join(custom_embedder_root, folder_name)
194
+ os.makedirs(folder_name, exist_ok=True)
195
+
196
+ if bin_file:
197
+ bin_file_path = os.path.join(folder_name, os.path.basename(bin_file))
198
+ shutil.copy(bin_file, bin_file_path)
199
+
200
+ if config_file:
201
+ config_file_path = os.path.join(folder_name, os.path.basename(config_file))
202
+ shutil.copy(config_file, config_file_path)
203
+
204
+ return f"Files moved to folder {folder_name}"
205
+
206
+
207
+ def refresh_embedders_folders():
208
+ custom_embedders = [
209
+ os.path.join(dirpath, dirname)
210
+ for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
211
+ for dirname in dirnames
212
+ ]
213
+ return custom_embedders
214
+
215
+
216
+ # Export
217
+ ## Get Pth and Index Files
218
+ def get_pth_list():
219
+ return [
220
+ os.path.relpath(os.path.join(dirpath, filename), now_dir)
221
+ for dirpath, _, filenames in os.walk(models_path)
222
+ for filename in filenames
223
+ if filename.endswith(".pth")
224
+ ]
225
+
226
+
227
+ def get_index_list():
228
+ return [
229
+ os.path.relpath(os.path.join(dirpath, filename), now_dir)
230
+ for dirpath, _, filenames in os.walk(models_path)
231
+ for filename in filenames
232
+ if filename.endswith(".index") and "trained" not in filename
233
+ ]
234
+
235
+
236
+ def refresh_pth_and_index_list():
237
+ return (
238
+ {"choices": sorted(get_pth_list()), "__type__": "update"},
239
+ {"choices": sorted(get_index_list()), "__type__": "update"},
240
+ )
241
+
242
+
243
+ ## Export Pth and Index Files
244
+ def export_pth(pth_path):
245
+ if pth_path and os.path.exists(pth_path):
246
+ return pth_path
247
+ return None
248
+
249
+
250
+ def export_index(index_path):
251
+ if index_path and os.path.exists(index_path):
252
+ return index_path
253
+ return None
254
+
255
+
256
+ ## Upload to Google Drive
257
+ def upload_to_google_drive(pth_path, index_path):
258
+ def upload_file(file_path):
259
+ if file_path:
260
+ try:
261
+ gr.Info(f"Uploading {pth_path} to Google Drive...")
262
+ google_drive_folder = "/content/drive/MyDrive/ApplioExported"
263
+ if not os.path.exists(google_drive_folder):
264
+ os.makedirs(google_drive_folder)
265
+ google_drive_file_path = os.path.join(
266
+ google_drive_folder, os.path.basename(file_path)
267
+ )
268
+ if os.path.exists(google_drive_file_path):
269
+ os.remove(google_drive_file_path)
270
+ shutil.copy2(file_path, google_drive_file_path)
271
+ gr.Info("File uploaded successfully.")
272
+ except Exception as error:
273
+ print(f"An error occurred uploading to Google Drive: {error}")
274
+ gr.Info("Error uploading to Google Drive")
275
+
276
+ upload_file(pth_path)
277
+ upload_file(index_path)
278
+
279
+
280
+ # Train Tab
281
+ def train_tab():
282
+ # Model settings section
283
+ with gr.Accordion(i18n("Model Settings")):
284
+ with gr.Row():
285
+ with gr.Column():
286
+ model_name = gr.Dropdown(
287
+ label=i18n("Model Name"),
288
+ info=i18n("Name of the new model."),
289
+ choices=get_models_list(),
290
+ value="my-project",
291
+ interactive=True,
292
+ allow_custom_value=True,
293
+ )
294
+ architecture = gr.Radio(
295
+ label=i18n("Architecture"),
296
+ info=i18n(
297
+ "Choose the model architecture:\n- **RVC (V2)**: Default option, compatible with all clients.\n- **Applio**: Advanced quality with improved vocoders and higher sample rates, Applio-only."
298
+ ),
299
+ choices=["RVC", "Applio"],
300
+ value="RVC",
301
+ interactive=True,
302
+ visible=True,
303
+ )
304
+ with gr.Column():
305
+ sampling_rate = gr.Radio(
306
+ label=i18n("Sampling Rate"),
307
+ info=i18n("The sampling rate of the audio files."),
308
+ choices=["32000", "40000", "48000"],
309
+ value="40000",
310
+ interactive=True,
311
+ )
312
+ vocoder = gr.Radio(
313
+ label=i18n("Vocoder"),
314
+ info=i18n(
315
+ "Choose the vocoder for audio synthesis:\n- **HiFi-GAN**: Default option, compatible with all clients.\n- **MRF HiFi-GAN**: Higher fidelity, Applio-only.\n- **RefineGAN**: Superior audio quality, Applio-only."
316
+ ),
317
+ choices=["HiFi-GAN", "MRF HiFi-GAN", "RefineGAN"],
318
+ value="HiFi-GAN",
319
+ interactive=False,
320
+ visible=True,
321
+ )
322
+ with gr.Accordion(
323
+ i18n("Advanced Settings"),
324
+ open=False,
325
+ ):
326
+ with gr.Row():
327
+ with gr.Column():
328
+ cpu_cores = gr.Slider(
329
+ 1,
330
+ min(cpu_count(), 32), # max 32 parallel processes
331
+ min(cpu_count(), 32),
332
+ step=1,
333
+ label=i18n("CPU Cores"),
334
+ info=i18n(
335
+ "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases."
336
+ ),
337
+ interactive=True,
338
+ )
339
+
340
+ with gr.Column():
341
+ gpu = gr.Textbox(
342
+ label=i18n("GPU Number"),
343
+ info=i18n(
344
+ "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-)."
345
+ ),
346
+ placeholder=i18n("0 to ∞ separated by -"),
347
+ value=str(get_number_of_gpus()),
348
+ interactive=True,
349
+ )
350
+ gr.Textbox(
351
+ label=i18n("GPU Information"),
352
+ info=i18n("The GPU information will be displayed here."),
353
+ value=get_gpu_info(),
354
+ interactive=False,
355
+ )
356
+ # Preprocess section
357
+ with gr.Accordion(i18n("Preprocess")):
358
+ dataset_path = gr.Dropdown(
359
+ label=i18n("Dataset Path"),
360
+ info=i18n("Path to the dataset folder."),
361
+ # placeholder=i18n("Enter dataset path"),
362
+ choices=get_datasets_list(),
363
+ allow_custom_value=True,
364
+ interactive=True,
365
+ )
366
+ dataset_creator = gr.Checkbox(
367
+ label=i18n("Dataset Creator"),
368
+ value=False,
369
+ interactive=True,
370
+ visible=True,
371
+ )
372
+ with gr.Column(visible=False) as dataset_creator_settings:
373
+ with gr.Accordion(i18n("Dataset Creator")):
374
+ dataset_name = gr.Textbox(
375
+ label=i18n("Dataset Name"),
376
+ info=i18n("Name of the new dataset."),
377
+ placeholder=i18n("Enter dataset name"),
378
+ interactive=True,
379
+ )
380
+ upload_audio_dataset = gr.File(
381
+ label=i18n("Upload Audio Dataset"),
382
+ type="filepath",
383
+ interactive=True,
384
+ )
385
+ refresh = gr.Button(i18n("Refresh"))
386
+
387
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
388
+ cut_preprocess = gr.Radio(
389
+ label=i18n("Audio cutting"),
390
+ info=i18n(
391
+ "Audio file slicing method: Select 'Skip' if the files are already pre-sliced, 'Simple' if excessive silence has already been removed from the files, or 'Automatic' for automatic silence detection and slicing around it."
392
+ ),
393
+ choices=["Skip", "Simple", "Automatic"],
394
+ value="Automatic",
395
+ interactive=True,
396
+ )
397
+ with gr.Row():
398
+ chunk_len = gr.Slider(
399
+ 0.5,
400
+ 5.0,
401
+ 3.0,
402
+ step=0.1,
403
+ label=i18n("Chunk length (sec)"),
404
+ info=i18n("Length of the audio slice for 'Simple' method."),
405
+ interactive=True,
406
+ )
407
+ overlap_len = gr.Slider(
408
+ 0.0,
409
+ 0.4,
410
+ 0.3,
411
+ step=0.1,
412
+ label=i18n("Overlap length (sec)"),
413
+ info=i18n(
414
+ "Length of the overlap between slices for 'Simple' method."
415
+ ),
416
+ interactive=True,
417
+ )
418
+
419
+ with gr.Row():
420
+ process_effects = gr.Checkbox(
421
+ label=i18n("Process effects"),
422
+ info=i18n(
423
+ "It's recommended to deactivate this option if your dataset has already been processed."
424
+ ),
425
+ value=True,
426
+ interactive=True,
427
+ visible=True,
428
+ )
429
+ noise_reduction = gr.Checkbox(
430
+ label=i18n("Noise Reduction"),
431
+ info=i18n(
432
+ "It's recommended keep deactivate this option if your dataset has already been processed."
433
+ ),
434
+ value=False,
435
+ interactive=True,
436
+ visible=True,
437
+ )
438
+ clean_strength = gr.Slider(
439
+ minimum=0,
440
+ maximum=1,
441
+ label=i18n("Noise Reduction Strength"),
442
+ info=i18n(
443
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
444
+ ),
445
+ visible=False,
446
+ value=0.5,
447
+ interactive=True,
448
+ )
449
+ preprocess_output_info = gr.Textbox(
450
+ label=i18n("Output Information"),
451
+ info=i18n("The output information will be displayed here."),
452
+ value="",
453
+ max_lines=8,
454
+ interactive=False,
455
+ )
456
+
457
+ with gr.Row():
458
+ preprocess_button = gr.Button(i18n("Preprocess Dataset"))
459
+ preprocess_button.click(
460
+ fn=run_preprocess_script,
461
+ inputs=[
462
+ model_name,
463
+ dataset_path,
464
+ sampling_rate,
465
+ cpu_cores,
466
+ cut_preprocess,
467
+ process_effects,
468
+ noise_reduction,
469
+ clean_strength,
470
+ chunk_len,
471
+ overlap_len,
472
+ ],
473
+ outputs=[preprocess_output_info],
474
+ )
475
+
476
+ # Extract section
477
+ with gr.Accordion(i18n("Extract")):
478
+ with gr.Row():
479
+ f0_method = gr.Radio(
480
+ label=i18n("Pitch extraction algorithm"),
481
+ info=i18n(
482
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
483
+ ),
484
+ choices=["crepe", "crepe-tiny", "rmvpe"],
485
+ value="rmvpe",
486
+ interactive=True,
487
+ )
488
+
489
+ embedder_model = gr.Radio(
490
+ label=i18n("Embedder Model"),
491
+ info=i18n("Model used for learning speaker embedding."),
492
+ choices=[
493
+ "contentvec",
494
+ "chinese-hubert-base",
495
+ "japanese-hubert-base",
496
+ "korean-hubert-base",
497
+ "custom",
498
+ ],
499
+ value="contentvec",
500
+ interactive=True,
501
+ )
502
+ include_mutes = gr.Slider(
503
+ 0,
504
+ 10,
505
+ 2,
506
+ step=1,
507
+ label=i18n("Silent training files"),
508
+ info=i18n(
509
+ "Adding several silent files to the training set enables the model to handle pure silence in inferred audio files. Select 0 if your dataset is clean and already contains segments of pure silence."
510
+ ),
511
+ value=True,
512
+ interactive=True,
513
+ )
514
+ hop_length = gr.Slider(
515
+ 1,
516
+ 512,
517
+ 128,
518
+ step=1,
519
+ label=i18n("Hop Length"),
520
+ info=i18n(
521
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy."
522
+ ),
523
+ visible=False,
524
+ interactive=True,
525
+ )
526
+ with gr.Row(visible=False) as embedder_custom:
527
+ with gr.Accordion("Custom Embedder", open=True):
528
+ with gr.Row():
529
+ embedder_model_custom = gr.Dropdown(
530
+ label="Select Custom Embedder",
531
+ choices=refresh_embedders_folders(),
532
+ interactive=True,
533
+ allow_custom_value=True,
534
+ )
535
+ refresh_embedders_button = gr.Button("Refresh embedders")
536
+ folder_name_input = gr.Textbox(label="Folder Name", interactive=True)
537
+ with gr.Row():
538
+ bin_file_upload = gr.File(
539
+ label="Upload .bin", type="filepath", interactive=True
540
+ )
541
+ config_file_upload = gr.File(
542
+ label="Upload .json", type="filepath", interactive=True
543
+ )
544
+ move_files_button = gr.Button("Move files to custom embedder folder")
545
+
546
+ extract_output_info = gr.Textbox(
547
+ label=i18n("Output Information"),
548
+ info=i18n("The output information will be displayed here."),
549
+ value="",
550
+ max_lines=8,
551
+ interactive=False,
552
+ )
553
+ extract_button = gr.Button(i18n("Extract Features"))
554
+ extract_button.click(
555
+ fn=run_extract_script,
556
+ inputs=[
557
+ model_name,
558
+ f0_method,
559
+ hop_length,
560
+ cpu_cores,
561
+ gpu,
562
+ sampling_rate,
563
+ embedder_model,
564
+ embedder_model_custom,
565
+ include_mutes,
566
+ ],
567
+ outputs=[extract_output_info],
568
+ )
569
+
570
+ # Training section
571
+ with gr.Accordion(i18n("Training")):
572
+ with gr.Row():
573
+ batch_size = gr.Slider(
574
+ 1,
575
+ 50,
576
+ max_vram_gpu(0),
577
+ step=1,
578
+ label=i18n("Batch Size"),
579
+ info=i18n(
580
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results."
581
+ ),
582
+ interactive=True,
583
+ )
584
+ save_every_epoch = gr.Slider(
585
+ 1,
586
+ 100,
587
+ 10,
588
+ step=1,
589
+ label=i18n("Save Every Epoch"),
590
+ info=i18n("Determine at how many epochs the model will saved at."),
591
+ interactive=True,
592
+ )
593
+ total_epoch = gr.Slider(
594
+ 1,
595
+ 10000,
596
+ 500,
597
+ step=1,
598
+ label=i18n("Total Epoch"),
599
+ info=i18n(
600
+ "Specifies the overall quantity of epochs for the model training process."
601
+ ),
602
+ interactive=True,
603
+ )
604
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
605
+ with gr.Row():
606
+ with gr.Column():
607
+ save_only_latest = gr.Checkbox(
608
+ label=i18n("Save Only Latest"),
609
+ info=i18n(
610
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space."
611
+ ),
612
+ value=True,
613
+ interactive=True,
614
+ )
615
+ save_every_weights = gr.Checkbox(
616
+ label=i18n("Save Every Weights"),
617
+ info=i18n(
618
+ "This setting enables you to save the weights of the model at the conclusion of each epoch."
619
+ ),
620
+ value=True,
621
+ interactive=True,
622
+ )
623
+ pretrained = gr.Checkbox(
624
+ label=i18n("Pretrained"),
625
+ info=i18n(
626
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality."
627
+ ),
628
+ value=True,
629
+ interactive=True,
630
+ )
631
+ with gr.Column():
632
+ cleanup = gr.Checkbox(
633
+ label=i18n("Fresh Training"),
634
+ info=i18n(
635
+ "Enable this setting only if you are training a new model from scratch or restarting the training. Deletes all previously generated weights and tensorboard logs."
636
+ ),
637
+ value=False,
638
+ interactive=True,
639
+ )
640
+ cache_dataset_in_gpu = gr.Checkbox(
641
+ label=i18n("Cache Dataset in GPU"),
642
+ info=i18n(
643
+ "Cache the dataset in GPU memory to speed up the training process."
644
+ ),
645
+ value=False,
646
+ interactive=True,
647
+ )
648
+ checkpointing = gr.Checkbox(
649
+ label=i18n("Checkpointing"),
650
+ info=i18n(
651
+ "Enables memory-efficient training. This reduces VRAM usage at the cost of slower training speed. It is useful for GPUs with limited memory (e.g., <6GB VRAM) or when training with a batch size larger than what your GPU can normally accommodate."
652
+ ),
653
+ value=False,
654
+ interactive=True,
655
+ )
656
+ with gr.Row():
657
+ custom_pretrained = gr.Checkbox(
658
+ label=i18n("Custom Pretrained"),
659
+ info=i18n(
660
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance."
661
+ ),
662
+ value=False,
663
+ interactive=True,
664
+ )
665
+ overtraining_detector = gr.Checkbox(
666
+ label=i18n("Overtraining Detector"),
667
+ info=i18n(
668
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data."
669
+ ),
670
+ value=False,
671
+ interactive=True,
672
+ )
673
+ with gr.Row():
674
+ with gr.Column(visible=False) as pretrained_custom_settings:
675
+ with gr.Accordion(i18n("Pretrained Custom Settings")):
676
+ upload_pretrained = gr.File(
677
+ label=i18n("Upload Pretrained Model"),
678
+ type="filepath",
679
+ interactive=True,
680
+ )
681
+ refresh_custom_pretaineds_button = gr.Button(
682
+ i18n("Refresh Custom Pretraineds")
683
+ )
684
+ g_pretrained_path = gr.Dropdown(
685
+ label=i18n("Custom Pretrained G"),
686
+ info=i18n(
687
+ "Select the custom pretrained model for the generator."
688
+ ),
689
+ choices=sorted(pretraineds_list_g),
690
+ interactive=True,
691
+ allow_custom_value=True,
692
+ )
693
+ d_pretrained_path = gr.Dropdown(
694
+ label=i18n("Custom Pretrained D"),
695
+ info=i18n(
696
+ "Select the custom pretrained model for the discriminator."
697
+ ),
698
+ choices=sorted(pretraineds_list_d),
699
+ interactive=True,
700
+ allow_custom_value=True,
701
+ )
702
+
703
+ with gr.Column(visible=False) as overtraining_settings:
704
+ with gr.Accordion(i18n("Overtraining Detector Settings")):
705
+ overtraining_threshold = gr.Slider(
706
+ 1,
707
+ 100,
708
+ 50,
709
+ step=1,
710
+ label=i18n("Overtraining Threshold"),
711
+ info=i18n(
712
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected."
713
+ ),
714
+ interactive=True,
715
+ )
716
+ index_algorithm = gr.Radio(
717
+ label=i18n("Index Algorithm"),
718
+ info=i18n(
719
+ "KMeans is a clustering algorithm that divides the dataset into K clusters. This setting is particularly useful for large datasets."
720
+ ),
721
+ choices=["Auto", "Faiss", "KMeans"],
722
+ value="Auto",
723
+ interactive=True,
724
+ )
725
+
726
+ def enforce_terms(terms_accepted, *args):
727
+ if not terms_accepted:
728
+ message = "You must agree to the Terms of Use to proceed."
729
+ gr.Info(message)
730
+ return message
731
+ return run_train_script(*args)
732
+
733
+ terms_checkbox = gr.Checkbox(
734
+ label=i18n("I agree to the terms of use"),
735
+ info=i18n(
736
+ "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your training."
737
+ ),
738
+ value=True,
739
+ interactive=True,
740
+ )
741
+ train_output_info = gr.Textbox(
742
+ label=i18n("Output Information"),
743
+ info=i18n("The output information will be displayed here."),
744
+ value="",
745
+ max_lines=8,
746
+ interactive=False,
747
+ )
748
+
749
+ with gr.Row():
750
+ train_button = gr.Button(i18n("Start Training"))
751
+ train_button.click(
752
+ fn=enforce_terms,
753
+ inputs=[
754
+ terms_checkbox,
755
+ model_name,
756
+ save_every_epoch,
757
+ save_only_latest,
758
+ save_every_weights,
759
+ total_epoch,
760
+ sampling_rate,
761
+ batch_size,
762
+ gpu,
763
+ overtraining_detector,
764
+ overtraining_threshold,
765
+ pretrained,
766
+ cleanup,
767
+ index_algorithm,
768
+ cache_dataset_in_gpu,
769
+ custom_pretrained,
770
+ g_pretrained_path,
771
+ d_pretrained_path,
772
+ vocoder,
773
+ checkpointing,
774
+ ],
775
+ outputs=[train_output_info],
776
+ )
777
+
778
+ stop_train_button = gr.Button(i18n("Stop Training"), visible=False)
779
+ stop_train_button.click(
780
+ fn=stop_train,
781
+ inputs=[model_name],
782
+ outputs=[],
783
+ )
784
+
785
+ index_button = gr.Button(i18n("Generate Index"))
786
+ index_button.click(
787
+ fn=run_index_script,
788
+ inputs=[model_name, index_algorithm],
789
+ outputs=[train_output_info],
790
+ )
791
+
792
+ # Export Model section
793
+ with gr.Accordion(i18n("Export Model"), open=False):
794
+ if not os.name == "nt":
795
+ gr.Markdown(
796
+ i18n(
797
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive."
798
+ )
799
+ )
800
+ with gr.Row():
801
+ with gr.Column():
802
+ pth_file_export = gr.File(
803
+ label=i18n("Exported Pth file"),
804
+ type="filepath",
805
+ value=None,
806
+ interactive=False,
807
+ )
808
+ pth_dropdown_export = gr.Dropdown(
809
+ label=i18n("Pth file"),
810
+ info=i18n("Select the pth file to be exported"),
811
+ choices=get_pth_list(),
812
+ value=None,
813
+ interactive=True,
814
+ allow_custom_value=True,
815
+ )
816
+ with gr.Column():
817
+ index_file_export = gr.File(
818
+ label=i18n("Exported Index File"),
819
+ type="filepath",
820
+ value=None,
821
+ interactive=False,
822
+ )
823
+ index_dropdown_export = gr.Dropdown(
824
+ label=i18n("Index File"),
825
+ info=i18n("Select the index file to be exported"),
826
+ choices=get_index_list(),
827
+ value=None,
828
+ interactive=True,
829
+ allow_custom_value=True,
830
+ )
831
+ with gr.Row():
832
+ with gr.Column():
833
+ refresh_export = gr.Button(i18n("Refresh"))
834
+ if not os.name == "nt":
835
+ upload_exported = gr.Button(i18n("Upload"))
836
+ upload_exported.click(
837
+ fn=upload_to_google_drive,
838
+ inputs=[pth_dropdown_export, index_dropdown_export],
839
+ outputs=[],
840
+ )
841
+
842
+ def toggle_visible(checkbox):
843
+ return {"visible": checkbox, "__type__": "update"}
844
+
845
+ def toggle_visible_hop_length(f0_method):
846
+ if f0_method == "crepe" or f0_method == "crepe-tiny":
847
+ return {"visible": True, "__type__": "update"}
848
+ return {"visible": False, "__type__": "update"}
849
+
850
+ def toggle_pretrained(pretrained, custom_pretrained):
851
+ if custom_pretrained == False:
852
+ return {"visible": pretrained, "__type__": "update"}, {
853
+ "visible": False,
854
+ "__type__": "update",
855
+ }
856
+ else:
857
+ return {"visible": pretrained, "__type__": "update"}, {
858
+ "visible": pretrained,
859
+ "__type__": "update",
860
+ }
861
+
862
+ def enable_stop_train_button():
863
+ return {"visible": False, "__type__": "update"}, {
864
+ "visible": True,
865
+ "__type__": "update",
866
+ }
867
+
868
+ def disable_stop_train_button():
869
+ return {"visible": True, "__type__": "update"}, {
870
+ "visible": False,
871
+ "__type__": "update",
872
+ }
873
+
874
+ def download_prerequisites():
875
+ gr.Info(
876
+ "Checking for prerequisites with pitch guidance... Missing files will be downloaded. If you already have them, this step will be skipped."
877
+ )
878
+ run_prerequisites_script(
879
+ pretraineds_hifigan=True,
880
+ models=False,
881
+ exe=False,
882
+ )
883
+ gr.Info(
884
+ "Prerequisites check complete. Missing files were downloaded, and you may now start preprocessing."
885
+ )
886
+
887
+ def toggle_visible_embedder_custom(embedder_model):
888
+ if embedder_model == "custom":
889
+ return {"visible": True, "__type__": "update"}
890
+ return {"visible": False, "__type__": "update"}
891
+
892
+ def toggle_architecture(architecture):
893
+ if architecture == "Applio":
894
+ return {
895
+ "choices": ["32000", "40000", "44100", "48000"],
896
+ "__type__": "update",
897
+ }, {
898
+ "interactive": True,
899
+ "__type__": "update",
900
+ }
901
+ else:
902
+ return {
903
+ "choices": ["32000", "40000", "48000"],
904
+ "__type__": "update",
905
+ "value": "40000",
906
+ }, {"interactive": False, "__type__": "update", "value": "HiFi-GAN"}
907
+
908
+ def update_slider_visibility(noise_reduction):
909
+ return gr.update(visible=noise_reduction)
910
+
911
+ noise_reduction.change(
912
+ fn=update_slider_visibility,
913
+ inputs=noise_reduction,
914
+ outputs=clean_strength,
915
+ )
916
+ architecture.change(
917
+ fn=toggle_architecture,
918
+ inputs=[architecture],
919
+ outputs=[sampling_rate, vocoder],
920
+ )
921
+ refresh.click(
922
+ fn=refresh_models_and_datasets,
923
+ inputs=[],
924
+ outputs=[model_name, dataset_path],
925
+ )
926
+ dataset_creator.change(
927
+ fn=toggle_visible,
928
+ inputs=[dataset_creator],
929
+ outputs=[dataset_creator_settings],
930
+ )
931
+ upload_audio_dataset.upload(
932
+ fn=save_drop_dataset_audio,
933
+ inputs=[upload_audio_dataset, dataset_name],
934
+ outputs=[upload_audio_dataset, dataset_path],
935
+ )
936
+ f0_method.change(
937
+ fn=toggle_visible_hop_length,
938
+ inputs=[f0_method],
939
+ outputs=[hop_length],
940
+ )
941
+ embedder_model.change(
942
+ fn=toggle_visible_embedder_custom,
943
+ inputs=[embedder_model],
944
+ outputs=[embedder_custom],
945
+ )
946
+ embedder_model.change(
947
+ fn=toggle_visible_embedder_custom,
948
+ inputs=[embedder_model],
949
+ outputs=[embedder_custom],
950
+ )
951
+ move_files_button.click(
952
+ fn=create_folder_and_move_files,
953
+ inputs=[folder_name_input, bin_file_upload, config_file_upload],
954
+ outputs=[],
955
+ )
956
+ refresh_embedders_button.click(
957
+ fn=refresh_embedders_folders, inputs=[], outputs=[embedder_model_custom]
958
+ )
959
+ pretrained.change(
960
+ fn=toggle_pretrained,
961
+ inputs=[pretrained, custom_pretrained],
962
+ outputs=[custom_pretrained, pretrained_custom_settings],
963
+ )
964
+ custom_pretrained.change(
965
+ fn=toggle_visible,
966
+ inputs=[custom_pretrained],
967
+ outputs=[pretrained_custom_settings],
968
+ )
969
+ refresh_custom_pretaineds_button.click(
970
+ fn=refresh_custom_pretraineds,
971
+ inputs=[],
972
+ outputs=[g_pretrained_path, d_pretrained_path],
973
+ )
974
+ upload_pretrained.upload(
975
+ fn=save_drop_model,
976
+ inputs=[upload_pretrained],
977
+ outputs=[upload_pretrained],
978
+ )
979
+ overtraining_detector.change(
980
+ fn=toggle_visible,
981
+ inputs=[overtraining_detector],
982
+ outputs=[overtraining_settings],
983
+ )
984
+ train_button.click(
985
+ fn=enable_stop_train_button,
986
+ inputs=[],
987
+ outputs=[train_button, stop_train_button],
988
+ )
989
+ train_output_info.change(
990
+ fn=disable_stop_train_button,
991
+ inputs=[],
992
+ outputs=[train_button, stop_train_button],
993
+ )
994
+ pth_dropdown_export.change(
995
+ fn=export_pth,
996
+ inputs=[pth_dropdown_export],
997
+ outputs=[pth_file_export],
998
+ )
999
+ index_dropdown_export.change(
1000
+ fn=export_index,
1001
+ inputs=[index_dropdown_export],
1002
+ outputs=[index_file_export],
1003
+ )
1004
+ refresh_export.click(
1005
+ fn=refresh_pth_and_index_list,
1006
+ inputs=[],
1007
+ outputs=[pth_dropdown_export, index_dropdown_export],
1008
+ )