r3gm commited on
Commit
bf05f28
·
verified ·
1 Parent(s): 9621ee3

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +164 -53
  3. pre-requirements.txt +2 -1
  4. requirements.txt +5 -2
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: ⚡
4
  colorFrom: gray
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 5.34.2
8
  app_file: app.py
9
  license: mit
10
  pinned: true
 
4
  colorFrom: gray
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 5.43.1
8
  app_file: app.py
9
  license: mit
10
  pinned: true
app.py CHANGED
@@ -6,7 +6,7 @@ import random
6
  import logging
7
  import time
8
  import soundfile as sf
9
- from infer_rvc_python.main import download_manager
10
  import zipfile
11
  import edge_tts
12
  import asyncio
@@ -21,14 +21,53 @@ import numpy as np
21
  import urllib.request
22
  import shutil
23
  import threading
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
26
 
27
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
30
- description = "This demo is provided for educational and research purposes only. The authors and contributors of this project do not endorse or encourage any misuse or unethical use of this software. Any use of this software for purposes other than those intended is solely at the user's own risk. The authors and contributors shall not be held responsible for any damages or liabilities arising from the use of this demo inappropriately."
31
- theme = "aliabid94/new-theme"
 
32
 
33
  PITCH_ALGO_OPT = [
34
  "pm",
@@ -39,6 +78,26 @@ PITCH_ALGO_OPT = [
39
  ]
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def find_files(directory):
43
  file_paths = []
44
  for filename in os.listdir(directory):
@@ -106,19 +165,23 @@ def find_my_model(a_, b_):
106
  return model, index
107
 
108
 
109
- def get_file_size(url):
110
-
111
  if "huggingface" not in url:
112
  raise ValueError("Only downloads from Hugging Face are allowed")
113
 
114
  try:
115
- with urllib.request.urlopen(url) as response:
116
- info = response.info()
117
- content_length = info.get("Content-Length")
 
 
 
118
 
119
  file_size = int(content_length)
120
- if file_size > 500000000:
121
- raise ValueError("The file is too large. You can only download files up to 500 MB in size.")
 
 
122
 
123
  except Exception as e:
124
  raise e
@@ -130,7 +193,7 @@ def clear_files(directory):
130
  shutil.rmtree(directory)
131
 
132
 
133
- def get_my_model(url_data):
134
 
135
  if not url_data:
136
  return None, None
@@ -147,12 +210,9 @@ def get_my_model(url_data):
147
  os.makedirs(directory, exist_ok=True)
148
 
149
  try:
150
- get_file_size(a_)
151
- if b_:
152
- get_file_size(b_)
153
-
154
  valid_url = [a_] if not b_ else [a_, b_]
155
  for link in valid_url:
 
156
  download_manager(
157
  url=link,
158
  path=directory,
@@ -194,13 +254,13 @@ def get_my_model(url_data):
194
  t.start()
195
 
196
 
197
- def add_audio_effects(audio_list):
198
  print("Audio effects")
199
 
200
  result = []
201
  for audio_path in audio_list:
202
  try:
203
- output_path = f'{os.path.splitext(audio_path)[0]}_effects.wav'
204
 
205
  # Initialize audio effects plugins
206
  board = Pedalboard(
@@ -211,13 +271,23 @@ def add_audio_effects(audio_list):
211
  ]
212
  )
213
 
 
 
 
214
  with AudioFile(audio_path) as f:
215
- with AudioFile(output_path, 'w', f.samplerate, f.num_channels) as o:
216
- # Read one second of audio at a time, until the file is empty:
217
  while f.tell() < f.frames:
218
  chunk = f.read(int(f.samplerate))
219
  effected = board(chunk, f.samplerate, reset=False)
220
  o.write(effected)
 
 
 
 
 
 
 
 
221
  result.append(output_path)
222
  except Exception as e:
223
  traceback.print_exc()
@@ -227,13 +297,13 @@ def add_audio_effects(audio_list):
227
  return result
228
 
229
 
230
- def apply_noisereduce(audio_list):
231
  # https://github.com/sa-if/Audio-Denoiser
232
  print("Noice reduce")
233
 
234
  result = []
235
  for audio_path in audio_list:
236
- out_path = f'{os.path.splitext(audio_path)[0]}_noisereduce.wav'
237
 
238
  try:
239
  # Load audio file
@@ -254,7 +324,7 @@ def apply_noisereduce(audio_list):
254
  )
255
 
256
  # Save reduced audio to file
257
- reduced_audio.export(out_path, format="wav")
258
  result.append(out_path)
259
 
260
  except Exception as e:
@@ -266,13 +336,17 @@ def apply_noisereduce(audio_list):
266
 
267
 
268
  @spaces.GPU()
269
- def convert_now(audio_files, random_tag, converter):
270
- return converter(
271
- audio_files,
272
- random_tag,
273
- overwrite=False,
274
- parallel_workers=8
275
- )
 
 
 
 
276
 
277
 
278
  def run(
@@ -287,6 +361,8 @@ def run(
287
  c_b_p,
288
  active_noise_reduce,
289
  audio_effects,
 
 
290
  ):
291
  if not audio_files:
292
  raise ValueError("The audio pls")
@@ -316,17 +392,17 @@ def run(
316
  respiration_median_filtering=r_m_f,
317
  envelope_ratio=e_r,
318
  consonant_breath_protection=c_b_p,
319
- resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
320
  )
321
  time.sleep(0.1)
322
 
323
- result = convert_now(audio_files, random_tag, converter)
324
 
325
  if active_noise_reduce:
326
- result = apply_noisereduce(result)
327
 
328
  if audio_effects:
329
- result = add_audio_effects(result)
330
 
331
  return result
332
 
@@ -485,10 +561,30 @@ def sound_gui():
485
  type="filepath",
486
  # format="mp3",
487
  autoplay=True,
488
- visible=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  )
490
 
491
 
 
 
 
 
 
 
 
492
  def denoise_conf():
493
  return gr.Checkbox(
494
  False,
@@ -571,9 +667,18 @@ def show_components_down(value_active):
571
  visible=value_active
572
  )
573
 
 
 
 
 
 
 
 
 
 
574
 
575
  def get_gui(theme):
576
- with gr.Blocks(theme=theme, fill_width=True, fill_height=True, delete_cache=(3200, 3200)) as app:
577
  gr.Markdown(title)
578
  gr.Markdown(description)
579
 
@@ -608,7 +713,7 @@ def get_gui(theme):
608
 
609
  down_active_gui = down_active_conf()
610
  down_info = gr.Markdown(
611
- "Provide a link to a zip file, like this one: `https://huggingface.co/mrmocciai/Models/resolve/main/Genshin%20Impact/ayaka-v2.zip?download=true`, or separate links with a comma for the .pth and .index files, like this: `https://huggingface.co/sail-rvc/ayaka-jp/resolve/main/model.pth?download=true, https://huggingface.co/sail-rvc/ayaka-jp/resolve/main/model.index?download=true`",
612
  visible=False
613
  )
614
  with gr.Row():
@@ -634,17 +739,20 @@ def get_gui(theme):
634
  [model, indx]
635
  )
636
 
637
- algo = pitch_algo_conf()
638
- algo_lvl = pitch_lvl_conf()
639
- indx_inf = index_inf_conf()
640
- res_fc = respiration_filter_conf()
641
- envel_r = envelope_ratio_conf()
642
- const = consonant_protec_conf()
643
- with gr.Row():
644
- with gr.Column():
645
- with gr.Row():
646
- denoise_gui = denoise_conf()
647
- effects_gui = effects_conf()
 
 
 
648
  button_base = button_conf()
649
  output_base = output_conf()
650
 
@@ -662,6 +770,8 @@ def get_gui(theme):
662
  const,
663
  denoise_gui,
664
  effects_gui,
 
 
665
  ],
666
  outputs=[output_base],
667
  )
@@ -723,9 +833,11 @@ def get_gui(theme):
723
 
724
 
725
  if __name__ == "__main__":
726
-
727
- tts_voice_list = asyncio.new_event_loop().run_until_complete(edge_tts.list_voices())
728
- voices = sorted([f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list])
 
 
729
 
730
  app = get_gui(theme)
731
 
@@ -733,10 +845,9 @@ if __name__ == "__main__":
733
 
734
  app.launch(
735
  max_threads=40,
736
- share=False,
737
  show_error=True,
738
  quiet=False,
739
- debug=False,
740
  ssr_mode=False,
741
- allowed_paths=["./downloads/"],
742
  )
 
6
  import logging
7
  import time
8
  import soundfile as sf
9
+ from infer_rvc_python.main import download_manager, load_hu_bert, Config
10
  import zipfile
11
  import edge_tts
12
  import asyncio
 
21
  import urllib.request
22
  import shutil
23
  import threading
24
+ import argparse
25
+ import sys
26
+
27
+ parser = argparse.ArgumentParser(description="Run the app with optional sharing")
28
+ parser.add_argument(
29
+ '--share',
30
+ action='store_true',
31
+ help='Enable sharing mode'
32
+ )
33
+ parser.add_argument(
34
+ '--theme',
35
+ type=str,
36
+ default="aliabid94/new-theme",
37
+ help='Set the theme (default: aliabid94/new-theme)'
38
+ )
39
+ args = parser.parse_args()
40
+
41
+ IS_COLAB = True if ('google.colab' in sys.modules or args.share) else False
42
+ IS_ZERO_GPU = os.getenv("SPACES_ZERO_GPU")
43
 
44
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
45
 
46
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
47
+ converter.hu_bert_model = load_hu_bert(Config(only_cpu=False), converter.hubert_path)
48
+
49
+ test_model = "https://huggingface.co/sail-rvc/aAldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.pth?download=true, https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.index?download=true"
50
+ test_names = ["model.pth", "model.index"]
51
+
52
+ for url, filename in zip(test_model.split(", "), test_names):
53
+ try:
54
+ download_manager(
55
+ url=url,
56
+ path=".",
57
+ extension="",
58
+ overwrite=False,
59
+ progress=True,
60
+ )
61
+ if not os.path.isfile(filename):
62
+ raise FileNotFoundError
63
+ except Exception:
64
+ with open(filename, "wb") as f:
65
+ pass
66
 
67
  title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
68
+ description = "This demo is provided for educational and research purposes only. The authors and contributors of this project do not endorse or encourage any misuse or unethical use of this software. Any use of this software for purposes other than those intended is solely at the user's own risk. The authors and contributors shall not be held responsible for any damages or liabilities arising from the use of this demo inappropriately." if IS_ZERO_GPU else ""
69
+ theme = args.theme
70
+ delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
71
 
72
  PITCH_ALGO_OPT = [
73
  "pm",
 
78
  ]
79
 
80
 
81
+ async def get_voices_list(proxy=None):
82
+ """Print all available voices."""
83
+ from edge_tts import list_voices
84
+ voices = await list_voices(proxy=proxy)
85
+ voices = sorted(voices, key=lambda voice: voice["ShortName"])
86
+
87
+ table = [
88
+ {
89
+ "ShortName": voice["ShortName"],
90
+ "Gender": voice["Gender"],
91
+ "ContentCategories": ", ".join(voice["VoiceTag"]["ContentCategories"]),
92
+ "VoicePersonalities": ", ".join(voice["VoiceTag"]["VoicePersonalities"]),
93
+ "FriendlyName": voice["FriendlyName"],
94
+ }
95
+ for voice in voices
96
+ ]
97
+
98
+ return table
99
+
100
+
101
  def find_files(directory):
102
  file_paths = []
103
  for filename in os.listdir(directory):
 
165
  return model, index
166
 
167
 
168
+ def ensure_valid_file(url):
 
169
  if "huggingface" not in url:
170
  raise ValueError("Only downloads from Hugging Face are allowed")
171
 
172
  try:
173
+ request = urllib.request.Request(url, method="HEAD")
174
+ with urllib.request.urlopen(request) as response:
175
+ content_length = response.headers.get("Content-Length")
176
+
177
+ if content_length is None:
178
+ raise ValueError("No Content-Length header found")
179
 
180
  file_size = int(content_length)
181
+ if file_size > 900000000 and IS_ZERO_GPU:
182
+ raise ValueError("The file is too large. Max allowed is 900 MB.")
183
+
184
+ return file_size
185
 
186
  except Exception as e:
187
  raise e
 
193
  shutil.rmtree(directory)
194
 
195
 
196
+ def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
197
 
198
  if not url_data:
199
  return None, None
 
210
  os.makedirs(directory, exist_ok=True)
211
 
212
  try:
 
 
 
 
213
  valid_url = [a_] if not b_ else [a_, b_]
214
  for link in valid_url:
215
+ ensure_valid_file(link)
216
  download_manager(
217
  url=link,
218
  path=directory,
 
254
  t.start()
255
 
256
 
257
+ def add_audio_effects(audio_list, type_output):
258
  print("Audio effects")
259
 
260
  result = []
261
  for audio_path in audio_list:
262
  try:
263
+ output_path = f'{os.path.splitext(audio_path)[0]}_effects.{type_output}'
264
 
265
  # Initialize audio effects plugins
266
  board = Pedalboard(
 
271
  ]
272
  )
273
 
274
+ # Temporary WAV to hold processed data before exporting
275
+ temp_wav = f'{os.path.splitext(audio_path)[0]}_temp.wav'
276
+
277
  with AudioFile(audio_path) as f:
278
+ with AudioFile(temp_wav, 'w', f.samplerate, f.num_channels) as o:
 
279
  while f.tell() < f.frames:
280
  chunk = f.read(int(f.samplerate))
281
  effected = board(chunk, f.samplerate, reset=False)
282
  o.write(effected)
283
+
284
+ # Convert with pydub to desired output type
285
+ audio_seg = AudioSegment.from_file(temp_wav, format=type_output)
286
+ audio_seg.export(output_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
287
+
288
+ # Clean up temp file
289
+ os.remove(temp_wav)
290
+
291
  result.append(output_path)
292
  except Exception as e:
293
  traceback.print_exc()
 
297
  return result
298
 
299
 
300
+ def apply_noisereduce(audio_list, type_output):
301
  # https://github.com/sa-if/Audio-Denoiser
302
  print("Noice reduce")
303
 
304
  result = []
305
  for audio_path in audio_list:
306
+ out_path = f"{os.path.splitext(audio_path)[0]}_noisereduce.{type_output}"
307
 
308
  try:
309
  # Load audio file
 
324
  )
325
 
326
  # Save reduced audio to file
327
+ reduced_audio.export(out_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
328
  result.append(out_path)
329
 
330
  except Exception as e:
 
336
 
337
 
338
  @spaces.GPU()
339
+ def convert_now(audio_files, random_tag, converter, type_output, steps):
340
+ for step in range(steps):
341
+ audio_files = converter(
342
+ audio_files,
343
+ random_tag,
344
+ overwrite=False,
345
+ parallel_workers=(2 if IS_COLAB else 8),
346
+ type_output=type_output,
347
+ )
348
+
349
+ return audio_files
350
 
351
 
352
  def run(
 
361
  c_b_p,
362
  active_noise_reduce,
363
  audio_effects,
364
+ type_output,
365
+ steps,
366
  ):
367
  if not audio_files:
368
  raise ValueError("The audio pls")
 
392
  respiration_median_filtering=r_m_f,
393
  envelope_ratio=e_r,
394
  consonant_breath_protection=c_b_p,
395
+ resample_sr=0,
396
  )
397
  time.sleep(0.1)
398
 
399
+ result = convert_now(audio_files, random_tag, converter, type_output, steps)
400
 
401
  if active_noise_reduce:
402
+ result = apply_noisereduce(result, type_output)
403
 
404
  if audio_effects:
405
+ result = add_audio_effects(result, type_output)
406
 
407
  return result
408
 
 
561
  type="filepath",
562
  # format="mp3",
563
  autoplay=True,
564
+ visible=True,
565
+ interactive=False,
566
+ elem_id="audio_tts",
567
+ )
568
+
569
+
570
+ def steps_conf():
571
+ return gr.Slider(
572
+ minimum=1,
573
+ maximum=3,
574
+ label="Steps",
575
+ value=1,
576
+ step=1,
577
+ interactive=True,
578
  )
579
 
580
 
581
+ def format_output_gui():
582
+ return gr.Dropdown(
583
+ label="Format output:",
584
+ choices=["wav", "mp3", "flac"],
585
+ value="wav",
586
+ )
587
+
588
  def denoise_conf():
589
  return gr.Checkbox(
590
  False,
 
667
  visible=value_active
668
  )
669
 
670
+ CSS = """
671
+ #audio_tts {
672
+ visibility: hidden; /* invisible but still takes space */
673
+ height: 0px;
674
+ width: 0px;
675
+ max-width: 0px;
676
+ max-height: 0px;
677
+ }
678
+ """
679
 
680
  def get_gui(theme):
681
+ with gr.Blocks(theme=theme, css=CSS, fill_width=True, fill_height=False, delete_cache=delete_cache_time) as app:
682
  gr.Markdown(title)
683
  gr.Markdown(description)
684
 
 
713
 
714
  down_active_gui = down_active_conf()
715
  down_info = gr.Markdown(
716
+ f"Provide a link to a zip file, like this one: `https://huggingface.co/MrDawg/ToothBrushing/resolve/main/ToothBrushing.zip?download=true`, or separate links with a comma for the .pth and .index files, like this: `{test_model}`",
717
  visible=False
718
  )
719
  with gr.Row():
 
739
  [model, indx]
740
  )
741
 
742
+ with gr.Accordion(label="Advanced settings", open=False):
743
+ algo = pitch_algo_conf()
744
+ algo_lvl = pitch_lvl_conf()
745
+ indx_inf = index_inf_conf()
746
+ res_fc = respiration_filter_conf()
747
+ envel_r = envelope_ratio_conf()
748
+ const = consonant_protec_conf()
749
+ steps_gui = steps_conf()
750
+ format_out = format_output_gui()
751
+ with gr.Row():
752
+ with gr.Column():
753
+ with gr.Row():
754
+ denoise_gui = denoise_conf()
755
+ effects_gui = effects_conf()
756
  button_base = button_conf()
757
  output_base = output_conf()
758
 
 
770
  const,
771
  denoise_gui,
772
  effects_gui,
773
+ format_out,
774
+ steps_gui,
775
  ],
776
  outputs=[output_base],
777
  )
 
833
 
834
 
835
  if __name__ == "__main__":
836
+ tts_voice_list = asyncio.new_event_loop().run_until_complete(get_voices_list(proxy=None))
837
+ voices = sorted([
838
+ (" - ".join(reversed(v["FriendlyName"].split("-"))).replace("Microsoft ", "").replace("Online (Natural)", f"({v['Gender']})").strip(), f"{v['ShortName']}-{v['Gender']}")
839
+ for v in tts_voice_list
840
+ ])
841
 
842
  app = get_gui(theme)
843
 
 
845
 
846
  app.launch(
847
  max_threads=40,
848
+ share=IS_COLAB,
849
  show_error=True,
850
  quiet=False,
851
+ debug=IS_COLAB,
852
  ssr_mode=False,
 
853
  )
pre-requirements.txt CHANGED
@@ -1 +1,2 @@
1
- pip==24.0
 
 
1
+ pip==23.0.1
2
+ Setuptools<=80.6.0
requirements.txt CHANGED
@@ -1,8 +1,11 @@
1
- torch==2.2.0
2
- infer-rvc-python==1.1.0
3
  edge-tts
4
  pedalboard
5
  noisereduce
6
  numpy==1.23.5
7
  transformers<=4.48.3
8
  pydantic==2.10.6
 
 
 
 
1
+ torch==2.5.1
2
+ infer-rvc-python
3
  edge-tts
4
  pedalboard
5
  noisereduce
6
  numpy==1.23.5
7
  transformers<=4.48.3
8
  pydantic==2.10.6
9
+ gradio==5.43.1
10
+ spaces
11
+ matplotlib-inline