vtuber_rvc_models

Build error

App Files Files Community

Kit-Lemonfoot commited on Dec 22, 2023

Commit

918ef2d

1 Parent(s): 827b7fd

Added some more indies, moved Dramatubers to their own tab, a few fixes

Browse files

Files changed (1) hide show

app.py +22 -41

app.py CHANGED Viewed

@@ -35,6 +35,9 @@ limitation = os.getenv("SYSTEM") == "spaces"
 #limitation=True
 language_dict = tts_order_voice
 audio_mode = []
 f0method_mode = []
 if limitation is True:
@@ -60,8 +63,10 @@ vcArr.append(VC(48000, config))
 def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_voice, f0_up_key, f0_method, index_rate, filter_radius, resample_sr, rms_mix_rate, protect, record_button):
     try:
         #Setup audio
         if vc_audio_mode == "Input path" or "Youtube" and vc_input != "":
             audio, sr = librosa.load(vc_input, sr=16000, mono=True)
         elif vc_audio_mode == "Upload audio":
             if vc_upload is None:
                 return "Please upload an audio file.", None
@@ -81,7 +86,11 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
             if tts_text is None or tts_voice is None or tts_text=="":
                 return "You need to enter text and select a voice.", None
             voice = language_dict[tts_voice]
-            asyncio.run(edge_tts.Communicate(tts_text, voice).save("tts.mp3"))
             try:
                 audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
             except:
@@ -103,6 +112,13 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
             if sampling_rate != 16000:
                 audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
             tts_text = "Recorded Audio"
         times = [0, 0, 0]
         f0_up_key = int(f0_up_key)
@@ -187,6 +203,8 @@ def load_model():
             model_index = f"weights/{category_folder}/{character_name}/{info['feature_retrieval_library']}"
             if info['feature_retrieval_library'] == "None":
                 model_index = None
             model_path =  f"weights/{category_folder}/{character_name}/{model_name}"
             cpt = torch.load(f"weights/{category_folder}/{character_name}/{model_name}", map_location="cpu")
             model_version = cpt.get("version", "v1")
@@ -282,9 +300,6 @@ def change_audio_mode(vc_audio_mode):
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
-            gr.Slider.update(visible=False),
-            gr.Audio.update(visible=False),
-            gr.Button.update(visible=False),
             # EdgeTTS
             gr.Textbox.update(visible=False),
             gr.Dropdown.update(visible=False),
@@ -304,9 +319,6 @@ def change_audio_mode(vc_audio_mode):
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
-            gr.Slider.update(visible=False),
-            gr.Audio.update(visible=False),
-            gr.Button.update(visible=False),
             # EdgeTTS
             gr.Textbox.update(visible=False),
             gr.Dropdown.update(visible=False),
@@ -326,9 +338,6 @@ def change_audio_mode(vc_audio_mode):
             gr.Audio.update(visible=True),
             gr.Audio.update(visible=True),
             gr.Audio.update(visible=True),
-            gr.Slider.update(visible=True),
-            gr.Audio.update(visible=True),
-            gr.Button.update(visible=True),
             # TTS
             gr.Textbox.update(visible=False),
             gr.Dropdown.update(visible=False),
@@ -348,9 +357,6 @@ def change_audio_mode(vc_audio_mode):
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
-            gr.Slider.update(visible=False),
-            gr.Audio.update(visible=False),
-            gr.Button.update(visible=False),
             # TTS
             gr.Textbox.update(visible=True),
             gr.Dropdown.update(visible=True),
@@ -370,9 +376,6 @@ def change_audio_mode(vc_audio_mode):
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
-            gr.Slider.update(visible=False),
-            gr.Audio.update(visible=False),
-            gr.Button.update(visible=False),
             # TTS
             gr.Textbox.update(visible=False),
             gr.Dropdown.update(visible=False),
@@ -392,9 +395,6 @@ def change_audio_mode(vc_audio_mode):
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
-            gr.Slider.update(visible=False),
-            gr.Audio.update(visible=False),
-            gr.Button.update(visible=False),
             # TTS
             gr.Textbox.update(visible=False, interactive=True),
             gr.Dropdown.update(visible=False, interactive=True),
@@ -535,19 +535,6 @@ if __name__ == '__main__':
                                         ],
                                         outputs=[vc_log, vc_output]
                                     )
-                                    vc_volume = gr.Slider(
-                                        minimum=0,
-                                        maximum=10,
-                                        label="Vocal volume",
-                                        value=4,
-                                        interactive=True,
-                                        step=1,
-                                        info="Adjust vocal volume (Default: 4}",
-                                        visible=False
-                                    )
-                                    vc_combined_output = gr.Audio(label="Output Combined Audio", visible=False)
-                                    vc_combine =  gr.Button("Combine",variant="primary", visible=False)
         with gr.Row():
             with gr.Column():
@@ -582,11 +569,6 @@ if __name__ == '__main__':
             inputs=[vc_link, vc_download_audio, vc_split_model],
             outputs=[vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input]
         )
-        vc_combine.click(
-            fn=combine_vocal_and_inst,
-            inputs=[vc_output, vc_volume, vc_split_model],
-            outputs=[vc_combined_output]
-        )
         vc_audio_mode.change(
             fn=change_audio_mode,
             inputs=[vc_audio_mode],
@@ -600,20 +582,19 @@ if __name__ == '__main__':
                 vc_vocal_preview,
                 vc_inst_preview,
                 vc_audio_preview,
-                vc_volume,
-                vc_combined_output,
-                vc_combine,
                 tts_text,
                 tts_voice,
                 record_button
             ]
         )
         gr.Markdown(
             "## <center>Credit to:\n"
             "#### <center>Original devs:\n"
             "<center>the RVC Project, lj1995, zomehwh, sysf\n\n"
             "#### <center>Model creators:\n"
-            "<center>dacoolkid44, Hijack, Maki Ligon, megaaziib, KitLemonfoot, yeey5, Sui, MahdeenSky, Itaxhix, Acato, Kyuubical, Listra92, IshimaIshimsky, ZomballTH, Jotape91, RigidSpinner, RandomAssBettel, Mimizukari, Oida, Shu-Kun, Nhat Minh, Ardha27, Legitdark, TempoHawk, 0x3e9, Kaiaya, Skeetawn, Sonphantrung, Pianissimo, RavenCutie21, HinaBl, Brazurl, PetroOne, Rubinlord, Gloomwastragic, Sunesu, Aimbo, Act8113, Blyxeen\n"
         )
 if limitation is True:
     app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)

 #limitation=True
 language_dict = tts_order_voice
+authors = ["dacoolkid44", "Hijack", "Maki Ligon", "megaaziib", "KitLemonfoot", "yeey5", "Sui", "MahdeenSky"]
+authorskip = ["dacoolkid44 & Hijack", "dacoolkid44 & Hijack & Maki Ligon", "Kit Lemonfoot / NSHFB"]
 audio_mode = []
 f0method_mode = []
 if limitation is True:
 def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_voice, f0_up_key, f0_method, index_rate, filter_radius, resample_sr, rms_mix_rate, protect, record_button):
     try:
         #Setup audio
+        audio=None
         if vc_audio_mode == "Input path" or "Youtube" and vc_input != "":
             audio, sr = librosa.load(vc_input, sr=16000, mono=True)
+            tts_text = "YouTube Audio"
         elif vc_audio_mode == "Upload audio":
             if vc_upload is None:
                 return "Please upload an audio file.", None
             if tts_text is None or tts_voice is None or tts_text=="":
                 return "You need to enter text and select a voice.", None
             voice = language_dict[tts_voice]
+            try:
+                asyncio.run(edge_tts.Communicate(tts_text, voice).save("tts.mp3"))
+            except:
+                print("Failed to get E-TTS handle. A restart may be needed soon.")
+                return "ERROR: Failed to communicate with Edge-TTS. The Edge-TTS service may be down or cannot communicate. Please try another method or try again later.", None
             try:
                 audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
             except:
             if sampling_rate != 16000:
                 audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
             tts_text = "Recorded Audio"
+        if audio is None:
+            if vc_audio_mode == "Edge-TTS":
+                print("Failed to get E-TTS handle. A restart may be needed soon.")
+                return "ERROR: Failed to obtain a correct response from Edge-TTS. The Edge-TTS service may be down or unable to communicate. Please try another method or try again later.", None
+            return "ERROR: Unknown audio error. Please try again.", None
         times = [0, 0, 0]
         f0_up_key = int(f0_up_key)
             model_index = f"weights/{category_folder}/{character_name}/{info['feature_retrieval_library']}"
             if info['feature_retrieval_library'] == "None":
                 model_index = None
+            if not (model_author in authors or model_author in authorskip):
+                authors.append(model_author)
             model_path =  f"weights/{category_folder}/{character_name}/{model_name}"
             cpt = torch.load(f"weights/{category_folder}/{character_name}/{model_name}", map_location="cpu")
             model_version = cpt.get("version", "v1")
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             # EdgeTTS
             gr.Textbox.update(visible=False),
             gr.Dropdown.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             # EdgeTTS
             gr.Textbox.update(visible=False),
             gr.Dropdown.update(visible=False),
             gr.Audio.update(visible=True),
             gr.Audio.update(visible=True),
             gr.Audio.update(visible=True),
             # TTS
             gr.Textbox.update(visible=False),
             gr.Dropdown.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             # TTS
             gr.Textbox.update(visible=True),
             gr.Dropdown.update(visible=True),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             # TTS
             gr.Textbox.update(visible=False),
             gr.Dropdown.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             gr.Audio.update(visible=False),
             # TTS
             gr.Textbox.update(visible=False, interactive=True),
             gr.Dropdown.update(visible=False, interactive=True),
                                         ],
                                         outputs=[vc_log, vc_output]
                                     )
         with gr.Row():
             with gr.Column():
             inputs=[vc_link, vc_download_audio, vc_split_model],
             outputs=[vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input]
         )
         vc_audio_mode.change(
             fn=change_audio_mode,
             inputs=[vc_audio_mode],
                 vc_vocal_preview,
                 vc_inst_preview,
                 vc_audio_preview,
                 tts_text,
                 tts_voice,
                 record_button
             ]
         )
+        authStr=", ".join(authors)
         gr.Markdown(
             "## <center>Credit to:\n"
             "#### <center>Original devs:\n"
             "<center>the RVC Project, lj1995, zomehwh, sysf\n\n"
             "#### <center>Model creators:\n"
+            f"<center>{authStr}\n"
         )
 if limitation is True:
     app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)