Spaces:

Jiangxz01
/

Generated_Podcast_Audio

Running

App Files Files

Jiangxz01 commited on Sep 27, 2024

Commit

71490c2

verified ·

1 Parent(s): 5048428

Upload app.py

Browse files

Files changed (1) hide show

app.py +51 -4

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import gradio as gr
 import openai
 import os
 import re
 import uuid
 import io
 import edge_tts
@@ -48,9 +49,9 @@ def generate_response(input_text, language, speaker1, speaker2, api_key):
 - The podcast should be long.
 - The podcast should be interesting, lively, and engaging, and hook the listener from the start.
 - The script must be in JSON format.
-Follow this example structure, MUST be in {language} language:
 ```
-{{
 "topic": "AGI",
 "podcast": [
             {
@@ -75,7 +76,7 @@ Follow this example structure, MUST be in {language} language:
                 "line": "Thank you {speaker2_name} for your professional sharing. Welcome to subscribe to the Wishing Podcast. Thank you and goodbye."
             }
 ]
-}}
 ```
 <podcast_dialogue>
@@ -105,6 +106,51 @@ Follow this example structure, MUST be in {language} language:
             raise gr.Error(f"Failed to generate podcast script: {e}")
     return podcast_match.group(0)
 with gr.Blocks() as iface:
     gr.Markdown("# 🎙️ Generated Podcast Audio. Deployed by 江信宗")
@@ -147,9 +193,10 @@ with gr.Blocks() as iface:
     api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models")
     podcast_script = gr.Textbox(label="生成的結果")
     generate_button = gr.Button("生成")
-    generate_button.click(fn=generate_response, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=podcast_script)
 if __name__ == "__main__":
     if "SPACE_ID" in os.environ:

 import openai
 import os
 import re
+from pydub import AudioSegment
 import uuid
 import io
 import edge_tts
 - The podcast should be long.
 - The podcast should be interesting, lively, and engaging, and hook the listener from the start.
 - The script must be in JSON format.
+Follow this JSON example structure, MUST be in {language} language:
 ```
+{
 "topic": "AGI",
 "podcast": [
             {
                 "line": "Thank you {speaker2_name} for your professional sharing. Welcome to subscribe to the Wishing Podcast. Thank you and goodbye."
             }
 ]
+}
 ```
 <podcast_dialogue>
             raise gr.Error(f"Failed to generate podcast script: {e}")
     return podcast_match.group(0)
+async def tts_generate(input_text, speaker1, speaker2):
+    speaker1_name = speaker1.split(' - ')[0]
+    speaker2_name = speaker2.split(' - ')[0]
+    speaker1_voice = speaker1.split(' - ')[1]
+    speaker2_voice = speaker2.split(' - ')[1]
+    podcast_dict = json.loads(input_text)
+    podcast_json = {
+        "topic": podcast_dict["topic"],
+        "podcast": []
+    }
+    speaker_map = {
+        "speaker1": 1,
+        "speaker2": 2
+    }
+    for line in podcast_dict["podcast"]:
+        speaker = line["speaker"]
+        text = line["line"]
+        voice = speaker1_voice if speaker == "speaker1" else speaker2_voice
+        voice_name = speaker1_name if speaker == "speaker1" else speaker2_name
+        communicate = edge_tts.Communicate(text, voice)
+        await communicate.save(f"{voice_name}.mp3")
+        podcast_json["podcast"].append({
+            "speaker": speaker_map.get(speaker, speaker),
+            "line": text
+        })
+    combined = AudioSegment.empty()
+    for line in podcast_json["podcast"]:
+        speaker = line["speaker"]
+        voice_name = speaker1_name if speaker == 1 else speaker2_name
+        audio = AudioSegment.from_mp3(f"{voice_name}.mp3")
+        combined += audio
+    combined.export("combined.mp3", format="mp3")
+    return "combined.mp3"
+async def process_podcast(input_text, language, speaker1, speaker2, api_key):
+    podcast_script = generate_response(input_text, language, speaker1, speaker2, api_key)
+    audio_file = await tts_generate(podcast_script, speaker1, speaker2)
+    return podcast_script, audio_file
 with gr.Blocks() as iface:
     gr.Markdown("# 🎙️ Generated Podcast Audio. Deployed by 江信宗")
     api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models")
     podcast_script = gr.Textbox(label="生成的結果")
+    audio_output = gr.Audio(label="生成的音頻")
     generate_button = gr.Button("生成")
+    generate_button.click(fn=process_podcast, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
 if __name__ == "__main__":
     if "SPACE_ID" in os.environ: