Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import gradio as gr
|
|
| 5 |
import openai
|
| 6 |
import os
|
| 7 |
import re
|
|
|
|
| 8 |
import uuid
|
| 9 |
import io
|
| 10 |
import edge_tts
|
|
@@ -48,9 +49,9 @@ def generate_response(input_text, language, speaker1, speaker2, api_key):
|
|
| 48 |
- The podcast should be long.
|
| 49 |
- The podcast should be interesting, lively, and engaging, and hook the listener from the start.
|
| 50 |
- The script must be in JSON format.
|
| 51 |
-
Follow this example structure, MUST be in {language} language:
|
| 52 |
```
|
| 53 |
-
{
|
| 54 |
"topic": "AGI",
|
| 55 |
"podcast": [
|
| 56 |
{
|
|
@@ -75,7 +76,7 @@ Follow this example structure, MUST be in {language} language:
|
|
| 75 |
"line": "Thank you {speaker2_name} for your professional sharing. Welcome to subscribe to the Wishing Podcast. Thank you and goodbye."
|
| 76 |
}
|
| 77 |
]
|
| 78 |
-
}
|
| 79 |
```
|
| 80 |
|
| 81 |
<podcast_dialogue>
|
|
@@ -105,6 +106,51 @@ Follow this example structure, MUST be in {language} language:
|
|
| 105 |
raise gr.Error(f"Failed to generate podcast script: {e}")
|
| 106 |
return podcast_match.group(0)
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
with gr.Blocks() as iface:
|
| 109 |
gr.Markdown("# ๐๏ธ Generated Podcast Audio. Deployed by ๆฑไฟกๅฎ")
|
| 110 |
|
|
@@ -147,9 +193,10 @@ with gr.Blocks() as iface:
|
|
| 147 |
|
| 148 |
api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models")
|
| 149 |
podcast_script = gr.Textbox(label="็ๆ็็ตๆ")
|
|
|
|
| 150 |
|
| 151 |
generate_button = gr.Button("็ๆ")
|
| 152 |
-
generate_button.click(fn=
|
| 153 |
|
| 154 |
if __name__ == "__main__":
|
| 155 |
if "SPACE_ID" in os.environ:
|
|
|
|
| 5 |
import openai
|
| 6 |
import os
|
| 7 |
import re
|
| 8 |
+
from pydub import AudioSegment
|
| 9 |
import uuid
|
| 10 |
import io
|
| 11 |
import edge_tts
|
|
|
|
| 49 |
- The podcast should be long.
|
| 50 |
- The podcast should be interesting, lively, and engaging, and hook the listener from the start.
|
| 51 |
- The script must be in JSON format.
|
| 52 |
+
Follow this JSON example structure, MUST be in {language} language:
|
| 53 |
```
|
| 54 |
+
{
|
| 55 |
"topic": "AGI",
|
| 56 |
"podcast": [
|
| 57 |
{
|
|
|
|
| 76 |
"line": "Thank you {speaker2_name} for your professional sharing. Welcome to subscribe to the Wishing Podcast. Thank you and goodbye."
|
| 77 |
}
|
| 78 |
]
|
| 79 |
+
}
|
| 80 |
```
|
| 81 |
|
| 82 |
<podcast_dialogue>
|
|
|
|
| 106 |
raise gr.Error(f"Failed to generate podcast script: {e}")
|
| 107 |
return podcast_match.group(0)
|
| 108 |
|
| 109 |
+
async def tts_generate(input_text, speaker1, speaker2):
|
| 110 |
+
speaker1_name = speaker1.split(' - ')[0]
|
| 111 |
+
speaker2_name = speaker2.split(' - ')[0]
|
| 112 |
+
speaker1_voice = speaker1.split(' - ')[1]
|
| 113 |
+
speaker2_voice = speaker2.split(' - ')[1]
|
| 114 |
+
|
| 115 |
+
podcast_dict = json.loads(input_text)
|
| 116 |
+
podcast_json = {
|
| 117 |
+
"topic": podcast_dict["topic"],
|
| 118 |
+
"podcast": []
|
| 119 |
+
}
|
| 120 |
+
speaker_map = {
|
| 121 |
+
"speaker1": 1,
|
| 122 |
+
"speaker2": 2
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
for line in podcast_dict["podcast"]:
|
| 126 |
+
speaker = line["speaker"]
|
| 127 |
+
text = line["line"]
|
| 128 |
+
voice = speaker1_voice if speaker == "speaker1" else speaker2_voice
|
| 129 |
+
voice_name = speaker1_name if speaker == "speaker1" else speaker2_name
|
| 130 |
+
|
| 131 |
+
communicate = edge_tts.Communicate(text, voice)
|
| 132 |
+
await communicate.save(f"{voice_name}.mp3")
|
| 133 |
+
|
| 134 |
+
podcast_json["podcast"].append({
|
| 135 |
+
"speaker": speaker_map.get(speaker, speaker),
|
| 136 |
+
"line": text
|
| 137 |
+
})
|
| 138 |
+
|
| 139 |
+
combined = AudioSegment.empty()
|
| 140 |
+
for line in podcast_json["podcast"]:
|
| 141 |
+
speaker = line["speaker"]
|
| 142 |
+
voice_name = speaker1_name if speaker == 1 else speaker2_name
|
| 143 |
+
audio = AudioSegment.from_mp3(f"{voice_name}.mp3")
|
| 144 |
+
combined += audio
|
| 145 |
+
|
| 146 |
+
combined.export("combined.mp3", format="mp3")
|
| 147 |
+
return "combined.mp3"
|
| 148 |
+
|
| 149 |
+
async def process_podcast(input_text, language, speaker1, speaker2, api_key):
|
| 150 |
+
podcast_script = generate_response(input_text, language, speaker1, speaker2, api_key)
|
| 151 |
+
audio_file = await tts_generate(podcast_script, speaker1, speaker2)
|
| 152 |
+
return podcast_script, audio_file
|
| 153 |
+
|
| 154 |
with gr.Blocks() as iface:
|
| 155 |
gr.Markdown("# ๐๏ธ Generated Podcast Audio. Deployed by ๆฑไฟกๅฎ")
|
| 156 |
|
|
|
|
| 193 |
|
| 194 |
api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models")
|
| 195 |
podcast_script = gr.Textbox(label="็ๆ็็ตๆ")
|
| 196 |
+
audio_output = gr.Audio(label="็ๆ็้ณ้ ป")
|
| 197 |
|
| 198 |
generate_button = gr.Button("็ๆ")
|
| 199 |
+
generate_button.click(fn=process_podcast, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
|
| 200 |
|
| 201 |
if __name__ == "__main__":
|
| 202 |
if "SPACE_ID" in os.environ:
|