Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import gradio as gr
|
|
5 |
import openai
|
6 |
import os
|
7 |
import re
|
|
|
8 |
import uuid
|
9 |
import io
|
10 |
import edge_tts
|
@@ -48,9 +49,9 @@ def generate_response(input_text, language, speaker1, speaker2, api_key):
|
|
48 |
- The podcast should be long.
|
49 |
- The podcast should be interesting, lively, and engaging, and hook the listener from the start.
|
50 |
- The script must be in JSON format.
|
51 |
-
Follow this example structure, MUST be in {language} language:
|
52 |
```
|
53 |
-
{
|
54 |
"topic": "AGI",
|
55 |
"podcast": [
|
56 |
{
|
@@ -75,7 +76,7 @@ Follow this example structure, MUST be in {language} language:
|
|
75 |
"line": "Thank you {speaker2_name} for your professional sharing. Welcome to subscribe to the Wishing Podcast. Thank you and goodbye."
|
76 |
}
|
77 |
]
|
78 |
-
}
|
79 |
```
|
80 |
|
81 |
<podcast_dialogue>
|
@@ -105,6 +106,51 @@ Follow this example structure, MUST be in {language} language:
|
|
105 |
raise gr.Error(f"Failed to generate podcast script: {e}")
|
106 |
return podcast_match.group(0)
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
with gr.Blocks() as iface:
|
109 |
gr.Markdown("# ๐๏ธ Generated Podcast Audio. Deployed by ๆฑไฟกๅฎ")
|
110 |
|
@@ -147,9 +193,10 @@ with gr.Blocks() as iface:
|
|
147 |
|
148 |
api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models")
|
149 |
podcast_script = gr.Textbox(label="็ๆ็็ตๆ")
|
|
|
150 |
|
151 |
generate_button = gr.Button("็ๆ")
|
152 |
-
generate_button.click(fn=
|
153 |
|
154 |
if __name__ == "__main__":
|
155 |
if "SPACE_ID" in os.environ:
|
|
|
5 |
import openai
|
6 |
import os
|
7 |
import re
|
8 |
+
from pydub import AudioSegment
|
9 |
import uuid
|
10 |
import io
|
11 |
import edge_tts
|
|
|
49 |
- The podcast should be long.
|
50 |
- The podcast should be interesting, lively, and engaging, and hook the listener from the start.
|
51 |
- The script must be in JSON format.
|
52 |
+
Follow this JSON example structure, MUST be in {language} language:
|
53 |
```
|
54 |
+
{
|
55 |
"topic": "AGI",
|
56 |
"podcast": [
|
57 |
{
|
|
|
76 |
"line": "Thank you {speaker2_name} for your professional sharing. Welcome to subscribe to the Wishing Podcast. Thank you and goodbye."
|
77 |
}
|
78 |
]
|
79 |
+
}
|
80 |
```
|
81 |
|
82 |
<podcast_dialogue>
|
|
|
106 |
raise gr.Error(f"Failed to generate podcast script: {e}")
|
107 |
return podcast_match.group(0)
|
108 |
|
109 |
+
async def tts_generate(input_text, speaker1, speaker2):
|
110 |
+
speaker1_name = speaker1.split(' - ')[0]
|
111 |
+
speaker2_name = speaker2.split(' - ')[0]
|
112 |
+
speaker1_voice = speaker1.split(' - ')[1]
|
113 |
+
speaker2_voice = speaker2.split(' - ')[1]
|
114 |
+
|
115 |
+
podcast_dict = json.loads(input_text)
|
116 |
+
podcast_json = {
|
117 |
+
"topic": podcast_dict["topic"],
|
118 |
+
"podcast": []
|
119 |
+
}
|
120 |
+
speaker_map = {
|
121 |
+
"speaker1": 1,
|
122 |
+
"speaker2": 2
|
123 |
+
}
|
124 |
+
|
125 |
+
for line in podcast_dict["podcast"]:
|
126 |
+
speaker = line["speaker"]
|
127 |
+
text = line["line"]
|
128 |
+
voice = speaker1_voice if speaker == "speaker1" else speaker2_voice
|
129 |
+
voice_name = speaker1_name if speaker == "speaker1" else speaker2_name
|
130 |
+
|
131 |
+
communicate = edge_tts.Communicate(text, voice)
|
132 |
+
await communicate.save(f"{voice_name}.mp3")
|
133 |
+
|
134 |
+
podcast_json["podcast"].append({
|
135 |
+
"speaker": speaker_map.get(speaker, speaker),
|
136 |
+
"line": text
|
137 |
+
})
|
138 |
+
|
139 |
+
combined = AudioSegment.empty()
|
140 |
+
for line in podcast_json["podcast"]:
|
141 |
+
speaker = line["speaker"]
|
142 |
+
voice_name = speaker1_name if speaker == 1 else speaker2_name
|
143 |
+
audio = AudioSegment.from_mp3(f"{voice_name}.mp3")
|
144 |
+
combined += audio
|
145 |
+
|
146 |
+
combined.export("combined.mp3", format="mp3")
|
147 |
+
return "combined.mp3"
|
148 |
+
|
149 |
+
async def process_podcast(input_text, language, speaker1, speaker2, api_key):
|
150 |
+
podcast_script = generate_response(input_text, language, speaker1, speaker2, api_key)
|
151 |
+
audio_file = await tts_generate(podcast_script, speaker1, speaker2)
|
152 |
+
return podcast_script, audio_file
|
153 |
+
|
154 |
with gr.Blocks() as iface:
|
155 |
gr.Markdown("# ๐๏ธ Generated Podcast Audio. Deployed by ๆฑไฟกๅฎ")
|
156 |
|
|
|
193 |
|
194 |
api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models")
|
195 |
podcast_script = gr.Textbox(label="็ๆ็็ตๆ")
|
196 |
+
audio_output = gr.Audio(label="็ๆ็้ณ้ ป")
|
197 |
|
198 |
generate_button = gr.Button("็ๆ")
|
199 |
+
generate_button.click(fn=process_podcast, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
|
200 |
|
201 |
if __name__ == "__main__":
|
202 |
if "SPACE_ID" in os.environ:
|