Jiangxz01 commited on
Commit
71490c2
ยท
verified ยท
1 Parent(s): 5048428

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -4
app.py CHANGED
@@ -5,6 +5,7 @@ import gradio as gr
5
  import openai
6
  import os
7
  import re
 
8
  import uuid
9
  import io
10
  import edge_tts
@@ -48,9 +49,9 @@ def generate_response(input_text, language, speaker1, speaker2, api_key):
48
  - The podcast should be long.
49
  - The podcast should be interesting, lively, and engaging, and hook the listener from the start.
50
  - The script must be in JSON format.
51
- Follow this example structure, MUST be in {language} language:
52
  ```
53
- {{
54
  "topic": "AGI",
55
  "podcast": [
56
  {
@@ -75,7 +76,7 @@ Follow this example structure, MUST be in {language} language:
75
  "line": "Thank you {speaker2_name} for your professional sharing. Welcome to subscribe to the Wishing Podcast. Thank you and goodbye."
76
  }
77
  ]
78
- }}
79
  ```
80
 
81
  <podcast_dialogue>
@@ -105,6 +106,51 @@ Follow this example structure, MUST be in {language} language:
105
  raise gr.Error(f"Failed to generate podcast script: {e}")
106
  return podcast_match.group(0)
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  with gr.Blocks() as iface:
109
  gr.Markdown("# ๐ŸŽ™๏ธ Generated Podcast Audio. Deployed by ๆฑŸไฟกๅฎ—")
110
 
@@ -147,9 +193,10 @@ with gr.Blocks() as iface:
147
 
148
  api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models")
149
  podcast_script = gr.Textbox(label="็”Ÿๆˆ็š„็ตๆžœ")
 
150
 
151
  generate_button = gr.Button("็”Ÿๆˆ")
152
- generate_button.click(fn=generate_response, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=podcast_script)
153
 
154
  if __name__ == "__main__":
155
  if "SPACE_ID" in os.environ:
 
5
  import openai
6
  import os
7
  import re
8
+ from pydub import AudioSegment
9
  import uuid
10
  import io
11
  import edge_tts
 
49
  - The podcast should be long.
50
  - The podcast should be interesting, lively, and engaging, and hook the listener from the start.
51
  - The script must be in JSON format.
52
+ Follow this JSON example structure, MUST be in {language} language:
53
  ```
54
+ {
55
  "topic": "AGI",
56
  "podcast": [
57
  {
 
76
  "line": "Thank you {speaker2_name} for your professional sharing. Welcome to subscribe to the Wishing Podcast. Thank you and goodbye."
77
  }
78
  ]
79
+ }
80
  ```
81
 
82
  <podcast_dialogue>
 
106
  raise gr.Error(f"Failed to generate podcast script: {e}")
107
  return podcast_match.group(0)
108
 
109
+ async def tts_generate(input_text, speaker1, speaker2):
110
+ speaker1_name = speaker1.split(' - ')[0]
111
+ speaker2_name = speaker2.split(' - ')[0]
112
+ speaker1_voice = speaker1.split(' - ')[1]
113
+ speaker2_voice = speaker2.split(' - ')[1]
114
+
115
+ podcast_dict = json.loads(input_text)
116
+ podcast_json = {
117
+ "topic": podcast_dict["topic"],
118
+ "podcast": []
119
+ }
120
+ speaker_map = {
121
+ "speaker1": 1,
122
+ "speaker2": 2
123
+ }
124
+
125
+ for line in podcast_dict["podcast"]:
126
+ speaker = line["speaker"]
127
+ text = line["line"]
128
+ voice = speaker1_voice if speaker == "speaker1" else speaker2_voice
129
+ voice_name = speaker1_name if speaker == "speaker1" else speaker2_name
130
+
131
+ communicate = edge_tts.Communicate(text, voice)
132
+ await communicate.save(f"{voice_name}.mp3")
133
+
134
+ podcast_json["podcast"].append({
135
+ "speaker": speaker_map.get(speaker, speaker),
136
+ "line": text
137
+ })
138
+
139
+ combined = AudioSegment.empty()
140
+ for line in podcast_json["podcast"]:
141
+ speaker = line["speaker"]
142
+ voice_name = speaker1_name if speaker == 1 else speaker2_name
143
+ audio = AudioSegment.from_mp3(f"{voice_name}.mp3")
144
+ combined += audio
145
+
146
+ combined.export("combined.mp3", format="mp3")
147
+ return "combined.mp3"
148
+
149
+ async def process_podcast(input_text, language, speaker1, speaker2, api_key):
150
+ podcast_script = generate_response(input_text, language, speaker1, speaker2, api_key)
151
+ audio_file = await tts_generate(podcast_script, speaker1, speaker2)
152
+ return podcast_script, audio_file
153
+
154
  with gr.Blocks() as iface:
155
  gr.Markdown("# ๐ŸŽ™๏ธ Generated Podcast Audio. Deployed by ๆฑŸไฟกๅฎ—")
156
 
 
193
 
194
  api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models")
195
  podcast_script = gr.Textbox(label="็”Ÿๆˆ็š„็ตๆžœ")
196
+ audio_output = gr.Audio(label="็”Ÿๆˆ็š„้Ÿณ้ ป")
197
 
198
  generate_button = gr.Button("็”Ÿๆˆ")
199
+ generate_button.click(fn=process_podcast, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
200
 
201
  if __name__ == "__main__":
202
  if "SPACE_ID" in os.environ: