Spaces:
Running
Running
File size: 10,605 Bytes
3f9fc14 8dfb6d1 71490c2 5048428 fe9bc81 5db6f6a 3f9fc14 729fc09 3e01b59 d3dfe8f bac399d 78911dc b505c58 729fc09 3f9fc14 ae24967 3f9fc14 3e01b59 729fc09 3f9fc14 71490c2 78911dc 3f9fc14 3e01b59 3f9fc14 729fc09 3f9fc14 729fc09 3f9fc14 b505c58 5ecccc7 b505c58 3f9fc14 71490c2 5ecccc7 71490c2 5ecccc7 71490c2 5ecccc7 71490c2 5ecccc7 71490c2 5ecccc7 71490c2 5ecccc7 71490c2 0fa7efa 3f9fc14 5db6f6a 77f51bb 058e306 4db864c 80c5b72 4db864c 058e306 77f51bb 3ead5ff 5048428 71490c2 3f9fc14 71490c2 3f9fc14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# -*- coding: utf-8 -*-
# 財政部財政資訊中心 江信宗
import gradio as gr
import openai
import os
import re
from pydub import AudioSegment
import uuid
import io
import edge_tts
import asyncio
import aiofiles
import json
def create_client(api_key=None):
if api_key:
openai.api_key = api_key
else:
openai.api_key = os.getenv("YOUR_API_TOKEN")
return openai.OpenAI(api_key=openai.api_key, base_url="https://api.sambanova.ai/v1")
def generate_response(input_text, language, speaker1, speaker2, api_key):
speaker1_name = speaker1.split(' - ')[0]
speaker2_name = speaker2.split(' - ')[0]
if language == "Auto Detect":
language_instruction = "- The podcast MUST be in the same language as the user input."
else:
language_instruction = f"- The podcast Must reply to me in {language} language."
example = """
{
"topic": "AGI",
"podcast": [
{
"speaker": 1,
"line": "Welcome to the 財資歐北共 Podcast. I am the host {speaker1_name}. Today we have invited an expert {speaker2_name} to join our program despite his busy schedule."
},
{
"speaker": 2,
"line": "Hello everyone, I am {speaker2_name}, I am honored to come and chat with you."
},
{
"speaker": 1,
"line": "Today we will discuss a very interesting topic..."
},
{
"speaker": 2,
"line": "Yes, this topic is indeed fascinating. Let's start with..."
},
…………,
{
"speaker": 1,
"line": "Thank you {speaker2_name} for your professional sharing. Welcome to subscribe to the Wishing Podcast. Thank you and goodbye."
}
]
}
"""
system_prompt = f"""你的任務是將提供的輸入文字轉換為一個引人入勝、訊息豐富且專業的Podcast對話。輸入文字可能會比較混亂或結構不完整,因為它可能來自不同來源,如PDF檔案或文字檔等。不要擔心格式問題或任何不相關的訊息;你的目標是提取可以在Podcast中討論的關鍵點、識別重要定義,並突出有趣的事實。
以下是你將要處理的輸入文字:
<input_text>
{{input_text}}
</input_text>
首先,仔細閱讀輸入文字,找出主要話題、關鍵點,以及任何有趣的事實或軼事。思考如何將這些訊息以一種有趣且吸引人的方式呈現出來,適合高質量的音訊Podcast。
<scratchpad>
頭腦風暴一些創造性的方法來討論你在輸入文字中識別出的主要話題、關鍵點及任何有趣的事實或軼事。可以考慮使用類比、講故事技巧或假設情境來讓內容對聽眾更加貼近和有趣。
請記住,你的Podcast應當易於普通聽眾理解,所以避免使用過多的專業術語或假設聽眾對該話題已有瞭解。如有必要,請思考如何用簡單的術語簡要解釋任何複雜的概念。
利用你的想像力填補輸入文字中的任何空白,或者想出一些值得探討與發人深省的問題,以供Podcast討論。目標是創造一個訊息豐富且娛樂性強的對話,因此可以在你的方法上大膽自由發揮創意。
將你的頭腦風暴想法和Podcast對話的粗略大綱寫在這裡。確保記錄下你希望在結尾重申的主要見解和要點。
</scratchpad>
現在你已經進行了頭腦風暴並建立了一個粗略的大綱,是時候撰寫實際的Podcast對話了。目標是主持人({speaker1_name})與嘉賓({speaker2_name})之間自然、對話式的交流。融入你在頭腦風暴中得出的最佳想法,並確保將任何複雜話題以易於理解的方式解釋清楚。
{language_instruction}
- The podcast should have 2 speakers: {speaker1_name} and {speaker2_name}.
- The podcast should be long.
- The podcast should be interesting, lively, and engaging, and hook the listener from the start.
- The script must be in JSON format.
Follow this JSON example structure, MUST be in {language} language:
{example}
<podcast_dialogue>
根據你在頭腦風暴階段提出的關鍵點和創造性想法,撰寫你的引人入勝、訊息豐富的Podcast對話。採用對話式的語氣,並包括任何必要的上下文或解釋,使內容對一般聽眾而言容易理解。使用主持人名字 {speaker1_name} 和嘉賓名字 {speaker2_name},以營造更吸引人和身臨其境的聆聽體驗。不要包括像[主持人]或[嘉賓]這樣的括號預留位置。設計你的輸出內容以供直接朗讀——它將直接轉換為音訊。
確保對話儘可能詳細、完整,同時保持在主題之內並維持吸引人的流暢性。目標是使用你的全部輸出容量,建立儘可能長的Podcast節目,同時以有趣的方式傳遞輸入文字中的關鍵訊息。
在對話結束時,讓主持人和嘉賓自然總結他們討論中的主要見解和要點。這應當是對話的隨機部分,以自然隨意而非明顯的總結——目的是在結束前最後一次以自然流暢的方式強化核心思想。最終以感謝詞結束。
</podcast_dialogue>
"""
client = create_client(api_key)
response = client.chat.completions.create(
model="Meta-Llama-3.1-405B-Instruct",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": input_text}
],
temperature=1
)
try:
podcast_match = re.search(r'{.*}', response.choices[0].message.content, re.DOTALL)
if podcast_match:
podcast_json = podcast_match.group(0)
# 嘗試解析 JSON,如果失敗則進行清理
try:
json.loads(podcast_json)
except json.JSONDecodeError:
# 清理 JSON 字符串
podcast_json = re.sub(r',\s*}', '}', podcast_json) # 移除最後一個逗號
podcast_json = re.sub(r',\s*]', ']', podcast_json) # 移除數組最後一個逗號
return podcast_json
else:
raise gr.Error("Failed to generate podcast script. Please try again.")
except Exception as e:
if "API key not valid" in str(e):
raise gr.Error("Invalid API key. Please provide a valid API key.")
elif "rate limit" in str(e).lower():
raise gr.Error("Rate limit exceeded for the API key. Please try again later or provide your own API key.")
else:
raise gr.Error(f"Failed to generate podcast script: {e}")
async def tts_generate(input_text, speaker1, speaker2):
speaker1_name = speaker1.split(' - ')[0]
speaker2_name = speaker2.split(' - ')[0]
speaker1_voice = speaker1.split(' - ')[1]
speaker2_voice = speaker2.split(' - ')[1]
try:
podcast_dict = json.loads(input_text)
except json.JSONDecodeError:
# 如果 JSON 解析失敗,嘗試清理輸入
cleaned_input = re.sub(r',\s*}', '}', input_text)
cleaned_input = re.sub(r',\s*]', ']', cleaned_input)
podcast_dict = json.loads(cleaned_input)
podcast_json = {
"topic": podcast_dict.get("topic", "Unknown Topic"),
"podcast": []
}
speaker_map = {
1: "speaker1",
2: "speaker2"
}
combined = AudioSegment.empty()
for line in podcast_dict.get("podcast", []):
speaker = line.get("speaker")
text = line.get("line", "")
voice = speaker1_voice if speaker == 1 else speaker2_voice
voice_name = speaker1_name if speaker == 1 else speaker2_name
communicate = edge_tts.Communicate(text, voice)
audio_file = f"{voice_name}_{uuid.uuid4()}.mp3"
await communicate.save(audio_file)
audio = AudioSegment.from_mp3(audio_file)
combined += audio
os.remove(audio_file) # 刪除臨時文件
podcast_json["podcast"].append({
"speaker": speaker_map.get(speaker, speaker),
"line": text
})
output_file = f"combined_{uuid.uuid4()}.mp3"
combined.export(output_file, format="mp3")
return output_file
async def process_podcast(input_text, language, speaker1, speaker2, api_key):
podcast_script = generate_response(input_text, language, speaker1, speaker2, api_key)
audio_file = await tts_generate(podcast_script, speaker1, speaker2)
return podcast_script, audio_file
with gr.Blocks() as iface:
gr.Markdown("# 🎙️ Generated Podcast Audio. Deployed by 江信宗")
input_text = gr.Textbox(label="請輸入 Podcast 話題(建議50~500字之間)")
with gr.Row():
Language = gr.Dropdown(
choices=["繁體中文", "Auto Detect", "English", "日本語", "한국어"],
value="繁體中文",
label="語言",
interactive=True,
scale=1
)
speaker_choices = [
"Andrew - English (United States)",
"Ava - English (United States)",
"Brian - English (United States)",
"Emma - English (United States)",
"Florian - German (Germany)",
"Seraphina - German (Germany)",
"Remy - French (France)",
"Vivienne - French (France)"
]
Speaker_1 = gr.Dropdown(
choices=speaker_choices,
value="Andrew - English (United States)",
label="主持人的語音",
interactive=True,
scale=2
)
Speaker_2 = gr.Dropdown(
choices=speaker_choices,
value="Ava - English (United States)",
label="來賓的語音",
interactive=True,
scale=2
)
api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models")
podcast_script = gr.Textbox(label="生成的結果")
audio_output = gr.Audio(label="生成的音頻")
generate_button = gr.Button("生成")
generate_button.click(fn=process_podcast, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
if __name__ == "__main__":
if "SPACE_ID" in os.environ:
iface.launch()
else:
iface.launch(share=True, show_api=False) |