Spaces:

Jiangxz01
/

Generated_Podcast_Audio

Sleeping

App Files Files

Jiangxz01 commited on Sep 28, 2024

Commit

36dc16f

verified ·

1 Parent(s): 8c30abd

Upload app.py

Browse files

Files changed (1) hide show

app.py +44 -27

app.py CHANGED Viewed

@@ -3,30 +3,33 @@
 import gradio as gr
 import openai
-import os
-import re
 from pydub import AudioSegment
 import uuid
 import edge_tts
 import json
 def create_client(api_key=None):
     if api_key:
         openai.api_key = api_key
     else:
-        openai.api_key = os.getenv("YOUR_API_TOKEN")
     return openai.OpenAI(api_key=openai.api_key, base_url="https://api.sambanova.ai/v1")
 def generate_response(input_text, language, speaker1, speaker2, api_key):
     speaker1_name = speaker1.split(' - ')[0]
     speaker2_name = speaker2.split(' - ')[0]
     if language == "Auto Detect":
         language_instruction = "- The podcast MUST be in the same language as the user input."
     else:
         language_instruction = f"- The podcast Must reply to me in {language} language."
     example = """
     {
-    "topic": "AGI",
     "podcast": [
             {
                 "speaker": 1,
@@ -53,23 +56,23 @@ def generate_response(input_text, language, speaker1, speaker2, api_key):
     }
     """
-    system_prompt = f"""你的任務是將提供的輸入文字轉換為一個引人入勝、訊息豐富且專業的Podcast對話。輸入文字可能會比較混亂或結構不完整，因為它可能來自不同來源，如PDF檔案或文字檔等。不要擔心格式問題或任何不相關的訊息；你的目標是提取可以在Podcast中討論的關鍵點、識別重要定義，並突出有趣的事實。
 以下是你將要處理的輸入文字：
 <input_text>
 {{input_text}}
 </input_text>
-首先，仔細閱讀輸入文字，找出主要話題、關鍵點，以及任何有趣的事實或軼事。思考如何將這些訊息以一種有趣且吸引人的方式呈現出來，適合高質量的音訊Podcast。
 <scratchpad>
-頭腦風暴一些創造性的方法來討論你在輸入文字中識別出的主要話題、關鍵點及任何有趣的事實或軼事。可以考慮使用類比、講故事技巧或假設情境來讓內容對聽眾更加貼近和有趣。
-請記住，你的Podcast應當易於普通聽眾理解，所以避免使用過多的專業術語或假設聽眾對該話題已有瞭解。如有必要，請思考如何用簡單的術語簡要解釋任何複雜的概念。
-利用你的想像力填補輸入文字中的任何空白，或者想出一些值得探討與發人深省的問題，以供Podcast討論。目標是創造一個訊息豐富且娛樂性強的對話，因此可以在你的方法上大膽自由發揮創意。
-將你的頭腦風暴想法和Podcast對話的粗略大綱寫在這裡。確保記錄下你希望在結尾重申的主要見解和要點。
 </scratchpad>
-現在你已經進行了頭腦風暴並建立了一個粗略的大綱，是時候撰寫實際的Podcast對話了。目標是主持人({speaker1_name})與嘉賓({speaker2_name})之間自然、對話式的交流。融入你在頭腦風暴中得出的最佳想法，並確保將任何複雜話題以易於理解的方式解釋清楚。
 {language_instruction}
 - The podcast should have 2 speakers: {speaker1_name} and {speaker2_name}.
 - The podcast should be long.
@@ -79,9 +82,9 @@ Follow this JSON example structure, MUST be in {language} language:
 {example}
 <podcast_dialogue>
-根據你在頭腦風暴階段提出的關鍵點和創造性想法，撰寫你的引人入勝、訊息豐富的Podcast對話。採用對話式的語氣，並包括任何必要的上下文或解釋，使內容對一般聽眾而言容易理解。使用主持人名字 {speaker1_name} 和嘉賓名字 {speaker2_name}，以營造更吸引人和身臨其境的聆聽體驗。不要包括像[主持人]或[嘉賓]這樣的括號預留位置。設計你的輸出內容以供直接朗讀——它將直接轉換為音訊。
-確保對話儘可能詳細、完整，同時保持在主題之內並維持吸引人的流暢性。目標是使用你的全部輸出容量，建立儘可能長的Podcast節目，同時以有趣的方式傳遞輸入文字中的關鍵訊息。
-在對話結束時，讓主持人和嘉賓自然總結他們討論中的主要見解和要點。這應當是對話的隨機部分，以自然隨意而非明顯的總結——目的是在結束前最後一次以自然流暢的方式強化核心思想。最終以感謝詞結束。
 </podcast_dialogue>
 """
     client = create_client(api_key)
@@ -102,28 +105,30 @@ Follow this JSON example structure, MUST be in {language} language:
             except json.JSONDecodeError:
                 podcast_json = re.sub(r',\s*}', '}', podcast_json)
                 podcast_json = re.sub(r',\s*]', ']', podcast_json)
             return podcast_json
         else:
-            raise gr.Error("生成 Podcast 劇本失敗！！請稍後再試。")
     except Exception as e:
         if "API key not valid" in str(e):
             raise gr.Error("無效的 API 金鑰！！請提供有效的 API 金鑰。")
         elif "rate limit" in str(e).lower():
             raise gr.Error("API 金鑰使用額度已超過限制！！請稍後再試或使用其他 API 金鑰。")
         else:
-            raise gr.Error(f"生成 Podcast 劇本失敗！！請稍後再試。")
 async def tts_generate(input_text, speaker1, speaker2):
     voice_names = {
         "家豪 - 中文 (Male)": "zh-TW-YunJheNeural",
         "淑芬 - 中文 (Female)": "zh-TW-HsiaoChenNeural",
         "子晴 - 中文 (Female)": "zh-TW-HsiaoYuNeural",
         "品妍 - 中文 (Female)": "zh-CN-XiaoxiaoNeural",
         "美玲 - 中文 (Female)": "zh-CN-XiaoyiNeural",
         "建宏 - 中文 (Male)": "zh-CN-YunjianNeural",
-        "品睿 - 中文 (Male)": "zh-CN-YunxiNeural",
         "宥廷 - 中文 (Male)": "zh-CN-YunxiaNeural",
-        "志明 - 中文 (Male)": "zh-CN-YunyangNeural",
         "雨霏 - 中文 (Female)": "zh-CN-liaoning-XiaobeiNeural",
         "Andrew - English (Male)": "en-US-AndrewMultilingualNeural",
         "Ava - English (Female)": "en-US-AvaMultilingualNeural",
@@ -137,6 +142,8 @@ async def tts_generate(input_text, speaker1, speaker2):
     speaker1_voice = voice_names[speaker1]
     speaker2_voice = voice_names[speaker2]
     try:
         podcast_dict = json.loads(input_text)
@@ -176,9 +183,13 @@ async def tts_generate(input_text, speaker1, speaker2):
     output_file = f"Jiangxz_{uuid.uuid4()}.mp3"
     combined.export(output_file, format="mp3")
     return output_file
 async def process_podcast(input_text, language, speaker1, speaker2, api_key):
     podcast_script = generate_response(input_text, language, speaker1, speaker2, api_key)
     speaker1_name = speaker1.split(' - ')[0]
     speaker2_name = speaker2.split(' - ')[0]
@@ -196,6 +207,9 @@ async def process_podcast(input_text, language, speaker1, speaker2, api_key):
         podcast_text = "Error: Unable to parse the podcast script."
     audio_file = await tts_generate(podcast_script, speaker1, speaker2)
     return podcast_text, audio_file
 custom_css = """
@@ -287,20 +301,22 @@ body {
 with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
     gr.Markdown("""
-    # 🎙️ 聲音行銷企劃 - 財資歐北共 Podcast 🎙️
-    > ### **※ 自動生成 Podcast 節目及音檔，系統布署：江信宗，LLM：Llama-3.1-405B-Instruct。**
     """, elem_classes="center-aligned")
     input_text = gr.Textbox(
         label="請輸入 Podcast 話題（建議50至1000字）",
-        placeholder="輸入 Podcast 話題內容越詳細劇本越佳 ......",
         elem_classes="input-background",
         max_lines=20
     )
     def check_input_length(text):
-        if len(text) > 4096:
-            return gr.Warning("輸入內容已超過 max_tokens，請縮短話題內容。")
     input_text.change(fn=check_input_length, inputs=[input_text])
@@ -318,7 +334,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
             "家豪 - 中文 (Male)",
             "淑芬 - 中文 (Female)",
             "子晴 - 中文 (Female)",
-            "品睿 - 中文 (Male)",
             "品妍 - 中文 (Female)",
             "志明 - 中文 (Male)",
             "美玲 - 中文 (Female)",
@@ -337,7 +353,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
         Speaker_1 = gr.Dropdown(
             choices=speaker_choices,
-            value="品睿 - 中文 (Male)",
             label="播客＃1語音",
             interactive=True,
             scale=2,
@@ -360,11 +376,12 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
         api_key = gr.Textbox(label="請輸入您的 API Key", type="password", placeholder="API authentication key for large language models", scale=1, elem_classes="api-background")
     audio_output = gr.Audio(label="Generated Podcast Audio", elem_classes="audio-background")
-    podcast_script = gr.Textbox(label="Generated Podcast 文案", elem_classes="script-background")
     generate_button.click(fn=process_podcast, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
 if __name__ == "__main__":
     if "SPACE_ID" in os.environ:
         iface.launch()
     else:
-        iface.launch(share=True, show_api=False)

 import gradio as gr
 import openai
 from pydub import AudioSegment
 import uuid
 import edge_tts
 import json
+import os
+import re
+import time
 def create_client(api_key=None):
     if api_key:
         openai.api_key = api_key
     else:
+        openai.api_key = os.getenv("YOUR_API_KEY")
     return openai.OpenAI(api_key=openai.api_key, base_url="https://api.sambanova.ai/v1")
 def generate_response(input_text, language, speaker1, speaker2, api_key):
     speaker1_name = speaker1.split(' - ')[0]
     speaker2_name = speaker2.split(' - ')[0]
+    gr.Info("正在生成 Podcast 文稿中，請稍待片刻......")
+    start_time = time.time()
     if language == "Auto Detect":
         language_instruction = "- The podcast MUST be in the same language as the user input."
     else:
         language_instruction = f"- The podcast Must reply to me in {language} language."
     example = """
     {
+    "topic": "AIF",
     "podcast": [
             {
                 "speaker": 1,
     }
     """
+    system_prompt = f"""你的任務是將提供的輸入文字轉換為一個引人入勝、訊息豐富且專業的播客對話。輸入文字可能會比較混亂或結構不完整，因為它可能來自不同來源，如PDF檔案或網頁文字等。不要擔心格式問題或任何不相關的訊息；你的目標是提取可以在播客中討論的關鍵點、識別重要定義，並突顯有趣的事實。
 以下是你將要處理的輸入文字：
 <input_text>
 {{input_text}}
 </input_text>
+首先，仔細閱讀輸入文字，並積極找出主要話題、關鍵點，以及任何有趣的事實或軼事。Chain-of-Thought思考如何將這些訊息以一種有趣且吸引人的方式呈現出來，適合高品質的播客文稿。
 <scratchpad>
+頭腦風暴一些創造性的方法來討論你在輸入文字中識別出的主要話題、關鍵點及任何有趣的事實或軼事。可以考慮使用類比、例子、講故事技巧或假設情境來讓內容更能與聽眾產生共鳴並吸引他們。
+請記住，你的播客應當易於普通聽眾理解，所以避免使用過多的專業術語或假設聽眾對該話題已有瞭解。如有必要，請思考如何用簡單的術語簡要解釋任何複雜的概念。
+發揮你的想像力填補輸入文字中的任何空白，或頭腦風暴提出一些值得探討與發人深省的問題，以供播客討論。目標是創造一個訊息豐富且娛樂性強的對話，因此可以在你的方法上大膽盡情自由發揮創意。
+將你的頭腦風暴想法和播客對話的粗略大綱寫在這裡，務必讓它有趣且吸引人。確保記錄下你希望在結尾重申的主要見解和要點。
 </scratchpad>
+現在你已經進行頭腦風暴並建立粗略大綱，是時候撰寫實際的播客對話了。目標是主持人({speaker1_name})與嘉賓({speaker2_name})之間自然、對話式的交流，融入你在頭腦風暴中得出的最佳想法，並花費精力確保將任何複雜話題以易於理解的方式解釋清楚。
 {language_instruction}
 - The podcast should have 2 speakers: {speaker1_name} and {speaker2_name}.
 - The podcast should be long.
 {example}
 <podcast_dialogue>
+根據你在頭腦風暴階段提出的關鍵點和創造性想法，撰寫一段引人入勝且訊息豐富的播客對話。採用對話式的語氣，並包括任何必要的上下文或解釋，使內容對一般聽眾容易理解。使用主持人名字 {speaker1_name} 和嘉賓名字 {speaker2_name}，為聽眾營造更吸引人和身臨其境的聆聽體驗。不要包括像[主持人]或[嘉賓]這樣的括號預留位置。設計你的輸出內容必須適合直接朗讀，因為它將直接轉換為音訊。
+確保對話儘可能詳細且完整，同時保持在主題之內並維持吸引人的流暢性。目標是使用你的全部輸出容量，建立儘可能長的播客節目，同時以娛樂性的方式傳達輸入文字中的關鍵訊息。
+在對話結束時，讓主持人和嘉賓自然總結他們討論中的主要見解和要點，這應當是對話的隨機部分，以自然隨意而非明顯的總結 - 目的是在結束前最後一次以自然流暢的方式強化核心思想。最終以感謝詞結束。
 </podcast_dialogue>
 """
     client = create_client(api_key)
             except json.JSONDecodeError:
                 podcast_json = re.sub(r',\s*}', '}', podcast_json)
                 podcast_json = re.sub(r',\s*]', ']', podcast_json)
+            end_time = time.time()
+            gr.Info(f"已成功生成 Podcast 文稿，執行時間： {(end_time - start_time):.2f} 秒。")
             return podcast_json
         else:
+            raise gr.Error("生成 Podcast 文稿失敗！！請稍後再試。")
     except Exception as e:
         if "API key not valid" in str(e):
             raise gr.Error("無效的 API 金鑰！！請提供有效的 API 金鑰。")
         elif "rate limit" in str(e).lower():
             raise gr.Error("API 金鑰使用額度已超過限制！！請稍後再試或使用其他 API 金鑰。")
         else:
+            raise gr.Error(f"生成 Podcast 文稿失敗！！請稍後再試。")
 async def tts_generate(input_text, speaker1, speaker2):
     voice_names = {
         "家豪 - 中文 (Male)": "zh-TW-YunJheNeural",
         "淑芬 - 中文 (Female)": "zh-TW-HsiaoChenNeural",
         "子晴 - 中文 (Female)": "zh-TW-HsiaoYuNeural",
+        "景睿 - 中文 (Male)": "zh-CN-YunxiNeural",
         "品妍 - 中文 (Female)": "zh-CN-XiaoxiaoNeural",
+        "志明 - 中文 (Male)": "zh-CN-YunyangNeural",
         "美玲 - 中文 (Female)": "zh-CN-XiaoyiNeural",
         "建宏 - 中文 (Male)": "zh-CN-YunjianNeural",
         "宥廷 - 中文 (Male)": "zh-CN-YunxiaNeural",
         "雨霏 - 中文 (Female)": "zh-CN-liaoning-XiaobeiNeural",
         "Andrew - English (Male)": "en-US-AndrewMultilingualNeural",
         "Ava - English (Female)": "en-US-AvaMultilingualNeural",
     speaker1_voice = voice_names[speaker1]
     speaker2_voice = voice_names[speaker2]
+    gr.Info("正在生成 Podcast 音檔中，請稍待片刻......")
+    start_time = time.time()
     try:
         podcast_dict = json.loads(input_text)
     output_file = f"Jiangxz_{uuid.uuid4()}.mp3"
     combined.export(output_file, format="mp3")
+    end_time = time.time()
+    gr.Info(f"已成功生成 Podcast 音檔，執行時間： {(end_time - start_time):.2f} 秒。")
     return output_file
 async def process_podcast(input_text, language, speaker1, speaker2, api_key):
+    gr.Info("開始生成 Podcast 節目及音檔，請稍待片刻......")
+    start_time = time.time()
     podcast_script = generate_response(input_text, language, speaker1, speaker2, api_key)
     speaker1_name = speaker1.split(' - ')[0]
     speaker2_name = speaker2.split(' - ')[0]
         podcast_text = "Error: Unable to parse the podcast script."
     audio_file = await tts_generate(podcast_script, speaker1, speaker2)
+    end_time = time.time()
+    gr.Info(f"已成功完成 Podcast 節目及音檔，總執行時間： {(end_time - start_time):.2f} 秒。")
+    gr.Info("請待本訊息自動消失後即可播放或下載 Podcast 音檔！！")
     return podcast_text, audio_file
 custom_css = """
 with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
     gr.Markdown("""
+    # 🎙️ 聲音經濟 - 財資歐北共 Podcast 🎙️
+    > ### **※ 玩轉聲音魅力，開拓更多可能性，自動生成 Podcast 節目及音檔，系統布署：江信宗，LLM：Llama-3.1-405B-Instruct。**
     """, elem_classes="center-aligned")
     input_text = gr.Textbox(
         label="請輸入 Podcast 話題（建議50至1000字）",
+        placeholder="輸入 Podcast 話題內容，受限 LLM Context Length，建議1000字以內 ......",
         elem_classes="input-background",
         max_lines=20
     )
     def check_input_length(text):
+        if 0 < len(text) < 4:
+            return gr.Warning("輸入內容過短，請提供明確的話題內容。")
+        elif len(text) > 4096:
+            return gr.Warning("輸入內容已超過 max tokens，請縮短話題內容。")
     input_text.change(fn=check_input_length, inputs=[input_text])
             "家豪 - 中文 (Male)",
             "淑芬 - 中文 (Female)",
             "子晴 - 中文 (Female)",
+            "景睿 - 中文 (Male)",
             "品妍 - 中文 (Female)",
             "志明 - 中文 (Male)",
             "美玲 - 中文 (Female)",
         Speaker_1 = gr.Dropdown(
             choices=speaker_choices,
+            value="景睿 - 中文 (Male)",
             label="播客＃1語音",
             interactive=True,
             scale=2,
         api_key = gr.Textbox(label="請輸入您的 API Key", type="password", placeholder="API authentication key for large language models", scale=1, elem_classes="api-background")
     audio_output = gr.Audio(label="Generated Podcast Audio", elem_classes="audio-background")
+    podcast_script = gr.Textbox(label="Generated Podcast 文稿", elem_classes="script-background")
     generate_button.click(fn=process_podcast, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
 if __name__ == "__main__":
     if "SPACE_ID" in os.environ:
         iface.launch()
     else:
+        iface.launch(share=True, show_api=False)