Spaces:

Jiangxz01
/

Generated_Podcast_Audio

Running

App Files Files

Jiangxz01 commited on Sep 26, 2024

Commit

194b951

verified ·

1 Parent(s): 681ba78

Upload app.py

Browse files

Files changed (1) hide show

app.py +35 -36

app.py CHANGED Viewed

@@ -19,23 +19,23 @@ class PodcastGenerator:
     async def generate_script(self, prompt: str, language: str, api_key: str) -> Dict:
         """
-        非同步生成基於給定提示和語言的Podcast劇本。
-        引數：
-            prompt (str): 用於生成Podcast劇本的使用者輸入文字。
-            language (str): Podcast指劇本所需的語言。
-            api_key (str): 用於訪問 Gemini AI 服務的 API 金鑰。
         返回：
-            Dict: 包含以 JSON 格式生成Podcast劇本的字典。
         異常：
-            gr.Error: 如果 API 金鑰或速率限制出現問題。
-        此方法使用 Gemini AI 模型根據使用者的輸入生成Podcast劇本。
-        它處理語言選擇，使用適當的配置設定 AI 模型，並處理生成的響應。
         """
-        # 定義一個示例JSON結構，用於指導AI生成類似格式的Podcast劇本
         example = """
         {
             "topic": "AGI",
@@ -244,7 +244,7 @@ class PodcastGenerator:
         - The podcast should be interesting, lively, and engaging, and hook the listener from the start.
         - The input text might be disorganized or unformatted, originating from sources like PDFs or text files. Ignore any formatting inconsistencies or irrelevant details; your task is to distill the essential points, identify key definitions, and highlight intriguing facts that would be suitable for discussion in a podcast.
         - The script must be in JSON format.
-        Follow this example structure carefully:
         {example}
         """
@@ -301,8 +301,8 @@ class PodcastGenerator:
         """
         非同步生成文字轉語音音訊檔案。
-        引數：
-            text (str): 要轉換爲語音的文字內容。
             speaker (int): 說話者的編號（1 或 2）。
             speaker1 (str): 第一位說話者的語音設定。
             speaker2 (str): 第二位說話者的語音設定。
@@ -310,7 +310,7 @@ class PodcastGenerator:
         返回：
             str: 生成的臨時音訊檔案的檔名。
-        此方法使用 Edge TTS 將文字轉換爲語音，並將結果儲存爲臨時音訊檔案。
         根據指定的說話者編號選擇相應的語音設定。
         """
         # 根據說話者選擇語音
@@ -334,7 +334,7 @@ class PodcastGenerator:
         """
         非同步合併音訊檔案。
-        引數：
             audio_files (List[str]): 包含音訊檔案路徑的列表。
         返回：
@@ -355,26 +355,26 @@ class PodcastGenerator:
     async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str) -> str:
         """
-        非同步生成Podcast音訊檔案。
-        引數：
-            input_text (str): 用於生成Podcast指令碼的輸入文字。
-            language (str): Podcast使用的語言。
             speaker1 (str): 第一位說話者的語音設定。
             speaker2 (str): 第二位說話者的語音設定。
-            api_key (str): 用於訪問 Gemini AI 服務的 API 金鑰。
         返回：
-            str: 生成的Podcast音訊檔案的檔名。
         此方法執行以下步驟：
-        1. 使用 generate_script 方法生成Podcast劇本。
-        2. 使用 tts_generate 方法爲每個對話行生成音訊檔案。
-        3. 使用 combine_audio_files 方法將所有音訊檔案合併爲一個完整的Podcast。
         整個過程是非同步的，以提高效率。方法還會記錄並顯示每個步驟的執行時間。
         """
-        # 生成Podcast劇本
         gr.Info("Generating podcast script...")
         start_time = time.time()
         podcast_json = await self.generate_script(input_text, language, api_key)
@@ -420,21 +420,21 @@ class TextExtractor:
 async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str, api_key: str = "") -> str:
     """
-    處理輸入並生成Podcast的非同步函式。
-    引數:
-    input_text (str): 使用者輸入的文字內容。
-    input_file: 使用者上傳的檔案（可以是 PDF 或 TXT）。
     language (str): 選擇的語言。
     speaker1 (str): 第一位說話者的語音選擇。
     speaker2 (str): 第二位說話者的語音選擇。
-    api_key (str): 用於生成 AI 的 API 金鑰，預設爲空字串。
     返回:
-    str: 生成的Podcast音訊檔案路徑。
-    此函式協調整個Podcast生成過程，包括文字提取、指令碼生成和音訊合成。
-    它處理不同的輸入型別（文字或檔案），並使用指定的語音和語言設定來建立最終的Podcast。
     """
     # 開始生成Podcast
     gr.Info("Starting podcast generation...")
@@ -462,7 +462,7 @@ async def process_input(input_text: str, input_file, language: str, speaker1: st
     # 如果沒有提供API金鑰，則使用環境變數中的金鑰
     if not api_key:
-        api_key = os.getenv("Your_API_KEY")
     # 建立PodcastGenerator實例並生成Podcast
     podcast_generator = PodcastGenerator()
@@ -524,7 +524,7 @@ iface = gr.Interface(
     outputs=[
         gr.Audio(label="Generated Podcast Audio")
     ],
-    title="🎙️ PodcastGen 🎙️",
     description="Generate a 2-speaker podcast from text input or documents!",
     allow_flagging="never"
 )
@@ -532,4 +532,3 @@ iface = gr.Interface(
 if __name__ == "__main__":
     iface.launch()

     async def generate_script(self, prompt: str, language: str, api_key: str) -> Dict:
         """
+        異步生成基於給定提示和語言的播客腳本。
+        參數：
+            prompt (str): 用於生成播客腳本的用戶輸入文本。
+            language (str): 播客腳本所需的語言。
+            api_key (str): 用於訪問 Gemini AI 服務的 API 密鑰。
         返回：
+            Dict: 包含以 JSON 格式生成的播客腳本的字典。
         異常：
+            gr.Error: 如果 API 密鑰或速率限制出現問題。
+        此方法使用 Gemini AI 模型根據用戶的輸入生成播客腳本。
+        它處理語言選擇，使用適當的配置設置 AI 模型，並處理生成的響應。
         """
+        # 定義一個示例JSON結構，用於指導AI生成類似格式的Podcast指令碼
         example = """
         {
             "topic": "AGI",
         - The podcast should be interesting, lively, and engaging, and hook the listener from the start.
         - The input text might be disorganized or unformatted, originating from sources like PDFs or text files. Ignore any formatting inconsistencies or irrelevant details; your task is to distill the essential points, identify key definitions, and highlight intriguing facts that would be suitable for discussion in a podcast.
         - The script must be in JSON format.
+        Follow this example structure:
         {example}
         """
         """
         非同步生成文字轉語音音訊檔案。
+        參數：
+            text (str): 要轉換為語音的文字內容。
             speaker (int): 說話者的編號（1 或 2）。
             speaker1 (str): 第一位說話者的語音設定。
             speaker2 (str): 第二位說話者的語音設定。
         返回：
             str: 生成的臨時音訊檔案的檔名。
+        此方法使用 Edge TTS 將文字轉換為語音，並將結果保存為臨時音訊檔案。
         根據指定的說話者編號選擇相應的語音設定。
         """
         # 根據說話者選擇語音
         """
         非同步合併音訊檔案。
+        參數：
             audio_files (List[str]): 包含音訊檔案路徑的列表。
         返回：
     async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str) -> str:
         """
+        非同步生成播客音訊檔案。
+        參數：
+            input_text (str): 用於生成播客腳本的輸入文本。
+            language (str): 播客使用的語言。
             speaker1 (str): 第一位說話者的語音設定。
             speaker2 (str): 第二位說話者的語音設定。
+            api_key (str): 用於訪問 Gemini AI 服務的 API 密鑰。
         返回：
+            str: 生成的播客音訊檔案的檔名。
         此方法執行以下步驟：
+        1. 使用 generate_script 方法生成播客腳本。
+        2. 使用 tts_generate 方法為每個對話行生成音訊檔案。
+        3. 使用 combine_audio_files 方法將所有音訊檔案合併為一個完整的播客。
         整個過程是非同步的，以提高效率。方法還會記錄並顯示每個步驟的執行時間。
         """
+        # 生成Podcast指令碼
         gr.Info("Generating podcast script...")
         start_time = time.time()
         podcast_json = await self.generate_script(input_text, language, api_key)
 async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str, api_key: str = "") -> str:
     """
+    處理輸入並生成播客的非同步函數。
+    參數:
+    input_text (str): 用戶輸入的文本內容。
+    input_file: 用戶上傳的文件（可以是 PDF 或 TXT）。
     language (str): 選擇的語言。
     speaker1 (str): 第一位說話者的語音選擇。
     speaker2 (str): 第二位說話者的語音選擇。
+    api_key (str): 用於生成 AI 的 API 金鑰，預設為空字串。
     返回:
+    str: 生成的播客音頻文件路徑。
+    此函數協調整個播客生成過程，包括文本提取、腳本生成和音頻合成。
+    它處理不同的輸入類型（文本或文件），並使用指定的語音和語言設置來創建最終的播客。
     """
     # 開始生成Podcast
     gr.Info("Starting podcast generation...")
     # 如果沒有提供API金鑰，則使用環境變數中的金鑰
     if not api_key:
+        api_key = os.getenv("GENAI_API_KEY")
     # 建立PodcastGenerator實例並生成Podcast
     podcast_generator = PodcastGenerator()
     outputs=[
         gr.Audio(label="Generated Podcast Audio")
     ],
+    title="PodcastGen 🎙️",
     description="Generate a 2-speaker podcast from text input or documents!",
     allow_flagging="never"
 )
 if __name__ == "__main__":
     iface.launch()