Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from pydub import AudioSegment
|
|
|
|
|
|
|
| 3 |
import json
|
| 4 |
import uuid
|
| 5 |
import io
|
|
@@ -10,12 +12,6 @@ import pypdf
|
|
| 10 |
import os
|
| 11 |
import time
|
| 12 |
from typing import List, Dict, Tuple
|
| 13 |
-
import openai
|
| 14 |
-
import logging
|
| 15 |
-
|
| 16 |
-
# At the beginning of your script, set up logging
|
| 17 |
-
logging.basicConfig(level=logging.INFO)
|
| 18 |
-
logger = logging.getLogger(__name__)
|
| 19 |
|
| 20 |
class PodcastGenerator:
|
| 21 |
def __init__(self):
|
|
@@ -28,7 +24,7 @@ class PodcastGenerator:
|
|
| 28 |
引數:
|
| 29 |
prompt (str): 用於生成Podcast劇本的使用者輸入文字。
|
| 30 |
language (str): Podcast指劇本所需的語言。
|
| 31 |
-
api_key (str): 用於訪問
|
| 32 |
|
| 33 |
返回:
|
| 34 |
Dict: 包含以 JSON 格式生成Podcast劇本的字典。
|
|
@@ -36,7 +32,7 @@ class PodcastGenerator:
|
|
| 36 |
異常:
|
| 37 |
gr.Error: 如果 API 金鑰或速率限制出現問題。
|
| 38 |
|
| 39 |
-
此方法使用
|
| 40 |
它處理語言選擇,使用適當的配置設定 AI 模型,並處理生成的響應。
|
| 41 |
"""
|
| 42 |
# 定義一個示例JSON結構,用於指導AI生成類似格式的Podcast劇本
|
|
@@ -255,51 +251,51 @@ class PodcastGenerator:
|
|
| 255 |
# 設定使用者提示,包含使用者輸入的內容
|
| 256 |
user_prompt = f"Please generate a podcast script based on the following user input:\n{prompt}"
|
| 257 |
|
| 258 |
-
#
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
)
|
| 265 |
|
| 266 |
# 嘗試生成內容
|
| 267 |
try:
|
| 268 |
-
response =
|
| 269 |
-
model='Meta-Llama-3.1-405B-Instruct',
|
| 270 |
-
messages=[
|
| 271 |
-
{"role": "system", "content": system_prompt},
|
| 272 |
-
{"role": "user", "content": user_prompt}
|
| 273 |
-
],
|
| 274 |
-
temperature=1
|
| 275 |
-
)
|
| 276 |
-
logger.info(f"API Response: {response}")
|
| 277 |
-
|
| 278 |
-
if response.choices and len(response.choices) > 0:
|
| 279 |
-
generated_text = response.choices[0].message.content
|
| 280 |
-
else:
|
| 281 |
-
logger.warning("No content generated from the API")
|
| 282 |
-
raise ValueError("No content generated from the API")
|
| 283 |
-
|
| 284 |
except Exception as e:
|
| 285 |
-
logger.error(f"Error generating script: {str(e)}")
|
| 286 |
# 處理可能的錯誤
|
| 287 |
if "API key not valid" in str(e):
|
| 288 |
-
raise gr.Error("Invalid API key. Please provide a valid
|
| 289 |
elif "rate limit" in str(e).lower():
|
| 290 |
-
raise gr.Error("Rate limit exceeded for the API key. Please try again later or provide your own
|
| 291 |
else:
|
| 292 |
-
raise gr.Error(f"Failed to generate podcast script: {
|
| 293 |
|
| 294 |
# 列印生成的Podcast指令碼
|
| 295 |
-
print(f"Generated podcast script:\n{
|
| 296 |
|
| 297 |
-
#
|
| 298 |
-
|
| 299 |
-
return json.loads(generated_text)
|
| 300 |
-
except json.JSONDecodeError:
|
| 301 |
-
print("Warning: Generated text is not valid JSON. Returning raw text.")
|
| 302 |
-
return {"raw_text": generated_text}
|
| 303 |
|
| 304 |
async def tts_generate(self, text: str, speaker: int, speaker1: str, speaker2: str) -> str:
|
| 305 |
"""
|
|
@@ -446,9 +442,6 @@ async def process_input(input_text: str, input_file, language: str, speaker1: st
|
|
| 446 |
|
| 447 |
# 定義語音名稱對映
|
| 448 |
voice_names = {
|
| 449 |
-
"臺女1 - Chinese Taiwanese (Taiwan)": "zh-TW-HsiaoChenNeural",
|
| 450 |
-
"臺女2 - Chinese Taiwanese (Taiwan)": "zh-TW-HsiaoYuNeural",
|
| 451 |
-
"臺男 - Chinese Taiwanese (Taiwan)": "zh-TW-YunJheNeural",
|
| 452 |
"Andrew - English (United States)": "en-US-AndrewMultilingualNeural",
|
| 453 |
"Ava - English (United States)": "en-US-AvaMultilingualNeural",
|
| 454 |
"Brian - English (United States)": "en-US-BrianMultilingualNeural",
|
|
@@ -489,9 +482,10 @@ iface = gr.Interface(
|
|
| 489 |
gr.File(label="Or Upload a PDF or TXT file"),
|
| 490 |
gr.Dropdown(label="Language", choices=[
|
| 491 |
"Auto Detect",
|
| 492 |
-
"
|
| 493 |
"Bahasa Indonesian", "Bangla", "Basque", "Bengali", "Bosnian", "Bulgarian",
|
| 494 |
-
"Burmese", "Catalan", "Chinese Cantonese", "Chinese Mandarin",
|
|
|
|
| 495 |
"Estonian", "Filipino", "Finnish", "French", "Galician", "Georgian",
|
| 496 |
"German", "Greek", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Irish",
|
| 497 |
"Italian", "Japanese", "Javanese", "Kannada", "Kazakh", "Khmer", "Korean",
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from pydub import AudioSegment
|
| 3 |
+
import google.generativeai as genai
|
| 4 |
+
from google.generativeai.types import HarmCategory, HarmBlockThreshold
|
| 5 |
import json
|
| 6 |
import uuid
|
| 7 |
import io
|
|
|
|
| 12 |
import os
|
| 13 |
import time
|
| 14 |
from typing import List, Dict, Tuple
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
class PodcastGenerator:
|
| 17 |
def __init__(self):
|
|
|
|
| 24 |
引數:
|
| 25 |
prompt (str): 用於生成Podcast劇本的使用者輸入文字。
|
| 26 |
language (str): Podcast指劇本所需的語言。
|
| 27 |
+
api_key (str): 用於訪問 Gemini AI 服務的 API 金鑰。
|
| 28 |
|
| 29 |
返回:
|
| 30 |
Dict: 包含以 JSON 格式生成Podcast劇本的字典。
|
|
|
|
| 32 |
異常:
|
| 33 |
gr.Error: 如果 API 金鑰或速率限制出現問題。
|
| 34 |
|
| 35 |
+
此方法使用 Gemini AI 模型根據使用者的輸入生成Podcast劇本。
|
| 36 |
它處理語言選擇,使用適當的配置設定 AI 模型,並處理生成的響應。
|
| 37 |
"""
|
| 38 |
# 定義一個示例JSON結構,用於指導AI生成類似格式的Podcast劇本
|
|
|
|
| 251 |
# 設定使用者提示,包含使用者輸入的內容
|
| 252 |
user_prompt = f"Please generate a podcast script based on the following user input:\n{prompt}"
|
| 253 |
|
| 254 |
+
# 設定訊息列表,包含使用者提示
|
| 255 |
+
messages = [
|
| 256 |
+
{"role": "user", "parts": [user_prompt]}
|
| 257 |
+
]
|
| 258 |
+
|
| 259 |
+
# 配置Google Generative AI
|
| 260 |
+
genai.configure(api_key=api_key)
|
| 261 |
+
|
| 262 |
+
# 設定生成配置
|
| 263 |
+
generation_config = {
|
| 264 |
+
"temperature": 1,
|
| 265 |
+
"max_output_tokens": 8192,
|
| 266 |
+
"response_mime_type": "application/json",
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
# 建立生成模型實例
|
| 270 |
+
model = genai.GenerativeModel(
|
| 271 |
+
model_name="gemini-1.5-flash-002",
|
| 272 |
+
generation_config=generation_config,
|
| 273 |
+
safety_settings={
|
| 274 |
+
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
|
| 275 |
+
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
|
| 276 |
+
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
|
| 277 |
+
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE
|
| 278 |
+
},
|
| 279 |
+
system_instruction=system_prompt
|
| 280 |
)
|
| 281 |
|
| 282 |
# 嘗試生成內容
|
| 283 |
try:
|
| 284 |
+
response = await model.generate_content_async(messages)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
except Exception as e:
|
|
|
|
| 286 |
# 處理可能的錯誤
|
| 287 |
if "API key not valid" in str(e):
|
| 288 |
+
raise gr.Error("Invalid API key. Please provide a valid Gemini API key.")
|
| 289 |
elif "rate limit" in str(e).lower():
|
| 290 |
+
raise gr.Error("Rate limit exceeded for the API key. Please try again later or provide your own Gemini API key.")
|
| 291 |
else:
|
| 292 |
+
raise gr.Error(f"Failed to generate podcast script: {e}")
|
| 293 |
|
| 294 |
# 列印生成的Podcast指令碼
|
| 295 |
+
print(f"Generated podcast script:\n{response.text}")
|
| 296 |
|
| 297 |
+
# 返回解析後的JSON資料
|
| 298 |
+
return json.loads(response.text)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
|
| 300 |
async def tts_generate(self, text: str, speaker: int, speaker1: str, speaker2: str) -> str:
|
| 301 |
"""
|
|
|
|
| 442 |
|
| 443 |
# 定義語音名稱對映
|
| 444 |
voice_names = {
|
|
|
|
|
|
|
|
|
|
| 445 |
"Andrew - English (United States)": "en-US-AndrewMultilingualNeural",
|
| 446 |
"Ava - English (United States)": "en-US-AvaMultilingualNeural",
|
| 447 |
"Brian - English (United States)": "en-US-BrianMultilingualNeural",
|
|
|
|
| 482 |
gr.File(label="Or Upload a PDF or TXT file"),
|
| 483 |
gr.Dropdown(label="Language", choices=[
|
| 484 |
"Auto Detect",
|
| 485 |
+
"Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani",
|
| 486 |
"Bahasa Indonesian", "Bangla", "Basque", "Bengali", "Bosnian", "Bulgarian",
|
| 487 |
+
"Burmese", "Catalan", "Chinese Cantonese", "Chinese Mandarin",
|
| 488 |
+
"Chinese Taiwanese", "Croatian", "Czech", "Danish", "Dutch", "English",
|
| 489 |
"Estonian", "Filipino", "Finnish", "French", "Galician", "Georgian",
|
| 490 |
"German", "Greek", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Irish",
|
| 491 |
"Italian", "Japanese", "Javanese", "Kannada", "Kazakh", "Khmer", "Korean",
|