Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
from pydub import AudioSegment
|
|
|
|
|
3 |
import json
|
4 |
import uuid
|
5 |
import io
|
@@ -10,12 +12,6 @@ import pypdf
|
|
10 |
import os
|
11 |
import time
|
12 |
from typing import List, Dict, Tuple
|
13 |
-
import openai
|
14 |
-
import logging
|
15 |
-
|
16 |
-
# At the beginning of your script, set up logging
|
17 |
-
logging.basicConfig(level=logging.INFO)
|
18 |
-
logger = logging.getLogger(__name__)
|
19 |
|
20 |
class PodcastGenerator:
|
21 |
def __init__(self):
|
@@ -28,7 +24,7 @@ class PodcastGenerator:
|
|
28 |
引數:
|
29 |
prompt (str): 用於生成Podcast劇本的使用者輸入文字。
|
30 |
language (str): Podcast指劇本所需的語言。
|
31 |
-
api_key (str): 用於訪問
|
32 |
|
33 |
返回:
|
34 |
Dict: 包含以 JSON 格式生成Podcast劇本的字典。
|
@@ -36,7 +32,7 @@ class PodcastGenerator:
|
|
36 |
異常:
|
37 |
gr.Error: 如果 API 金鑰或速率限制出現問題。
|
38 |
|
39 |
-
此方法使用
|
40 |
它處理語言選擇,使用適當的配置設定 AI 模型,並處理生成的響應。
|
41 |
"""
|
42 |
# 定義一個示例JSON結構,用於指導AI生成類似格式的Podcast劇本
|
@@ -255,51 +251,51 @@ class PodcastGenerator:
|
|
255 |
# 設定使用者提示,包含使用者輸入的內容
|
256 |
user_prompt = f"Please generate a podcast script based on the following user input:\n{prompt}"
|
257 |
|
258 |
-
#
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
)
|
265 |
|
266 |
# 嘗試生成內容
|
267 |
try:
|
268 |
-
response =
|
269 |
-
model='Meta-Llama-3.1-405B-Instruct',
|
270 |
-
messages=[
|
271 |
-
{"role": "system", "content": system_prompt},
|
272 |
-
{"role": "user", "content": user_prompt}
|
273 |
-
],
|
274 |
-
temperature=1
|
275 |
-
)
|
276 |
-
logger.info(f"API Response: {response}")
|
277 |
-
|
278 |
-
if response.choices and len(response.choices) > 0:
|
279 |
-
generated_text = response.choices[0].message.content
|
280 |
-
else:
|
281 |
-
logger.warning("No content generated from the API")
|
282 |
-
raise ValueError("No content generated from the API")
|
283 |
-
|
284 |
except Exception as e:
|
285 |
-
logger.error(f"Error generating script: {str(e)}")
|
286 |
# 處理可能的錯誤
|
287 |
if "API key not valid" in str(e):
|
288 |
-
raise gr.Error("Invalid API key. Please provide a valid
|
289 |
elif "rate limit" in str(e).lower():
|
290 |
-
raise gr.Error("Rate limit exceeded for the API key. Please try again later or provide your own
|
291 |
else:
|
292 |
-
raise gr.Error(f"Failed to generate podcast script: {
|
293 |
|
294 |
# 列印生成的Podcast指令碼
|
295 |
-
print(f"Generated podcast script:\n{
|
296 |
|
297 |
-
#
|
298 |
-
|
299 |
-
return json.loads(generated_text)
|
300 |
-
except json.JSONDecodeError:
|
301 |
-
print("Warning: Generated text is not valid JSON. Returning raw text.")
|
302 |
-
return {"raw_text": generated_text}
|
303 |
|
304 |
async def tts_generate(self, text: str, speaker: int, speaker1: str, speaker2: str) -> str:
|
305 |
"""
|
@@ -446,9 +442,6 @@ async def process_input(input_text: str, input_file, language: str, speaker1: st
|
|
446 |
|
447 |
# 定義語音名稱對映
|
448 |
voice_names = {
|
449 |
-
"臺女1 - Chinese Taiwanese (Taiwan)": "zh-TW-HsiaoChenNeural",
|
450 |
-
"臺女2 - Chinese Taiwanese (Taiwan)": "zh-TW-HsiaoYuNeural",
|
451 |
-
"臺男 - Chinese Taiwanese (Taiwan)": "zh-TW-YunJheNeural",
|
452 |
"Andrew - English (United States)": "en-US-AndrewMultilingualNeural",
|
453 |
"Ava - English (United States)": "en-US-AvaMultilingualNeural",
|
454 |
"Brian - English (United States)": "en-US-BrianMultilingualNeural",
|
@@ -489,9 +482,10 @@ iface = gr.Interface(
|
|
489 |
gr.File(label="Or Upload a PDF or TXT file"),
|
490 |
gr.Dropdown(label="Language", choices=[
|
491 |
"Auto Detect",
|
492 |
-
"
|
493 |
"Bahasa Indonesian", "Bangla", "Basque", "Bengali", "Bosnian", "Bulgarian",
|
494 |
-
"Burmese", "Catalan", "Chinese Cantonese", "Chinese Mandarin",
|
|
|
495 |
"Estonian", "Filipino", "Finnish", "French", "Galician", "Georgian",
|
496 |
"German", "Greek", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Irish",
|
497 |
"Italian", "Japanese", "Javanese", "Kannada", "Kazakh", "Khmer", "Korean",
|
|
|
1 |
import gradio as gr
|
2 |
from pydub import AudioSegment
|
3 |
+
import google.generativeai as genai
|
4 |
+
from google.generativeai.types import HarmCategory, HarmBlockThreshold
|
5 |
import json
|
6 |
import uuid
|
7 |
import io
|
|
|
12 |
import os
|
13 |
import time
|
14 |
from typing import List, Dict, Tuple
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
class PodcastGenerator:
|
17 |
def __init__(self):
|
|
|
24 |
引數:
|
25 |
prompt (str): 用於生成Podcast劇本的使用者輸入文字。
|
26 |
language (str): Podcast指劇本所需的語言。
|
27 |
+
api_key (str): 用於訪問 Gemini AI 服務的 API 金鑰。
|
28 |
|
29 |
返回:
|
30 |
Dict: 包含以 JSON 格式生成Podcast劇本的字典。
|
|
|
32 |
異常:
|
33 |
gr.Error: 如果 API 金鑰或速率限制出現問題。
|
34 |
|
35 |
+
此方法使用 Gemini AI 模型根據使用者的輸入生成Podcast劇本。
|
36 |
它處理語言選擇,使用適當的配置設定 AI 模型,並處理生成的響應。
|
37 |
"""
|
38 |
# 定義一個示例JSON結構,用於指導AI生成類似格式的Podcast劇本
|
|
|
251 |
# 設定使用者提示,包含使用者輸入的內容
|
252 |
user_prompt = f"Please generate a podcast script based on the following user input:\n{prompt}"
|
253 |
|
254 |
+
# 設定訊息列表,包含使用者提示
|
255 |
+
messages = [
|
256 |
+
{"role": "user", "parts": [user_prompt]}
|
257 |
+
]
|
258 |
+
|
259 |
+
# 配置Google Generative AI
|
260 |
+
genai.configure(api_key=api_key)
|
261 |
+
|
262 |
+
# 設定生成配置
|
263 |
+
generation_config = {
|
264 |
+
"temperature": 1,
|
265 |
+
"max_output_tokens": 8192,
|
266 |
+
"response_mime_type": "application/json",
|
267 |
+
}
|
268 |
+
|
269 |
+
# 建立生成模型實例
|
270 |
+
model = genai.GenerativeModel(
|
271 |
+
model_name="gemini-1.5-flash-002",
|
272 |
+
generation_config=generation_config,
|
273 |
+
safety_settings={
|
274 |
+
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
|
275 |
+
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
|
276 |
+
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
|
277 |
+
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE
|
278 |
+
},
|
279 |
+
system_instruction=system_prompt
|
280 |
)
|
281 |
|
282 |
# 嘗試生成內容
|
283 |
try:
|
284 |
+
response = await model.generate_content_async(messages)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
except Exception as e:
|
|
|
286 |
# 處理可能的錯誤
|
287 |
if "API key not valid" in str(e):
|
288 |
+
raise gr.Error("Invalid API key. Please provide a valid Gemini API key.")
|
289 |
elif "rate limit" in str(e).lower():
|
290 |
+
raise gr.Error("Rate limit exceeded for the API key. Please try again later or provide your own Gemini API key.")
|
291 |
else:
|
292 |
+
raise gr.Error(f"Failed to generate podcast script: {e}")
|
293 |
|
294 |
# 列印生成的Podcast指令碼
|
295 |
+
print(f"Generated podcast script:\n{response.text}")
|
296 |
|
297 |
+
# 返回解析後的JSON資料
|
298 |
+
return json.loads(response.text)
|
|
|
|
|
|
|
|
|
299 |
|
300 |
async def tts_generate(self, text: str, speaker: int, speaker1: str, speaker2: str) -> str:
|
301 |
"""
|
|
|
442 |
|
443 |
# 定義語音名稱對映
|
444 |
voice_names = {
|
|
|
|
|
|
|
445 |
"Andrew - English (United States)": "en-US-AndrewMultilingualNeural",
|
446 |
"Ava - English (United States)": "en-US-AvaMultilingualNeural",
|
447 |
"Brian - English (United States)": "en-US-BrianMultilingualNeural",
|
|
|
482 |
gr.File(label="Or Upload a PDF or TXT file"),
|
483 |
gr.Dropdown(label="Language", choices=[
|
484 |
"Auto Detect",
|
485 |
+
"Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani",
|
486 |
"Bahasa Indonesian", "Bangla", "Basque", "Bengali", "Bosnian", "Bulgarian",
|
487 |
+
"Burmese", "Catalan", "Chinese Cantonese", "Chinese Mandarin",
|
488 |
+
"Chinese Taiwanese", "Croatian", "Czech", "Danish", "Dutch", "English",
|
489 |
"Estonian", "Filipino", "Finnish", "French", "Galician", "Georgian",
|
490 |
"German", "Greek", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Irish",
|
491 |
"Italian", "Japanese", "Javanese", "Kannada", "Kazakh", "Khmer", "Korean",
|