gitdeem commited on
Commit
a9837a2
·
verified ·
1 Parent(s): 3d96646

Upload 34 files

Browse files
.env ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ APP_NAME=StoryFlicks
2
+ DEBUG=true
3
+ VERSION=1.0.0
4
+
5
+ text_provider="openai"
6
+ image_provider="openai"
7
+
8
+ text_llm_model=comic-c
9
+ image_llm_model=comic
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ resource/fonts/MicrosoftYaHeiBold.ttc filter=lfs diff=lfs merge=lfs -text
37
+ resource/fonts/MicrosoftYaHeiNormal.ttc filter=lfs diff=lfs merge=lfs -text
38
+ resource/fonts/STHeitiLight.ttc filter=lfs diff=lfs merge=lfs -text
39
+ resource/fonts/STHeitiMedium.ttc filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 使用 Python 3.10 精简版镜像
2
+ FROM python:3.10-slim
3
+
4
+ # 设置工作目录
5
+ WORKDIR /app
6
+
7
+ # 复制项目的 requirements.txt
8
+ COPY requirements.txt /app/
9
+
10
+ # 配置 pip 使用阿里云镜像并安装依赖
11
+ RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ \
12
+ && pip install --no-cache-dir -r requirements.txt
13
+
14
+ # 复制整个后端项目到容器
15
+ COPY . /app/
16
+
17
+ # 设置环境变量
18
+ ENV PYTHONUNBUFFERED=1
19
+
20
+ # 设置默认的命令,启动 Uvicorn 服务
21
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
README.md CHANGED
@@ -1,10 +1,11 @@
1
  ---
2
- title: Mp4
3
- emoji: 🌖
4
- colorFrom: yellow
5
  colorTo: pink
6
  sdk: docker
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: mp4
3
+ emoji: 🌍
4
+ colorFrom: blue
5
  colorTo: pink
6
  sdk: docker
7
  pinned: false
8
+ license: mit
9
+ app_port: 8000
10
  ---
11
 
 
app/api/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from app.api.router import router as api_router
app/api/health.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, status
2
+ from app.schemas.health import HealthResponse
3
+ from app.services.health import health_service
4
+
5
+ router = APIRouter()
6
+
7
+
8
+ @router.get(
9
+ "/",
10
+ response_model=HealthResponse,
11
+ status_code=status.HTTP_200_OK,
12
+ summary="Health Check",
13
+ description="Check the health status of the application"
14
+ )
15
+ async def health_check():
16
+ """
17
+ 健康检查接口
18
+
19
+ 返回:
20
+ - status: 整体状态 (healthy/degraded)
21
+ - version: 应用版本
22
+ """
23
+ return await health_service.check_health()
app/api/llm.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from app.services.llm import llm_service
3
+ from app.schemas.llm import (
4
+ StoryGenerationRequest,
5
+ StoryGenerationResponse,
6
+ ImageGenerationRequest,
7
+ ImageGenerationResponse,
8
+ )
9
+ from loguru import logger
10
+ from enum import Enum
11
+ from typing import List, Dict
12
+
13
+ router = APIRouter()
14
+
15
+ class LLMType(str, Enum):
16
+ TEXT = "text"
17
+ IMAGE = "image"
18
+ VIDEO = "video"
19
+
20
+
21
+ @router.post("/story", response_model=StoryGenerationResponse)
22
+ async def generate_story(request: StoryGenerationRequest) -> StoryGenerationResponse:
23
+ """生成故事"""
24
+ try:
25
+ segments = llm_service.generate_story(
26
+ request
27
+ )
28
+ return StoryGenerationResponse(segments=segments)
29
+ except Exception as e:
30
+ logger.error(f"Failed to generate story: {e}")
31
+ raise HTTPException(status_code=500, detail=str(e))
32
+
33
+
34
+ @router.post("/image", response_model=ImageGenerationResponse)
35
+ async def generate_image(request: ImageGenerationRequest) -> ImageGenerationResponse:
36
+ """生成图片"""
37
+ try:
38
+ image_url = llm_service.generate_image(prompt=request.prompt, image_llm_provider=request.image_llm_provider, image_llm_model=request.image_llm_model, resolution=request.resolution)
39
+ return ImageGenerationResponse(image_url=image_url)
40
+ except Exception as e:
41
+ raise HTTPException(status_code=500, detail=str(e))
42
+
43
+
44
+ @router.post("/story-with-images", response_model=StoryGenerationResponse)
45
+ async def generate_story_with_images(request: StoryGenerationRequest) -> StoryGenerationResponse:
46
+ """生成故事和配图"""
47
+ try:
48
+ segments = llm_service.generate_story_with_images(
49
+ segments=request.segments,
50
+ story_prompt=request.story_prompt,
51
+ language=request.language
52
+ )
53
+ return StoryGenerationResponse(segments=segments)
54
+ except Exception as e:
55
+ logger.error(f"Failed to generate story with images: {e}")
56
+ raise HTTPException(status_code=500, detail=str(e))
57
+
58
+ @router.get("/providers", response_model=Dict[str, List[str]])
59
+ async def get_llm_providers():
60
+ """
61
+ 获取 LLM Provider 列表
62
+ """
63
+ # 这里将实现获取 LLM Provider 的逻辑
64
+ return llm_service.get_llm_providers()
app/api/router.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.api import voice, video, llm
3
+
4
+ router = APIRouter(prefix="/api")
5
+ router.include_router(voice.router, prefix="/voice", tags=["voice"])
6
+ router.include_router(video.router, prefix="/video", tags=["video"])
7
+ router.include_router(llm.router, prefix="/llm", tags=["llm"])
app/api/stories.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Query
2
+ from typing import List
3
+ from ..schemas.story import Story, StoryCreate, StoryUpdate
4
+ from ..services.story import story_service
5
+
6
+ router = APIRouter()
7
+
8
+
9
+ @router.get("/", response_model=List[Story])
10
+ async def list_stories(
11
+ skip: int = Query(0, ge=0),
12
+ limit: int = Query(10, ge=1, le=100)
13
+ ):
14
+ """
15
+ 获取故事列表
16
+ """
17
+ return story_service.get_stories(skip=skip, limit=limit)
18
+
19
+
20
+ @router.post("/", response_model=Story)
21
+ async def create_story(story: StoryCreate):
22
+ """
23
+ 创建新故事
24
+ """
25
+ return story_service.create_story(story)
26
+
27
+
28
+ @router.get("/{story_id}", response_model=Story)
29
+ async def get_story(story_id: str):
30
+ """
31
+ 获取特定故事的详细信息
32
+ """
33
+ story = story_service.get_story(story_id)
34
+ if story is None:
35
+ raise HTTPException(status_code=404, detail="Story not found")
36
+ return story
37
+
38
+
39
+ @router.put("/{story_id}", response_model=Story)
40
+ async def update_story(story_id: str, story: StoryUpdate):
41
+ """
42
+ 更新故事信息
43
+ """
44
+ updated_story = story_service.update_story(story_id, story)
45
+ if updated_story is None:
46
+ raise HTTPException(status_code=404, detail="Story not found")
47
+ return updated_story
48
+
49
+
50
+ @router.delete("/{story_id}")
51
+ async def delete_story(story_id: str):
52
+ """
53
+ 删除故事
54
+ """
55
+ if not story_service.delete_story(story_id):
56
+ raise HTTPException(status_code=404, detail="Story not found")
57
+ return {"message": "Story deleted successfully"}
app/api/video.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Query
2
+ from loguru import logger
3
+ from app.services.video import generate_video, create_video_with_scenes, generate_voice
4
+ from app.schemas.video import VideoGenerateRequest, VideoGenerateResponse, StoryScene
5
+ import os
6
+ import json
7
+ from app.utils.utils import extract_id
8
+
9
+ router = APIRouter()
10
+
11
+ @router.post("/generate")
12
+ async def generate_video_endpoint(
13
+ request: VideoGenerateRequest
14
+ ):
15
+ """生成视频"""
16
+ try:
17
+ video_file = await generate_video(request)
18
+ task_id = extract_id(video_file)
19
+ # 转换为相对路径
20
+ video_url = "http://127.0.0.1:8000/tasks/" + task_id + "/video.mp4"
21
+ return VideoGenerateResponse(
22
+ success=True,
23
+ data={"video_url": video_url}
24
+ )
25
+ except Exception as e:
26
+ logger.error(f"Failed to generate video: {str(e)}")
27
+ return VideoGenerateResponse(
28
+ success=False,
29
+ message=str(e)
30
+ )
31
+
32
+
app/api/voice.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Request, Query
2
+ from fastapi.responses import JSONResponse
3
+ from app.schemas.voice import VoiceGenerationRequest, VoiceGenerationResponse
4
+ from app.schemas.video import VideoGenerateResponse, StoryScene
5
+ from app.services.voice import generate_voice, get_all_azure_voices
6
+ from app.services.video import create_video_with_scenes
7
+ import os
8
+ import json
9
+ from typing import List, Optional
10
+ from pydantic import BaseModel
11
+
12
+ router = APIRouter()
13
+
14
+
15
+ class VoiceRequest(BaseModel):
16
+ area: Optional[List[str]] = None
17
+
18
+
19
+ @router.post("/test_subtitle")
20
+ async def test_subtitle_endpoint(task_id: str = Query(..., description="任务ID,对应 storage/tasks/ 下的目录名")) -> VideoGenerateResponse:
21
+ """测试字幕添加功能"""
22
+ try:
23
+ # 构建任务目录路径
24
+ task_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "storage", "tasks", task_id)
25
+ if not os.path.exists(task_dir):
26
+ raise HTTPException(status_code=404, detail=f"Task directory not found: {task_id}")
27
+
28
+ # 读取 story.json
29
+ story_file = os.path.join(task_dir, "story.json")
30
+ if not os.path.exists(story_file):
31
+ raise HTTPException(status_code=404, detail=f"Story file not found: {story_file}")
32
+
33
+ with open(story_file, 'r', encoding='utf-8') as f:
34
+ scenes_data = json.load(f)
35
+
36
+ # 转换为 StoryScene 对象
37
+ scenes = [StoryScene(**scene) for scene in scenes_data]
38
+
39
+ # 生成语音和字幕
40
+ voice_name = "zh-CN-XiaoxiaoNeural"
41
+ voice_rate = 0
42
+ for i, scene in enumerate(scenes, 1):
43
+ audio_file = os.path.join(task_dir, f"{i}.mp3")
44
+ subtitle_file = os.path.join(task_dir, f"{i}.srt")
45
+ await generate_voice(scene.text, voice_name, voice_rate, audio_file, subtitle_file)
46
+
47
+ # 创建视频
48
+ video_file = await create_video_with_scenes(task_dir, scenes, voice_name, voice_rate)
49
+
50
+ video_url = "/" + video_file.split("/tasks/")[-1]
51
+ return VideoGenerateResponse(video_url=video_url, scenes=scenes)
52
+ except Exception as e:
53
+ logger.error(f"Failed to test subtitle: {str(e)}")
54
+ raise HTTPException(status_code=500, detail=str(e))
55
+
56
+ @router.post("/generate", response_model=VoiceGenerationResponse)
57
+ async def generate_voice_api(request: Request) -> VoiceGenerationResponse:
58
+ """
59
+ 生成语音和字幕文件
60
+
61
+ Args:
62
+ request: 包含文本内容和语音配置的请求
63
+
64
+ Returns:
65
+ 生成的音频和字幕文件的URL
66
+ """
67
+ try:
68
+ # 手动解析请求体
69
+ body = await request.json()
70
+ req = VoiceGenerationRequest(**body)
71
+
72
+ audio_file, subtitle_file = await generate_voice(
73
+ text=req.text,
74
+ voice_name=req.voice_name,
75
+ voice_rate=req.voice_rate
76
+ )
77
+
78
+ if not audio_file or not subtitle_file:
79
+ raise HTTPException(status_code=500, detail="Failed to generate voice")
80
+
81
+ # 将文件路径转换为URL路径
82
+ audio_url = f"/tasks/{os.path.basename(audio_file)}"
83
+ subtitle_url = f"/tasks/{os.path.basename(subtitle_file)}"
84
+
85
+ return VoiceGenerationResponse(
86
+ audio_url=audio_url,
87
+ subtitle_url=subtitle_url
88
+ )
89
+ except Exception as e:
90
+ raise HTTPException(status_code=500, detail=str(e))
91
+
92
+
93
+ @router.post("/voices")
94
+ async def list_voices(request: VoiceRequest) -> dict:
95
+ """
96
+ 获取所有支持的语音列表
97
+ """
98
+ return {"voices": get_all_azure_voices(request.area)}
app/config.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from pydantic_settings import BaseSettings, SettingsConfigDict
3
+ from functools import lru_cache
4
+ import os
5
+
6
+ class Settings(BaseSettings):
7
+ app_name: str = "Story Flicks"
8
+ debug: bool = True
9
+ version: str = "1.0.0"
10
+
11
+ # provider configuration
12
+ text_provider: str = "openai"
13
+ image_provider: str = "openai"
14
+
15
+ # base url configuration
16
+ openai_base_url: str = "https://api.openai.com/v1"
17
+ aliyun_base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1"
18
+ deepseek_base_url: str = "https://api.deepseek.com/v1"
19
+ ollama_base_url: str = "http://localhost:11434/v1"
20
+ siliconflow_base_url: str = "https://api.siliconflow.cn/v1"
21
+
22
+ # api key
23
+ openai_api_key: str = ""
24
+ aliyun_api_key: str = ""
25
+ deepseek_api_key: str = ""
26
+ ollama_api_key: str = ""
27
+ siliconflow_api_key: str = ""
28
+
29
+ text_llm_model: str = "gpt-4o"
30
+ image_llm_model: str = "dall-e-3"
31
+
32
+ class Config:
33
+ env_file = ".env"
34
+ # env_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), ".env")
35
+
36
+ @lru_cache()
37
+ def get_settings() -> Settings:
38
+ return Settings()
app/exceptions.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ class LLMResponseValidationError(Exception):
2
+ """LLM 响应验证错误"""
3
+ def __init__(self, message: str):
4
+ self.message = message
5
+ super().__init__(self.message)
app/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
app/models/const.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+
3
+ class StoryType(str, Enum):
4
+ """故事类型"""
5
+ custom = "custom" # 自定义故事
6
+ bedtime = "bedtime" # 睡前故事
7
+ fairy_tale = "fairy_tale" # 童话故事
8
+ adventure = "adventure" # 冒险故事
9
+ science = "science" # 科普故事
10
+ moral = "moral" # 寓言故事
11
+
12
+ class ImageStyle(str, Enum):
13
+ """图片风格"""
14
+ realistic = "realistic" # 写实风格
15
+ cartoon = "cartoon" # 卡通风格
16
+ watercolor = "watercolor" # 水彩风格
17
+ oil_painting = "oil_painting" # 油画风格
18
+
19
+ class Language(str, Enum):
20
+ """支持的语言"""
21
+ CHINESE_CN = "zh-CN" # 中文(简体)
22
+ CHINESE_TW = "zh-TW" # 中文(繁体)
23
+ ENGLISH_US = "en-US" # 英语(美国)
24
+ JAPANESE = "ja-JP" # 日语
25
+ KOREAN = "ko-KR" # 韩语
26
+
27
+ # 语言名称映射
28
+ LANGUAGE_NAMES = {
29
+ Language.CHINESE_CN: "中文(简体)",
30
+ Language.CHINESE_TW: "中文(繁体)",
31
+ Language.ENGLISH_US: "English",
32
+ Language.JAPANESE: "日本語",
33
+ Language.KOREAN: "한국어"
34
+ }
35
+
36
+ PUNCTUATIONS = [
37
+ "?",
38
+ ",",
39
+ ".",
40
+ "、",
41
+ ";",
42
+ ":",
43
+ "!",
44
+ "…",
45
+ "?",
46
+ ",",
47
+ "。",
48
+ "、",
49
+ ";",
50
+ ":",
51
+ "!",
52
+ "...",
53
+ ]
54
+
55
+ TASK_STATE_FAILED = -1
56
+ TASK_STATE_COMPLETE = 1
57
+ TASK_STATE_PROCESSING = 4
58
+
59
+ FILE_TYPE_VIDEOS = ["mp4", "mov", "mkv", "webm"]
60
+ FILE_TYPE_IMAGES = ["jpg", "jpeg", "png", "bmp"]
app/schemas/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
app/schemas/health.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import Dict, Any
3
+
4
+
5
+ class HealthResponse(BaseModel):
6
+ status: str = Field(..., description="Overall health status of the application")
7
+ version: str = Field(..., description="Application version")
app/schemas/llm.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Dict, Any
3
+ from app.models.const import Language
4
+ from typing import Optional
5
+
6
+ class StoryGenerationRequest(BaseModel):
7
+ resolution: Optional[str] = Field(default="1024*1024", description="分辨率")
8
+ text_llm_provider: Optional[str] = Field(default=None, description="Text LLM provider")
9
+ text_llm_model: Optional[str] = Field(default=None, description="Text LLM model")
10
+ image_llm_provider: Optional[str] = Field(default=None, description="Image LLM provider")
11
+ image_llm_model: Optional[str] = Field(default=None, description="Image LLM model")
12
+ segments: int = Field(..., ge=1, le=10, description="Number of story segments to generate")
13
+ story_prompt: str = Field(..., min_length=1, max_length=4000, description="Theme or topic of the story")
14
+ language: Language = Field(default=Language.CHINESE_CN, description="Story language")
15
+
16
+
17
+ class StorySegment(BaseModel):
18
+ text: str = Field(..., description="Story text")
19
+ image_prompt: str = Field(..., description="Image generation prompt")
20
+ url: str = Field(None, description="Generated image URL")
21
+
22
+
23
+ class StoryGenerationResponse(BaseModel):
24
+ segments: List[StorySegment] = Field(..., description="Generated story segments")
25
+
26
+
27
+ class ImageGenerationRequest(BaseModel):
28
+ prompt: str = Field(..., min_length=1, max_length=4000, description="Description of the image to generate")
29
+ image_llm_provider: Optional[str] = Field(default=None, description="Image LLM provider")
30
+ image_llm_model: Optional[str] = Field(default=None, description="Image LLM model")
31
+ resolution: Optional[str] = Field(default="1024*1024", description="Image resolution")
32
+
33
+
34
+ class ImageGenerationResponse(BaseModel):
35
+ image_url: str = Field(..., description="Generated image URL")
app/schemas/story.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Optional
3
+
4
+
5
+ class StoryBase(BaseModel):
6
+ title: str
7
+ description: Optional[str] = None
8
+
9
+
10
+ class StoryCreate(StoryBase):
11
+ pass
12
+
13
+
14
+ class StoryUpdate(StoryBase):
15
+ title: Optional[str] = None
16
+
17
+
18
+ class Story(StoryBase):
19
+ id: str
20
+
21
+ class Config:
22
+ from_attributes = True
app/schemas/video.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ from enum import Enum
3
+ from typing import Any, List, Optional, Union, Dict
4
+
5
+ import pydantic
6
+ from pydantic import BaseModel, Field
7
+ from app.models.const import Language
8
+
9
+ # 忽略 Pydantic 的特定警告
10
+ warnings.filterwarnings(
11
+ "ignore",
12
+ category=UserWarning,
13
+ message="Field name.*shadows an attribute in parent.*",
14
+ )
15
+
16
+
17
+ class VideoConcatMode(str, Enum):
18
+ random = "random"
19
+ sequential = "sequential"
20
+
21
+
22
+ class VideoAspect(str, Enum):
23
+ landscape = "16:9"
24
+ portrait = "9:16"
25
+ square = "1:1"
26
+
27
+ def to_resolution(self):
28
+ if self == VideoAspect.landscape.value:
29
+ return 1920, 1080
30
+ elif self == VideoAspect.portrait.value:
31
+ return 1080, 1920
32
+ elif self == VideoAspect.square.value:
33
+ return 1080, 1080
34
+ return 1080, 1920
35
+
36
+
37
+ class _Config:
38
+ arbitrary_types_allowed = True
39
+
40
+
41
+ @pydantic.dataclasses.dataclass(config=_Config)
42
+ class MaterialInfo:
43
+ provider: str = "pexels"
44
+ url: str = ""
45
+ duration: int = 0
46
+
47
+ class VideoParams(BaseModel):
48
+ """
49
+ {
50
+ "video_subject": "",
51
+ "video_aspect": "横屏 16:9(西瓜视频)",
52
+ "voice_name": "女生-晓晓",
53
+ "bgm_name": "random",
54
+ "font_name": "STHeitiMedium 黑体-中",
55
+ "text_color": "#FFFFFF",
56
+ "font_size": 60,
57
+ "stroke_color": "#000000",
58
+ "stroke_width": 1.5
59
+ }
60
+ """
61
+
62
+ video_subject: str
63
+ video_script: str = "" # Script used to generate the video
64
+ video_terms: Optional[str | list] = None # Keywords used to generate the video
65
+ video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
66
+ video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
67
+ video_clip_duration: Optional[int] = 5
68
+ video_count: Optional[int] = 1
69
+
70
+ video_source: Optional[str] = "pexels"
71
+ video_materials: Optional[List[MaterialInfo]] = None # Materials used to generate the video
72
+
73
+ video_language: Optional[str] = "" # auto detect
74
+
75
+ voice_name: Optional[str] = ""
76
+ voice_volume: Optional[float] = 1.0
77
+ voice_rate: Optional[float] = 1.0
78
+ bgm_type: Optional[str] = "random"
79
+ bgm_file: Optional[str] = ""
80
+ bgm_volume: Optional[float] = 0.2
81
+
82
+ subtitle_enabled: Optional[bool] = True
83
+ subtitle_position: Optional[str] = "bottom" # top, bottom, center
84
+ custom_position: float = 70.0
85
+ font_name: Optional[str] = "STHeitiMedium.ttc"
86
+ text_fore_color: Optional[str] = "#FFFFFF"
87
+ text_background_color: Union[bool, str] = True
88
+
89
+ font_size: int = 60
90
+ stroke_color: Optional[str] = "#000000"
91
+ stroke_width: float = 1.5
92
+ n_threads: Optional[int] = 2
93
+ paragraph_number: Optional[int] = 1
94
+
95
+
96
+ class SubtitleRequest(BaseModel):
97
+ video_script: str
98
+ video_language: Optional[str] = ""
99
+ voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
100
+ voice_volume: Optional[float] = 1.0
101
+ voice_rate: Optional[float] = 1.2
102
+ bgm_type: Optional[str] = "random"
103
+ bgm_file: Optional[str] = ""
104
+ bgm_volume: Optional[float] = 0.2
105
+ subtitle_position: Optional[str] = "bottom"
106
+ font_name: Optional[str] = "STHeitiMedium.ttc"
107
+ text_fore_color: Optional[str] = "#FFFFFF"
108
+ text_background_color: Union[bool, str] = True
109
+ font_size: int = 60
110
+ stroke_color: Optional[str] = "#000000"
111
+ stroke_width: float = 1.5
112
+ video_source: Optional[str] = "local"
113
+ subtitle_enabled: Optional[str] = "true"
114
+
115
+
116
+ class AudioRequest(BaseModel):
117
+ video_script: str
118
+ video_language: Optional[str] = ""
119
+ voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
120
+ voice_volume: Optional[float] = 1.0
121
+ voice_rate: Optional[float] = 1.2
122
+ bgm_type: Optional[str] = "random"
123
+ bgm_file: Optional[str] = ""
124
+ bgm_volume: Optional[float] = 0.2
125
+ video_source: Optional[str] = "local"
126
+
127
+
128
+ class VideoScriptParams:
129
+ """
130
+ {
131
+ "video_subject": "春天的花海",
132
+ "video_language": "",
133
+ "paragraph_number": 1
134
+ }
135
+ """
136
+
137
+ video_subject: Optional[str] = "春天的花海"
138
+ video_language: Optional[str] = ""
139
+ paragraph_number: Optional[int] = 1
140
+
141
+
142
+ class VideoTermsParams:
143
+ """
144
+ {
145
+ "video_subject": "",
146
+ "video_script": "",
147
+ "amount": 5
148
+ }
149
+ """
150
+
151
+ video_subject: Optional[str] = "春天的花海"
152
+ video_script: Optional[str] = (
153
+ "春天的花海,如诗如画般展现在眼前。万物复苏的季节里,大地披上了一袭绚丽多彩的盛装。金黄的迎春、粉嫩的樱花、洁白的梨花、艳丽的郁金香……"
154
+ )
155
+ amount: Optional[int] = 5
156
+
157
+
158
+ class BaseResponse(BaseModel):
159
+ status: int = 200
160
+ message: Optional[str] = "success"
161
+ data: Any = None
162
+
163
+
164
+ class TaskVideoRequest(VideoParams, BaseModel):
165
+ pass
166
+
167
+
168
+ class TaskQueryRequest(BaseModel):
169
+ pass
170
+
171
+
172
+ class VideoScriptRequest(VideoScriptParams, BaseModel):
173
+ pass
174
+
175
+
176
+ class VideoTermsRequest(VideoTermsParams, BaseModel):
177
+ pass
178
+
179
+ class TaskResponse(BaseResponse):
180
+ class TaskResponseData(BaseModel):
181
+ task_id: str
182
+
183
+ data: TaskResponseData
184
+
185
+ class Config:
186
+ json_schema_extra = {
187
+ "example": {
188
+ "status": 200,
189
+ "message": "success",
190
+ "data": {"task_id": "6c85c8cc-a77a-42b9-bc30-947815aa0558"},
191
+ },
192
+ }
193
+
194
+
195
+ class TaskQueryResponse(BaseResponse):
196
+ class Config:
197
+ json_schema_extra = {
198
+ "example": {
199
+ "status": 200,
200
+ "message": "success",
201
+ "data": {
202
+ "state": 1,
203
+ "progress": 100,
204
+ "videos": [
205
+ "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
206
+ ],
207
+ "combined_videos": [
208
+ "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
209
+ ],
210
+ },
211
+ },
212
+ }
213
+
214
+
215
+ class TaskDeletionResponse(BaseResponse):
216
+ class Config:
217
+ json_schema_extra = {
218
+ "example": {
219
+ "status": 200,
220
+ "message": "success",
221
+ "data": {
222
+ "state": 1,
223
+ "progress": 100,
224
+ "videos": [
225
+ "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
226
+ ],
227
+ "combined_videos": [
228
+ "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
229
+ ],
230
+ },
231
+ },
232
+ }
233
+
234
+
235
+ class VideoScriptResponse(BaseResponse):
236
+ class Config:
237
+ json_schema_extra = {
238
+ "example": {
239
+ "status": 200,
240
+ "message": "success",
241
+ "data": {
242
+ "video_script": "春天的花海,是大自然的一幅美丽画卷。在这个季节里,大地复苏,万物生长,花朵争相绽放,形成了一片五彩斑斓的花海..."
243
+ },
244
+ },
245
+ }
246
+
247
+
248
+ class VideoTermsResponse(BaseResponse):
249
+ class Config:
250
+ json_schema_extra = {
251
+ "example": {
252
+ "status": 200,
253
+ "message": "success",
254
+ "data": {"video_terms": ["sky", "tree"]},
255
+ },
256
+ }
257
+
258
+
259
+ class BgmRetrieveResponse(BaseResponse):
260
+ class Config:
261
+ json_schema_extra = {
262
+ "example": {
263
+ "status": 200,
264
+ "message": "success",
265
+ "data": {
266
+ "files": [
267
+ {
268
+ "name": "output013.mp3",
269
+ "size": 1891269,
270
+ "file": "/MoneyPrinterTurbo/resource/songs/output013.mp3",
271
+ }
272
+ ]
273
+ },
274
+ },
275
+ }
276
+
277
+
278
+ class BgmUploadResponse(BaseResponse):
279
+ class Config:
280
+ json_schema_extra = {
281
+ "example": {
282
+ "status": 200,
283
+ "message": "success",
284
+ "data": {"file": "/MoneyPrinterTurbo/resource/songs/example.mp3"},
285
+ },
286
+ }
287
+
288
+
289
+ from app.models.const import StoryType, ImageStyle
290
+
291
+ class StoryScene(BaseModel):
292
+ """故事场景"""
293
+ text: str = Field(description="场景文本")
294
+ image_prompt: str = Field(description="图片生成提示词")
295
+ url: Optional[str] = Field(default=None, description="生成的图片 URL")
296
+
297
+ class VideoGenerateRequest(BaseModel):
298
+ """视频生成请求"""
299
+ text_llm_provider: Optional[str] = Field(default=None, description="Text LLM provider")
300
+ image_llm_provider: Optional[str] = Field(default=None, description="Image LLM provider")
301
+ text_llm_model: Optional[str] = Field(default=None, description="Text LLM model")
302
+ image_llm_model: Optional[str] = Field(default=None, description="Image LLM model")
303
+ test_mode: bool = Field(default=False, description="是否为测试模式")
304
+ task_id: Optional[str] = Field(default=None, description="任务ID")
305
+ segments: int = Field(default=3, ge=1, le=10, description="分段数量")
306
+ language: Language = Field(default=Language.CHINESE_CN, description="故事语言")
307
+ story_prompt: Optional[str] = Field(default=None, description="故事提示词")
308
+ image_style: ImageStyle = Field(default=ImageStyle.realistic, description="图片风格")
309
+ voice_name: str = Field(default="zh-CN-XiaoxiaoNeural", description="语音名称")
310
+ voice_rate: float = Field(default=1.0, description="语音速率")
311
+ resolution: Optional[str] = Field(default="1024*1024", description="分辨率")
312
+
313
+
314
+ class VideoGenerateResponse(BaseModel):
315
+ """视频生成响应"""
316
+ success: bool
317
+ data: Optional[Dict[str, Any]] = None
318
+ message: Optional[str] = None
app/schemas/voice.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class VoiceGenerationRequest(BaseModel):
5
+ text: str = Field(..., min_length=1, max_length=5000, description="要转换为语音的文本内容")
6
+ voice_name: str = Field(
7
+ default="zh-CN-XiaoxiaoNeural",
8
+ description="语音名称,如:zh-CN-XiaoxiaoNeural, zh-CN-YunxiNeural"
9
+ )
10
+ voice_rate: float = Field(
11
+ default=0,
12
+ ge=-1,
13
+ le=1,
14
+ description="语速调整,范围 -1.0 到 1.0,0 表示正常速度"
15
+ )
16
+
17
+ model_config = {
18
+ "json_schema_extra": {
19
+ "examples": [
20
+ {
21
+ "text": "你好,这是一个测试文本,用于生成语音和字幕文件。",
22
+ "voice_name": "zh-CN-XiaoxiaoNeural",
23
+ "voice_rate": 0
24
+ }
25
+ ]
26
+ }
27
+ }
28
+
29
+
30
+ class VoiceGenerationResponse(BaseModel):
31
+ audio_url: str = Field(..., description="生成的音频文件URL")
32
+ subtitle_url: str = Field(..., description="生成的字幕文件URL")
33
+
34
+ model_config = {
35
+ "json_schema_extra": {
36
+ "examples": [
37
+ {
38
+ "audio_url": "/tasks/audio_1234567890_abcd1234.mp3",
39
+ "subtitle_url": "/tasks/subtitle_1234567890_abcd1234.srt"
40
+ }
41
+ ]
42
+ }
43
+ }
app/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
app/services/health.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.config import get_settings
2
+
3
+ settings = get_settings()
4
+
5
+
6
+ class HealthService:
7
+ async def check_health(self) -> dict:
8
+ """
9
+ 检查各个服务的健康状态
10
+ """
11
+
12
+ return {
13
+ "status": "healthy",
14
+ "version": settings.VERSION,
15
+ }
16
+
17
+
18
+ # 创建服务实例
19
+ health_service = HealthService()
app/services/llm.py ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from app.config import get_settings
3
+ from loguru import logger
4
+ from typing import List, Dict, Any
5
+ import json
6
+ from http import HTTPStatus
7
+ from pathlib import PurePosixPath
8
+ import requests
9
+ from urllib.parse import urlparse, unquote
10
+ import random
11
+
12
+ from app.models.const import LANGUAGE_NAMES, Language
13
+ from app.exceptions import LLMResponseValidationError
14
+ import dashscope
15
+
16
+ from dashscope import ImageSynthesis
17
+ from app.schemas.llm import (
18
+ StoryGenerationRequest,
19
+ )
20
+ settings = get_settings()
21
+
22
+
23
+ openai_client = None
24
+ if settings.openai_api_key:
25
+ openai_client = OpenAI(api_key=settings.openai_api_key, base_url=settings.openai_base_url or "https://api.openai.com/v1")
26
+ aliyun_text_client = None
27
+ if settings.aliyun_api_key:
28
+ dashscope.api_key = settings.aliyun_api_key
29
+ aliyun_text_client = OpenAI(base_url=settings.aliyun_base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1", api_key=settings.aliyun_api_key)
30
+ if settings.deepseek_api_key:
31
+ deepseek_client = OpenAI(api_key=settings.deepseek_api_key, base_url=settings.deepseek_base_url or "https://api.deepseek.com/v1")
32
+ if settings.ollama_api_key:
33
+ ollama_client = OpenAI(api_key=settings.ollama_api_key, base_url=settings.ollama_base_url or "http://localhost:11434/v1")
34
+ if settings.siliconflow_api_key:
35
+ siliconflow_client = OpenAI(api_key=settings.siliconflow_api_key, base_url=settings.siliconflow_base_url or "https://api.siliconflow.cn/v1")
36
+
37
+ class LLMService:
38
+ def __init__(self):
39
+ self.openai_client = openai_client
40
+ self.aliyun_text_client = aliyun_text_client
41
+ self.text_llm_model = settings.text_llm_model
42
+ self.image_llm_model = settings.image_llm_model
43
+
44
+ async def generate_story(self, request: StoryGenerationRequest) -> List[Dict[str, Any]]:
45
+ """生成故事场景
46
+ Args:
47
+ story_prompt (str, optional): 故事提示. Defaults to None.
48
+ segments (int, optional): 故事分段数. Defaults to 3.
49
+
50
+ Returns:
51
+ List[Dict[str, Any]]: 故事场景列表
52
+ """
53
+
54
+ messages = [
55
+ {"role": "system", "content": "你是一个专业的故事创作者,善于创作引人入胜的故事。请只返回JSON格式的内容。"},
56
+ {"role": "user", "content": await self._get_story_prompt(request.story_prompt, request.language, request.segments)}
57
+ ]
58
+ logger.info(f"prompt messages: {json.dumps(messages, indent=4, ensure_ascii=False)}")
59
+ response = await self._generate_response(text_llm_provider = request.text_llm_provider or None, text_llm_model = request.text_llm_model or None, messages=messages, response_format="json_object")
60
+ response = response["list"]
61
+ response = self.normalize_keys(response)
62
+
63
+ logger.info(f"Generated story: {json.dumps(response, indent=4, ensure_ascii=False)}")
64
+ # 验证响应格式
65
+ self._validate_story_response(response)
66
+
67
+ return response
68
+ def normalize_keys(self, data):
69
+ """
70
+ 阿里云和 openai 的模型返回结果不一致,处理一下
71
+ 修改对象中非 `text` 的键为 `image_prompt`
72
+ - 如果是字典,替换 `text` 以外的单个键为 `image_prompt`
73
+ - 如果是列表,对列表中的每个对象递归处理
74
+ """
75
+ if isinstance(data, dict):
76
+ # 如果是字典,处理键值
77
+ if "text" in data:
78
+ # 找到非 `text` 的键
79
+ other_keys = [key for key in data.keys() if key != "text"]
80
+ # 确保只处理一个非 `text` 键的情况
81
+ if len(other_keys) == 1:
82
+ data["image_prompt"] = data.pop(other_keys[0])
83
+ elif len(other_keys) > 1:
84
+ raise ValueError(f"Unexpected extra keys: {other_keys}. Only one non-'text' key is allowed.")
85
+ return data
86
+ elif isinstance(data, list):
87
+ # 如果是列表,递归处理每个对象
88
+ return [self.normalize_keys(item) for item in data]
89
+ else:
90
+ raise TypeError("Input must be a dict or list of dicts")
91
+
92
+ def generate_image(self, *, prompt: str, image_llm_provider: str = None, image_llm_model: str = None, resolution: str = "1024x1024") -> str:
93
+ # return "https://dashscope-result-bj.oss-cn-beijing.aliyuncs.com/1d/56/20250118/3c4cc727/4fc622b5-54a6-484c-bf1f-f1cfb66ace2d-1.png?Expires=1737290655&OSSAccessKeyId=LTAI5tQZd8AEcZX6KZV4G8qL&Signature=W8D4CN3uonQ2pL1e9xGMWufz33E%3D"
94
+ """生成图片
95
+
96
+ Args:
97
+ prompt (str): 图片描述
98
+ resolution (str): 图片分辨率,默认为 1024x1024
99
+
100
+ Returns:
101
+ str: 图片URL
102
+ """
103
+
104
+
105
+ image_llm_provider = image_llm_provider or settings.image_provider
106
+ image_llm_model = image_llm_model or settings.image_llm_model
107
+
108
+ try:
109
+ # 添加安全提示词
110
+ safe_prompt = f"Create a safe, family-friendly illustration. {prompt} The image should be appropriate for all ages, non-violent, and non-controversial."
111
+
112
+ if image_llm_provider == "aliyun":
113
+ rsp = ImageSynthesis.call(model=image_llm_model,
114
+ prompt=prompt,
115
+ size=resolution,)
116
+ if rsp.status_code == HTTPStatus.OK:
117
+ # print("aliyun image response", rsp.output)
118
+ for result in rsp.output.results:
119
+ return result.url
120
+ else:
121
+ error_message = f'Failed, status_code: {rsp.status_code}, code: {rsp.code}, message: {rsp.message}'
122
+ logger.error(error_message)
123
+ raise Exception(error_message)
124
+ elif image_llm_provider == "openai":
125
+ if (resolution != None):
126
+ resolution = resolution.replace("*", "x")
127
+ response = self.openai_client.images.generate(
128
+ model=image_llm_model,
129
+ prompt=safe_prompt,
130
+ size=resolution,
131
+ quality="standard",
132
+ n=1
133
+ )
134
+ logger.info("image generate res", response.data[0].url)
135
+ return response.data[0].url
136
+ elif image_llm_provider == "siliconflow":
137
+ if (resolution != None):
138
+ resolution = resolution.replace("*", "x")
139
+ payload = {
140
+ "model": image_llm_model,
141
+ "prompt": safe_prompt,
142
+ "seed": random.randint(1000000, 4999999999),
143
+ "image_size": resolution,
144
+ "guidance_scale": 7.5,
145
+ "batch_size": 1,
146
+ }
147
+ headers = {
148
+ "Authorization": "Bearer " + settings.siliconflow_api_key,
149
+ "Content-Type": "application/json"
150
+ }
151
+ response = requests.request("POST", "https://api.siliconflow.cn/v1/images/generations", json=payload, headers=headers)
152
+ if response.text != None:
153
+ response = json.loads(response.text)
154
+ return response["images"][0]["url"]
155
+ else:
156
+ raise Exception(response.text)
157
+ except Exception as e:
158
+ logger.error(f"Failed to generate image: {e}")
159
+ return ""
160
+
161
+ async def generate_story_with_images(self, request: StoryGenerationRequest) -> List[Dict[str, Any]]:
162
+ """生成故事和配图
163
+ Args:
164
+ story_prompt (str, optional): 故事提示. Defaults to None.
165
+ language (Language, optional): 语言. Defaults to Language.CHINESE.
166
+ segments (int, optional): 故事分段数. Defaults to 3.
167
+
168
+ Returns:
169
+ List[Dict[str, Any]]: 故事场景列表,每个场景包含文本、图片提示词和图片URL
170
+ """
171
+ # 先生成故事
172
+ story_segments = await self.generate_story(
173
+ request,
174
+ )
175
+
176
+ # 为每个场景生成图片
177
+ for segment in story_segments:
178
+ try:
179
+ image_url = self.generate_image(prompt=segment["image_prompt"], resolution=request.resolution, image_llm_provider=request.image_llm_provider, image_llm_model=request.image_llm_model)
180
+ segment["url"] = image_url
181
+ except Exception as e:
182
+ logger.error(f"Failed to generate image for segment: {e}")
183
+ segment["url"] = None
184
+
185
+ return story_segments
186
+
187
+ def get_llm_providers(self) -> Dict[str, List[str]]:
188
+ imgLLMList = []
189
+ textLLMList = []
190
+ if settings.openai_api_key:
191
+ textLLMList.append("openai")
192
+ imgLLMList.append("openai")
193
+ if settings.aliyun_api_key:
194
+ textLLMList.append("aliyun")
195
+ imgLLMList.append("aliyun")
196
+ if settings.deepseek_api_key:
197
+ textLLMList.append("deepseek")
198
+ if settings.ollama_api_key:
199
+ textLLMList.append("ollama")
200
+ if settings.siliconflow_api_key:
201
+ textLLMList.append("siliconflow")
202
+ imgLLMList.append("siliconflow")
203
+ return { "textLLMProviders": textLLMList, "imageLLMProviders": imgLLMList }
204
+
205
+ def _validate_story_response(self, response: any) -> None:
206
+ """验证故事生成响应
207
+
208
+ Args:
209
+ response: LLM 响应
210
+
211
+ Raises:
212
+ LLMResponseValidationError: 响应格式错误
213
+ """
214
+ if not isinstance(response, list):
215
+ raise LLMResponseValidationError("Response must be an array")
216
+
217
+ for i, scene in enumerate(response):
218
+ if not isinstance(scene, dict):
219
+ raise LLMResponseValidationError(f"story item {i} must be an object")
220
+
221
+ if "text" not in scene:
222
+ raise LLMResponseValidationError(f"Scene {i} missing 'text' field")
223
+
224
+ if "image_prompt" not in scene:
225
+ raise LLMResponseValidationError(f"Scene {i} missing 'image_prompt' field")
226
+
227
+ if not isinstance(scene["text"], str):
228
+ raise LLMResponseValidationError(f"Scene {i} 'text' must be a string")
229
+
230
+ if not isinstance(scene["image_prompt"], str):
231
+ raise LLMResponseValidationError(f"Scene {i} 'image_prompt' must be a string")
232
+
233
+ async def _generate_response(self, *, text_llm_provider: str = None, text_llm_model: str = None, messages: List[Dict[str, str]], response_format: str = "json_object") -> any:
234
+ """生成 LLM 响应
235
+
236
+ Args:
237
+ messages: 消息列表
238
+ response_format: 响应格式,默认为 json_object
239
+
240
+ Returns:
241
+ Dict[str, Any]: 解析后的响应
242
+
243
+ Raises:
244
+ Exception: 请求失败或解析失败时抛出异常
245
+ """
246
+ if text_llm_provider == None:
247
+ text_llm_provider = settings.text_llm_provider
248
+ if text_llm_provider == "aliyun":
249
+ text_client = self.aliyun_text_client
250
+ elif text_llm_provider == "openai":
251
+ text_client = self.openai_client
252
+ elif text_llm_provider == "deepseek":
253
+ text_client = deepseek_client
254
+ elif text_llm_provider == "ollama":
255
+ text_client = ollama_client
256
+ elif text_llm_provider == "siliconflow":
257
+ text_client = siliconflow_client
258
+ if text_llm_model == None:
259
+ text_llm_model = settings.text_llm_model
260
+ response = text_client.chat.completions.create(
261
+ model= text_llm_model,
262
+ response_format={"type": response_format},
263
+ messages=messages,
264
+ )
265
+ try:
266
+ content = response.choices[0].message.content
267
+ result = json.loads(content)
268
+ return result
269
+ except Exception as e:
270
+ logger.error(f"Failed to parse response: {e}")
271
+ raise e
272
+
273
+ async def _get_story_prompt(self, story_prompt: str = None, language: Language = Language.CHINESE_CN, segments: int = 3) -> str:
274
+ """生成故事提示词
275
+
276
+ Args:
277
+ story_prompt (str, optional): 故事提示. Defaults to None.
278
+ segments (int, optional): 故事分段数. Defaults to 3.
279
+
280
+ Returns:
281
+ str: 完整的提示词
282
+ """
283
+
284
+ languageValue = LANGUAGE_NAMES[language]
285
+ if story_prompt:
286
+ base_prompt = f"讲一个故事,主题是:{story_prompt}"
287
+
288
+ return f"""
289
+ {base_prompt}. The story needs to be divided into {segments} scenes, and each scene must include descriptive text and an image prompt.
290
+
291
+ Please return the result in the following JSON format, where the key `list` contains an array of objects:
292
+
293
+ **Expected JSON format**:
294
+ {{
295
+ "list": [
296
+ {{
297
+ "text": "Descriptive text for the scene",
298
+ "image_prompt": "Detailed image generation prompt, described in English"
299
+ }},
300
+ {{
301
+ "text": "Another scene description text",
302
+ "image_prompt": "Another detailed image generation prompt in English"
303
+ }}
304
+ ]
305
+ }}
306
+
307
+ **Requirements**:
308
+ 1. The root object must contain a key named `list`, and its value must be an array of scene objects.
309
+ 2. Each object in the `list` array must include:
310
+ - `text`: A descriptive text for the scene, written in {languageValue}.
311
+ - `image_prompt`: A detailed prompt for generating an image, written in English.
312
+ 3. Ensure the JSON format matches the above example exactly. Avoid extra fields or incorrect key names like `cimage_prompt` or `inage_prompt`.
313
+
314
+ **Important**:
315
+ - If there is only one scene, the array under `list` should contain a single object.
316
+ - The output must be a valid JSON object. Do not include explanations, comments, or additional content outside the JSON.
317
+
318
+ Example output:
319
+ {{
320
+ "list": [
321
+ {{
322
+ "text": "Scene description text",
323
+ "image_prompt": "Detailed image generation prompt in English"
324
+ }}
325
+ ]
326
+ }}
327
+ """
328
+
329
+
330
+
331
+ # 创建服务实例
332
+ llm_service = LLMService()
app/services/story.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+ from app.schemas.story import Story, StoryCreate, StoryUpdate
3
+ import uuid
4
+
5
+
6
+ class StoryService:
7
+ def __init__(self):
8
+ # 使用内存存储示例数据
9
+ self._stories = {}
10
+
11
+ def get_stories(self, skip: int = 0, limit: int = 10) -> List[Story]:
12
+ stories = list(self._stories.values())
13
+ return stories[skip : skip + limit]
14
+
15
+ def get_story(self, story_id: str) -> Optional[Story]:
16
+ return self._stories.get(story_id)
17
+
18
+ def create_story(self, story: StoryCreate) -> Story:
19
+ story_id = str(uuid.uuid4())
20
+ story_data = Story(
21
+ id=story_id,
22
+ title=story.title,
23
+ description=story.description,
24
+ )
25
+ self._stories[story_id] = story_data
26
+ return story_data
27
+
28
+ def update_story(self, story_id: str, story: StoryUpdate) -> Optional[Story]:
29
+ if story_id not in self._stories:
30
+ return None
31
+
32
+ stored_story = self._stories[story_id]
33
+ update_data = story.model_dump(exclude_unset=True)
34
+
35
+ for field, value in update_data.items():
36
+ setattr(stored_story, field, value)
37
+
38
+ self._stories[story_id] = stored_story
39
+ return stored_story
40
+
41
+ def delete_story(self, story_id: str) -> bool:
42
+ if story_id not in self._stories:
43
+ return False
44
+ del self._stories[story_id]
45
+ return True
46
+
47
+
48
+ # 创建一个全局的服务实例
49
+ story_service = StoryService()
app/services/video.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ from typing import List
5
+ from app.schemas.llm import StoryGenerationRequest
6
+ from loguru import logger
7
+ from app.models.const import StoryType, ImageStyle
8
+ from app.schemas.video import VideoGenerateRequest, StoryScene
9
+ from app.services.llm import llm_service
10
+ from app.services.voice import generate_voice
11
+ from app.utils import utils
12
+ from moviepy import (
13
+ VideoFileClip,
14
+ ImageClip,
15
+ AudioFileClip,
16
+ TextClip,
17
+ CompositeVideoClip,
18
+ concatenate_videoclips,
19
+ afx,
20
+ )
21
+ from moviepy.video.tools import subtitles
22
+ from moviepy.video.tools.subtitles import SubtitlesClip
23
+ from PIL import Image, ImageDraw, ImageFont
24
+ import numpy as np
25
+ import requests
26
+ import random
27
+
28
+ def wrap_text(text, max_width, font="Arial", fontsize=60):
29
+ # Create ImageFont
30
+ font = ImageFont.truetype(font, fontsize)
31
+
32
+ def get_text_size(inner_text):
33
+ inner_text = inner_text.strip()
34
+ left, top, right, bottom = font.getbbox(inner_text)
35
+ return right - left, bottom - top
36
+
37
+ width, height = get_text_size(text)
38
+ if width <= max_width:
39
+ return text, height
40
+
41
+ # logger.warning(f"wrapping text, max_width: {max_width}, text_width: {width}, text: {text}")
42
+
43
+ processed = True
44
+
45
+ _wrapped_lines_ = []
46
+ words = text.split(" ")
47
+ _txt_ = ""
48
+ for word in words:
49
+ _before = _txt_
50
+ _txt_ += f"{word} "
51
+ _width, _height = get_text_size(_txt_)
52
+ if _width <= max_width:
53
+ continue
54
+ else:
55
+ if _txt_.strip() == word.strip():
56
+ processed = False
57
+ break
58
+ _wrapped_lines_.append(_before)
59
+ _txt_ = f"{word} "
60
+ _wrapped_lines_.append(_txt_)
61
+ if processed:
62
+ _wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
63
+ result = "\n".join(_wrapped_lines_).strip()
64
+ height = len(_wrapped_lines_) * height
65
+ # logger.warning(f"wrapped text: {result}")
66
+ return result, height
67
+
68
+ _wrapped_lines_ = []
69
+ chars = list(text)
70
+ _txt_ = ""
71
+ for word in chars:
72
+ _txt_ += word
73
+ _width, _height = get_text_size(_txt_)
74
+ if _width <= max_width:
75
+ continue
76
+ else:
77
+ _wrapped_lines_.append(_txt_)
78
+ _txt_ = ""
79
+ _wrapped_lines_.append(_txt_)
80
+ result = "\n".join(_wrapped_lines_).strip()
81
+ height = len(_wrapped_lines_) * height
82
+ # logger.warning(f"wrapped text: {result}")
83
+ return result, height
84
+
85
+ async def create_video_with_scenes(task_dir: str, scenes: List[StoryScene], voice_name: str, voice_rate: float, test_mode: bool = False) -> str:
86
+ """创建带有场景的视频
87
+
88
+ Args:
89
+ task_dir (str): 任务目录
90
+ scenes (List[StoryScene]): 场景列表
91
+ voice_name (str): 语音名称
92
+ voice_rate (float): 语音速率
93
+ test_mode (bool): 是否为测试模式,如果是则使用已有的图片、音频、字幕文件
94
+ """
95
+ clips = []
96
+ for i, scene in enumerate(scenes, 1):
97
+ try:
98
+ # 获取文件路径
99
+ image_file = os.path.join(task_dir, f"{i}.png")
100
+ audio_file = os.path.join(task_dir, f"{i}.mp3")
101
+ subtitle_file = os.path.join(task_dir, f"{i}.srt")
102
+
103
+ # 测试模式下检查文件是否存在
104
+ if test_mode:
105
+ if not (os.path.exists(image_file) and os.path.exists(audio_file) and os.path.exists(subtitle_file)):
106
+ logger.warning(f"Test mode: Required files not found for scene {i}")
107
+ raise FileNotFoundError("Required files not found")
108
+ else:
109
+ # 正式模式下生成所需文件
110
+ logger.info(f"Processing scene {i}")
111
+ audio_file, subtitle_file = await generate_voice(
112
+ scene.text,
113
+ voice_name,
114
+ voice_rate,
115
+ audio_file,
116
+ subtitle_file
117
+ )
118
+
119
+ # 获取字幕的总时长
120
+ subs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
121
+ subtitle_duration = max([tb for ((ta, tb), txt) in subs])
122
+
123
+ # 创建图片剪辑
124
+ image_clip = ImageClip(image_file)
125
+ origin_image_w, origin_image_h = image_clip.size # 获取放大后的图片尺寸
126
+ image_scale = 1.2
127
+ image_clip = image_clip.resized((origin_image_w*image_scale,origin_image_h*image_scale))
128
+ image_w, image_h = image_clip.size # 获取放大后的图片尺寸
129
+ # 确保图片视频时长至少和字幕一样长
130
+ image_clip = image_clip.with_duration(subtitle_duration)
131
+
132
+ width_diff = origin_image_w * (image_scale-1)
133
+ def debug_position(t):
134
+ # print(f"当前时间 t = {t}", subtitle_duration, width_diff, width_diff/subtitle_duration*t) # 输出当前时间
135
+ return (-width_diff/subtitle_duration*t, 'center')
136
+ image_clip = image_clip.with_position(debug_position)
137
+ # 创建音频剪辑
138
+ audio_clip = AudioFileClip(audio_file)
139
+ image_clip = image_clip.with_audio(audio_clip)
140
+ # 使用系统字体
141
+ font_path = os.path.join(utils.resource_dir(), "fonts", "STHeitiLight.ttc")
142
+ if not os.path.exists(font_path):
143
+ logger.warning("Font file not found, using default font")
144
+ raise FileNotFoundError("Font file not found: " + font_path)
145
+ else:
146
+ logger.info(f"Using font: {font_path}")
147
+
148
+ print(f"Using font: {font_path}")
149
+ # 添加字幕
150
+ if os.path.exists(subtitle_file):
151
+ logger.info(f"Loading subtitle file: {subtitle_file}")
152
+ try:
153
+ def make_textclip(text):
154
+ return TextClip(
155
+ text=text,
156
+ font=font_path,
157
+ font_size=60,
158
+ )
159
+ def create_text_clip(subtitle_item):
160
+ phrase = subtitle_item[1]
161
+ max_width = (origin_image_w * 0.9)
162
+ wrapped_txt, txt_height = wrap_text(
163
+ phrase, max_width=max_width, font=font_path, fontsize=60
164
+ )
165
+ _clip = TextClip(
166
+ text=wrapped_txt,
167
+ font=font_path,
168
+ font_size=60,
169
+ color="white",
170
+ stroke_color="black",
171
+ stroke_width=2,
172
+ )
173
+ duration = subtitle_item[0][1] - subtitle_item[0][0]
174
+ _clip = _clip.with_start(subtitle_item[0][0])
175
+ _clip = _clip.with_end(subtitle_item[0][1])
176
+ _clip = _clip.with_duration(duration)
177
+ _clip = _clip.with_position(("center", origin_image_h * 0.95 - _clip.h - 50))
178
+ return _clip
179
+
180
+ sub = SubtitlesClip(subtitle_file, encoding="utf-8", make_textclip=make_textclip)
181
+
182
+ text_clips = []
183
+ for item in sub.subtitles:
184
+ clip = create_text_clip(subtitle_item=item)
185
+ text_clips.append(clip)
186
+ video_clip = CompositeVideoClip([image_clip, *text_clips], (origin_image_w, origin_image_h))
187
+ clips.append(video_clip)
188
+ logger.info(f"Added subtitles for scene {i}")
189
+ except Exception as e:
190
+ logger.error(f"Failed to add subtitles for scene {i}: {str(e)}")
191
+ clips.append(image_clip)
192
+ else:
193
+ logger.warning(f"Subtitle file not found: {subtitle_file}")
194
+ clips.append(image_clip)
195
+ except Exception as e:
196
+ logger.error(f"Failed to process scene {i}: {str(e)}")
197
+ raise e
198
+
199
+ if not clips:
200
+ raise ValueError("No valid clips to combine")
201
+
202
+ # 合并所有片段
203
+ logger.info("Merging all clips")
204
+ final_clip = concatenate_videoclips(clips)
205
+ video_file = os.path.join(task_dir, "video.mp4")
206
+ logger.info(f"Writing video to {video_file}")
207
+ final_clip.write_videofile(video_file, fps=24, codec='libx264', audio_codec='aac')
208
+
209
+ return video_file
210
+
211
+
212
+ async def generate_video(request: VideoGenerateRequest):
213
+ """生成视频
214
+
215
+ Args:
216
+ request (VideoGenerateRequest): 视频生成请求
217
+ """
218
+ try:
219
+ # 测试模式下,从 story.json 中读取请求参数
220
+ if request.test_mode:
221
+ task_id = request.task_id or str(int(time.time()))
222
+ task_dir = utils.task_dir(task_id)
223
+ if not os.path.exists(task_dir):
224
+ raise ValueError(f"Task directory not found: {task_dir}")
225
+ # 从 story.json 中读取数据
226
+ story_file = os.path.join(task_dir, "story.json")
227
+ if not os.path.exists(story_file):
228
+ raise ValueError(f"Story file not found: {story_file}")
229
+
230
+ with open(story_file, "r", encoding="utf-8") as f:
231
+ story_data = json.load(f)
232
+ print("story_data", story_data)
233
+
234
+ request = VideoGenerateRequest(**story_data)
235
+ request.test_mode = True
236
+ scenes = [StoryScene(**scene) for scene in story_data.get("scenes", [])]
237
+ else:
238
+ req = StoryGenerationRequest(
239
+ resolution=request.resolution,
240
+ story_prompt=request.story_prompt,
241
+ language=request.language,
242
+ segments=request.segments,
243
+ text_llm_provider=request.text_llm_provider,
244
+ text_llm_model=request.text_llm_model,
245
+ image_llm_provider=request.image_llm_provider,
246
+ image_llm_model=request.image_llm_model
247
+ )
248
+ story_list = await llm_service.generate_story_with_images(request=req)
249
+ scenes = [StoryScene(text=scene["text"], image_prompt=scene["image_prompt"], url=scene["url"]) for scene in story_list]
250
+
251
+ # 保存 story.json
252
+ story_data = request.model_dump()
253
+ story_data["scenes"] = [scene.model_dump() for scene in scenes]
254
+ task_id = str(int(time.time()))
255
+ task_dir = utils.task_dir(task_id)
256
+ os.makedirs(task_dir, exist_ok=True)
257
+ story_file = os.path.join(task_dir, "story.json")
258
+ for i, scene in enumerate(story_list, 1):
259
+ if scene.get("url"):
260
+ image_path = os.path.join(task_dir, f"{i}.png")
261
+ try:
262
+ response = requests.get(scene["url"])
263
+ if response.status_code == 200:
264
+ with open(image_path, "wb") as f:
265
+ f.write(response.content)
266
+ logger.info(f"Downloaded image {i} to {image_path}")
267
+ except Exception as e:
268
+ logger.error(f"Failed to download image {i}: {e}")
269
+
270
+ with open(story_file, "w", encoding="utf-8") as f:
271
+ json.dump(story_data, f, ensure_ascii=False, indent=2)
272
+ # return ""
273
+ # 生成视频
274
+ return await create_video_with_scenes(task_dir, scenes, request.voice_name, request.voice_rate, request.test_mode)
275
+ except Exception as e:
276
+ logger.error(f"Failed to generate video: {e}")
277
+ raise e
app/services/voice.py ADDED
@@ -0,0 +1,1275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ import time
4
+ import uuid
5
+ import json
6
+ import edge_tts
7
+ import re
8
+ import xml.sax.saxutils
9
+ from edge_tts import SubMaker, submaker
10
+ from edge_tts.submaker import mktimestamp
11
+ from moviepy.video.tools import subtitles
12
+ from loguru import logger
13
+ from typing import Tuple
14
+ from xml.sax.saxutils import unescape
15
+
16
+ PUNCTUATIONS = [
17
+ "?",
18
+ ",",
19
+ ".",
20
+ "、",
21
+ ";",
22
+ ":",
23
+ "!",
24
+ "…",
25
+ "?",
26
+ ",",
27
+ "。",
28
+ "、",
29
+ ";",
30
+ ":",
31
+ "!",
32
+ "...",
33
+ ]
34
+
35
+ def split_string_by_punctuations(s):
36
+ result = []
37
+ txt = ""
38
+
39
+ previous_char = ""
40
+ next_char = ""
41
+ for i in range(len(s)):
42
+ char = s[i]
43
+ if char == "\n":
44
+ if txt.strip(): # 只有在非空的情况下才添加
45
+ result.append(txt.strip())
46
+ txt = ""
47
+ continue
48
+
49
+ if i > 0:
50
+ previous_char = s[i - 1]
51
+ if i < len(s) - 1:
52
+ next_char = s[i + 1]
53
+
54
+ if char == "." and previous_char.isdigit() and next_char.isdigit():
55
+ txt += char
56
+ continue
57
+
58
+ if char not in PUNCTUATIONS:
59
+ txt += char
60
+ else:
61
+ if txt.strip(): # 只有在非空的情况下才添加
62
+ result.append(txt.strip())
63
+ txt = ""
64
+ if txt.strip(): # 最后一段如果非空也要添加
65
+ result.append(txt.strip())
66
+
67
+ # 过滤掉空字符串和只包含标点符号的片段
68
+ def is_valid_segment(segment):
69
+ # 去掉所有标点符号后还有内容的才是有效片段
70
+ return bool(re.sub(r'[^\w\s]', '', segment).strip())
71
+
72
+ result = list(filter(is_valid_segment, result))
73
+ return result
74
+
75
+ def get_all_azure_voices(filter_locals=None) -> list[str]:
76
+ if filter_locals is None:
77
+ filter_locals = ["zh-CN", "en-US", "zh-TW", "ja-JP", "ko-KR"]
78
+ voices_str = """
79
+ Name: af-ZA-AdriNeural
80
+ Gender: Female
81
+
82
+ Name: af-ZA-WillemNeural
83
+ Gender: Male
84
+
85
+ Name: am-ET-AmehaNeural
86
+ Gender: Male
87
+
88
+ Name: am-ET-MekdesNeural
89
+ Gender: Female
90
+
91
+ Name: ar-AE-FatimaNeural
92
+ Gender: Female
93
+
94
+ Name: ar-AE-HamdanNeural
95
+ Gender: Male
96
+
97
+ Name: ar-BH-AliNeural
98
+ Gender: Male
99
+
100
+ Name: ar-BH-LailaNeural
101
+ Gender: Female
102
+
103
+ Name: ar-DZ-AminaNeural
104
+ Gender: Female
105
+
106
+ Name: ar-DZ-IsmaelNeural
107
+ Gender: Male
108
+
109
+ Name: ar-EG-SalmaNeural
110
+ Gender: Female
111
+
112
+ Name: ar-EG-ShakirNeural
113
+ Gender: Male
114
+
115
+ Name: ar-IQ-BasselNeural
116
+ Gender: Male
117
+
118
+ Name: ar-IQ-RanaNeural
119
+ Gender: Female
120
+
121
+ Name: ar-JO-SanaNeural
122
+ Gender: Female
123
+
124
+ Name: ar-JO-TaimNeural
125
+ Gender: Male
126
+
127
+ Name: ar-KW-FahedNeural
128
+ Gender: Male
129
+
130
+ Name: ar-KW-NouraNeural
131
+ Gender: Female
132
+
133
+ Name: ar-LB-LaylaNeural
134
+ Gender: Female
135
+
136
+ Name: ar-LB-RamiNeural
137
+ Gender: Male
138
+
139
+ Name: ar-LY-ImanNeural
140
+ Gender: Female
141
+
142
+ Name: ar-LY-OmarNeural
143
+ Gender: Male
144
+
145
+ Name: ar-MA-JamalNeural
146
+ Gender: Male
147
+
148
+ Name: ar-MA-MounaNeural
149
+ Gender: Female
150
+
151
+ Name: ar-OM-AbdullahNeural
152
+ Gender: Male
153
+
154
+ Name: ar-OM-AyshaNeural
155
+ Gender: Female
156
+
157
+ Name: ar-QA-AmalNeural
158
+ Gender: Female
159
+
160
+ Name: ar-QA-MoazNeural
161
+ Gender: Male
162
+
163
+ Name: ar-SA-HamedNeural
164
+ Gender: Male
165
+
166
+ Name: ar-SA-ZariyahNeural
167
+ Gender: Female
168
+
169
+ Name: ar-SY-AmanyNeural
170
+ Gender: Female
171
+
172
+ Name: ar-SY-LaithNeural
173
+ Gender: Male
174
+
175
+ Name: ar-TN-HediNeural
176
+ Gender: Male
177
+
178
+ Name: ar-TN-ReemNeural
179
+ Gender: Female
180
+
181
+ Name: ar-YE-MaryamNeural
182
+ Gender: Female
183
+
184
+ Name: ar-YE-SalehNeural
185
+ Gender: Male
186
+
187
+ Name: az-AZ-BabekNeural
188
+ Gender: Male
189
+
190
+ Name: az-AZ-BanuNeural
191
+ Gender: Female
192
+
193
+ Name: bg-BG-BorislavNeural
194
+ Gender: Male
195
+
196
+ Name: bg-BG-KalinaNeural
197
+ Gender: Female
198
+
199
+ Name: bn-BD-NabanitaNeural
200
+ Gender: Female
201
+
202
+ Name: bn-BD-PradeepNeural
203
+ Gender: Male
204
+
205
+ Name: bn-IN-BashkarNeural
206
+ Gender: Male
207
+
208
+ Name: bn-IN-TanishaaNeural
209
+ Gender: Female
210
+
211
+ Name: bs-BA-GoranNeural
212
+ Gender: Male
213
+
214
+ Name: bs-BA-VesnaNeural
215
+ Gender: Female
216
+
217
+ Name: ca-ES-EnricNeural
218
+ Gender: Male
219
+
220
+ Name: ca-ES-JoanaNeural
221
+ Gender: Female
222
+
223
+ Name: cs-CZ-AntoninNeural
224
+ Gender: Male
225
+
226
+ Name: cs-CZ-VlastaNeural
227
+ Gender: Female
228
+
229
+ Name: cy-GB-AledNeural
230
+ Gender: Male
231
+
232
+ Name: cy-GB-NiaNeural
233
+ Gender: Female
234
+
235
+ Name: da-DK-ChristelNeural
236
+ Gender: Female
237
+
238
+ Name: da-DK-JeppeNeural
239
+ Gender: Male
240
+
241
+ Name: de-AT-IngridNeural
242
+ Gender: Female
243
+
244
+ Name: de-AT-JonasNeural
245
+ Gender: Male
246
+
247
+ Name: de-CH-JanNeural
248
+ Gender: Male
249
+
250
+ Name: de-CH-LeniNeural
251
+ Gender: Female
252
+
253
+ Name: de-DE-AmalaNeural
254
+ Gender: Female
255
+
256
+ Name: de-DE-ConradNeural
257
+ Gender: Male
258
+
259
+ Name: de-DE-FlorianMultilingualNeural
260
+ Gender: Male
261
+
262
+ Name: de-DE-KatjaNeural
263
+ Gender: Female
264
+
265
+ Name: de-DE-KillianNeural
266
+ Gender: Male
267
+
268
+ Name: de-DE-SeraphinaMultilingualNeural
269
+ Gender: Female
270
+
271
+ Name: el-GR-AthinaNeural
272
+ Gender: Female
273
+
274
+ Name: el-GR-NestorasNeural
275
+ Gender: Male
276
+
277
+ Name: en-AU-NatashaNeural
278
+ Gender: Female
279
+
280
+ Name: en-AU-WilliamNeural
281
+ Gender: Male
282
+
283
+ Name: en-CA-ClaraNeural
284
+ Gender: Female
285
+
286
+ Name: en-CA-LiamNeural
287
+ Gender: Male
288
+
289
+ Name: en-GB-LibbyNeural
290
+ Gender: Female
291
+
292
+ Name: en-GB-MaisieNeural
293
+ Gender: Female
294
+
295
+ Name: en-GB-RyanNeural
296
+ Gender: Male
297
+
298
+ Name: en-GB-SoniaNeural
299
+ Gender: Female
300
+
301
+ Name: en-GB-ThomasNeural
302
+ Gender: Male
303
+
304
+ Name: en-HK-SamNeural
305
+ Gender: Male
306
+
307
+ Name: en-HK-YanNeural
308
+ Gender: Female
309
+
310
+ Name: en-IE-ConnorNeural
311
+ Gender: Male
312
+
313
+ Name: en-IE-EmilyNeural
314
+ Gender: Female
315
+
316
+ Name: en-IN-NeerjaExpressiveNeural
317
+ Gender: Female
318
+
319
+ Name: en-IN-NeerjaNeural
320
+ Gender: Female
321
+
322
+ Name: en-IN-PrabhatNeural
323
+ Gender: Male
324
+
325
+ Name: en-KE-AsiliaNeural
326
+ Gender: Female
327
+
328
+ Name: en-KE-ChilembaNeural
329
+ Gender: Male
330
+
331
+ Name: en-NG-AbeoNeural
332
+ Gender: Male
333
+
334
+ Name: en-NG-EzinneNeural
335
+ Gender: Female
336
+
337
+ Name: en-NZ-MitchellNeural
338
+ Gender: Male
339
+
340
+ Name: en-NZ-MollyNeural
341
+ Gender: Female
342
+
343
+ Name: en-PH-JamesNeural
344
+ Gender: Male
345
+
346
+ Name: en-PH-RosaNeural
347
+ Gender: Female
348
+
349
+ Name: en-SG-LunaNeural
350
+ Gender: Female
351
+
352
+ Name: en-SG-WayneNeural
353
+ Gender: Male
354
+
355
+ Name: en-TZ-ElimuNeural
356
+ Gender: Male
357
+
358
+ Name: en-TZ-ImaniNeural
359
+ Gender: Female
360
+
361
+ Name: en-US-AnaNeural
362
+ Gender: Female
363
+
364
+ Name: en-US-AndrewMultilingualNeural
365
+ Gender: Male
366
+
367
+ Name: en-US-AndrewNeural
368
+ Gender: Male
369
+
370
+ Name: en-US-AriaNeural
371
+ Gender: Female
372
+
373
+ Name: en-US-AvaMultilingualNeural
374
+ Gender: Female
375
+
376
+ Name: en-US-AvaNeural
377
+ Gender: Female
378
+
379
+ Name: en-US-BrianMultilingualNeural
380
+ Gender: Male
381
+
382
+ Name: en-US-BrianNeural
383
+ Gender: Male
384
+
385
+ Name: en-US-ChristopherNeural
386
+ Gender: Male
387
+
388
+ Name: en-US-EmmaMultilingualNeural
389
+ Gender: Female
390
+
391
+ Name: en-US-EmmaNeural
392
+ Gender: Female
393
+
394
+ Name: en-US-EricNeural
395
+ Gender: Male
396
+
397
+ Name: en-US-GuyNeural
398
+ Gender: Male
399
+
400
+ Name: en-US-JennyNeural
401
+ Gender: Female
402
+
403
+ Name: en-US-MichelleNeural
404
+ Gender: Female
405
+
406
+ Name: en-US-RogerNeural
407
+ Gender: Male
408
+
409
+ Name: en-US-SteffanNeural
410
+ Gender: Male
411
+
412
+ Name: en-ZA-LeahNeural
413
+ Gender: Female
414
+
415
+ Name: en-ZA-LukeNeural
416
+ Gender: Male
417
+
418
+ Name: es-AR-ElenaNeural
419
+ Gender: Female
420
+
421
+ Name: es-AR-TomasNeural
422
+ Gender: Male
423
+
424
+ Name: es-BO-MarceloNeural
425
+ Gender: Male
426
+
427
+ Name: es-BO-SofiaNeural
428
+ Gender: Female
429
+
430
+ Name: es-CL-CatalinaNeural
431
+ Gender: Female
432
+
433
+ Name: es-CL-LorenzoNeural
434
+ Gender: Male
435
+
436
+ Name: es-CO-GonzaloNeural
437
+ Gender: Male
438
+
439
+ Name: es-CO-SalomeNeural
440
+ Gender: Female
441
+
442
+ Name: es-CR-JuanNeural
443
+ Gender: Male
444
+
445
+ Name: es-CR-MariaNeural
446
+ Gender: Female
447
+
448
+ Name: es-CU-BelkysNeural
449
+ Gender: Female
450
+
451
+ Name: es-CU-ManuelNeural
452
+ Gender: Male
453
+
454
+ Name: es-DO-EmilioNeural
455
+ Gender: Male
456
+
457
+ Name: es-DO-RamonaNeural
458
+ Gender: Female
459
+
460
+ Name: es-EC-AndreaNeural
461
+ Gender: Female
462
+
463
+ Name: es-EC-LuisNeural
464
+ Gender: Male
465
+
466
+ Name: es-ES-AlvaroNeural
467
+ Gender: Male
468
+
469
+ Name: es-ES-ElviraNeural
470
+ Gender: Female
471
+
472
+ Name: es-ES-XimenaNeural
473
+ Gender: Female
474
+
475
+ Name: es-GQ-JavierNeural
476
+ Gender: Male
477
+
478
+ Name: es-GQ-TeresaNeural
479
+ Gender: Female
480
+
481
+ Name: es-GT-AndresNeural
482
+ Gender: Male
483
+
484
+ Name: es-GT-MartaNeural
485
+ Gender: Female
486
+
487
+ Name: es-HN-CarlosNeural
488
+ Gender: Male
489
+
490
+ Name: es-HN-KarlaNeural
491
+ Gender: Female
492
+
493
+ Name: es-MX-DaliaNeural
494
+ Gender: Female
495
+
496
+ Name: es-MX-JorgeNeural
497
+ Gender: Male
498
+
499
+ Name: es-NI-FedericoNeural
500
+ Gender: Male
501
+
502
+ Name: es-NI-YolandaNeural
503
+ Gender: Female
504
+
505
+ Name: es-PA-MargaritaNeural
506
+ Gender: Female
507
+
508
+ Name: es-PA-RobertoNeural
509
+ Gender: Male
510
+
511
+ Name: es-PE-AlexNeural
512
+ Gender: Male
513
+
514
+ Name: es-PE-CamilaNeural
515
+ Gender: Female
516
+
517
+ Name: es-PR-KarinaNeural
518
+ Gender: Female
519
+
520
+ Name: es-PR-VictorNeural
521
+ Gender: Male
522
+
523
+ Name: es-PY-MarioNeural
524
+ Gender: Male
525
+
526
+ Name: es-PY-TaniaNeural
527
+ Gender: Female
528
+
529
+ Name: es-SV-LorenaNeural
530
+ Gender: Female
531
+
532
+ Name: es-SV-RodrigoNeural
533
+ Gender: Male
534
+
535
+ Name: es-US-AlonsoNeural
536
+ Gender: Male
537
+
538
+ Name: es-US-PalomaNeural
539
+ Gender: Female
540
+
541
+ Name: es-UY-MateoNeural
542
+ Gender: Male
543
+
544
+ Name: es-UY-ValentinaNeural
545
+ Gender: Female
546
+
547
+ Name: es-VE-PaolaNeural
548
+ Gender: Female
549
+
550
+ Name: es-VE-SebastianNeural
551
+ Gender: Male
552
+
553
+ Name: et-EE-AnuNeural
554
+ Gender: Female
555
+
556
+ Name: et-EE-KertNeural
557
+ Gender: Male
558
+
559
+ Name: fa-IR-DilaraNeural
560
+ Gender: Female
561
+
562
+ Name: fa-IR-FaridNeural
563
+ Gender: Male
564
+
565
+ Name: fi-FI-HarriNeural
566
+ Gender: Male
567
+
568
+ Name: fi-FI-NooraNeural
569
+ Gender: Female
570
+
571
+ Name: fil-PH-AngeloNeural
572
+ Gender: Male
573
+
574
+ Name: fil-PH-BlessicaNeural
575
+ Gender: Female
576
+
577
+ Name: fr-BE-CharlineNeural
578
+ Gender: Female
579
+
580
+ Name: fr-BE-GerardNeural
581
+ Gender: Male
582
+
583
+ Name: fr-CA-AntoineNeural
584
+ Gender: Male
585
+
586
+ Name: fr-CA-JeanNeural
587
+ Gender: Male
588
+
589
+ Name: fr-CA-SylvieNeural
590
+ Gender: Female
591
+
592
+ Name: fr-CA-ThierryNeural
593
+ Gender: Male
594
+
595
+ Name: fr-CH-ArianeNeural
596
+ Gender: Female
597
+
598
+ Name: fr-CH-FabriceNeural
599
+ Gender: Male
600
+
601
+ Name: fr-FR-DeniseNeural
602
+ Gender: Female
603
+
604
+ Name: fr-FR-EloiseNeural
605
+ Gender: Female
606
+
607
+ Name: fr-FR-HenriNeural
608
+ Gender: Male
609
+
610
+ Name: fr-FR-RemyMultilingualNeural
611
+ Gender: Male
612
+
613
+ Name: fr-FR-VivienneMultilingualNeural
614
+ Gender: Female
615
+
616
+ Name: ga-IE-ColmNeural
617
+ Gender: Male
618
+
619
+ Name: ga-IE-OrlaNeural
620
+ Gender: Female
621
+
622
+ Name: gl-ES-RoiNeural
623
+ Gender: Male
624
+
625
+ Name: gl-ES-SabelaNeural
626
+ Gender: Female
627
+
628
+ Name: gu-IN-DhwaniNeural
629
+ Gender: Female
630
+
631
+ Name: gu-IN-NiranjanNeural
632
+ Gender: Male
633
+
634
+ Name: he-IL-AvriNeural
635
+ Gender: Male
636
+
637
+ Name: he-IL-HilaNeural
638
+ Gender: Female
639
+
640
+ Name: hi-IN-MadhurNeural
641
+ Gender: Male
642
+
643
+ Name: hi-IN-SwaraNeural
644
+ Gender: Female
645
+
646
+ Name: hr-HR-GabrijelaNeural
647
+ Gender: Female
648
+
649
+ Name: hr-HR-SreckoNeural
650
+ Gender: Male
651
+
652
+ Name: hu-HU-NoemiNeural
653
+ Gender: Female
654
+
655
+ Name: hu-HU-TamasNeural
656
+ Gender: Male
657
+
658
+ Name: id-ID-ArdiNeural
659
+ Gender: Male
660
+
661
+ Name: id-ID-GadisNeural
662
+ Gender: Female
663
+
664
+ Name: is-IS-GudrunNeural
665
+ Gender: Female
666
+
667
+ Name: is-IS-GunnarNeural
668
+ Gender: Male
669
+
670
+ Name: it-IT-DiegoNeural
671
+ Gender: Male
672
+
673
+ Name: it-IT-ElsaNeural
674
+ Gender: Female
675
+
676
+ Name: it-IT-GiuseppeMultilingualNeural
677
+ Gender: Male
678
+
679
+ Name: it-IT-IsabellaNeural
680
+ Gender: Female
681
+
682
+ Name: iu-Cans-CA-SiqiniqNeural
683
+ Gender: Female
684
+
685
+ Name: iu-Cans-CA-TaqqiqNeural
686
+ Gender: Male
687
+
688
+ Name: iu-Latn-CA-SiqiniqNeural
689
+ Gender: Female
690
+
691
+ Name: iu-Latn-CA-TaqqiqNeural
692
+ Gender: Male
693
+
694
+ Name: ja-JP-KeitaNeural
695
+ Gender: Male
696
+
697
+ Name: ja-JP-NanamiNeural
698
+ Gender: Female
699
+
700
+ Name: jv-ID-DimasNeural
701
+ Gender: Male
702
+
703
+ Name: jv-ID-SitiNeural
704
+ Gender: Female
705
+
706
+ Name: ka-GE-EkaNeural
707
+ Gender: Female
708
+
709
+ Name: ka-GE-GiorgiNeural
710
+ Gender: Male
711
+
712
+ Name: kk-KZ-AigulNeural
713
+ Gender: Female
714
+
715
+ Name: kk-KZ-DauletNeural
716
+ Gender: Male
717
+
718
+ Name: km-KH-PisethNeural
719
+ Gender: Male
720
+
721
+ Name: km-KH-SreymomNeural
722
+ Gender: Female
723
+
724
+ Name: kn-IN-GaganNeural
725
+ Gender: Male
726
+
727
+ Name: kn-IN-SapnaNeural
728
+ Gender: Female
729
+
730
+ Name: ko-KR-HyunsuMultilingualNeural
731
+ Gender: Male
732
+
733
+ Name: ko-KR-InJoonNeural
734
+ Gender: Male
735
+
736
+ Name: ko-KR-SunHiNeural
737
+ Gender: Female
738
+
739
+ Name: lo-LA-ChanthavongNeural
740
+ Gender: Male
741
+
742
+ Name: lo-LA-KeomanyNeural
743
+ Gender: Female
744
+
745
+ Name: lt-LT-LeonasNeural
746
+ Gender: Male
747
+
748
+ Name: lt-LT-OnaNeural
749
+ Gender: Female
750
+
751
+ Name: lv-LV-EveritaNeural
752
+ Gender: Female
753
+
754
+ Name: lv-LV-NilsNeural
755
+ Gender: Male
756
+
757
+ Name: mk-MK-AleksandarNeural
758
+ Gender: Male
759
+
760
+ Name: mk-MK-MarijaNeural
761
+ Gender: Female
762
+
763
+ Name: ml-IN-MidhunNeural
764
+ Gender: Male
765
+
766
+ Name: ml-IN-SobhanaNeural
767
+ Gender: Female
768
+
769
+ Name: mn-MN-BataaNeural
770
+ Gender: Male
771
+
772
+ Name: mn-MN-YesuiNeural
773
+ Gender: Female
774
+
775
+ Name: mr-IN-AarohiNeural
776
+ Gender: Female
777
+
778
+ Name: mr-IN-ManoharNeural
779
+ Gender: Male
780
+
781
+ Name: ms-MY-OsmanNeural
782
+ Gender: Male
783
+
784
+ Name: ms-MY-YasminNeural
785
+ Gender: Female
786
+
787
+ Name: mt-MT-GraceNeural
788
+ Gender: Female
789
+
790
+ Name: mt-MT-JosephNeural
791
+ Gender: Male
792
+
793
+ Name: my-MM-NilarNeural
794
+ Gender: Female
795
+
796
+ Name: my-MM-ThihaNeural
797
+ Gender: Male
798
+
799
+ Name: nb-NO-FinnNeural
800
+ Gender: Male
801
+
802
+ Name: nb-NO-PernilleNeural
803
+ Gender: Female
804
+
805
+ Name: ne-NP-HemkalaNeural
806
+ Gender: Female
807
+
808
+ Name: ne-NP-SagarNeural
809
+ Gender: Male
810
+
811
+ Name: nl-BE-ArnaudNeural
812
+ Gender: Male
813
+
814
+ Name: nl-BE-DenaNeural
815
+ Gender: Female
816
+
817
+ Name: nl-NL-ColetteNeural
818
+ Gender: Female
819
+
820
+ Name: nl-NL-FennaNeural
821
+ Gender: Female
822
+
823
+ Name: nl-NL-MaartenNeural
824
+ Gender: Male
825
+
826
+ Name: pl-PL-MarekNeural
827
+ Gender: Male
828
+
829
+ Name: pl-PL-ZofiaNeural
830
+ Gender: Female
831
+
832
+ Name: ps-AF-GulNawazNeural
833
+ Gender: Male
834
+
835
+ Name: ps-AF-LatifaNeural
836
+ Gender: Female
837
+
838
+ Name: pt-BR-AntonioNeural
839
+ Gender: Male
840
+
841
+ Name: pt-BR-FranciscaNeural
842
+ Gender: Female
843
+
844
+ Name: pt-BR-ThalitaMultilingualNeural
845
+ Gender: Female
846
+
847
+ Name: pt-PT-DuarteNeural
848
+ Gender: Male
849
+
850
+ Name: pt-PT-RaquelNeural
851
+ Gender: Female
852
+
853
+ Name: ro-RO-AlinaNeural
854
+ Gender: Female
855
+
856
+ Name: ro-RO-EmilNeural
857
+ Gender: Male
858
+
859
+ Name: ru-RU-DmitryNeural
860
+ Gender: Male
861
+
862
+ Name: ru-RU-SvetlanaNeural
863
+ Gender: Female
864
+
865
+ Name: si-LK-SameeraNeural
866
+ Gender: Male
867
+
868
+ Name: si-LK-ThiliniNeural
869
+ Gender: Female
870
+
871
+ Name: sk-SK-LukasNeural
872
+ Gender: Male
873
+
874
+ Name: sk-SK-ViktoriaNeural
875
+ Gender: Female
876
+
877
+ Name: sl-SI-PetraNeural
878
+ Gender: Female
879
+
880
+ Name: sl-SI-RokNeural
881
+ Gender: Male
882
+
883
+ Name: so-SO-MuuseNeural
884
+ Gender: Male
885
+
886
+ Name: so-SO-UbaxNeural
887
+ Gender: Female
888
+
889
+ Name: sq-AL-AnilaNeural
890
+ Gender: Female
891
+
892
+ Name: sq-AL-IlirNeural
893
+ Gender: Male
894
+
895
+ Name: sr-RS-NicholasNeural
896
+ Gender: Male
897
+
898
+ Name: sr-RS-SophieNeural
899
+ Gender: Female
900
+
901
+ Name: su-ID-JajangNeural
902
+ Gender: Male
903
+
904
+ Name: su-ID-TutiNeural
905
+ Gender: Female
906
+
907
+ Name: sv-SE-MattiasNeural
908
+ Gender: Male
909
+
910
+ Name: sv-SE-SofieNeural
911
+ Gender: Female
912
+
913
+ Name: sw-KE-RafikiNeural
914
+ Gender: Male
915
+
916
+ Name: sw-KE-ZuriNeural
917
+ Gender: Female
918
+
919
+ Name: sw-TZ-DaudiNeural
920
+ Gender: Male
921
+
922
+ Name: sw-TZ-RehemaNeural
923
+ Gender: Female
924
+
925
+ Name: ta-IN-PallaviNeural
926
+ Gender: Female
927
+
928
+ Name: ta-IN-ValluvarNeural
929
+ Gender: Male
930
+
931
+ Name: ta-LK-KumarNeural
932
+ Gender: Male
933
+
934
+ Name: ta-LK-SaranyaNeural
935
+ Gender: Female
936
+
937
+ Name: ta-MY-KaniNeural
938
+ Gender: Female
939
+
940
+ Name: ta-MY-SuryaNeural
941
+ Gender: Male
942
+
943
+ Name: ta-SG-AnbuNeural
944
+ Gender: Male
945
+
946
+ Name: ta-SG-VenbaNeural
947
+ Gender: Female
948
+
949
+ Name: te-IN-MohanNeural
950
+ Gender: Male
951
+
952
+ Name: te-IN-ShrutiNeural
953
+ Gender: Female
954
+
955
+ Name: th-TH-NiwatNeural
956
+ Gender: Male
957
+
958
+ Name: th-TH-PremwadeeNeural
959
+ Gender: Female
960
+
961
+ Name: tr-TR-AhmetNeural
962
+ Gender: Male
963
+
964
+ Name: tr-TR-EmelNeural
965
+ Gender: Female
966
+
967
+ Name: uk-UA-OstapNeural
968
+ Gender: Male
969
+
970
+ Name: uk-UA-PolinaNeural
971
+ Gender: Female
972
+
973
+ Name: ur-IN-GulNeural
974
+ Gender: Female
975
+
976
+ Name: ur-IN-SalmanNeural
977
+ Gender: Male
978
+
979
+ Name: ur-PK-AsadNeural
980
+ Gender: Male
981
+
982
+ Name: ur-PK-UzmaNeural
983
+ Gender: Female
984
+
985
+ Name: uz-UZ-MadinaNeural
986
+ Gender: Female
987
+
988
+ Name: uz-UZ-SardorNeural
989
+ Gender: Male
990
+
991
+ Name: vi-VN-HoaiMyNeural
992
+ Gender: Female
993
+
994
+ Name: vi-VN-NamMinhNeural
995
+ Gender: Male
996
+
997
+ Name: zh-CN-XiaoxiaoNeural
998
+ Gender: Female
999
+
1000
+ Name: zh-CN-XiaoyiNeural
1001
+ Gender: Female
1002
+
1003
+ Name: zh-CN-YunjianNeural
1004
+ Gender: Male
1005
+
1006
+ Name: zh-CN-YunxiNeural
1007
+ Gender: Male
1008
+
1009
+ Name: zh-CN-YunxiaNeural
1010
+ Gender: Male
1011
+
1012
+ Name: zh-CN-YunyangNeural
1013
+ Gender: Male
1014
+
1015
+ Name: zh-CN-liaoning-XiaobeiNeural
1016
+ Gender: Female
1017
+
1018
+ Name: zh-CN-shaanxi-XiaoniNeural
1019
+ Gender: Female
1020
+
1021
+ Name: zh-HK-HiuGaaiNeural
1022
+ Gender: Female
1023
+
1024
+ Name: zh-HK-HiuMaanNeural
1025
+ Gender: Female
1026
+
1027
+ Name: zh-HK-WanLungNeural
1028
+ Gender: Male
1029
+
1030
+ Name: zh-TW-HsiaoChenNeural
1031
+ Gender: Female
1032
+
1033
+ Name: zh-TW-HsiaoYuNeural
1034
+ Gender: Female
1035
+
1036
+ Name: zh-TW-YunJheNeural
1037
+ Gender: Male
1038
+
1039
+ Name: zu-ZA-ThandoNeural
1040
+ Gender: Female
1041
+
1042
+ Name: zu-ZA-ThembaNeural
1043
+ Gender: Male
1044
+ """.strip()
1045
+ voices = []
1046
+ name = ""
1047
+ for line in voices_str.split("\n"):
1048
+ line = line.strip()
1049
+ if not line:
1050
+ continue
1051
+ if line.startswith("Name: "):
1052
+ name = line[6:].strip()
1053
+ if line.startswith("Gender: "):
1054
+ gender = line[8:].strip()
1055
+ if name and gender:
1056
+ # voices.append({
1057
+ # "name": name,
1058
+ # "gender": gender,
1059
+ # })
1060
+ if filter_locals:
1061
+ for filter_local in filter_locals:
1062
+ if name.lower().startswith(filter_local.lower()):
1063
+ voices.append(f"{name}-{gender}")
1064
+ else:
1065
+ voices.append(f"{name}-{gender}")
1066
+ name = ""
1067
+ voices.sort()
1068
+ return voices
1069
+
1070
+
1071
+ def parse_voice_name(name: str):
1072
+ # zh-CN-XiaoyiNeural-Female
1073
+ # zh-CN-YunxiNeural-Male
1074
+ # zh-CN-XiaoxiaoMultilingualNeural-V2-Female
1075
+ name = name.replace("-Female", "").replace("-Male", "").strip()
1076
+ return name
1077
+
1078
+
1079
+ def convert_rate_to_percent(rate: float) -> str:
1080
+ if rate == 1.0:
1081
+ return "+0%"
1082
+ percent = round((rate - 1.0) * 100)
1083
+ if percent > 0:
1084
+ return f"+{percent}%"
1085
+ else:
1086
+ return f"{percent}%"
1087
+
1088
+
1089
+ async def generate_voice(text: str, voice_name: str, voice_rate: float = 0, audio_file: str = None, subtitle_file: str = None) -> Tuple[str, str]:
1090
+ """生成语音和字幕
1091
+
1092
+ Args:
1093
+ text (str): 文本内容
1094
+ voice_name (str): 语音名称
1095
+ voice_rate (float, optional): 语音速率. Defaults to 0.
1096
+ audio_file (str, optional): 语音文件路径. Defaults to None.
1097
+ subtitle_file (str, optional): 字幕文件路径. Defaults to None.
1098
+
1099
+ Returns:
1100
+ Tuple[str, str]: 语音文件路径, 字幕文件路径
1101
+ """
1102
+ if audio_file is None:
1103
+ audio_file = f"temp_{uuid.uuid4()}.mp3"
1104
+ if subtitle_file is None:
1105
+ subtitle_file = f"temp_{uuid.uuid4()}.srt"
1106
+
1107
+ # 生成语音
1108
+ sub_maker = await edge_tts_voice(text, voice_name, audio_file, voice_rate)
1109
+ # 生成字幕
1110
+ if sub_maker:
1111
+ await generate_subtitle(sub_maker, text, subtitle_file)
1112
+ else:
1113
+ logger.error("Failed to generate sub_maker")
1114
+
1115
+ return audio_file, subtitle_file
1116
+
1117
+
1118
+ async def edge_tts_voice(text: str, voice_name: str, voice_file: str, voice_rate: float = 0) -> SubMaker:
1119
+ """使用 Edge TTS 生成语音"""
1120
+ rate_str = convert_rate_to_percent(voice_rate)
1121
+ for i in range(3):
1122
+ try:
1123
+ logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
1124
+
1125
+ communicate = edge_tts.Communicate(text, voice_name, rate=rate_str)
1126
+ sub_maker = edge_tts.SubMaker()
1127
+
1128
+ with open(voice_file, "wb") as file:
1129
+ async for chunk in communicate.stream():
1130
+ if chunk["type"] == "audio":
1131
+ file.write(chunk["data"])
1132
+ elif chunk["type"] == "WordBoundary":
1133
+ logger.debug(f"Got word boundary: {chunk}")
1134
+ # 使用 SubMaker 的 create_sub 方法创建字幕
1135
+ sub_maker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
1136
+
1137
+ if not sub_maker or not sub_maker.subs:
1138
+ logger.warning("failed, sub_maker is None or sub_maker.subs is None")
1139
+ continue
1140
+
1141
+ logger.info(f"completed, output file: {voice_file}")
1142
+ return sub_maker
1143
+ except Exception as e:
1144
+ logger.error(f"failed, error: {str(e)}")
1145
+ continue
1146
+ return None
1147
+
1148
+
1149
+ async def generate_subtitle(sub_maker: edge_tts.SubMaker, text: str, subtitle_file: str):
1150
+ """生成字幕文件"""
1151
+ try:
1152
+ if not sub_maker or not hasattr(sub_maker, "subs") or not sub_maker.subs:
1153
+ print("No subtitles to generate: sub_maker is None or sub_maker.subs is empty")
1154
+ return
1155
+
1156
+ print(f"Generating subtitles with {len(sub_maker.subs)} items")
1157
+
1158
+ # 直接使用创建字幕的函数
1159
+ await create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
1160
+
1161
+ except Exception as e:
1162
+ print(f"failed to generate subtitle: {str(e)}")
1163
+ import traceback
1164
+ print(traceback.format_exc())
1165
+
1166
+
1167
+ def get_audio_duration(sub_maker: edge_tts.SubMaker) -> float:
1168
+ """获取音频时长(秒)"""
1169
+ if not sub_maker or not hasattr(sub_maker, "subs") or not sub_maker.subs:
1170
+ return 0
1171
+ last_sub = sub_maker.subs[-1]
1172
+ start, duration = last_sub[0]
1173
+ return (start + duration) / 10000000 # 转换为秒
1174
+
1175
+
1176
+ def _format_text(text: str) -> str:
1177
+ text = text.replace("[", " ")
1178
+ text = text.replace("]", " ")
1179
+ text = text.replace("(", " ")
1180
+ text = text.replace(")", " ")
1181
+ text = text.replace("{", " ")
1182
+ text = text.replace("}", " ")
1183
+ text = text.strip()
1184
+ return text
1185
+
1186
+
1187
+
1188
+
1189
+ async def create_subtitle(sub_maker: edge_tts.SubMaker, text: str, subtitle_file: str):
1190
+ """
1191
+ 优化字幕文件
1192
+ 1. 将字幕文件按照标点符号分割成多行
1193
+ 2. 逐行匹配字幕文件中的文本
1194
+ 3. 生成新的字幕文件
1195
+ """
1196
+ text = _format_text(text)
1197
+
1198
+ def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
1199
+ """
1200
+ 1
1201
+ 00:00:00,000 --> 00:00:02,360
1202
+ 跑步是一项简单易行的运动
1203
+ """
1204
+ start_t = mktimestamp(start_time).replace(".", ",")
1205
+ end_t = mktimestamp(end_time).replace(".", ",")
1206
+ return f"{idx}\n{start_t} --> {end_t}\n{sub_text}\n"
1207
+
1208
+ start_time = -1.0
1209
+ sub_items = []
1210
+ sub_index = 0
1211
+
1212
+ script_lines = split_string_by_punctuations(text)
1213
+ logger.debug(f"Split text into {len(script_lines)} lines: {script_lines}")
1214
+
1215
+ def match_line(_sub_line: str, _sub_index: int):
1216
+ if len(script_lines) <= _sub_index:
1217
+ return ""
1218
+
1219
+ _line = script_lines[_sub_index]
1220
+ if _sub_line == _line:
1221
+ return script_lines[_sub_index].strip()
1222
+
1223
+ _sub_line_ = re.sub(r"[^\w\s]", "", _sub_line)
1224
+ _line_ = re.sub(r"[^\w\s]", "", _line)
1225
+ if _sub_line_ == _line_:
1226
+ return _line_.strip()
1227
+
1228
+ _sub_line_ = re.sub(r"\W+", "", _sub_line)
1229
+ _line_ = re.sub(r"\W+", "", _line)
1230
+ if _sub_line_ == _line_:
1231
+ return _line.strip()
1232
+
1233
+ return ""
1234
+
1235
+ sub_line = ""
1236
+
1237
+ try:
1238
+ for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
1239
+ _start_time, end_time = offset
1240
+ if start_time < 0:
1241
+ start_time = _start_time
1242
+
1243
+ sub = unescape(sub)
1244
+ sub_line += sub
1245
+ sub_text = match_line(sub_line, sub_index)
1246
+ if sub_text:
1247
+ sub_index += 1
1248
+ line = formatter(
1249
+ idx=sub_index,
1250
+ start_time=start_time,
1251
+ end_time=end_time,
1252
+ sub_text=sub_text,
1253
+ )
1254
+ sub_items.append(line)
1255
+ start_time = -1.0
1256
+ sub_line = ""
1257
+ if len(sub_items) == len(script_lines):
1258
+ with open(subtitle_file, "w", encoding="utf-8") as file:
1259
+ file.write("\n".join(sub_items) + "\n")
1260
+ try:
1261
+ sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
1262
+ duration = max([tb for ((ta, tb), txt) in sbs])
1263
+ logger.info(
1264
+ f"completed, subtitle file created: {subtitle_file}, duration: {duration}"
1265
+ )
1266
+ except Exception as e:
1267
+ logger.error(f"failed, error: {str(e)}")
1268
+ os.remove(subtitle_file)
1269
+ else:
1270
+ logger.error(
1271
+ f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}"
1272
+ )
1273
+
1274
+ except Exception as e:
1275
+ logger.error(f"failed, error: {str(e)}")
app/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
app/utils/utils.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import locale
3
+ import os
4
+ import random
5
+ import re
6
+ import string
7
+ import threading
8
+ import urllib3
9
+ from typing import Any, List
10
+ from uuid import uuid4
11
+ from pathlib import Path
12
+
13
+ from loguru import logger
14
+
15
+ from app.models import const
16
+
17
+ urllib3.disable_warnings()
18
+
19
+
20
+ def get_uuid(remove_hyphen: bool = False):
21
+ u = str(uuid4())
22
+ if remove_hyphen:
23
+ u = u.replace("-", "")
24
+ return u
25
+
26
+
27
+ def get_root_dir():
28
+ return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
29
+
30
+
31
+ def resource_dir(sub_dir: str = ""):
32
+ d = os.path.join(get_root_dir(), "resource")
33
+ if sub_dir:
34
+ d = os.path.join(d, sub_dir)
35
+ return d
36
+
37
+ def task_dir(sub_dir: str = "") -> str:
38
+ """获取任务目录路径
39
+ Args:
40
+ sub_dir (str, optional): 子目录名. Defaults to "".
41
+ Returns:
42
+ str: 任务目录的绝对路径
43
+ """
44
+ # 获取 backend 目录
45
+ root_dir = get_root_dir()
46
+ # 任务目录
47
+ d = os.path.join(root_dir, "tasks")
48
+ if sub_dir:
49
+ d = os.path.join(d, sub_dir)
50
+
51
+ # 确保目录存在
52
+ os.makedirs(d, exist_ok=True)
53
+
54
+ return d
55
+
56
+
57
+ def font_dir(sub_dir: str = ""):
58
+ d = resource_dir("fonts")
59
+ if sub_dir:
60
+ d = os.path.join(d, sub_dir)
61
+ if not os.path.exists(d):
62
+ os.makedirs(d)
63
+ return d
64
+
65
+
66
+ def song_dir(sub_dir: str = ""):
67
+ d = resource_dir("songs")
68
+ if sub_dir:
69
+ d = os.path.join(d, sub_dir)
70
+ if not os.path.exists(d):
71
+ os.makedirs(d)
72
+ return d
73
+
74
+
75
+ def public_dir(sub_dir: str = ""):
76
+ d = resource_dir("public")
77
+ if sub_dir:
78
+ d = os.path.join(d, sub_dir)
79
+ if not os.path.exists(d):
80
+ os.makedirs(d)
81
+ return d
82
+
83
+
84
+ def run_in_background(func, *args, **kwargs):
85
+ def run():
86
+ try:
87
+ func(*args, **kwargs)
88
+ except Exception as e:
89
+ logger.error(f"run_in_background error: {e}")
90
+
91
+ thread = threading.Thread(target=run)
92
+ thread.start()
93
+ return thread
94
+
95
+
96
+ def time_convert_seconds_to_hmsm(seconds) -> str:
97
+ hours = int(seconds // 3600)
98
+ seconds = seconds % 3600
99
+ minutes = int(seconds // 60)
100
+ milliseconds = int(seconds * 1000) % 1000
101
+ seconds = int(seconds % 60)
102
+ return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, seconds, milliseconds)
103
+
104
+
105
+ def text_to_srt(idx: int, msg: str, start_time: float, end_time: float) -> str:
106
+ start_time = time_convert_seconds_to_hmsm(start_time)
107
+ end_time = time_convert_seconds_to_hmsm(end_time)
108
+ srt = """%d
109
+ %s --> %s
110
+ %s
111
+ """ % (
112
+ idx,
113
+ start_time,
114
+ end_time,
115
+ msg,
116
+ )
117
+ return srt
118
+
119
+
120
+ def str_contains_punctuation(word):
121
+ for p in const.PUNCTUATIONS:
122
+ if p in word:
123
+ return True
124
+ return False
125
+
126
+
127
+ def split_string_by_punctuations(s):
128
+ result = []
129
+ txt = ""
130
+
131
+ previous_char = ""
132
+ next_char = ""
133
+ for i in range(len(s)):
134
+ char = s[i]
135
+ if char == "\n":
136
+ result.append(txt.strip())
137
+ txt = ""
138
+ continue
139
+
140
+ if i > 0:
141
+ previous_char = s[i - 1]
142
+ if i < len(s) - 1:
143
+ next_char = s[i + 1]
144
+
145
+ if char == "." and previous_char.isdigit() and next_char.isdigit():
146
+ # # In the case of "withdraw 10,000, charged at 2.5% fee", the dot in "2.5" should not be treated as a line break marker
147
+ txt += char
148
+ continue
149
+
150
+ if char not in const.PUNCTUATIONS:
151
+ txt += char
152
+ else:
153
+ result.append(txt.strip())
154
+ txt = ""
155
+ result.append(txt.strip())
156
+ # filter empty string
157
+ result = list(filter(None, result))
158
+ return result
159
+
160
+
161
+ def split_string_by_punctuations_new(text: str) -> List[str]:
162
+ """按标点符号分割文本"""
163
+ result = []
164
+ txt = ""
165
+
166
+ previous_char = ""
167
+ next_char = ""
168
+ for i in range(len(text)):
169
+ char = text[i]
170
+ if char == "\n":
171
+ if txt.strip():
172
+ result.append(txt.strip())
173
+ txt = ""
174
+ continue
175
+
176
+ if i > 0:
177
+ previous_char = text[i - 1]
178
+ if i < len(text) - 1:
179
+ next_char = text[i + 1]
180
+
181
+ if char == "." and previous_char.isdigit() and next_char.isdigit():
182
+ txt += char
183
+ continue
184
+
185
+ if char not in [".", "。", "!", "?", "...", "…"]:
186
+ txt += char
187
+ else:
188
+ txt += char
189
+ if txt.strip():
190
+ result.append(txt.strip())
191
+ txt = ""
192
+
193
+ if txt.strip():
194
+ result.append(txt.strip())
195
+ return result
196
+
197
+
198
+ def random_str(length: int = 8) -> str:
199
+ """生成随机字符串"""
200
+ letters = string.ascii_lowercase + string.digits
201
+ return ''.join(random.choice(letters) for _ in range(length))
202
+
203
+
204
+ def md5(text):
205
+ import hashlib
206
+
207
+ return hashlib.md5(text.encode("utf-8")).hexdigest()
208
+
209
+
210
+ def get_system_locale():
211
+ try:
212
+ loc = locale.getdefaultlocale()
213
+ # zh_CN, zh_TW return zh
214
+ # en_US, en_GB return en
215
+ language_code = loc[0].split("_")[0]
216
+ return language_code
217
+ except Exception:
218
+ return "en"
219
+
220
+
221
+ def load_locales(i18n_dir):
222
+ _locales = {}
223
+ for root, dirs, files in os.walk(i18n_dir):
224
+ for file in files:
225
+ if file.endswith(".json"):
226
+ lang = file.split(".")[0]
227
+ with open(os.path.join(root, file), "r", encoding="utf-8") as f:
228
+ _locales[lang] = json.loads(f.read())
229
+ return _locales
230
+
231
+
232
+ def parse_extension(filename):
233
+ return os.path.splitext(filename)[1].strip().lower().replace(".", "")
234
+
235
+ def extract_id(video_file: str) -> str:
236
+ """
237
+ 从路径中提取 ID(tasks 目录下的第一级子目录名)
238
+ 兼容 Windows 和 Linux
239
+ """
240
+ path = Path(video_file)
241
+
242
+ # 遍历路径的所有部分,查找 "tasks" 目录
243
+ try:
244
+ parts = path.parts
245
+ index = parts.index("tasks") # 找到 "tasks" 目录的位置
246
+ return parts[index + 1] # 返回紧跟其后的部分作为 ID
247
+ except (ValueError, IndexError):
248
+ raise ValueError(f"Invalid path format: {video_file}")
main.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, APIRouter
2
+ from fastapi.staticfiles import StaticFiles
3
+
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from app.api import api_router
6
+ import os
7
+
8
+ app = FastAPI(
9
+ title="StoryFlicks Backend API",
10
+ description="Backend API for StoryFlicks application",
11
+ docs_url="/docs",
12
+ redoc_url="/redoc",
13
+ )
14
+
15
+ # Configure CORS
16
+ app.add_middleware(
17
+ CORSMiddleware,
18
+ allow_origins=["http://localhost:5173", "http://127.0.0.1:5173"], # In production, replace with specific origins
19
+ allow_credentials=True,
20
+ allow_methods=["*"],
21
+ allow_headers=["*"],
22
+ )
23
+
24
+ if not os.path.exists('tasks'):
25
+ os.makedirs('tasks')
26
+
27
+ app.mount("/tasks", StaticFiles(directory=os.path.abspath("tasks")), name="tasks")
28
+ # Include API router
29
+ app.include_router(api_router)
30
+
31
+ @app.get("/")
32
+ async def root():
33
+ return {
34
+ "app_name": "StoryFlicks Backend API",
35
+ "docs_url": "/docs"
36
+ }
37
+
38
+ if __name__ == "__main__":
39
+ import uvicorn
40
+ uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True)
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn==0.24.0
3
+ python-multipart==0.0.6
4
+ pydantic==2.5.2
5
+ pydantic-settings==2.1.0
6
+ python-dotenv==1.0.0
7
+ openai==1.59.7
8
+ dashscope==1.22.0
9
+ edge_tts==6.1.19
10
+ loguru==0.7.2
11
+ numpy==1.25.0
12
+ moviepy==2.1.1
13
+ decorator==4.4.2
14
+ imageio-ffmpeg==0.4.9
15
+ requests==2.31.0
16
+ Pillow==9.5.0
resource/fonts/MicrosoftYaHeiBold.ttc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:519309b7ab0479c4dc3ace5e291de5a8702175be5586e165bc810267bd4619a5
3
+ size 16880832
resource/fonts/MicrosoftYaHeiNormal.ttc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3084f1f88369af6bf9989c909024164d953d1e38d08734f05f28ef24b2f9d577
3
+ size 19701556
resource/fonts/STHeitiLight.ttc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a57b0316cc0544f682b8fb9855e14ade79ae77340ef6a01ba9210e25b4c5a5b7
3
+ size 55783456
resource/fonts/STHeitiMedium.ttc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8fa4a63e2cf500e98e64d4c73260daaba049306cf85dec9e3729bc285b7d645
3
+ size 55754164