Spaces:

chaowenguo
/

aegwe4

Running

App Files Files Community

chaowenguo commited on Jun 8

Commit

3b13b0e

verified ·

1 Parent(s): 6fa337a

Upload 121 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +13 -0
Dockerfile +63 -0
LICENSE +21 -0
README-en.md +115 -0
README-ja.md +84 -0
README.md +118 -10
app/__init__.py +0 -0
app/asgi.py +90 -0
app/config/__init__.py +56 -0
app/config/config.py +85 -0
app/controllers/base.py +31 -0
app/controllers/manager/base_manager.py +64 -0
app/controllers/manager/memory_manager.py +18 -0
app/controllers/manager/redis_manager.py +56 -0
app/controllers/ping.py +14 -0
app/controllers/v1/base.py +11 -0
app/controllers/v1/llm.py +93 -0
app/controllers/v1/video.py +271 -0
app/controllers/v2/base.py +11 -0
app/controllers/v2/script.py +170 -0
app/models/__init__.py +0 -0
app/models/const.py +25 -0
app/models/exception.py +28 -0
app/models/schema.py +391 -0
app/models/schema_v2.py +63 -0
app/router.py +21 -0
app/services/SDE/prompt.py +97 -0
app/services/SDE/short_drama_explanation.py +456 -0
app/services/SDP/generate_script_short.py +37 -0
app/services/SDP/utils/short_schema.py +60 -0
app/services/SDP/utils/step1_subtitle_analyzer_openai.py +157 -0
app/services/SDP/utils/step5_merge_script.py +69 -0
app/services/SDP/utils/utils.py +45 -0
app/services/__init__.py +0 -0
app/services/audio_merger.py +171 -0
app/services/clip_video.py +237 -0
app/services/generate_narration_script.py +264 -0
app/services/generate_video.py +393 -0
app/services/llm.py +808 -0
app/services/material.py +561 -0
app/services/merger_video.py +662 -0
app/services/script_service.py +400 -0
app/services/state.py +122 -0
app/services/subtitle.py +462 -0
app/services/subtitle_merger.py +202 -0
app/services/task.py +398 -0
app/services/update_script.py +266 -0
app/services/video.py +365 -0
app/services/video_service.py +56 -0
app/services/voice.py +1469 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+docs/check-en.png filter=lfs diff=lfs merge=lfs -text
+docs/check-zh.png filter=lfs diff=lfs merge=lfs -text
+docs/img001-en.png filter=lfs diff=lfs merge=lfs -text
+docs/img001-zh.png filter=lfs diff=lfs merge=lfs -text
+docs/img004-en.png filter=lfs diff=lfs merge=lfs -text
+docs/img004-zh.png filter=lfs diff=lfs merge=lfs -text
+docs/img005-zh.png filter=lfs diff=lfs merge=lfs -text
+docs/img006-en.png filter=lfs diff=lfs merge=lfs -text
+docs/img006-zh.png filter=lfs diff=lfs merge=lfs -text
+docs/img007-en.png filter=lfs diff=lfs merge=lfs -text
+docs/img007-zh.png filter=lfs diff=lfs merge=lfs -text
+docs/index-en.png filter=lfs diff=lfs merge=lfs -text
+docs/index-zh.png filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,63 @@

+# 构建阶段
+FROM python:3.10-slim-bullseye as builder
+# 设置工作目录
+WORKDIR /build
+# 安装构建依赖
+RUN apt-get update && apt-get install -y \
+    git \
+    git-lfs \
+    && rm -rf /var/lib/apt/lists/*
+# 创建虚拟环境
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+# 首先安装 PyTorch（因为它是最大的依赖）
+RUN pip install --no-cache-dir torch torchvision torchaudio
+# 然后安装其他依赖
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# 运行阶段
+FROM python:3.10-slim-bullseye
+# 设置工作目录
+WORKDIR /NarratoAI
+# 从builder阶段复制虚拟环境
+COPY --from=builder /opt/venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+# 安装运行时依赖
+RUN apt-get update && apt-get install -y \
+    imagemagick \
+    ffmpeg \
+    wget \
+    git-lfs \
+    && rm -rf /var/lib/apt/lists/* \
+    && sed -i '/<policy domain="path" rights="none" pattern="@\*"/d' /etc/ImageMagick-6/policy.xml
+# 设置环境变量
+ENV PYTHONPATH="/NarratoAI" \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1
+# 设置目录权限
+RUN chmod 777 /NarratoAI
+# 安装git lfs
+RUN git lfs install
+# 复制应用代码
+COPY . .
+# 暴露端口
+EXPOSE 8501 8080
+# 使用脚本作为入口点
+COPY docker-entrypoint.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
+ENTRYPOINT ["docker-entrypoint.sh"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 linyq
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README-en.md ADDED Viewed

	@@ -0,0 +1,115 @@

+<div align="center">
+<h1 align="center" style="font-size: 2cm;"> NarratoAI 😎📽️ </h1>
+<h3 align="center">An all-in-one AI-powered tool for film commentary and automated video editing.🎬🎞️ </h3>
+<h3>📖 English | <a href="README.md">简体中文</a> | <a href="README-ja.md">日本語</a> </h3>
+<div align="center">
+[//]: # (  <a href="https://trendshift.io/repositories/8731" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8731" alt="harry0703%2FNarratoAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>)
+</div>
+<br>
+NarratoAI is an automated video narration tool that provides an all-in-one solution for script writing, automated video editing, voice-over, and subtitle generation, powered by LLM to enhance efficient content creation.
+<br>
+[![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/linyqh/NarratoAI)
+[![GitHub license](https://img.shields.io/github/license/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/blob/main/LICENSE)
+[![GitHub issues](https://img.shields.io/github/issues/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/issues)
+[![GitHub stars](https://img.shields.io/github/stars/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/stargazers)
+<a href="https://discord.com/invite/V2pbAqqQNb" target="_blank">💬 Join the open source community to get project updates and the latest news.</a>
+<h2><a href="https://p9mf6rjv3c.feishu.cn/wiki/SP8swLLZki5WRWkhuFvc2CyInDg?from=from_copylink" target="_blank">🎉🎉🎉 Official Documentation 🎉🎉🎉</a> </h2>
+<h3>Home</h3>
+![](docs/index-en.png)
+<h3>Video Review Interface</h3>
+![](docs/check-en.png)
+</div>
+## Latest News
+- 2025.05.11 Released new version 0.6.0, supports **short drama commentary** and optimized editing process
+- 2025.03.06 Released new version 0.5.2, supports DeepSeek R1 and DeepSeek V3 models for short drama mixing
+- 2024.12.16 Released new version 0.3.9, supports Alibaba Qwen2-VL model for video understanding; supports short drama mixing
+- 2024.11.24 Opened Discord community: https://discord.com/invite/V2pbAqqQNb
+- 2024.11.11 Migrated open source community, welcome to join! [Join the official community](https://github.com/linyqh/NarratoAI/wiki)
+- 2024.11.10 Released official documentation, details refer to [Official Documentation](https://p9mf6rjv3c.feishu.cn/wiki/SP8swLLZki5WRWkhuFvc2CyInDg)
+- 2024.11.10 Released new version v0.3.5; optimized video editing process,
+## Major Benefits 🎉
+From now on, fully support DeepSeek model! Register to enjoy 20 million free tokens (worth 14 yuan platform quota), editing a 10-minute video only costs 0.1 yuan!
+🔥 Quick benefits:
+1️⃣ Click the link to register: https://cloud.siliconflow.cn/i/pyOKqFCV
+2️⃣ Log in with your phone number, **be sure to fill in the invitation code: pyOKqFCV**
+3️⃣ Receive a 14 yuan quota, experience high cost-effective AI editing quickly!
+💡 Low cost, high creativity:
+Silicon Flow API Key can be integrated with one click, doubling intelligent editing efficiency!
+(Note: The invitation code is the only proof for benefit collection, automatically credited after registration)
+Immediately take action to unlock your AI productivity with "pyOKqFCV"!
+😊 Update Steps:
+Integration Package: Click update.bat one-click update script
+Code Build: Use git pull to fetch the latest code
+## Announcement 📢
+_**Note⚠️: Recently, someone has been impersonating the author on x (Twitter) to issue tokens on the pump.fun platform! This is a scam!!! Do not be deceived! Currently, NarratoAI has not made any official promotions on x (Twitter), please be cautious**_
+Below is a screenshot of this person's x (Twitter) homepage
+<img src="https://github.com/user-attachments/assets/c492ab99-52cd-4ba2-8695-1bd2073ecf12" alt="Screenshot_20250109_114131_Samsung Internet" style="width:30%; height:auto;">
+## Future Plans 🥳
+- [x] Windows Integration Pack Release
+- [x] Optimized the story generation process and improved the generation effect
+- [x] Released version 0.3.5 integration package
+- [x] Support Alibaba Qwen2-VL large model for video understanding
+- [x] Support short drama commentary
+  - [x] One-click merge materials
+  - [x] One-click transcription
+  - [x] One-click clear cache
+- [ ] Support exporting to Jianying drafts
+- [X] Support short drama commentary
+- [ ] Character face matching
+- [ ] Support automatic matching based on voiceover, script, and video materials
+- [ ] Support more TTS engines
+- [ ] ...
+## System Requirements 📦
+- Recommended minimum: CPU with 4 cores or more, 8GB RAM or more, GPU is not required
+- Windows 10/11 or MacOS 11.0 or above
+- [Python 3.12+](https://www.python.org/downloads/)
+## Feedback & Suggestions 📢
+👏 1. You can submit [issue](https://github.com/linyqh/NarratoAI/issues) or [pull request](https://github.com/linyqh/NarratoAI/pulls)
+💬 2. [Join the open source community exchange group](https://github.com/linyqh/NarratoAI/wiki)
+📷 3. Follow the official account [NarratoAI助手] to grasp the latest news
+## Reference Projects 📚
+- https://github.com/FujiwaraChoki/MoneyPrinter
+- https://github.com/harry0703/MoneyPrinterTurbo
+This project was refactored based on the above projects with the addition of video narration features. Thanks to the original authors for their open-source spirit 🥳🥳🥳
+## Buy the Author a Cup of Coffee ☕️
+<div style="display: flex; justify-content: space-between;">
+  <img src="https://github.com/user-attachments/assets/5038ccfb-addf-4db1-9966-99415989fd0c" alt="Image 1" style="width: 350px; height: 350px; margin: auto;"/>
+  <img src="https://github.com/user-attachments/assets/07d4fd58-02f0-425c-8b59-2ab94b4f09f8" alt="Image 2" style="width: 350px; height: 350px; margin: auto;"/>
+</div>
+## License 📝
+Click to view [`LICENSE`](LICENSE) file
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=linyqh/NarratoAI&type=Date)](https://star-history.com/#linyqh/NarratoAI&Date)

README-ja.md ADDED Viewed

	@@ -0,0 +1,84 @@

+<div align="center">
+<h1 align="center" style="font-size: 2cm;"> NarratoAI 😎📽️ </h1>
+<h3 align="center">一体型AI映画解説および自動ビデオ編集ツール🎬🎞️ </h3>
+<h3>📖 <a href="README-cn.md">简体中文</a> | <a href="README.md">English</a> | 日本語 </h3>
+<div align="center">
+[//]: # (  <a href="https://trendshift.io/repositories/8731" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8731" alt="harry0703%2FNarratoAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>)
+</div>
+<br>
+NarratoAIは、LLMを活用してスクリプト作成、自動ビデオ編集、ナレーション、字幕生成の一体型ソリューションを提供する自動化ビデオナレーションツールです。
+<br>
+[![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/linyqh/NarratoAI)
+[![GitHub license](https://img.shields.io/github/license/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/blob/main/LICENSE)
+[![GitHub issues](https://img.shields.io/github/issues/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/issues)
+[![GitHub stars](https://img.shields.io/github/stars/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/stargazers)
+<a href="https://discord.gg/uVAJftcm" target="_blank">💬 Discordオープンソースコミュニティに参加して、プロジェクトの最新情報を入手しましょう。</a>
+<h2><a href="https://p9mf6rjv3c.feishu.cn/wiki/SP8swLLZki5WRWkhuFvc2CyInDg?from=from_copylink" target="_blank">🎉🎉🎉 公式ドキュメント 🎉🎉🎉</a> </h2>
+<h3>ホーム</h3>
+![](docs/index-zh.png)
+<h3>ビデオレビューインターフェース</h3>
+![](docs/check-zh.png)
+</div>
+## 最新情報
+- 2024.11.24 Discordコミュニティ開設：https://discord.gg/uVAJftcm
+- 2024.11.11 オープンソースコミュニティに移行、参加を歓迎します！ [公式コミュニティに参加](https://github.com/linyqh/NarratoAI/wiki)
+- 2024.11.10 公式ドキュメント公開、詳細は [公式ドキュメント](https://p9mf6rjv3c.feishu.cn/wiki/SP8swLLZki5WRWkhuFvc2CyInDg) を参照
+- 2024.11.10 新バージョンv0.3.5リリース；ビデオ編集プロセスの最適化
+## 今後の計画 🥳
+- [x] Windows統合パックリリース
+- [x] ストーリー生成プロセスの最適化、生成効果の向上
+- [x] バージョン0.3.5統合パックリリース
+- [x] アリババQwen2-VL大規模モデルのビデオ理解サポート
+- [x] 短編ドラマの解説サポート
+  - [x] 一クリックで素材を統合
+  - [x] 一クリックで文字起こし
+  - [x] 一クリックでキャッシュをクリア
+- [ ] ジャン映草稿のエクスポートをサポート
+- [ ] 主役の顔のマッチング
+- [ ] 音声、スクリプト、ビデオ素材に基づいて自動マッチングをサポート
+- [ ] より多くのTTSエンジンをサポート
+- [ ] ...
+## システム要件 📦
+- 推奨最低：CPU 4コア以上、メモリ8GB以上、GPUは必須ではありません
+- Windows 10またはMacOS 11.0以上
+## フィードバックと提案 📢
+👏 1. [issue](https://github.com/linyqh/NarratoAI/issues)または[pull request](https://github.com/linyqh/NarratoAI/pulls)を提出できます
+💬 2. [オープンソースコミュニティ交流グループに参加](https://github.com/linyqh/NarratoAI/wiki)
+📷 3. 公式アカウント【NarratoAI助手】をフォローして最新情報を入手
+## 参考プロジェクト 📚
+- https://github.com/FujiwaraChoki/MoneyPrinter
+- https://github.com/harry0703/MoneyPrinterTurbo
+このプロジェクトは上記のプロジェクトを基にリファクタリングされ、映画解説機能が追加されました。オリジナルの作者に感謝します 🥳🥳🥳
+## 作者にコーヒーを一杯おごる ☕️
+<div style="display: flex; justify-content: space-between;">
+  <img src="https://github.com/user-attachments/assets/5038ccfb-addf-4db1-9966-99415989fd0c" alt="Image 1" style="width: 350px; height: 350px; margin: auto;"/>
+  <img src="https://github.com/user-attachments/assets/07d4fd58-02f0-425c-8b59-2ab94b4f09f8" alt="Image 2" style="width: 350px; height: 350px; margin: auto;"/>
+</div>
+## ライセンス 📝
+[`LICENSE`](LICENSE) ファイルをクリックして表示
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=linyqh/NarratoAI&type=Date)](https://star-history.com/#linyqh/NarratoAI&Date)

README.md CHANGED Viewed

@@ -1,10 +1,118 @@
----
-title: Aegwe4
-emoji: 🔥
-colorFrom: blue
-colorTo: indigo
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+<div align="center">
+<h1 align="center" style="font-size: 2cm;"> NarratoAI 😎📽️ </h1>
+<h3 align="center">一站式 AI 影视解说+自动化剪辑工具🎬🎞️ </h3>
+<h3>📖 <a href="README-en.md">English</a> | 简体中文 | <a href="README-ja.md">日本語</a> </h3>
+<div align="center">
+[//]: # (  <a href="https://trendshift.io/repositories/8731" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8731" alt="harry0703%2FNarratoAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>)
+</div>
+<br>
+NarratoAI 是一个自动化影视解说工具，基于LLM实现文案撰写、自动化视频剪辑、配音和字幕生成的一站式流程，助力高效内容创作。
+<br>
+[![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/linyqh/NarratoAI)
+[![GitHub license](https://img.shields.io/github/license/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/blob/main/LICENSE)
+[![GitHub issues](https://img.shields.io/github/issues/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/issues)
+[![GitHub stars](https://img.shields.io/github/stars/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/stargazers)
+<a href="https://discord.com/invite/V2pbAqqQNb" target="_blank">💬 加入 discord 开源社区，获取项目动态和最新资讯。</a>
+<h2><a href="https://p9mf6rjv3c.feishu.cn/wiki/SP8swLLZki5WRWkhuFvc2CyInDg?from=from_copylink" target="_blank">🎉🎉🎉 官方文档 🎉🎉🎉</a> </h2>
+<h3>首页</h3>
+![](docs/index-zh.png)
+<h3>视频审查界面</h3>
+![](docs/check-zh.png)
+</div>
+## 最新资讯
+- 2025.05.11 发布新版本 0.6.0，支持 **短剧解说** 和 优化剪辑流程
+- 2025.03.06 发布新版本 0.5.2，支持 DeepSeek R1 和 DeepSeek V3 模型进行短剧混剪
+- 2024.12.16 发布新版本 0.3.9，支持阿里 Qwen2-VL 模型理解视频；支持短剧混剪
+- 2024.11.24 开通 discord 社群：https://discord.com/invite/V2pbAqqQNb
+- 2024.11.11 迁移开源社群，欢迎加入！ [加入官方社群](https://github.com/linyqh/NarratoAI/wiki)
+- 2024.11.10 发布官方文档，详情参见 [官方文档](https://p9mf6rjv3c.feishu.cn/wiki/SP8swLLZki5WRWkhuFvc2CyInDg)
+- 2024.11.10 发布新版本 v0.3.5；优化视频剪辑流程，
+## 重磅福利 🎉
+即日起全面支持DeepSeek模型！注册即享2000万免费Token（价值14元平台配额），剪辑10分钟视频仅需0.1元！
+🔥 快速领福利：
+1️⃣ 点击链接注册：https://cloud.siliconflow.cn/i/pyOKqFCV
+2️⃣ 使用手机号登录，**务必填写邀请码：pyOKqFCV**
+3️⃣ 领取14元配额，极速体验高性价比AI剪辑
+💡 小成本大创作：
+硅基流动API Key一键接入，智能剪辑效率翻倍！
+（注：邀请码为福利领取唯一凭证，注册后自动到账）
+立即行动，用「pyOKqFCV」解锁你的AI生产力！
+😊 更新步骤：
+整合包：点击 update.bat 一键更新脚本
+代码构建：使用 git pull 拉去最新代码
+## 公告 📢
+_**注意⚠️：近期在 x (推特) 上发现有人冒充作者在 pump.fun 平台上发行代币！ 这是骗子！！！ 不要被割了韭菜
+！！！目前 NarratoAI 没有在 x(推特) 上做任何官方宣传，注意甄别**_
+下面是此人 x(推特) 首页截图
+<img src="https://github.com/user-attachments/assets/c492ab99-52cd-4ba2-8695-1bd2073ecf12" alt="Screenshot_20250109_114131_Samsung Internet" style="width:30%; height:auto;">
+## 未来计划 🥳
+- [x] windows 整合包发布
+- [x] 优化剧情生成流程，提升生成效果
+- [x] 发布 0.3.5 整合包
+- [x] 支持阿里 Qwen2-VL 大模型理解视频
+- [x] 支持短剧混剪
+  - [x] 一键合并素材
+  - [x] 一键转录
+  - [x] 一键清理缓存
+- [ ] 支持导出剪映草稿
+- [X] 支持短剧解说
+- [ ] 主角人脸匹配
+- [ ] 支持根据口播，文案，视频素材自动匹配
+- [ ] 支持更多 TTS 引擎
+- [ ] ...
+## 配置要求 📦
+- 建议最低 CPU 4核或以上，内存 8G 或以上，显卡非必须
+- Windows 10/11 或 MacOS 11.0 以上系统
+- [Python 3.12+](https://www.python.org/downloads/)
+## 反馈建议 📢
+👏 1. 可以提交 [issue](https://github.com/linyqh/NarratoAI/issues)或者 [pull request](https://github.com/linyqh/NarratoAI/pulls)
+💬 2. [加入开源社区交流群](https://github.com/linyqh/NarratoAI/wiki)
+📷 3. 关注公众号【NarratoAI助手】，掌握最新资讯
+## 参考项目 📚
+- https://github.com/FujiwaraChoki/MoneyPrinter
+- https://github.com/harry0703/MoneyPrinterTurbo
+该项目基于以上项目重构而来，增加了影视解说功能，感谢大佬的开源精神 🥳����🥳
+## 请作者喝一杯咖啡 ☕️
+<div style="display: flex; justify-content: space-between;">
+  <img src="https://github.com/user-attachments/assets/5038ccfb-addf-4db1-9966-99415989fd0c" alt="Image 1" style="width: 350px; height: 350px; margin: auto;"/>
+  <img src="https://github.com/user-attachments/assets/07d4fd58-02f0-425c-8b59-2ab94b4f09f8" alt="Image 2" style="width: 350px; height: 350px; margin: auto;"/>
+</div>
+## 许可证 📝
+点击查看 [`LICENSE`](LICENSE) 文件
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=linyqh/NarratoAI&type=Date)](https://star-history.com/#linyqh/NarratoAI&Date)

app/__init__.py ADDED Viewed

File without changes

app/asgi.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""Application implementation - ASGI."""
+import os
+from fastapi import FastAPI, Request
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import JSONResponse
+from loguru import logger
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+from app.config import config
+from app.models.exception import HttpException
+from app.router import root_api_router
+from app.utils import utils
+from app.utils import ffmpeg_utils
+def exception_handler(request: Request, e: HttpException):
+    return JSONResponse(
+        status_code=e.status_code,
+        content=utils.get_response(e.status_code, e.data, e.message),
+    )
+def validation_exception_handler(request: Request, e: RequestValidationError):
+    return JSONResponse(
+        status_code=400,
+        content=utils.get_response(
+            status=400, data=e.errors(), message="field required"
+        ),
+    )
+def get_application() -> FastAPI:
+    """Initialize FastAPI application.
+    Returns:
+       FastAPI: Application object instance.
+    """
+    instance = FastAPI(
+        title=config.project_name,
+        description=config.project_description,
+        version=config.project_version,
+        debug=False,
+    )
+    instance.include_router(root_api_router)
+    instance.add_exception_handler(HttpException, exception_handler)
+    instance.add_exception_handler(RequestValidationError, validation_exception_handler)
+    return instance
+app = get_application()
+# Configures the CORS middleware for the FastAPI app
+cors_allowed_origins_str = os.getenv("CORS_ALLOWED_ORIGINS", "")
+origins = cors_allowed_origins_str.split(",") if cors_allowed_origins_str else ["*"]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+task_dir = utils.task_dir()
+app.mount(
+    "/tasks", StaticFiles(directory=task_dir, html=True, follow_symlink=True), name=""
+)
+public_dir = utils.public_dir()
+app.mount("/", StaticFiles(directory=public_dir, html=True), name="")
+@app.on_event("shutdown")
+def shutdown_event():
+    logger.info("shutdown event")
+@app.on_event("startup")
+def startup_event():
+    logger.info("startup event")
+    # 检测FFmpeg硬件加速
+    hwaccel_info = ffmpeg_utils.detect_hardware_acceleration()
+    if hwaccel_info["available"]:
+        logger.info(f"FFmpeg硬件加速检测结果: 可用 | 类型: {hwaccel_info['type']} | 编码器: {hwaccel_info['encoder']} | 独立显卡: {hwaccel_info['is_dedicated_gpu']} | 参数: {hwaccel_info['hwaccel_args']}")
+    else:
+        logger.warning(f"FFmpeg硬件加速不可用: {hwaccel_info['message']}, 将使用CPU软件编码")

app/config/__init__.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+import sys
+from loguru import logger
+from app.config import config
+from app.utils import utils
+def __init_logger():
+    # _log_file = utils.storage_dir("logs/server.log")
+    _lvl = config.log_level
+    root_dir = os.path.dirname(
+        os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+    )
+    def format_record(record):
+        # 获取日志记录中的文件全路径
+        file_path = record["file"].path
+        # 将绝对路径转换为相对于项目根目录的路径
+        relative_path = os.path.relpath(file_path, root_dir)
+        # 更新记录中的文件路径
+        record["file"].path = f"./{relative_path}"
+        # 返回修改后的格式字符串
+        # 您可以根据需要调整这里的格式
+        _format = (
+            "<green>{time:%Y-%m-%d %H:%M:%S}</> | "
+            + "<level>{level}</> | "
+            + '"{file.path}:{line}":<blue> {function}</> '
+            + "- <level>{message}</>"
+            + "\n"
+        )
+        return _format
+    logger.remove()
+    logger.add(
+        sys.stdout,
+        level=_lvl,
+        format=format_record,
+        colorize=True,
+    )
+    # logger.add(
+    #     _log_file,
+    #     level=_lvl,
+    #     format=format_record,
+    #     rotation="00:00",
+    #     retention="3 days",
+    #     backtrace=True,
+    #     diagnose=True,
+    #     enqueue=True,
+    # )
+__init_logger()

app/config/config.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+import socket
+import toml
+import shutil
+from loguru import logger
+root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+config_file = f"{root_dir}/config.toml"
+version_file = f"{root_dir}/project_version"
+def get_version_from_file():
+    """从project_version文件中读取版本号"""
+    try:
+        if os.path.isfile(version_file):
+            with open(version_file, "r", encoding="utf-8") as f:
+                return f.read().strip()
+        return "0.1.0"  # 默认版本号
+    except Exception as e:
+        logger.error(f"读取版本号文件失败: {str(e)}")
+        return "0.1.0"  # 默认版本号
+def load_config():
+    # fix: IsADirectoryError: [Errno 21] Is a directory: '/NarratoAI/config.toml'
+    if os.path.isdir(config_file):
+        shutil.rmtree(config_file)
+    if not os.path.isfile(config_file):
+        example_file = f"{root_dir}/config.example.toml"
+        if os.path.isfile(example_file):
+            shutil.copyfile(example_file, config_file)
+            logger.info(f"copy config.example.toml to config.toml")
+    logger.info(f"load config from file: {config_file}")
+    try:
+        _config_ = toml.load(config_file)
+    except Exception as e:
+        logger.warning(f"load config failed: {str(e)}, try to load as utf-8-sig")
+        with open(config_file, mode="r", encoding="utf-8-sig") as fp:
+            _cfg_content = fp.read()
+            _config_ = toml.loads(_cfg_content)
+    return _config_
+def save_config():
+    with open(config_file, "w", encoding="utf-8") as f:
+        _cfg["app"] = app
+        _cfg["azure"] = azure
+        _cfg["ui"] = ui
+        f.write(toml.dumps(_cfg))
+_cfg = load_config()
+app = _cfg.get("app", {})
+whisper = _cfg.get("whisper", {})
+proxy = _cfg.get("proxy", {})
+azure = _cfg.get("azure", {})
+ui = _cfg.get("ui", {})
+frames = _cfg.get("frames", {})
+hostname = socket.gethostname()
+log_level = _cfg.get("log_level", "DEBUG")
+listen_host = _cfg.get("listen_host", "0.0.0.0")
+listen_port = _cfg.get("listen_port", 8080)
+project_name = _cfg.get("project_name", "NarratoAI")
+project_description = _cfg.get(
+    "project_description",
+    "<a href='https://github.com/linyqh/NarratoAI'>https://github.com/linyqh/NarratoAI</a>",
+)
+# 从文件读取版本号，而不是从配置文件中获取
+project_version = get_version_from_file()
+reload_debug = False
+imagemagick_path = app.get("imagemagick_path", "")
+if imagemagick_path and os.path.isfile(imagemagick_path):
+    os.environ["IMAGEMAGICK_BINARY"] = imagemagick_path
+ffmpeg_path = app.get("ffmpeg_path", "")
+if ffmpeg_path and os.path.isfile(ffmpeg_path):
+    os.environ["IMAGEIO_FFMPEG_EXE"] = ffmpeg_path
+logger.info(f"{project_name} v{project_version}")

app/controllers/base.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from uuid import uuid4
+from fastapi import Request
+from app.config import config
+from app.models.exception import HttpException
+def get_task_id(request: Request):
+    task_id = request.headers.get("x-task-id")
+    if not task_id:
+        task_id = uuid4()
+    return str(task_id)
+def get_api_key(request: Request):
+    api_key = request.headers.get("x-api-key")
+    return api_key
+def verify_token(request: Request):
+    token = get_api_key(request)
+    if token != config.app.get("api_key", ""):
+        request_id = get_task_id(request)
+        request_url = request.url
+        user_agent = request.headers.get("user-agent")
+        raise HttpException(
+            task_id=request_id,
+            status_code=401,
+            message=f"invalid token: {request_url}, {user_agent}",
+        )

app/controllers/manager/base_manager.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import threading
+from typing import Callable, Any, Dict
+class TaskManager:
+    def __init__(self, max_concurrent_tasks: int):
+        self.max_concurrent_tasks = max_concurrent_tasks
+        self.current_tasks = 0
+        self.lock = threading.Lock()
+        self.queue = self.create_queue()
+    def create_queue(self):
+        raise NotImplementedError()
+    def add_task(self, func: Callable, *args: Any, **kwargs: Any):
+        with self.lock:
+            if self.current_tasks < self.max_concurrent_tasks:
+                print(f"add task: {func.__name__}, current_tasks: {self.current_tasks}")
+                self.execute_task(func, *args, **kwargs)
+            else:
+                print(
+                    f"enqueue task: {func.__name__}, current_tasks: {self.current_tasks}"
+                )
+                self.enqueue({"func": func, "args": args, "kwargs": kwargs})
+    def execute_task(self, func: Callable, *args: Any, **kwargs: Any):
+        thread = threading.Thread(
+            target=self.run_task, args=(func, *args), kwargs=kwargs
+        )
+        thread.start()
+    def run_task(self, func: Callable, *args: Any, **kwargs: Any):
+        try:
+            with self.lock:
+                self.current_tasks += 1
+            func(*args, **kwargs)  # 在这里调用函数，传递*args和**kwargs
+        finally:
+            self.task_done()
+    def check_queue(self):
+        with self.lock:
+            if (
+                self.current_tasks < self.max_concurrent_tasks
+                and not self.is_queue_empty()
+            ):
+                task_info = self.dequeue()
+                func = task_info["func"]
+                args = task_info.get("args", ())
+                kwargs = task_info.get("kwargs", {})
+                self.execute_task(func, *args, **kwargs)
+    def task_done(self):
+        with self.lock:
+            self.current_tasks -= 1
+        self.check_queue()
+    def enqueue(self, task: Dict):
+        raise NotImplementedError()
+    def dequeue(self):
+        raise NotImplementedError()
+    def is_queue_empty(self):
+        raise NotImplementedError()

app/controllers/manager/memory_manager.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from queue import Queue
+from typing import Dict
+from app.controllers.manager.base_manager import TaskManager
+class InMemoryTaskManager(TaskManager):
+    def create_queue(self):
+        return Queue()
+    def enqueue(self, task: Dict):
+        self.queue.put(task)
+    def dequeue(self):
+        return self.queue.get()
+    def is_queue_empty(self):
+        return self.queue.empty()

app/controllers/manager/redis_manager.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import json
+from typing import Dict
+import redis
+from app.controllers.manager.base_manager import TaskManager
+from app.models.schema import VideoParams
+from app.services import task as tm
+FUNC_MAP = {
+    "start": tm.start,
+    # 'start_test': tm.start_test
+}
+class RedisTaskManager(TaskManager):
+    def __init__(self, max_concurrent_tasks: int, redis_url: str):
+        self.redis_client = redis.Redis.from_url(redis_url)
+        super().__init__(max_concurrent_tasks)
+    def create_queue(self):
+        return "task_queue"
+    def enqueue(self, task: Dict):
+        task_with_serializable_params = task.copy()
+        if "params" in task["kwargs"] and isinstance(
+            task["kwargs"]["params"], VideoParams
+        ):
+            task_with_serializable_params["kwargs"]["params"] = task["kwargs"][
+                "params"
+            ].dict()
+        # 将函数对象转换为其名称
+        task_with_serializable_params["func"] = task["func"].__name__
+        self.redis_client.rpush(self.queue, json.dumps(task_with_serializable_params))
+    def dequeue(self):
+        task_json = self.redis_client.lpop(self.queue)
+        if task_json:
+            task_info = json.loads(task_json)
+            # 将函数名称转换回函数对象
+            task_info["func"] = FUNC_MAP[task_info["func"]]
+            if "params" in task_info["kwargs"] and isinstance(
+                task_info["kwargs"]["params"], dict
+            ):
+                task_info["kwargs"]["params"] = VideoParams(
+                    **task_info["kwargs"]["params"]
+                )
+            return task_info
+        return None
+    def is_queue_empty(self):
+        return self.redis_client.llen(self.queue) == 0

app/controllers/ping.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from fastapi import APIRouter
+from fastapi import Request
+router = APIRouter()
+@router.get(
+    "/ping",
+    tags=["Health Check"],
+    description="检查服务可用性",
+    response_description="pong",
+)
+def ping(request: Request) -> str:
+    return "pong"

app/controllers/v1/base.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from fastapi import APIRouter, Depends
+def new_router(dependencies=None):
+    router = APIRouter()
+    router.tags = ["V1"]
+    router.prefix = "/api/v1"
+    # 将认证依赖项应用于所有路由
+    if dependencies:
+        router.dependencies = dependencies
+    return router

app/controllers/v1/llm.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from fastapi import Request, File, UploadFile
+import os
+from app.controllers.v1.base import new_router
+from app.models.schema import (
+    VideoScriptResponse,
+    VideoScriptRequest,
+    VideoTermsResponse,
+    VideoTermsRequest,
+    VideoTranscriptionRequest,
+    VideoTranscriptionResponse,
+)
+from app.services import llm
+from app.utils import utils
+from app.config import config
+# 认证依赖项
+# router = new_router(dependencies=[Depends(base.verify_token)])
+router = new_router()
+# 定义上传目录
+UPLOAD_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "uploads")
+@router.post(
+    "/scripts",
+    response_model=VideoScriptResponse,
+    summary="Create a script for the video",
+)
+def generate_video_script(request: Request, body: VideoScriptRequest):
+    video_script = llm.generate_script(
+        video_subject=body.video_subject,
+        language=body.video_language,
+        paragraph_number=body.paragraph_number,
+    )
+    response = {"video_script": video_script}
+    return utils.get_response(200, response)
+@router.post(
+    "/terms",
+    response_model=VideoTermsResponse,
+    summary="Generate video terms based on the video script",
+)
+def generate_video_terms(request: Request, body: VideoTermsRequest):
+    video_terms = llm.generate_terms(
+        video_subject=body.video_subject,
+        video_script=body.video_script,
+        amount=body.amount,
+    )
+    response = {"video_terms": video_terms}
+    return utils.get_response(200, response)
+@router.post(
+    "/transcription",
+    response_model=VideoTranscriptionResponse,
+    summary="Transcribe video content using Gemini"
+)
+async def transcribe_video(
+    request: Request,
+    video_name: str,
+    language: str = "zh-CN",
+    video_file: UploadFile = File(...)
+):
+    """
+    使用 Gemini 转录视频内容,包括时间戳、画面描述和语音内容
+    Args:
+        video_name: 视频名称
+        language: 语言代码,默认zh-CN
+        video_file: 上传的视频文件
+    """
+    # 创建临时目录用于存储上传的视频
+    os.makedirs(UPLOAD_DIR, exist_ok=True)
+    # 保存上传的视频文件
+    video_path = os.path.join(UPLOAD_DIR, video_file.filename)
+    with open(video_path, "wb") as buffer:
+        content = await video_file.read()
+        buffer.write(content)
+    try:
+        transcription = llm.gemini_video_transcription(
+            video_name=video_name,
+            video_path=video_path,
+            language=language,
+            llm_provider_video=config.app.get("video_llm_provider", "gemini")
+        )
+        response = {"transcription": transcription}
+        return utils.get_response(200, response)
+    finally:
+        # 处理完成后删除临时文件
+        if os.path.exists(video_path):
+            os.remove(video_path)

app/controllers/v1/video.py ADDED Viewed

	@@ -0,0 +1,271 @@

+import glob
+import os
+import pathlib
+import shutil
+from typing import Union
+from fastapi import BackgroundTasks, Depends, Path, Request, UploadFile
+from fastapi.params import File
+from fastapi.responses import FileResponse, StreamingResponse
+from loguru import logger
+from app.config import config
+from app.controllers import base
+from app.controllers.manager.memory_manager import InMemoryTaskManager
+from app.controllers.manager.redis_manager import RedisTaskManager
+from app.controllers.v1.base import new_router
+from app.models.exception import HttpException
+from app.models.schema import (
+    AudioRequest,
+    BgmRetrieveResponse,
+    BgmUploadResponse,
+    SubtitleRequest,
+    TaskDeletionResponse,
+    TaskQueryRequest,
+    TaskQueryResponse,
+    TaskResponse,
+    TaskVideoRequest,
+)
+from app.services import state as sm
+from app.services import task as tm
+from app.utils import utils
+# 认证依赖项
+# router = new_router(dependencies=[Depends(base.verify_token)])
+router = new_router()
+_enable_redis = config.app.get("enable_redis", False)
+_redis_host = config.app.get("redis_host", "localhost")
+_redis_port = config.app.get("redis_port", 6379)
+_redis_db = config.app.get("redis_db", 0)
+_redis_password = config.app.get("redis_password", None)
+_max_concurrent_tasks = config.app.get("max_concurrent_tasks", 5)
+redis_url = f"redis://:{_redis_password}@{_redis_host}:{_redis_port}/{_redis_db}"
+# 根据配置选择合适的任务管理器
+if _enable_redis:
+    task_manager = RedisTaskManager(
+        max_concurrent_tasks=_max_concurrent_tasks, redis_url=redis_url
+    )
+else:
+    task_manager = InMemoryTaskManager(max_concurrent_tasks=_max_concurrent_tasks)
+@router.post("/videos", response_model=TaskResponse, summary="Generate a short video")
+def create_video(
+    background_tasks: BackgroundTasks, request: Request, body: TaskVideoRequest
+):
+    return create_task(request, body, stop_at="video")
+@router.post("/subtitle", response_model=TaskResponse, summary="Generate subtitle only")
+def create_subtitle(
+    background_tasks: BackgroundTasks, request: Request, body: SubtitleRequest
+):
+    return create_task(request, body, stop_at="subtitle")
+@router.post("/audio", response_model=TaskResponse, summary="Generate audio only")
+def create_audio(
+    background_tasks: BackgroundTasks, request: Request, body: AudioRequest
+):
+    return create_task(request, body, stop_at="audio")
+def create_task(
+    request: Request,
+    body: Union[TaskVideoRequest, SubtitleRequest, AudioRequest],
+    stop_at: str,
+):
+    task_id = utils.get_uuid()
+    request_id = base.get_task_id(request)
+    try:
+        task = {
+            "task_id": task_id,
+            "request_id": request_id,
+            "params": body.model_dump(),
+        }
+        sm.state.update_task(task_id)
+        task_manager.add_task(tm.start, task_id=task_id, params=body, stop_at=stop_at)
+        logger.success(f"Task created: {utils.to_json(task)}")
+        return utils.get_response(200, task)
+    except ValueError as e:
+        raise HttpException(
+            task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}"
+        )
+@router.get(
+    "/tasks/{task_id}", response_model=TaskQueryResponse, summary="Query task status"
+)
+def get_task(
+    request: Request,
+    task_id: str = Path(..., description="Task ID"),
+    query: TaskQueryRequest = Depends(),
+):
+    endpoint = config.app.get("endpoint", "")
+    if not endpoint:
+        endpoint = str(request.base_url)
+    endpoint = endpoint.rstrip("/")
+    request_id = base.get_task_id(request)
+    task = sm.state.get_task(task_id)
+    if task:
+        task_dir = utils.task_dir()
+        def file_to_uri(file):
+            if not file.startswith(endpoint):
+                _uri_path = v.replace(task_dir, "tasks").replace("\\", "/")
+                _uri_path = f"{endpoint}/{_uri_path}"
+            else:
+                _uri_path = file
+            return _uri_path
+        if "videos" in task:
+            videos = task["videos"]
+            urls = []
+            for v in videos:
+                urls.append(file_to_uri(v))
+            task["videos"] = urls
+        if "combined_videos" in task:
+            combined_videos = task["combined_videos"]
+            urls = []
+            for v in combined_videos:
+                urls.append(file_to_uri(v))
+            task["combined_videos"] = urls
+        return utils.get_response(200, task)
+    raise HttpException(
+        task_id=task_id, status_code=404, message=f"{request_id}: task not found"
+    )
+@router.delete(
+    "/tasks/{task_id}",
+    response_model=TaskDeletionResponse,
+    summary="Delete a generated short video task",
+)
+def delete_video(request: Request, task_id: str = Path(..., description="Task ID")):
+    request_id = base.get_task_id(request)
+    task = sm.state.get_task(task_id)
+    if task:
+        tasks_dir = utils.task_dir()
+        current_task_dir = os.path.join(tasks_dir, task_id)
+        if os.path.exists(current_task_dir):
+            shutil.rmtree(current_task_dir)
+        sm.state.delete_task(task_id)
+        logger.success(f"video deleted: {utils.to_json(task)}")
+        return utils.get_response(200)
+    raise HttpException(
+        task_id=task_id, status_code=404, message=f"{request_id}: task not found"
+    )
+# @router.get(
+#     "/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files"
+# )
+# def get_bgm_list(request: Request):
+#     suffix = "*.mp3"
+#     song_dir = utils.song_dir()
+#     files = glob.glob(os.path.join(song_dir, suffix))
+#     bgm_list = []
+#     for file in files:
+#         bgm_list.append(
+#             {
+#                 "name": os.path.basename(file),
+#                 "size": os.path.getsize(file),
+#                 "file": file,
+#             }
+#         )
+#     response = {"files": bgm_list}
+#     return utils.get_response(200, response)
+#
+# @router.post(
+#     "/musics",
+#     response_model=BgmUploadResponse,
+#     summary="Upload the BGM file to the songs directory",
+# )
+# def upload_bgm_file(request: Request, file: UploadFile = File(...)):
+#     request_id = base.get_task_id(request)
+#     # check file ext
+#     if file.filename.endswith("mp3"):
+#         song_dir = utils.song_dir()
+#         save_path = os.path.join(song_dir, file.filename)
+#         # save file
+#         with open(save_path, "wb+") as buffer:
+#             # If the file already exists, it will be overwritten
+#             file.file.seek(0)
+#             buffer.write(file.file.read())
+#         response = {"file": save_path}
+#         return utils.get_response(200, response)
+#
+#     raise HttpException(
+#         "", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded"
+#     )
+#
+#
+# @router.get("/stream/{file_path:path}")
+# async def stream_video(request: Request, file_path: str):
+#     tasks_dir = utils.task_dir()
+#     video_path = os.path.join(tasks_dir, file_path)
+#     range_header = request.headers.get("Range")
+#     video_size = os.path.getsize(video_path)
+#     start, end = 0, video_size - 1
+#
+#     length = video_size
+#     if range_header:
+#         range_ = range_header.split("bytes=")[1]
+#         start, end = [int(part) if part else None for part in range_.split("-")]
+#         if start is None:
+#             start = video_size - end
+#             end = video_size - 1
+#         if end is None:
+#             end = video_size - 1
+#         length = end - start + 1
+#
+#     def file_iterator(file_path, offset=0, bytes_to_read=None):
+#         with open(file_path, "rb") as f:
+#             f.seek(offset, os.SEEK_SET)
+#             remaining = bytes_to_read or video_size
+#             while remaining > 0:
+#                 bytes_to_read = min(4096, remaining)
+#                 data = f.read(bytes_to_read)
+#                 if not data:
+#                     break
+#                 remaining -= len(data)
+#                 yield data
+#
+#     response = StreamingResponse(
+#         file_iterator(video_path, start, length), media_type="video/mp4"
+#     )
+#     response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}"
+#     response.headers["Accept-Ranges"] = "bytes"
+#     response.headers["Content-Length"] = str(length)
+#     response.status_code = 206  # Partial Content
+#
+#     return response
+#
+#
+# @router.get("/download/{file_path:path}")
+# async def download_video(_: Request, file_path: str):
+#     """
+#     download video
+#     :param _: Request request
+#     :param file_path: video file path, eg: /cd1727ed-3473-42a2-a7da-4faafafec72b/final-1.mp4
+#     :return: video file
+#     """
+#     tasks_dir = utils.task_dir()
+#     video_path = os.path.join(tasks_dir, file_path)
+#     file_path = pathlib.Path(video_path)
+#     filename = file_path.stem
+#     extension = file_path.suffix
+#     headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
+#     return FileResponse(
+#         path=video_path,
+#         headers=headers,
+#         filename=f"{filename}{extension}",
+#         media_type=f"video/{extension[1:]}",
+#     )

app/controllers/v2/base.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from fastapi import APIRouter, Depends
+def v2_router(dependencies=None):
+    router = APIRouter()
+    router.tags = ["V2"]
+    router.prefix = "/api/v2"
+    # 将认证依赖项应用于所有路由
+    if dependencies:
+        router.dependencies = dependencies
+    return router

app/controllers/v2/script.py ADDED Viewed

	@@ -0,0 +1,170 @@

+from fastapi import APIRouter, BackgroundTasks
+from loguru import logger
+import os
+from app.models.schema_v2 import (
+    GenerateScriptRequest,
+    GenerateScriptResponse,
+    CropVideoRequest,
+    CropVideoResponse,
+    DownloadVideoRequest,
+    DownloadVideoResponse,
+    StartSubclipRequest,
+    StartSubclipResponse
+)
+from app.models.schema import VideoClipParams
+from app.services.script_service import ScriptGenerator
+from app.services.video_service import VideoService
+from app.utils import utils
+from app.controllers.v2.base import v2_router
+from app.models.schema import VideoClipParams
+from app.services.youtube_service import YoutubeService
+from app.services import task as task_service
+router = v2_router()
+@router.post(
+    "/scripts/generate",
+    response_model=GenerateScriptResponse,
+    summary="同步请求；生成视频脚本 (V2)"
+)
+async def generate_script(
+    request: GenerateScriptRequest,
+    background_tasks: BackgroundTasks
+):
+    """
+    生成视频脚本的V2版本API
+    """
+    task_id = utils.get_uuid()
+    try:
+        generator = ScriptGenerator()
+        script = await generator.generate_script(
+            video_path=request.video_path,
+            video_theme=request.video_theme,
+            custom_prompt=request.custom_prompt,
+            skip_seconds=request.skip_seconds,
+            threshold=request.threshold,
+            vision_batch_size=request.vision_batch_size,
+            vision_llm_provider=request.vision_llm_provider
+        )
+        return {
+            "task_id": task_id,
+            "script": script
+        }
+    except Exception as e:
+        logger.exception(f"Generate script failed: {str(e)}")
+        raise
+@router.post(
+    "/scripts/crop",
+    response_model=CropVideoResponse,
+    summary="同步请求；裁剪视频 (V2)"
+)
+async def crop_video(
+    request: CropVideoRequest,
+    background_tasks: BackgroundTasks
+):
+    """
+    根据脚本裁剪视频的V2版本API
+    """
+    try:
+        # 调用视频裁剪服务
+        video_service = VideoService()
+        task_id, subclip_videos = await video_service.crop_video(
+            video_path=request.video_origin_path,
+            video_script=request.video_script
+        )
+        logger.debug(f"裁剪视频成功，视频片段路径: {subclip_videos}")
+        logger.debug(type(subclip_videos))
+        return {
+            "task_id": task_id,
+            "subclip_videos": subclip_videos
+        }
+    except Exception as e:
+        logger.exception(f"Crop video failed: {str(e)}")
+        raise
+@router.post(
+    "/youtube/download",
+    response_model=DownloadVideoResponse,
+    summary="同步请求；下载YouTube视频 (V2)"
+)
+async def download_youtube_video(
+    request: DownloadVideoRequest,
+    background_tasks: BackgroundTasks
+):
+    """
+    下载指定分辨率的YouTube视频
+    """
+    try:
+        youtube_service = YoutubeService()
+        task_id, output_path, filename = await youtube_service.download_video(
+            url=request.url,
+            resolution=request.resolution,
+            output_format=request.output_format,
+            rename=request.rename
+        )
+        return {
+            "task_id": task_id,
+            "output_path": output_path,
+            "resolution": request.resolution,
+            "format": request.output_format,
+            "filename": filename
+        }
+    except Exception as e:
+        logger.exception(f"Download YouTube video failed: {str(e)}")
+        raise
+@router.post(
+    "/scripts/start-subclip",
+    response_model=StartSubclipResponse,
+    summary="异步请求；开始视频剪辑任务 (V2)"
+)
+async def start_subclip(
+    request: VideoClipParams,
+    task_id: str,
+    subclip_videos: dict,
+    background_tasks: BackgroundTasks
+):
+    """
+    开始视频剪辑任务的V2版本API
+    """
+    try:
+        # 构建参数对象
+        params = VideoClipParams(
+            video_origin_path=request.video_origin_path,
+            video_clip_json_path=request.video_clip_json_path,
+            voice_name=request.voice_name,
+            voice_rate=request.voice_rate,
+            voice_pitch=request.voice_pitch,
+            subtitle_enabled=request.subtitle_enabled,
+            video_aspect=request.video_aspect,
+            n_threads=request.n_threads
+        )
+        # 在后台任务中执行视频剪辑
+        background_tasks.add_task(
+            task_service.start_subclip,
+            task_id=task_id,
+            params=params,
+            subclip_path_videos=subclip_videos
+        )
+        return {
+            "task_id": task_id,
+            "state": "PROCESSING"  # 初始状态
+        }
+    except Exception as e:
+        logger.exception(f"Start subclip task failed: {str(e)}")
+        raise

app/models/__init__.py ADDED Viewed

File without changes

app/models/const.py ADDED Viewed

	@@ -0,0 +1,25 @@

+PUNCTUATIONS = [
+    "?",
+    ",",
+    ".",
+    "、",
+    ";",
+    ":",
+    "!",
+    "…",
+    "？",
+    "，",
+    "。",
+    "、",
+    "；",
+    "：",
+    "！",
+    "...",
+]
+TASK_STATE_FAILED = -1
+TASK_STATE_COMPLETE = 1
+TASK_STATE_PROCESSING = 4
+FILE_TYPE_VIDEOS = ["mp4", "mov", "mkv", "webm"]
+FILE_TYPE_IMAGES = ["jpg", "jpeg", "png", "bmp"]

app/models/exception.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import traceback
+from typing import Any
+from loguru import logger
+class HttpException(Exception):
+    def __init__(
+        self, task_id: str, status_code: int, message: str = "", data: Any = None
+    ):
+        self.message = message
+        self.status_code = status_code
+        self.data = data
+        # 获取异常堆栈信息
+        tb_str = traceback.format_exc().strip()
+        if not tb_str or tb_str == "NoneType: None":
+            msg = f"HttpException: {status_code}, {task_id}, {message}"
+        else:
+            msg = f"HttpException: {status_code}, {task_id}, {message}\n{tb_str}"
+        if status_code == 400:
+            logger.warning(msg)
+        else:
+            logger.error(msg)
+class FileNotFoundException(Exception):
+    pass

app/models/schema.py ADDED Viewed

	@@ -0,0 +1,391 @@

+import warnings
+from enum import Enum
+from typing import Any, List, Optional
+import pydantic
+from pydantic import BaseModel, Field
+# 忽略 Pydantic 的特定警告
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    message="Field name.*shadows an attribute in parent.*",
+)
+class VideoConcatMode(str, Enum):
+    random = "random"
+    sequential = "sequential"
+class VideoAspect(str, Enum):
+    landscape = "16:9"
+    landscape_2 = "4:3"
+    portrait = "9:16"
+    portrait_2 = "3:4"
+    square = "1:1"
+    def to_resolution(self):
+        if self == VideoAspect.landscape.value:
+            return 1920, 1080
+        elif self == VideoAspect.portrait.value:
+            return 1080, 1920
+        elif self == VideoAspect.square.value:
+            return 1080, 1080
+        return 1080, 1920
+class _Config:
+    arbitrary_types_allowed = True
+@pydantic.dataclasses.dataclass(config=_Config)
+class MaterialInfo:
+    provider: str = "pexels"
+    url: str = ""
+    duration: int = 0
+# VoiceNames = [
+#     # zh-CN
+#     "female-zh-CN-XiaoxiaoNeural",
+#     "female-zh-CN-XiaoyiNeural",
+#     "female-zh-CN-liaoning-XiaobeiNeural",
+#     "female-zh-CN-shaanxi-XiaoniNeural",
+#
+#     "male-zh-CN-YunjianNeural",
+#     "male-zh-CN-YunxiNeural",
+#     "male-zh-CN-YunxiaNeural",
+#     "male-zh-CN-YunyangNeural",
+#
+#     # "female-zh-HK-HiuGaaiNeural",
+#     # "female-zh-HK-HiuMaanNeural",
+#     # "male-zh-HK-WanLungNeural",
+#     #
+#     # "female-zh-TW-HsiaoChenNeural",
+#     # "female-zh-TW-HsiaoYuNeural",
+#     # "male-zh-TW-YunJheNeural",
+#
+#     # en-US
+#     "female-en-US-AnaNeural",
+#     "female-en-US-AriaNeural",
+#     "female-en-US-AvaNeural",
+#     "female-en-US-EmmaNeural",
+#     "female-en-US-JennyNeural",
+#     "female-en-US-MichelleNeural",
+#
+#     "male-en-US-AndrewNeural",
+#     "male-en-US-BrianNeural",
+#     "male-en-US-ChristopherNeural",
+#     "male-en-US-EricNeural",
+#     "male-en-US-GuyNeural",
+#     "male-en-US-RogerNeural",
+#     "male-en-US-SteffanNeural",
+# ]
+class VideoParams(BaseModel):
+    """
+    {
+      "video_subject": "",
+      "video_aspect": "横屏 16:9（西瓜视频）",
+      "voice_name": "女生-晓晓",
+      "bgm_name": "random",
+      "font_name": "STHeitiMedium 黑体-中",
+      "text_color": "#FFFFFF",
+      "font_size": 60,
+      "stroke_color": "#000000",
+      "stroke_width": 1.5
+    }
+    """
+    video_subject: str
+    video_script: str = ""  # 用于生成视频的脚本
+    video_terms: Optional[str | list] = None  # 用于生成视频的关键词
+    video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
+    video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
+    video_clip_duration: Optional[int] = 5
+    video_count: Optional[int] = 1
+    video_source: Optional[str] = "pexels"
+    video_materials: Optional[List[MaterialInfo]] = None  # 用于生成视频的素材
+    video_language: Optional[str] = ""  # auto detect
+    voice_name: Optional[str] = ""
+    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.0
+    bgm_type: Optional[str] = "random"
+    bgm_file: Optional[str] = ""
+    bgm_volume: Optional[float] = 0.2
+    subtitle_enabled: Optional[bool] = True
+    subtitle_position: Optional[str] = "bottom"  # top, bottom, center
+    custom_position: float = 70.0
+    font_name: Optional[str] = "STHeitiMedium.ttc"
+    text_fore_color: Optional[str] = "#FFFFFF"
+    text_background_color: Optional[str] = "transparent"
+    font_size: int = 60
+    stroke_color: Optional[str] = "#000000"
+    stroke_width: float = 1.5
+    n_threads: Optional[int] = 2
+    paragraph_number: Optional[int] = 1
+class SubtitleRequest(BaseModel):
+    video_script: str
+    video_language: Optional[str] = ""
+    voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
+    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.2
+    bgm_type: Optional[str] = "random"
+    bgm_file: Optional[str] = ""
+    bgm_volume: Optional[float] = 0.2
+    subtitle_position: Optional[str] = "bottom"
+    font_name: Optional[str] = "STHeitiMedium.ttc"
+    text_fore_color: Optional[str] = "#FFFFFF"
+    text_background_color: Optional[str] = "transparent"
+    font_size: int = 60
+    stroke_color: Optional[str] = "#000000"
+    stroke_width: float = 1.5
+    video_source: Optional[str] = "local"
+    subtitle_enabled: Optional[str] = "true"
+class AudioRequest(BaseModel):
+    video_script: str
+    video_language: Optional[str] = ""
+    voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
+    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.2
+    bgm_type: Optional[str] = "random"
+    bgm_file: Optional[str] = ""
+    bgm_volume: Optional[float] = 0.2
+    video_source: Optional[str] = "local"
+class VideoScriptParams:
+    """
+    {
+      "video_subject": "春天的花海",
+      "video_language": "",
+      "paragraph_number": 1
+    }
+    """
+    video_subject: Optional[str] = "春天的花海"
+    video_language: Optional[str] = ""
+    paragraph_number: Optional[int] = 1
+class VideoTermsParams:
+    """
+    {
+      "video_subject": "",
+      "video_script": "",
+      "amount": 5
+    }
+    """
+    video_subject: Optional[str] = "春天的花海"
+    video_script: Optional[str] = (
+        "春天的花海，如诗如画般展现在眼前。万物复苏的季节里，大地披上了一袭绚丽多彩的盛装。金黄的迎春、粉嫩的樱花、洁白的梨花、艳丽的郁金香……"
+    )
+    amount: Optional[int] = 5
+class BaseResponse(BaseModel):
+    status: int = 200
+    message: Optional[str] = "success"
+    data: Any = None
+class TaskVideoRequest(VideoParams, BaseModel):
+    pass
+class TaskQueryRequest(BaseModel):
+    pass
+class VideoScriptRequest(VideoScriptParams, BaseModel):
+    pass
+class VideoTermsRequest(VideoTermsParams, BaseModel):
+    pass
+######################################################################################################
+######################################################################################################
+######################################################################################################
+######################################################################################################
+class TaskResponse(BaseResponse):
+    class TaskResponseData(BaseModel):
+        task_id: str
+    data: TaskResponseData
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {"task_id": "6c85c8cc-a77a-42b9-bc30-947815aa0558"},
+            },
+        }
+class TaskQueryResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "state": 1,
+                    "progress": 100,
+                    "videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
+                    ],
+                    "combined_videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
+                    ],
+                },
+            },
+        }
+class TaskDeletionResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "state": 1,
+                    "progress": 100,
+                    "videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
+                    ],
+                    "combined_videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
+                    ],
+                },
+            },
+        }
+class VideoScriptResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "video_script": "春天的花海，是大自然的一幅美丽画卷。在这个季节里，大地复苏，万物生长，花朵争相绽放，形成了一片五彩斑斓的花海..."
+                },
+            },
+        }
+class VideoTermsResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {"video_terms": ["sky", "tree"]},
+            },
+        }
+class BgmRetrieveResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "files": [
+                        {
+                            "name": "output013.mp3",
+                            "size": 1891269,
+                            "file": "/NarratoAI/resource/songs/output013.mp3",
+                        }
+                    ]
+                },
+            },
+        }
+class BgmUploadResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {"file": "/NarratoAI/resource/songs/example.mp3"},
+            },
+        }
+class VideoClipParams(BaseModel):
+    """
+    NarratoAI 数据模型
+    """
+    video_clip_json: Optional[list] = Field(default=[], description="LLM 生成的视频剪辑脚本内容")
+    video_clip_json_path: Optional[str] = Field(default="", description="LLM 生成的视频剪辑脚本路径")
+    video_origin_path: Optional[str] = Field(default="", description="原视频路径")
+    video_aspect: Optional[VideoAspect] = Field(default=VideoAspect.portrait.value, description="视频比例")
+    video_language: Optional[str] = Field(default="zh-CN", description="视频语言")
+    # video_clip_duration: Optional[int] = 5      # 视频片段时长
+    # video_count: Optional[int] = 1      # 视频片段数量
+    # video_source: Optional[str] = "local"
+    # video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
+    voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
+    voice_volume: Optional[float] = Field(default=1.0, description="解说语音音量")
+    voice_rate: Optional[float] = Field(default=1.0, description="语速")
+    voice_pitch: Optional[float] = Field(default=1.0, description="语调")
+    bgm_name: Optional[str] = Field(default="random", description="背景音乐名称")
+    bgm_type: Optional[str] = Field(default="random", description="背景音乐类型")
+    bgm_file: Optional[str] = Field(default="", description="背景音乐文件")
+    subtitle_enabled: bool = True
+    font_name: str = "SimHei"  # 默认使用黑体
+    font_size: int = 36
+    text_fore_color: str = "white"              # 文本前景色
+    text_back_color: Optional[str] = None       # 文本背景色
+    stroke_color: str = "black"                 # 描边颜色
+    stroke_width: float = 1.5                   # 描边宽度
+    subtitle_position: str = "bottom"   # top, bottom, center, custom
+    custom_position: float = 70.0       # 自定义位置
+    n_threads: Optional[int] = Field(default=16, description="线程数")    # 线程数，有助于提升视频处理速度
+    tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量（后处理）")
+    original_volume: Optional[float] = Field(default=1.0, description="视频原声音量")
+    bgm_volume: Optional[float] = Field(default=0.3, description="背景音乐音量")
+class VideoTranscriptionRequest(BaseModel):
+    video_name: str
+    language: str = "zh-CN"
+    class Config:
+        arbitrary_types_allowed = True
+class VideoTranscriptionResponse(BaseModel):
+    transcription: str
+class SubtitlePosition(str, Enum):
+    TOP = "top"
+    CENTER = "center"
+    BOTTOM = "bottom"

app/models/schema_v2.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from typing import Optional, List
+from pydantic import BaseModel
+class GenerateScriptRequest(BaseModel):
+    video_path: str
+    video_theme: Optional[str] = ""
+    custom_prompt: Optional[str] = ""
+    frame_interval_input: Optional[int] = 5
+    skip_seconds: Optional[int] = 0
+    threshold: Optional[int] = 30
+    vision_batch_size: Optional[int] = 5
+    vision_llm_provider: Optional[str] = "gemini"
+class GenerateScriptResponse(BaseModel):
+    task_id: str
+    script: List[dict]
+class CropVideoRequest(BaseModel):
+    video_origin_path: str
+    video_script: List[dict]
+class CropVideoResponse(BaseModel):
+    task_id: str
+    subclip_videos: dict
+class DownloadVideoRequest(BaseModel):
+    url: str
+    resolution: str
+    output_format: Optional[str] = "mp4"
+    rename: Optional[str] = None
+class DownloadVideoResponse(BaseModel):
+    task_id: str
+    output_path: str
+    resolution: str
+    format: str
+    filename: str
+class StartSubclipRequest(BaseModel):
+    task_id: str
+    video_origin_path: str
+    video_clip_json_path: str
+    voice_name: Optional[str] = None
+    voice_rate: Optional[int] = 0
+    voice_pitch: Optional[int] = 0
+    subtitle_enabled: Optional[bool] = True
+    video_aspect: Optional[str] = "16:9"
+    n_threads: Optional[int] = 4
+    subclip_videos: list  # 从裁剪视频接口获取的视频片段字典
+class StartSubclipResponse(BaseModel):
+    task_id: str
+    state: str
+    videos: Optional[List[str]] = None
+    combined_videos: Optional[List[str]] = None

app/router.py ADDED Viewed

	@@ -0,0 +1,21 @@

+"""Application configuration - root APIRouter.
+Defines all FastAPI application endpoints.
+Resources:
+    1. https://fastapi.tiangolo.com/tutorial/bigger-applications
+"""
+from fastapi import APIRouter
+from app.controllers.v1 import llm, video
+from app.controllers.v2 import script
+root_api_router = APIRouter()
+# v1
+root_api_router.include_router(video.router)
+root_api_router.include_router(llm.router)
+# v2
+root_api_router.include_router(script.router)

app/services/SDE/prompt.py ADDED Viewed

	@@ -0,0 +1,97 @@

+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''
+@Project: NarratoAI
+@File   : prompt
+@Author : 小林同学
+@Date   : 2025/5/9 上午12:57
+'''
+# 字幕剧情分析提示词
+subtitle_plot_analysis_v1 = """
+# 角色
+你是一位专业的剧本分析师和剧情概括助手。
+# 任务
+我将为你提供一部短剧的完整字幕文本。请你基于这些字幕，完成以下任务：
+1.  **整体剧情分析**：简要概括整个短剧的核心剧情脉络、主要冲突和结局（如果有的话）。
+2.  **分段剧情解析与时间戳定位**：
+    *   将整个短剧划分为若干个关键的剧情段落（例如：开端、发展、转折、高潮、结局，或根据具体情节自然划分）。
+    *   段落数应该与字幕长度成正比。
+    *   对于每一个剧情段落：
+        *   **概括该段落的主要内容**：用简洁的语言描述这段剧情发生了什么。
+        *   **标注对应的时间戳范围**：明确指出该剧情段落对应的开始字幕时间戳和结束字幕时间戳。请直接从字幕中提取时间信息。
+# 输入格式
+字幕内容通常包含时间戳和对话，例如：
+```
+00:00:05,000 --> 00:00:10,000
+[角色A]: 你好吗？
+00:00:10,500 --> 00:00:15,000
+[角色B]: 我很好，谢谢。发生了一些有趣的事情。
+... (更多字幕内容) ...
+```
+我将把实际字幕粘贴在下方。
+# 输出格式要求
+请按照以下格式清晰地呈现分析结果：
+**一、整体剧情概括：**
+[此处填写对整个短剧剧情的概括]
+**二、分段剧情解析：**
+**剧情段落 1：[段落主题/概括，例如：主角登场与背景介绍]**
+*   **时间戳：** [开始时间戳] --> [结束时间戳]
+*   **内容概要：** [对这段剧情的详细描述]
+**剧情段落 2：[段落主题/概括，例如：第一个冲突出现]**
+*   **时间戳：** [开始时间戳] --> [结束时间戳]
+*   **内容概要：** [对这段剧情的详细描述]
+... (根据实际剧情段落数量继续) ...
+**剧情段落 N：[段落主题/概括，例如：结局与反思]**
+*   **时间戳：** [开始时间戳] --> [结束时间戳]
+*   **内容概要：** [对这段剧情的详细描述]
+# 注意事项
+*   请确保时间戳的准确性，直接引用字幕中的时间。
+*   剧情段落的划分应合乎逻辑，能够反映剧情的起承转合。
+*   语言表达应简洁、准确、客观。
+# 限制
+1. 严禁输出与分析结果无关的内容
+2.
+# 请处理以下字幕：
+"""
+plot_writing = """
+我是一个影视解说up主，需要为我的粉丝讲解短剧《%s》的剧情，目前正在解说剧情，希望能让粉丝通过我的解说了解剧情，并且产生 继续观看的兴趣，请生成一篇解说脚本，包含解说文案，以及穿插原声的片段，下面<plot>中的内容是短剧的剧情概述：
+<plot>
+%s
+</plot>
+请使用 json 格式进行输出；使用 <output> 中的输出格式：
+<output>
+{
+  "items": [
+    {
+        "_id": 1, # 唯一递增id
+        "timestamp": "00:00:05,390-00:00:10,430",
+        "picture": "剧情描述或者备注",
+        "narration": "解说文案，如果片段为穿插的原片片段，可以直接使用 ‘播放原片+_id‘ 进行占位",
+        "OST": "值为 0 表示当前片段为解说片段，值为 1 表示当前片段为穿插的原片"
+    }
+}
+</output>
+<restriction>
+1. 只输出 json 内容，不要输出其他任何说明性的文字
+2. 解说文案的语言使用 简体中文
+3. 严禁虚构剧情，所有画面只能从 <polt> 中摘取
+4. 严禁虚构时间戳，所有时间戳范围只能从 <polt> 中摘取
+</restriction>
+"""

app/services/SDE/short_drama_explanation.py ADDED Viewed

	@@ -0,0 +1,456 @@

+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''
+@Project: NarratoAI
+@File   : 短剧解说
+@Author : 小林同学
+@Date   : 2025/5/9 上午12:36
+'''
+import os
+import json
+import requests
+from typing import Dict, Any, Optional
+from loguru import logger
+from app.config import config
+from app.utils.utils import get_uuid, storage_dir
+from app.services.SDE.prompt import subtitle_plot_analysis_v1, plot_writing
+class SubtitleAnalyzer:
+    """字幕剧情分析器，负责分析字幕内容并提取关键剧情段落"""
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+        base_url: Optional[str] = None,
+        custom_prompt: Optional[str] = None,
+        temperature: Optional[float] = 1.0,
+    ):
+        """
+        初始化字幕分析器
+        Args:
+            api_key: API密钥，如果不提供则从配置中读取
+            model: 模型名称，如果不提供则从配置中读取
+            base_url: API基础URL，如果不提供则从配置中读取或使用默认值
+            custom_prompt: 自定义提示词，如果不提供则使用默认值
+            temperature: 模型温度
+        """
+        # 使用传入的参数或从配置中获取
+        self.api_key = api_key
+        self.model = model
+        self.base_url = base_url
+        self.temperature = temperature
+        # 设置提示词模板
+        self.prompt_template = custom_prompt or subtitle_plot_analysis_v1
+        # 初始化HTTP请求所需的头信息
+        self._init_headers()
+    def _init_headers(self):
+        """初始化HTTP请求头"""
+        try:
+            # 基础请求头，包含API密钥和内容类型
+            self.headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.api_key}"
+            }
+            # logger.debug(f"初始化成功 - API Key: {self.api_key[:8]}... - Base URL: {self.base_url}")
+        except Exception as e:
+            logger.error(f"初始化请求头失败: {str(e)}")
+            raise
+    def analyze_subtitle(self, subtitle_content: str) -> Dict[str, Any]:
+        """
+        分析字幕内容
+        Args:
+            subtitle_content: 字幕内容文本
+        Returns:
+            Dict[str, Any]: 包含分析结果的字典
+        """
+        try:
+            # 构建完整提示词
+            prompt = f"{self.prompt_template}\n\n{subtitle_content}"
+            # 构建请求体数据
+            payload = {
+                "model": self.model,
+                "messages": [
+                    {"role": "system", "content": "你是一位专业的剧本分析师和剧情概括助手。"},
+                    {"role": "user", "content": prompt}
+                ],
+                "temperature": self.temperature
+            }
+            # 构建请求地址
+            url = f"{self.base_url}/chat/completions"
+            # 发送HTTP请求
+            response = requests.post(url, headers=self.headers, json=payload)
+            # 解析响应
+            if response.status_code == 200:
+                response_data = response.json()
+                # 提取响应内容
+                if "choices" in response_data and len(response_data["choices"]) > 0:
+                    analysis_result = response_data["choices"][0]["message"]["content"]
+                    logger.debug(f"字幕分析完成，消耗的tokens: {response_data.get('usage', {}).get('total_tokens', 0)}")
+                    # 返回结果
+                    return {
+                        "status": "success",
+                        "analysis": analysis_result,
+                        "tokens_used": response_data.get("usage", {}).get("total_tokens", 0),
+                        "model": self.model,
+                        "temperature": self.temperature
+                    }
+                else:
+                    logger.error("字幕分析失败: 未获取到有效响应")
+                    return {
+                        "status": "error",
+                        "message": "未获取到有效响应",
+                        "temperature": self.temperature
+                    }
+            else:
+                error_msg = f"请求失败，状态码: {response.status_code}, 响应: {response.text}"
+                logger.error(error_msg)
+                return {
+                    "status": "error",
+                    "message": error_msg,
+                    "temperature": self.temperature
+                }
+        except Exception as e:
+            logger.error(f"字幕分析过程中发生错误: {str(e)}")
+            return {
+                "status": "error",
+                "message": str(e),
+                "temperature": self.temperature
+            }
+    def analyze_subtitle_from_file(self, subtitle_file_path: str) -> Dict[str, Any]:
+        """
+        从文件读取字幕并分析
+        Args:
+            subtitle_file_path: 字幕文件的路径
+        Returns:
+            Dict[str, Any]: 包含分析结果的字典
+        """
+        try:
+            # 检查文件是否存在
+            if not os.path.exists(subtitle_file_path):
+                return {
+                    "status": "error",
+                    "message": f"字幕文件不存在: {subtitle_file_path}",
+                    "temperature": self.temperature
+                }
+            # 读取文件内容
+            with open(subtitle_file_path, 'r', encoding='utf-8') as f:
+                subtitle_content = f.read()
+            # 分析字幕
+            return self.analyze_subtitle(subtitle_content)
+        except Exception as e:
+            logger.error(f"从文件读取字幕并分析过程中发生错误: {str(e)}")
+            return {
+                "status": "error",
+                "message": str(e),
+                "temperature": self.temperature
+            }
+    def save_analysis_result(self, analysis_result: Dict[str, Any], output_path: Optional[str] = None) -> str:
+        """
+        保存分析结果到文件
+        Args:
+            analysis_result: 分析结果
+            output_path: 输出文件路径，如果不提供则自动生成
+        Returns:
+            str: 输出文件的路径
+        """
+        try:
+            # 如果未提供输出路径，则自动生成
+            if not output_path:
+                output_dir = storage_dir("drama_analysis", create=True)
+                output_path = os.path.join(output_dir, f"analysis_{get_uuid(True)}.txt")
+            # 确保目录存在
+            os.makedirs(os.path.dirname(output_path), exist_ok=True)
+            # 保存结果
+            with open(output_path, 'w', encoding='utf-8') as f:
+                if analysis_result["status"] == "success":
+                    f.write(analysis_result["analysis"])
+                else:
+                    f.write(f"分析失败: {analysis_result['message']}")
+            logger.info(f"分析结果已保存到: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"保存分析结果时发生错误: {str(e)}")
+            return ""
+    def generate_narration_script(self, short_name:str, plot_analysis: str, temperature: float = 0.7) -> Dict[str, Any]:
+        """
+        根据剧情分析生成解说文案
+        Args:
+            short_name: 短剧名称
+            plot_analysis: 剧情分析内容
+            temperature: 生成温度，控制创造性，默认0.7
+        Returns:
+            Dict[str, Any]: 包含生成结果的字典
+        """
+        try:
+            # 构建完整提示词
+            prompt = plot_writing % (short_name, plot_analysis)
+            # 构建请求体数据
+            payload = {
+                "model": self.model,
+                "messages": [
+                    {"role": "system", "content": "你是一位专业的短视频解说脚本撰写专家。"},
+                    {"role": "user", "content": prompt}
+                ],
+                "temperature": temperature
+            }
+            # 对特定模型添加响应格式设置
+            if self.model not in ["deepseek-reasoner"]:
+                payload["response_format"] = {"type": "json_object"}
+            # 构建请求地址
+            url = f"{self.base_url}/chat/completions"
+            # 发送HTTP请求
+            response = requests.post(url, headers=self.headers, json=payload)
+            # 解析响应
+            if response.status_code == 200:
+                response_data = response.json()
+                # 提取响应内容
+                if "choices" in response_data and len(response_data["choices"]) > 0:
+                    narration_script = response_data["choices"][0]["message"]["content"]
+                    logger.debug(f"解说文案生成完成，消耗的tokens: {response_data.get('usage', {}).get('total_tokens', 0)}")
+                    # 返回结果
+                    return {
+                        "status": "success",
+                        "narration_script": narration_script,
+                        "tokens_used": response_data.get("usage", {}).get("total_tokens", 0),
+                        "model": self.model,
+                        "temperature": self.temperature
+                    }
+                else:
+                    logger.error("解说文案生成失败: 未获取到有效响应")
+                    return {
+                        "status": "error",
+                        "message": "未获取到有效���应",
+                        "temperature": self.temperature
+                    }
+            else:
+                error_msg = f"请求失败，状态码: {response.status_code}, 响应: {response.text}"
+                logger.error(error_msg)
+                return {
+                    "status": "error",
+                    "message": error_msg,
+                    "temperature": self.temperature
+                }
+        except Exception as e:
+            logger.error(f"解说文案生成过程中发生错误: {str(e)}")
+            return {
+                "status": "error",
+                "message": str(e),
+                "temperature": self.temperature
+            }
+    def save_narration_script(self, narration_result: Dict[str, Any], output_path: Optional[str] = None) -> str:
+        """
+        保存解说文案到文件
+        Args:
+            narration_result: 解说文案生成结果
+            output_path: 输出文件路径，如果不提供则自动生成
+        Returns:
+            str: 输出文件的路径
+        """
+        try:
+            # 如果未提供输出路径，则自动生成
+            if not output_path:
+                output_dir = storage_dir("narration_scripts", create=True)
+                output_path = os.path.join(output_dir, f"narration_{get_uuid(True)}.json")
+            # 确保目录存在
+            os.makedirs(os.path.dirname(output_path), exist_ok=True)
+            # 保存结果
+            with open(output_path, 'w', encoding='utf-8') as f:
+                if narration_result["status"] == "success":
+                    f.write(narration_result["narration_script"])
+                else:
+                    f.write(f"生成失败: {narration_result['message']}")
+            logger.info(f"解说文案已保存到: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"保存解说文案时发生错误: {str(e)}")
+            return ""
+def analyze_subtitle(
+        subtitle_content: str = None,
+        subtitle_file_path: str = None,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+        base_url: Optional[str] = None,
+        custom_prompt: Optional[str] = None,
+        temperature: float = 1.0,
+        save_result: bool = False,
+        output_path: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    分析字幕内容的便捷函数
+    Args:
+        subtitle_content: 字幕内容文本
+        subtitle_file_path: 字幕文件路径
+        custom_prompt: 自定义提示词
+        api_key: API密钥
+        model: 模型名称
+        base_url: API基础URL
+        temperature: 模型温度
+        save_result: 是否保存结果到文件
+        output_path: 输出文件路径
+    Returns:
+        Dict[str, Any]: 包含分析结果的字典
+    """
+    # 初始化分析器
+    analyzer = SubtitleAnalyzer(
+        temperature=temperature,
+        api_key=api_key,
+        model=model,
+        base_url=base_url,
+        custom_prompt=custom_prompt
+    )
+    logger.debug(f"使用模型: {analyzer.model} 开始分析, 温度: {analyzer.temperature}")
+    # 分析字幕
+    if subtitle_content:
+        result = analyzer.analyze_subtitle(subtitle_content)
+    elif subtitle_file_path:
+        result = analyzer.analyze_subtitle_from_file(subtitle_file_path)
+    else:
+        return {
+            "status": "error",
+            "message": "必须提供字幕内容或字幕文件路径",
+            "temperature": temperature
+        }
+    # 保存结果
+    if save_result and result["status"] == "success":
+        result["output_path"] = analyzer.save_analysis_result(result, output_path)
+    return result
+def generate_narration_script(
+    short_name: str = None,
+    plot_analysis: str = None,
+    api_key: Optional[str] = None,
+    model: Optional[str] = None,
+    base_url: Optional[str] = None,
+    temperature: float = 1.0,
+    save_result: bool = False,
+    output_path: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    根据剧情分析生成解说文案的便捷函数
+    Args:
+        short_name: 短剧名称
+        plot_analysis: 剧情分析内容，直接提供
+        api_key: API密钥
+        model: 模型名称
+        base_url: API基础URL
+        temperature: 生成温度，控制创造性
+        save_result: 是否保存结果到文件
+        output_path: 输出文件路径
+    Returns:
+        Dict[str, Any]: 包含生成结果的字典
+    """
+    # 初始化分析器
+    analyzer = SubtitleAnalyzer(
+        temperature=temperature,
+        api_key=api_key,
+        model=model,
+        base_url=base_url
+    )
+    # 生成解说文案
+    result = analyzer.generate_narration_script(short_name, plot_analysis, temperature)
+    # 保存结果
+    if save_result and result["status"] == "success":
+        result["output_path"] = analyzer.save_narration_script(result, output_path)
+    return result
+if __name__ == '__main__':
+    text_api_key = "skxxxx"
+    text_model = "gemini-2.0-flash"
+    text_base_url = "https://api.narratoai.cn/v1/chat/completions"  # 确保URL不以斜杠结尾，便于后续拼接
+    subtitle_path = "/Users/apple/Desktop/home/NarratoAI/resource/srt/家里家外1-5.srt"
+    # 示例用法
+    if subtitle_path:
+        # 分析字幕总结剧情
+        analysis_result = analyze_subtitle(
+            subtitle_file_path=subtitle_path,
+            api_key=text_api_key,
+            model=text_model,
+            base_url=text_base_url,
+            save_result=True
+        )
+        if analysis_result["status"] == "success":
+            print("字幕分析成功！")
+            print("分析结果：")
+            print(analysis_result["analysis"])
+            # 根据剧情生成解说文案
+            narration_result = generate_narration_script(
+                plot_analysis=analysis_result["analysis"],
+                api_key=text_api_key,
+                model=text_model,
+                base_url=text_base_url,
+                save_result=True
+            )
+            if narration_result["status"] == "success":
+                print("\n解说文案生成成功！")
+                print("解说文案：")
+                print(narration_result["narration_script"])
+            else:
+                print(f"\n解说文案生成失败: {narration_result['message']}")
+        else:
+            print(f"分析失败: {analysis_result['message']}")

app/services/SDP/generate_script_short.py ADDED Viewed

	@@ -0,0 +1,37 @@

+"""
+视频脚本生成pipeline，串联各个处理步骤
+"""
+import os
+from .utils.step1_subtitle_analyzer_openai import analyze_subtitle
+from .utils.step5_merge_script import merge_script
+def generate_script(srt_path: str, api_key: str, model_name: str, output_path: str, base_url: str = None, custom_clips: int = 5):
+    """生成视频混剪脚本
+    Args:
+        srt_path: 字幕文件路径
+        output_path: 输出文件路径，可选
+    Returns:
+        str: 生成的脚本内容
+    """
+    # 验证输入文件
+    if not os.path.exists(srt_path):
+        raise FileNotFoundError(f"字幕文件不存在: {srt_path}")
+    # 分析字幕
+    print("开始分析...")
+    openai_analysis = analyze_subtitle(
+        srt_path=srt_path,
+        api_key=api_key,
+        model_name=model_name,
+        base_url=base_url,
+        custom_clips=custom_clips
+    )
+    # 合并生成最终脚本
+    adjusted_results = openai_analysis['plot_points']
+    final_script = merge_script(adjusted_results, output_path)
+    return final_script

app/services/SDP/utils/short_schema.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""
+定义项目中使用的数据类型
+"""
+from typing import List, Dict, Optional
+from dataclasses import dataclass
+@dataclass
+class PlotPoint:
+    timestamp: str
+    title: str
+    picture: str
+@dataclass
+class Commentary:
+    timestamp: str
+    title: str
+    copywriter: str
+@dataclass
+class SubtitleSegment:
+    start_time: float
+    end_time: float
+    text: str
+@dataclass
+class ScriptItem:
+    timestamp: str
+    title: str
+    picture: str
+    copywriter: str
+@dataclass
+class PipelineResult:
+    output_video_path: str
+    plot_points: List[PlotPoint]
+    subtitle_segments: List[SubtitleSegment]
+    commentaries: List[Commentary]
+    final_script: List[ScriptItem]
+    error: Optional[str] = None
+class VideoProcessingError(Exception):
+    pass
+class SubtitleProcessingError(Exception):
+    pass
+class PlotAnalysisError(Exception):
+    pass
+class CopywritingError(Exception):
+    pass

app/services/SDP/utils/step1_subtitle_analyzer_openai.py ADDED Viewed

	@@ -0,0 +1,157 @@

+"""
+使用OpenAI API，分析字幕文件，返回剧情梗概和爆点
+"""
+import traceback
+from openai import OpenAI, BadRequestError
+import os
+import json
+from .utils import load_srt
+def analyze_subtitle(
+    srt_path: str,
+    model_name: str,
+    api_key: str = None,
+    base_url: str = None,
+    custom_clips: int = 5
+) -> dict:
+    """分析字幕内容，返回完整的分析结果
+    Args:
+        srt_path (str): SRT字幕文件路径
+        api_key (str, optional): 大模型API密钥. Defaults to None.
+        model_name (str, optional): 大模型名称. Defaults to "gpt-4o-2024-11-20".
+        base_url (str, optional): 大模型API基础URL. Defaults to None.
+    Returns:
+        dict: 包含剧情梗概和结构化的时间段分析的字典
+    """
+    try:
+        # 加载字幕文件
+        subtitles = load_srt(srt_path)
+        subtitle_content = "\n".join([f"{sub['timestamp']}\n{sub['text']}" for sub in subtitles])
+        # 初始化客户端
+        global client
+        if "deepseek" in model_name.lower():
+            client = OpenAI(
+                api_key=api_key or os.getenv('DeepSeek_API_KEY'),
+                base_url="https://api.siliconflow.cn/v1"    # 使用第三方 硅基流动 API
+            )
+        else:
+            client = OpenAI(
+                api_key=api_key or os.getenv('OPENAI_API_KEY'),
+                base_url=base_url
+            )
+        messages = [
+            {
+                "role": "system",
+                "content": """你是一名经验丰富的短剧编剧，擅长根据字幕内容按照先后顺序分析关键剧情,并找出 %s 个关键片段。
+                请返回一个JSON对象，包含以下字段：
+                {
+                    "summary": "整体剧情梗概",
+                    "plot_titles": [
+                        "关键剧情1",
+                        "关键剧情2",
+                        "关键剧情3",
+                        "关键剧情4",
+                        "关键剧情5",
+                        "..."
+                    ]
+                }
+                请确保返回的是合法的JSON格式, 请确保返回的是 %s 个片段。
+                """ % (custom_clips, custom_clips)
+            },
+            {
+                "role": "user",
+                "content": f"srt字幕如下：{subtitle_content}"
+            }
+        ]
+        # DeepSeek R1 和 V3 不支持 response_format=json_object
+        try:
+            completion = client.chat.completions.create(
+                model=model_name,
+                messages=messages,
+                response_format={"type": "json_object"}
+            )
+            summary_data = json.loads(completion.choices[0].message.content)
+        except BadRequestError as e:
+            completion = client.chat.completions.create(
+                model=model_name,
+                messages=messages
+            )
+            # 去除 completion 字符串前的 ```json 和 结尾的 ```
+            completion = completion.choices[0].message.content.replace("```json", "").replace("```", "")
+            summary_data = json.loads(completion)
+        except Exception as e:
+            raise Exception(f"大模型解析发生错误：{str(e)}\n{traceback.format_exc()}")
+        print(json.dumps(summary_data, indent=4, ensure_ascii=False))
+        # 获取爆点时间段分析
+        prompt = f"""剧情梗概：
+            {summary_data['summary']}
+            需要定位的爆点内容：
+            """
+        print(f"找到 {len(summary_data['plot_titles'])} 个片段")
+        for i, point in enumerate(summary_data['plot_titles'], 1):
+            prompt += f"{i}. {point}\n"
+        messages = [
+            {
+                "role": "system",
+                "content": """你是一名短剧编剧，非常擅长根据字幕中分析视频中关键剧情出现的具体时间段。
+                请仔细阅读剧情梗概和爆点内容，然后在字幕中找出每个爆点发生的具体时间段和爆点前后的详细剧情。
+                请返回一个JSON对象，包含一个名为"plot_points"的数组，数组中包含多个对象，每个对象都要包含以下字段：
+                {
+                    "plot_points": [
+                        {
+                            "timestamp": "时间段，格式为xx:xx:xx,xxx-xx:xx:xx,xxx",
+                            "title": "关键剧情的主题",
+                            "picture": "关键剧情前后的详细剧情描述"
+                        }
+                    ]
+                }
+                请确保返回的是合法的JSON格式。"""
+            },
+            {
+                "role": "user",
+                "content": f"""字幕内容：
+{subtitle_content}
+{prompt}"""
+            }
+        ]
+        # DeepSeek R1 和 V3 不支持 response_format=json_object
+        try:
+            completion = client.chat.completions.create(
+                model=model_name,
+                messages=messages,
+                response_format={"type": "json_object"}
+            )
+            plot_points_data = json.loads(completion.choices[0].message.content)
+        except BadRequestError as e:
+            completion = client.chat.completions.create(
+                model=model_name,
+                messages=messages
+            )
+            # 去除 completion 字符串前的 ```json 和 结尾的 ```
+            completion = completion.choices[0].message.content.replace("```json", "").replace("```", "")
+            plot_points_data = json.loads(completion)
+        except Exception as e:
+            raise Exception(f"大模型解析错误：{str(e)}\n{traceback.format_exc()}")
+        print(json.dumps(plot_points_data, indent=4, ensure_ascii=False))
+        # 合并结果
+        return {
+            "plot_summary": summary_data,
+            "plot_points": plot_points_data["plot_points"]
+        }
+    except Exception as e:
+        raise Exception(f"分析字幕时发生错误：{str(e)}\n{traceback.format_exc()}")

app/services/SDP/utils/step5_merge_script.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""
+合并生成最终脚本
+"""
+import os
+import json
+from typing import List, Dict, Tuple
+def merge_script(
+        plot_points: List[Dict],
+        output_path: str
+):
+    """合并生成最终脚本
+    Args:
+        plot_points: 校对后的剧情点
+        output_path: 输出文件路径，如果提供则保存到文件
+    Returns:
+        str: 最终合并的脚本
+    """
+    def parse_timestamp(ts: str) -> Tuple[float, float]:
+        """解析时间戳，返回开始和结束时间（秒）"""
+        start, end = ts.split('-')
+        def parse_time(time_str: str) -> float:
+            time_str = time_str.strip()
+            if ',' in time_str:
+                time_parts, ms_parts = time_str.split(',')
+                ms = float(ms_parts) / 1000
+            else:
+                time_parts = time_str
+                ms = 0
+            hours, minutes, seconds = map(int, time_parts.split(':'))
+            return hours * 3600 + minutes * 60 + seconds + ms
+        return parse_time(start), parse_time(end)
+    def format_timestamp(seconds: float) -> str:
+        """将秒数转换为时间戳格式 HH:MM:SS"""
+        hours = int(seconds // 3600)
+        minutes = int((seconds % 3600) // 60)
+        secs = int(seconds % 60)
+        return f"{hours:02d}:{minutes:02d}:{secs:02d}"
+    # 创建包含所有信息的临时列表
+    final_script = []
+    # 处理原生画面条目
+    number = 1
+    for plot_point in plot_points:
+        start, end = parse_timestamp(plot_point["timestamp"])
+        script_item = {
+            "_id": number,
+            "timestamp": plot_point["timestamp"],
+            "picture": plot_point["picture"],
+            "narration": f"播放原生_{os.urandom(4).hex()}",
+            "OST": 1,  # OST=0 仅保留解说 OST=2 保留解说和原声
+        }
+        final_script.append(script_item)
+        number += 1
+    # 保存结果
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(final_script, f, ensure_ascii=False, indent=4)
+    print(f"脚本生成完成：{output_path}")
+    return final_script

app/services/SDP/utils/utils.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# 公共方法
+import json
+import requests  # 新增
+from typing import List, Dict
+def load_srt(file_path: str) -> List[Dict]:
+    """加载并解析SRT文件
+    Args:
+        file_path: SRT文件路径
+    Returns:
+        字幕内容列表
+    """
+    with open(file_path, 'r', encoding='utf-8-sig') as f:
+        content = f.read().strip()
+    # 按空行分割字幕块
+    subtitle_blocks = content.split('\n\n')
+    subtitles = []
+    for block in subtitle_blocks:
+        lines = block.split('\n')
+        if len(lines) >= 3:  # 确保块包含足够的行
+            try:
+                number = int(lines[0].strip())
+                timestamp = lines[1]
+                text = ' '.join(lines[2:])
+                # 解析时间戳
+                start_time, end_time = timestamp.split(' --> ')
+                subtitles.append({
+                    'number': number,
+                    'timestamp': timestamp,
+                    'text': text,
+                    'start_time': start_time,
+                    'end_time': end_time
+                })
+            except ValueError as e:
+                print(f"Warning: 跳过无效的字幕块: {e}")
+                continue
+    return subtitles

app/services/__init__.py ADDED Viewed

File without changes

app/services/audio_merger.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import os
+import json
+import subprocess
+import edge_tts
+from edge_tts import submaker
+from pydub import AudioSegment
+from typing import List, Dict
+from loguru import logger
+from app.utils import utils
+def check_ffmpeg():
+    """检查FFmpeg是否已安装"""
+    try:
+        subprocess.run(['ffmpeg', '-version'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        return True
+    except FileNotFoundError:
+        return False
+def merge_audio_files(task_id: str, total_duration: float, list_script: list):
+    """
+    合并音频文件
+    Args:
+        task_id: 任务ID
+        total_duration: 总时长
+        list_script: 完整脚本信息，包含duration时长和audio路径
+    Returns:
+        str: 合并后的音频文件路径
+    """
+    # 检查FFmpeg是否安装
+    if not check_ffmpeg():
+        logger.error("FFmpeg未安装，无法合并音频文件")
+        return None
+    # 创建一个空的音频片段
+    final_audio = AudioSegment.silent(duration=total_duration * 1000)  # 总时长以毫秒为单位
+    # 计算每个片段的开始位置（基于duration字段）
+    current_position = 0  # 初始位置（秒）
+    # 遍历脚本中的每个片段
+    for segment in list_script:
+        try:
+            # 获取片段时长（秒）
+            duration = segment['duration']
+            # 检查audio字段是否为空
+            if segment['audio'] and os.path.exists(segment['audio']):
+                # 加载TTS音频文件
+                tts_audio = AudioSegment.from_file(segment['audio'])
+                # 将TTS音频添加到最终音频
+                final_audio = final_audio.overlay(tts_audio, position=current_position * 1000)
+            else:
+                # audio为空，不添加音频，仅保留间隔
+                logger.info(f"片段 {segment.get('timestamp', '')} 没有音频文件，保留 {duration} 秒的间隔")
+            # 更新下一个片段的开始位置
+            current_position += duration
+        except Exception as e:
+            logger.error(f"处理音频片段时出错: {str(e)}")
+            # 即使处理失败，也要更新位置，确保后续片段位置正确
+            if 'duration' in segment:
+                current_position += segment['duration']
+            continue
+    # 保存合并后的音频文件
+    output_audio_path = os.path.join(utils.task_dir(task_id), "merger_audio.mp3")
+    final_audio.export(output_audio_path, format="mp3")
+    logger.info(f"合并后的音频文件已保存: {output_audio_path}")
+    return output_audio_path
+def time_to_seconds(time_str):
+    """
+    将时间字符串转换为秒数，支持多种格式：
+    1. 'HH:MM:SS,mmm' (时:分:秒,毫秒)
+    2. 'MM:SS,mmm' (分:秒,毫秒)
+    3. 'SS,mmm' (秒,毫秒)
+    """
+    try:
+        # 处理毫秒部分
+        if ',' in time_str:
+            time_part, ms_part = time_str.split(',')
+            ms = float(ms_part) / 1000
+        else:
+            time_part = time_str
+            ms = 0
+        # 分割时间部分
+        parts = time_part.split(':')
+        if len(parts) == 3:  # HH:MM:SS
+            h, m, s = map(int, parts)
+            seconds = h * 3600 + m * 60 + s
+        elif len(parts) == 2:  # MM:SS
+            m, s = map(int, parts)
+            seconds = m * 60 + s
+        else:  # SS
+            seconds = int(parts[0])
+        return seconds + ms
+    except (ValueError, IndexError) as e:
+        logger.error(f"Error parsing time {time_str}: {str(e)}")
+        return 0.0
+def extract_timestamp(filename):
+    """
+    从文件名中提取开始和结束时间戳
+    例如: "audio_00_06,500-00_24,800.mp3" -> (6.5, 24.8)
+    """
+    try:
+        # 从文件名中提取时间部分
+        time_part = filename.split('_', 1)[1].split('.')[0]  # 获取 "00_06,500-00_24,800" 部分
+        start_time, end_time = time_part.split('-')  # 分割成开始和结束时间
+        # 将下划线格式转换回冒号格式
+        start_time = start_time.replace('_', ':')
+        end_time = end_time.replace('_', ':')
+        # 将时间戳转换为秒
+        start_seconds = time_to_seconds(start_time)
+        end_seconds = time_to_seconds(end_time)
+        return start_seconds, end_seconds
+    except Exception as e:
+        logger.error(f"Error extracting timestamp from {filename}: {str(e)}")
+        return 0.0, 0.0
+if __name__ == "__main__":
+    # 示例用法
+    total_duration = 90
+    video_script = [
+        {'picture': '【解说】好的，各位，欢迎回到我的频道！《庆余年 2》刚开播就给了我们一个王炸！范闲在北齐"死"了？这怎么可能！',
+         'timestamp': '00:00:00-00:00:26',
+         'narration': '好的各位，欢迎回到我的频道！《庆余年 2》刚开播就给了我们一个王炸！范闲在北齐"死"了？这怎么可能！上集片尾那个巨大的��念，这一集就立刻揭晓了！范闲假死归来，他面临的第一个，也是最大的难关，就是如何面对他最敬爱的，同时也是最可怕的那个人——庆帝！',
+         'OST': 0, 'duration': 26,
+         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3'},
+        {'picture': '【解说】上一集我们看到，范闲在北齐遭遇了惊天变故，生死不明！', 'timestamp': '00:01:15-00:01:29',
+         'narration': '但我们都知道，他绝不可能就这么轻易退场！第二集一开场，范闲就已经秘密回到了京都。他的生死传闻，可不像我们想象中那样只是小范围流传，而是…',
+         'OST': 0, 'duration': 14,
+         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3'},
+        {'picture': '画面切到王启年小心翼翼地向范闲汇报。', 'timestamp': '00:04:41-00:04:58',
+         'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪',
+         'OST': 1, 'duration': 17,
+         'audio': ''},
+        {'picture': '【解说】"欺君之罪"！在封建王朝，这可是抄家灭族的大罪！搁一般人，肯定脚底抹油溜之大吉了。',
+         'timestamp': '00:04:58-00:05:20',
+         'narration': '"欺君之罪"！在封建王朝，这可是抄家灭族的大罪！搁一般人，肯定脚底抹油溜之大吉了。但范闲是谁啊？他偏要反其道而行之！他竟然决定，直接去见庆帝！冒着天大的风险，用"假死"这个事实去赌庆帝的态度！',
+         'OST': 0, 'duration': 22,
+         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3'},
+        {'picture': '【解说】但想见庆帝，哪有那么容易？范闲艺高人胆大，竟然选择了最激进的方式——闯宫！',
+         'timestamp': '00:05:45-00:05:53',
+         'narration': '但想见庆帝，哪有那么容易？范闲艺高人胆大，竟然选择了最激进的方式——闯宫！',
+         'OST': 0, 'duration': 8,
+         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'},
+        {'picture': '画面切换到范闲蒙面闯入皇宫，被侍卫包围的场景。', 'timestamp': '00:06:00-00:06:03',
+         'narration': '抓刺客',
+         'OST': 1, 'duration': 3,
+         'audio': ''}]
+    output_file = merge_audio_files("test456", total_duration, video_script)
+    print(output_file)

app/services/clip_video.py ADDED Viewed

	@@ -0,0 +1,237 @@

+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''
+@Project: NarratoAI
+@File   : clip_video
+@Author : 小林同学
+@Date   : 2025/5/6 下午6:14
+'''
+import os
+import subprocess
+import json
+import hashlib
+from loguru import logger
+from typing import Dict, List, Optional
+from pathlib import Path
+from app.utils import ffmpeg_utils
+def parse_timestamp(timestamp: str) -> tuple:
+    """
+    解析时间戳字符串，返回开始和结束时间
+    Args:
+        timestamp: 格式为'HH:MM:SS-HH:MM:SS'或'HH:MM:SS,sss-HH:MM:SS,sss'的时间戳字符串
+    Returns:
+        tuple: (开始时间, 结束时间) 格式为'HH:MM:SS'或'HH:MM:SS,sss'
+    """
+    start_time, end_time = timestamp.split('-')
+    return start_time, end_time
+def calculate_end_time(start_time: str, duration: float, extra_seconds: float = 1.0) -> str:
+    """
+    根据开始时间和持续时间计算结束时间
+    Args:
+        start_time: 开始时间，格式为'HH:MM:SS'或'HH:MM:SS,sss'(带毫秒)
+        duration: 持续时间，单位为秒
+        extra_seconds: 额外添加的秒数，默认为1秒
+    Returns:
+        str: 计算后的结束时间，格式与输入格式相同
+    """
+    # 检查是否包含毫秒
+    has_milliseconds = ',' in start_time
+    milliseconds = 0
+    if has_milliseconds:
+        time_part, ms_part = start_time.split(',')
+        h, m, s = map(int, time_part.split(':'))
+        milliseconds = int(ms_part)
+    else:
+        h, m, s = map(int, start_time.split(':'))
+    # 转换为总毫秒数
+    total_milliseconds = ((h * 3600 + m * 60 + s) * 1000 + milliseconds +
+                          int((duration + extra_seconds) * 1000))
+    # 计算新的时、分、秒、毫秒
+    ms_new = total_milliseconds % 1000
+    total_seconds = total_milliseconds // 1000
+    h_new = int(total_seconds // 3600)
+    m_new = int((total_seconds % 3600) // 60)
+    s_new = int(total_seconds % 60)
+    # 返回与输入格式一致的时间字符串
+    if has_milliseconds:
+        return f"{h_new:02d}:{m_new:02d}:{s_new:02d},{ms_new:03d}"
+    else:
+        return f"{h_new:02d}:{m_new:02d}:{s_new:02d}"
+def check_hardware_acceleration() -> Optional[str]:
+    """
+    检查系统支持的硬件加速选项
+    Returns:
+        Optional[str]: 硬件加速参数，如果不支持则返回None
+    """
+    # 使用集中式硬件加速检测
+    return ffmpeg_utils.get_ffmpeg_hwaccel_type()
+def clip_video(
+        video_origin_path: str,
+        tts_result: List[Dict],
+        output_dir: Optional[str] = None,
+        task_id: Optional[str] = None
+) -> Dict[str, str]:
+    """
+    根据时间戳裁剪视频
+    Args:
+        video_origin_path: 原始视频的路径
+        tts_result: 包含时间戳和持续时间信息的列表
+        output_dir: 输出目录路径，默认为None时会自动生成
+        task_id: 任务ID，用于生成唯一的输出目录，默认为None时会自动生成
+    Returns:
+        Dict[str, str]: 时间戳到裁剪后视频路径的映射
+    """
+    # 检查视频文件是否存在
+    if not os.path.exists(video_origin_path):
+        raise FileNotFoundError(f"视频文件不存在: {video_origin_path}")
+    # 如果未提供task_id，则根据输入生成一个唯一ID
+    if task_id is None:
+        content_for_hash = f"{video_origin_path}_{json.dumps(tts_result)}"
+        task_id = hashlib.md5(content_for_hash.encode()).hexdigest()
+    # 设置输出目录
+    if output_dir is None:
+        output_dir = os.path.join(
+            os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
+            "storage", "temp", "clip_video", task_id
+        )
+    # 确保输出目录存在
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
+    # 获取硬件加速支持
+    hwaccel = check_hardware_acceleration()
+    hwaccel_args = []
+    if hwaccel:
+        hwaccel_args = ffmpeg_utils.get_ffmpeg_hwaccel_args()
+    # 存储裁剪结果
+    result = {}
+    for item in tts_result:
+        _id = item.get("_id", item.get("timestamp", "unknown"))
+        timestamp = item["timestamp"]
+        start_time, _ = parse_timestamp(timestamp)
+        # 根据持续时间计算真正的结束时间（加上1秒余量）
+        duration = item["duration"]
+        calculated_end_time = calculate_end_time(start_time, duration)
+        # 转换为FFmpeg兼容的时间格式（逗号替换为点）
+        ffmpeg_start_time = start_time.replace(',', '.')
+        ffmpeg_end_time = calculated_end_time.replace(',', '.')
+        # 格式化输出文件名（使用连字符替代冒号和逗号）
+        safe_start_time = start_time.replace(':', '-').replace(',', '-')
+        safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
+        output_filename = f"vid_{safe_start_time}@{safe_end_time}.mp4"
+        output_path = os.path.join(output_dir, output_filename)
+        # 构建FFmpeg命令
+        ffmpeg_cmd = [
+            "ffmpeg", "-y", *hwaccel_args,
+            "-i", video_origin_path,
+            "-ss", ffmpeg_start_time,
+            "-to", ffmpeg_end_time,
+            "-c:v", "h264_videotoolbox" if hwaccel == "videotoolbox" else "libx264",
+            "-c:a", "aac",
+            "-strict", "experimental",
+            output_path
+        ]
+        # 执行FFmpeg命令
+        try:
+            logger.info(f"裁剪视频片段: {timestamp} -> {ffmpeg_start_time}到{ffmpeg_end_time}")
+            # logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}")
+            # 在Windows系统上使用UTF-8编码处理输出，避免GBK编码错误
+            is_windows = os.name == 'nt'
+            if is_windows:
+                process = subprocess.run(
+                    ffmpeg_cmd,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    encoding='utf-8',  # 明确指定编码为UTF-8
+                    text=True,
+                    check=True
+                )
+            else:
+                process = subprocess.run(
+                    ffmpeg_cmd,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    text=True,
+                    check=True
+                )
+            result[_id] = output_path
+        except subprocess.CalledProcessError as e:
+            logger.error(f"裁剪视频片段失败: {timestamp}")
+            logger.error(f"错误信息: {e.stderr}")
+            raise RuntimeError(f"视频裁剪失败: {e.stderr}")
+    return result
+if __name__ == "__main__":
+    video_origin_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/qyn2-2无片头片尾.mp4"
+    tts_result = [{'timestamp': '00:00:00-00:01:15',
+                   'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
+                   'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
+                   'duration': 25.55,
+                   'text': '好的各位，欢迎回到我的频道！《庆余年 2》刚开播就给了我们一个王炸！范闲在北齐"死"了？这怎么可能！上集片尾那个巨大的悬念，这一集就立刻揭晓了！范闲假死归来，他面临的第一个，也是最大的难关，就是如何面对他最敬爱的，同时也是最可怕的那个人——庆帝！'},
+                  {'timestamp': '00:01:15-00:04:40',
+                   'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
+                   'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
+                   'duration': 13.488,
+                   'text': '但我们都知道，他绝不可能就这么轻易退场！第二集一开场，范闲就已经秘密回到了京都。他的生死传闻，可不像我们想象中那样只是小范围流传，而是…'},
+                  {'timestamp': '00:04:58-00:05:45',
+                   'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
+                   'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
+                   'duration': 21.363,
+                   'text': '"欺君之罪"！在封建王朝，这可是抄家灭族的大罪！搁一般人，肯定脚底抹油溜之大吉了。但范闲是谁啊？他偏要反其道而行之！他竟然决定，直接去见庆帝！冒着天大的风险，用"假死"这个事实去赌庆帝的态度！'},
+                  {'timestamp': '00:05:45-00:06:00',
+                   'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3',
+                   'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt',
+                   'duration': 7.675, 'text': '但想见庆帝，哪有那么容易？范闲艺高人胆大，竟然选择了最激进的方式——闯宫！'}]
+    subclip_path_videos = {
+        '00:00:00-00:01:15': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-00-00-00-01-15.mp4',
+        '00:01:15-00:04:40': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-01-15-00-04-40.mp4',
+        '00:04:41-00:04:58': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-41-00-04-58.mp4',
+        '00:04:58-00:05:45': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-58-00-05-45.mp4',
+        '00:05:45-00:06:00': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-05-45-00-06-00.mp4',
+        '00:06:00-00:06:03': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-06-00-00-06-03.mp4',
+    }
+    # 使用方法示例
+    try:
+        result = clip_video(video_origin_path, tts_result, subclip_path_videos)
+        print("裁剪结果:")
+        print(json.dumps(result, indent=4, ensure_ascii=False))
+    except Exception as e:
+        print(f"发生错误: {e}")

app/services/generate_narration_script.py ADDED Viewed

	@@ -0,0 +1,264 @@

+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''
+@Project: NarratoAI
+@File   : 生成介绍文案
+@Author : 小林同学
+@Date   : 2025/5/8 上午11:33
+'''
+import json
+import os
+import traceback
+from openai import OpenAI
+from loguru import logger
+def parse_frame_analysis_to_markdown(json_file_path):
+    """
+    解析视频帧分析JSON文件并转换为Markdown格式
+    :param json_file_path: JSON文件路径
+    :return: Markdown格式的字符串
+    """
+    # 检查文件是否存在
+    if not os.path.exists(json_file_path):
+        return f"错误: 文件 {json_file_path} 不存在"
+    try:
+        # 读取JSON文件
+        with open(json_file_path, 'r', encoding='utf-8') as file:
+            data = json.load(file)
+        # 初始化Markdown字符串
+        markdown = ""
+        # 获取总结和帧观察数据
+        summaries = data.get('overall_activity_summaries', [])
+        frame_observations = data.get('frame_observations', [])
+        # 按批次组织数据
+        batch_frames = {}
+        for frame in frame_observations:
+            batch_index = frame.get('batch_index')
+            if batch_index not in batch_frames:
+                batch_frames[batch_index] = []
+            batch_frames[batch_index].append(frame)
+        # 生成Markdown内容
+        for i, summary in enumerate(summaries, 1):
+            batch_index = summary.get('batch_index')
+            time_range = summary.get('time_range', '')
+            batch_summary = summary.get('summary', '')
+            markdown += f"## 片段 {i}\n"
+            markdown += f"- 时间范围：{time_range}\n"
+            # 添加片段描述
+            markdown += f"- 片段描述：{batch_summary}\n" if batch_summary else f"- 片段描述：\n"
+            markdown += "- 详细描述：\n"
+            # 添加该批次的帧观察详情
+            frames = batch_frames.get(batch_index, [])
+            for frame in frames:
+                timestamp = frame.get('timestamp', '')
+                observation = frame.get('observation', '')
+                # 直接使用原始文本，不进行分割
+                markdown += f"  - {timestamp}: {observation}\n" if observation else f"  - {timestamp}: \n"
+            markdown += "\n"
+        return markdown
+    except Exception as e:
+        return f"处理JSON文件时出错: {traceback.format_exc()}"
+def generate_narration(markdown_content, api_key, base_url, model):
+    """
+    调用OpenAI API根据视频帧分析的Markdown内容生成解说文案
+    :param markdown_content: Markdown格式的视频帧分析内容
+    :param api_key: OpenAI API密钥
+    :param base_url: API基础URL，如果使用非官方API
+    :param model: 使用的模型名称
+    :return: 生成的解说文案
+    """
+    try:
+        # 构建提示词
+        prompt = """
+我是一名荒野建造解说的博主，以下是一些同行的对标文案，请你深度学习并总结这些文案的风格特点跟内容特点：
+<example_text_1>
+解压助眠的天花板就是荒野建造，沉浸丝滑的搭建过程可以说每一帧都是极致享受，我保证强迫症来了都找不出一丁点毛病。更别说全屋严丝合缝的拼接工艺，还能轻松抵御零下二十度气温，让你居住的每一天都温暖如春。
+在家闲不住的西姆今天也打算来一次野外建造，行走没多久他就发现许多倒塌的树，任由它们自生自灭不如将其利用起来。想到这他就开始挥舞铲子要把地基挖掘出来，虽然每次只能挖一点点，但架不住他体能惊人。没多长时间一个 2x3 的深坑就赫然出现，这深度住他一人绰绰有余。
+随后他去附近收集来原木，这些都是搭建墙壁的最好材料。而在投入使用前自然要把表皮刮掉，防止森林中的白蚁蛀虫。处理好一大堆后西姆还在两端打孔，使用木钉固定在一起。这可不是用来做墙壁的，而是做庇护所的承重柱。只要木头间的缝隙足够紧密，那搭建出的木屋就能足够坚固。
+每向上搭建一层，他都会在中间塞入苔藓防寒，保证不会泄露一丝热量。其他几面也是用相同方法，很快西姆就做好了三面墙壁，每一根木头都极其工整，保证强迫症来了都要点个赞再走。
+在继续搭建墙壁前西姆决定将壁炉制作出来，毕竟森林夜晚的气温会很低，保暖措施可是重中之重。完成后他找来一块大树皮用来充当庇护所的大门，而上面刮掉的木屑还能作为壁炉的引火物，可以说再完美不过。
+测试了排烟没问题后他才开始搭建最后一面墙壁，这一面要预留门和窗，所以在搭建到一半后还需要在原木中间开出卡口，让自己劈砍时能轻松许多。此时只需将另外一根如法炮制，两端拼接在一起后就��一扇大小适中的窗户。而随着随后一层苔藓铺好，最后一根原木落位，这个庇护所的雏形就算完成。
+大门的安装他没选择用合页，而是在底端雕刻出榫头，门框上则雕刻出榫眼，只能说西姆的眼就是一把尺，这完全就是严丝合缝。此时他才开始搭建屋顶。这里西姆用的方法不同，他先把最外围的原木固定好，随后将原木平铺在上面，就能得到完美的斜面屋顶。等他将四周的围栏也装好后，工整的屋顶看起来十分舒服，西姆躺上去都不想动。
+稍作休息后，他利用剩余的苔藓，对屋顶的缝隙处密封。可这样西姆觉得不够保险，于是他找来一些黏土，再次对原本的缝隙二次加工，保管这庇护所冬天也暖和。最后只需要平铺上枯叶，以及挖掘出的泥土，整个屋顶就算完成。
+考虑到庇护所的美观性，自然少不了覆盖上苔藓，翠绿的颜色看起来十分舒服。就连门口的庭院旁，他都移植了许多小树做点缀，让这木屋与周边环境融为一体。西姆才刚完成好这件事，一场大雨就骤然降临。好在此时的他已经不用淋雨，更别说这屋顶防水十分不错，室内没一点雨水渗透进来。
+等待温度回升的过程，西姆利用墙壁本身的凹槽，把床框镶嵌在上面，只需要铺上苔藓，以及自带的床单枕头，一张完美的单人床就做好。辛苦劳作一整天，西姆可不会亏待自己。他将自带的牛肉腌制好后，直接放到壁炉中烤，只需要等待三十分钟，就能享受这美味的一顿。
+在辛苦建造一星期后，他终于可以在自己搭建的庇护所中，享受最纯正的野外露营。后面西姆回家补给了一堆物资，再次回来时森林已经大雪纷飞，让他原本翠绿的小屋，更换上了冬季限定皮肤。好在内部设施没受什么影响，和他离开时一样整洁。
+就是房间中已经没多少柴火，让西姆今天又得劈柴。寒冷干燥的天气，让木头劈起来十分轻松。没多久他就收集到一大堆，这些足够燃烧好几天。虽然此时外面大雪纷飞，但小屋中却开始逐渐温暖。这次他除了带来一些食物外，还有几瓶调味料，以及一整套被褥，让自己的居住舒适度提高一大截。
+而秋天他有收集干草的缘故，只需要塞入枕套中密封起来，就能作为靠垫用。就这居住条件，比一般人在家过的还要奢侈。趁着壁炉木头变木炭的过程，西姆则开始不紧不慢的处理食物。他取出一块牛排，改好花刀以后，撒上一堆调料腌制起来。接着用锡纸包裹好，放到壁炉中直接炭烤，搭配上自带的红酒，是一个非常好的选择。
+随着时间来到第二天，外面的积雪融化了不少，西姆简单做顿煎蛋补充体力后，决定制作一个室外篝火堆，用来晚上驱散周边野兽。搭建这玩意没什么技巧，只需要找到一大堆木棍，利用大树的夹缝将其掰弯，然后将其堆积在一起，就是一个简易版的篝火堆。看这外形有点像帐篷，好在西姆没想那么多。
+等待天色暗淡下来后，他才来到室外将其点燃，顺便处理下多余的废料。只可惜这场景没朋友陪在身边，对西姆来说可能是个遗憾。而哪怕森林只有他一个人，都依旧做了好几个小时。等到里面的篝火彻底燃尽后，西姆还找来雪球，覆盖到上面将火熄灭，这防火意识可谓十分好。最后在室内二十五度的高温下，裹着被子睡觉。
+</example_text_1>
+<example_text_2>
+解压助眠的天花板就是荒野建造，沉浸丝滑的搭建过程每一帧都是极致享受，全屋严丝合缝的拼接工艺，能轻松抵御零下二十度气温，居住体验温暖如春。
+在家闲不住的西姆开启野外建造。他发现倒塌的树，决定加以利用。先挖掘出 2x3 的深坑作为地基，接着收集原木，刮掉表皮防白蚁蛀虫，打孔用木钉固定制作承重柱。搭建墙壁时，每一层都塞入苔藓防寒，很快做好三面墙。
+为应对森林夜晚低温，西姆制作壁炉，用大树皮当大门，刮下的木屑做引火物。搭建最后一面墙时预留门窗，通过在原木中间开口拼接做出窗户。大门采用榫卯结构安装，严丝合缝。
+搭建屋顶时，先固定外围原木，再平铺原木形成斜面屋顶，之后用苔藓、黏土密封缝隙，铺上枯叶和泥土。为美观，在木屋覆盖苔藓，移植小树点缀。完工时遇大雨，木屋防水良好。
+西姆利用墙壁凹槽镶嵌床框，铺上苔藓、床单枕头做成床。劳作一天后，他用壁炉烤牛肉享用。建造一星期后，他开始野外露营。
+后来西姆回家补给物资，回来时森林大雪纷飞。他劈柴储备，带回食物、调味料和被褥，提高居住舒适度，还用干草做靠垫。他用壁炉烤牛排，搭配红酒。
+第二天，积雪融化，西姆制作室外篝火堆防野兽。用大树夹缝掰弯木棍堆积而成，晚上点燃处理废料，结束后用雪球灭火，最后在室内二十五度的环境中裹被入睡。
+</example_text_2>
+<example_text_3>
+如果战争到来，这个深埋地下十几米的庇护所绝对是 bug 般的存在。即使被敌人发现，还能通过快速通道一秒逃出。里面不仅有竹子、地暖、地下水井，还自制抽水机。在解决用水问题的同时，甚至自研无土栽培技术，过上完全自给自足的生活。
+阿伟的老婆美如花，但阿伟从来不回家，来到野外他乐哈哈，一言不合就开挖。众所周知当战争来临时，地下堡垒的安全性是最高的。阿伟苦苦研习两载半，只为练就一身挖洞本领。在这双逆天麒麟臂的加持下，如此坚硬的泥土都只能当做炮灰。
+得到了充足的空间后，他便开始对这些边缘进行打磨。随后阿伟将细线捆在木棍上，以此描绘出圆柱的轮廓。接着再一点点铲掉多余的部分。虽然是由泥土一体式打造，但这样的桌子保准用上千年都不成问题。
+考虑到十几米的深度进出非常不方便，于是阿伟找来两根长达 66.6 米的木头，打算为庇护所打造一条快速通道。只见他将木桩牢牢地插入地下，并顺着洞口的方向延伸出去，直到贯穿整个山洞。接着在每个木桩的连接处钉入铁钉，确保轨道不能有一毫米的偏差。完成后再制作一个木质框架，从而达到前后滑动的效果。
+不得不说阿伟这手艺简直就是大钢管子杵青蛙。在上面放上一个木制的车斗，还能加快搬运泥土的速度。没多久庇护所的内部就已经初见雏形。为了住起来更加舒适，还需要为自己打造一张床。虽然深处的泥土同样很坚固，但好处就是不用担心垮塌的风险。
+阿伟不仅设计了更加符合人体工学的拱形，并且还在一旁雕刻处壁龛。就是这氛围怎么看着有点不太吉利。别看阿伟一身腱子肉，但这身体里的艺术细菌可不少。每个边缘的地方他都做了精雕细琢，瞬间让整个卧室的颜值提升一大截。
+住在地下的好处就是房子面积全靠挖，每平方消耗两个半馒头。不仅没有了房贷的压力，就连买墓地的钱也省了。阿伟将中间的墙壁挖空，从而得到取暖的壁炉。当然最重要的还有排烟问题，要想从上往下打通十几米的山体是件极其困难的事。好在阿伟年轻时报过忆坤年的古墓派补习班，这打洞技术堪比隔壁学校的土拨鼠专业。虽然深度长达十几米，但排烟效果却一点不受影响，一个字专业！
+随后阿伟继续对壁炉底部雕刻，打通了底部放柴火的空间，并制作出放锅的灶头。完成后阿伟从侧面将壁炉打通，并制作出一条导热的通道，以此连接到床铺的位置。毕竟住在这么一个风湿宝地，不注意保暖除湿很容易得老寒腿。
+阿伟在床面上挖出一条条管道，以便于温度能传输到床的每个角落。接下来就可以根据这些通道的长度裁切出同样长短的竹子，根据竹筒的大小凿出相互连接的孔洞，最后再将竹筒内部打通，以达到温度传送的效果。
+而后阿伟将这些管道安装到凹槽内，在他严谨的制作工艺下，每根竹子刚好都能镶嵌进去。在铺设床面之前还需要用木塞把圆孔堵住，防止泥土掉落进管道。泥土虽然不能隔绝湿气，但却是十分优良的导热材料。等他把床面都压平后就可以小心的将这些木塞拔出来，最后再用黏土把剩余的管道也遮盖起来，直到整个墙面恢复原样。
+接下来还需要测试一下加热效果，当他把火点起来后，温度很快就传送到了管道内，把火力一点点加大，直到热气流淌到更远的床面。随着小孔里的青烟冒出，也预示着阿伟的地暖可以投入使用。而后阿伟制作了一些竹条，并用细绳将它们喜结连理。
+千里之行始于足下，美好的家园要靠自己双手打造。明明可以靠才艺吃饭的阿伟偏偏要用八块腹肌征服大家，就问这样的男人哪个野生婆娘不喜欢？完成后阿伟还用自己 35 码的大腚感受了一下，真烫！
+随后阿伟来到野区找到一根上好的雷击木，他当即就把木头咔嚓成两段，并取下两节较为完整的带了回去，刚好能和圆桌配套。另外一个在里面凿出凹槽，并插入木棍连接，得到一个夯土的木锤。住过农村的小伙伴都知道，这样夯出来的地面堪比水泥地，不仅坚硬耐磨，还不用担心脚底打滑。忙碌了一天的阿伟已经饥渴难耐，拿出野生小烤肠，安安心心住新房，光脚爬上大热炕，一觉能睡到天亮。
+第二天阿伟打算将房间扩宽，毕竟吃住的地方有了，还要解决个人卫生的问题。阿伟在另一侧增加了一个房间，他打算将这里打造成洗澡的地方。为了防止泥土垮塌，他将顶部做���圆弧形，等挖出足够的空间后，旁边的泥土已经堆成了小山。
+为了方便清理这些泥土，阿伟在之前的轨道增加了转弯，交接处依然是用铁钉固定，一直延伸到房间的最里面。有了运输车的帮助，这些成吨的泥土也能轻松的运送出去，并且还能体验过山车的感觉。很快他就完成了清理工作。
+为了更方便的在里面洗澡，他将底部一点点挖空，这么大的浴缸，看来阿伟并不打算一个人住。完成后他将墙面雕刻的凹凸有致，让这里看起来更加豪华。接着用洛阳铲挖出排水口，并用一根相同大小的竹筒作为开关。
+由于四周都是泥土还不能防水，阿伟特意找了一些白蚁巢，用来制作可以防水的野生水泥。现在就可以将里里外外，能接触到水的地方都涂抹一遍。细心的阿伟还找来这种 500 克一斤的鹅卵石，对池子表面进行装饰。
+没错，水源问题阿伟早已经考虑在内，他打算直接在旁边挖个水井，毕竟已经挖了这么深，再向下挖一挖，应该就能到达地下水的深度。经过几日的奋战，能看得出阿伟已经消瘦了不少，但一想到马上就能拥有的豪宅，他直接化身为无情的挖土机器，很快就挖到了好几米的深度。
+考虑到自己的弹跳力有限，阿伟在一旁定入木桩，然后通过绳子爬上爬下。随着深度越来越深，井底已经开始渗出水来，这也预示着打井成功。没多久这里面将渗满泉水，仅凭一次就能挖到水源，看来这里还真是块风湿宝地。
+随后阿伟在井口四周挖出凹槽，以便于井盖的安置。这一量才知道，井的深度已经达到了足足的 5 米。阿伟把木板组合在一起，再沿着标记切掉多余部分，他甚至还给井盖做了把手。可是如何从这么深的井里打水还是个问题，但从阿伟坚定的眼神来看，他应该想到了解决办法。
+只见他将树桩锯成两半，然后用凿子把里面一点点掏空，另外一半也是如法炮制。接着还要在底部挖出圆孔，要想成功将水从 5 米深的地方抽上来，那就不得不提到大家熟知的勾股定理。没错，这跟勾股定理没什么关系。
+阿伟给竹筒做了一个木塞，并在里面打上安装连接轴的孔。为了增加密闭性，阿伟不得不牺牲了自己的 AJ，剪出与木塞相同的大小后，再用木钉固定住。随后他收集了一些树胶，并放到火上加热融化。接下来就可以涂在木塞上增加使用寿命。
+现在将竹筒组装完成，就可以利用虹吸原理将水抽上来。完成后就可以把井盖盖上去，再用泥土在上面覆盖，现在就不用担心失足掉下去了。
+接下来阿伟去采集了一些大漆，将它涂抹在木桶接缝处，就能将其二合为一。完了再接入旁边浴缸的入水口，每个连接的地方都要做好密封，不然后面很容易漏水。随后就可以安装上活塞，并用一根木桩作为省力杠杆，根据空气压强的原理将井水抽上来。
+经过半小时的来回拉扯，硕大的浴缸终于被灌满，阿伟也是忍不住洗了把脸。接下来还需要解决排水的问题，阿伟在地上挖出沟渠，一直贯穿到屋外，然后再用竹筒从出水口连接，每个接口处都要抹上胶水，就连门外的出水口他都做了隐藏。
+在野外最重要的就是庇护所、水源还有食物。既然已经完成了前二者，那么阿伟还需要拥有可持续发展的食物来源。他先是在地上挖了两排地洞，然后在每根竹筒的表面都打上无数孔洞，这就是他打算用来种植的载体。在此之前，还需要用大火对竹筒进行杀菌消毒。
+趁着这时候，他去搬了一麻袋的木屑，先用芭蕉叶覆盖在上面，再铺上厚厚的黏土隔绝温度。在火焰的温度下，能让里面的木屑达到生长条件。
+等到第二天所有材料都晾凉后，阿伟才将竹筒内部掏空，并将木屑一点点地塞入竹筒。一切准备就绪，就可以将竹筒插入提前挖好的地洞。最后再往竹筒里塞入种子，依靠房间内的湿度和温度，就能达到大棚种植的效果。稍加时日，这些种子就会慢慢发芽。
+虽然暂时还吃不上自己培养的食物，但好在阿伟从表哥贺强那里学到不少钓鱼本领，哪怕只有一根小小的竹竿，也能让他钓上两斤半的大鲶鱼。新鲜的食材，那肯定是少不了高温消毒的过程。趁着鱼没熟，阿伟直接爬进浴缸，冰凉的井水瞬间洗去了身上的疲惫。这一刻的阿伟是无比的享受。
+不久后鱼也烤得差不多了，阿伟的生活现在可以说是有滋有味。住在十几米的地下，不仅能安全感满满，哪怕遇到危险，还能通过轨道快速逃生。
+<example_text_3>
+<video_frame_description>
+%s
+</video_frame_description>
+我正在尝试做这个内容的解说纪录片视频，我需要你以 <video_frame_description> </video_frame_description> 中的内容为解说目标，根据我刚才提供给你的对标文案 <example_text> 特点，以及你总结的特点，帮我生成一段关于荒野建造的解说文案，文案需要符合平台受欢迎的解说风格，请使用 json 格式进行输出；使用 <output> 中的输出格式：
+<output>
+{
+  "items": [
+    {
+        "_id": 1, # 唯一递增id
+        "timestamp": "00:00:05,390-00:00:10,430",
+        "picture": "画面描述",
+        "narration": "解说文案",
+    }
+}
+</output>
+<restriction>
+1. 只输出 json 内容，不要输出其他任何说明性的文字
+2. 解说文案的语言使用 简体中文
+3. 严禁虚构画面，所有画面只能从 <video_frame_description> 中摘取
+</restriction>
+""" % (markdown_content)
+        # 使用OpenAI SDK初始化客户端
+        client = OpenAI(
+            api_key=api_key,
+            base_url=base_url
+        )
+        # 使用SDK发送请求
+        if model not in ["deepseek-reasoner"]:
+            # deepseek-reasoner 不支持 json 输出
+            response = client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": "你是一名专业的短视频解说文案撰写专家。"},
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=1.5,
+                response_format={"type": "json_object"},
+            )
+            # 提取生成的文案
+            if response.choices and len(response.choices) > 0:
+                narration_script = response.choices[0].message.content
+                # 打印消耗的tokens
+                logger.debug(f"消耗的tokens: {response.usage.total_tokens}")
+                return narration_script
+            else:
+                return "生成解说文案失败: 未获取到有效响应"
+        else:
+            # 不支持 json 输出，需要多一步处理 ```json ``` 的步骤
+            response = client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": "你是一名专业的短视频解说文案撰写专家。"},
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=1.5,
+            )
+            # 提取生成的文案
+            if response.choices and len(response.choices) > 0:
+                narration_script = response.choices[0].message.content
+                # 打印消耗的tokens
+                logger.debug(f"文案消耗的tokens: {response.usage.total_tokens}")
+                # 清理 narration_script 字符串前后的 ```json ``` 字符串
+                narration_script = narration_script.replace("```json", "").replace("```", "")
+                return narration_script
+            else:
+                return "生成解说文案失败: 未获取到有效响应"
+    except Exception as e:
+        return f"调用API生成解说文案时出错: {traceback.format_exc()}"
+if __name__ == '__main__':
+    text_provider = 'openai'
+    text_api_key = "sk-xxx"
+    text_model = "deepseek-reasoner"
+    text_base_url = "https://api.deepseek.com"
+    video_frame_description_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_1139.json"
+    # 测试新的JSON文件
+    test_file_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_2258.json"
+    markdown_output = parse_frame_analysis_to_markdown(test_file_path)
+    # print(markdown_output)
+    # 输出到文件以便检查格式
+    output_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/家里家外1-5.md"
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(markdown_output)
+    # print(f"\n已将Markdown输出保存到: {output_file}")
+    # # 生成解说文案
+    # narration = generate_narration(
+    #     markdown_output,
+    #     text_api_key,
+    #     base_url=text_base_url,
+    #     model=text_model
+    # )
+    #
+    # # 保存解说文案
+    # print(narration)
+    # print(type(narration))
+    # narration_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/final_narration_script.json"
+    # with open(narration_file, 'w', encoding='utf-8') as f:
+    #     f.write(narration)
+    # print(f"\n已将解说文案保存到: {narration_file}")

app/services/generate_video.py ADDED Viewed

	@@ -0,0 +1,393 @@

+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''
+@Project: NarratoAI
+@File   : generate_video
+@Author : 小林同学
+@Date   : 2025/5/7 上午11:55
+'''
+import os
+import traceback
+from typing import Optional, Dict, Any
+from loguru import logger
+from moviepy import (
+    VideoFileClip,
+    AudioFileClip,
+    CompositeAudioClip,
+    CompositeVideoClip,
+    TextClip,
+    afx
+)
+from moviepy.video.tools.subtitles import SubtitlesClip
+from PIL import ImageFont
+from app.utils import utils
+def merge_materials(
+    video_path: str,
+    audio_path: str,
+    output_path: str,
+    subtitle_path: Optional[str] = None,
+    bgm_path: Optional[str] = None,
+    options: Optional[Dict[str, Any]] = None
+) -> str:
+    """
+    合并视频、音频、BGM和字幕素材生成最终视频
+    参数:
+        video_path: 视频文件路径
+        audio_path: 音频文件路径
+        output_path: 输出文件路径
+        subtitle_path: 字幕文件路径，可选
+        bgm_path: 背景音乐文件路径，可选
+        options: 其他选项配置，可包含以下字段:
+            - voice_volume: 人声音量，默认1.0
+            - bgm_volume: 背景音乐音量，默认0.3
+            - original_audio_volume: 原始音频音量，默认0.0
+            - keep_original_audio: 是否保留原始音频，默认False
+            - subtitle_font: 字幕字体，默认None，系统会使用默认字体
+            - subtitle_font_size: 字幕字体大小，默认40
+            - subtitle_color: 字幕颜色，默认白色
+            - subtitle_bg_color: 字幕背景颜色，默认透明
+            - subtitle_position: 字幕位置，可选值'bottom', 'top', 'center'，默认'bottom'
+            - custom_position: 自定义位置
+            - stroke_color: 描边颜色，默认黑色
+            - stroke_width: 描边宽度，默认1
+            - threads: 处理线程数，默认2
+            - fps: 输出帧率，默认30
+    返回:
+        输出视频的路径
+    """
+    # 合并选项默认值
+    if options is None:
+        options = {}
+    # 设置默认参数值
+    voice_volume = options.get('voice_volume', 1.0)
+    bgm_volume = options.get('bgm_volume', 0.3)
+    original_audio_volume = options.get('original_audio_volume', 0.0)  # 默认为0，即不保留原声
+    keep_original_audio = options.get('keep_original_audio', False)  # 是否保留原声
+    subtitle_font = options.get('subtitle_font', '')
+    subtitle_font_size = options.get('subtitle_font_size', 40)
+    subtitle_color = options.get('subtitle_color', '#FFFFFF')
+    subtitle_bg_color = options.get('subtitle_bg_color', 'transparent')
+    subtitle_position = options.get('subtitle_position', 'bottom')
+    custom_position = options.get('custom_position', 70)
+    stroke_color = options.get('stroke_color', '#000000')
+    stroke_width = options.get('stroke_width', 1)
+    threads = options.get('threads', 2)
+    fps = options.get('fps', 30)
+    # 处理透明背景色问题 - MoviePy 2.1.1不支持'transparent'值
+    if subtitle_bg_color == 'transparent':
+        subtitle_bg_color = None  # None在新版MoviePy中表示透明背景
+    # 创建输出目录（如果不存在）
+    output_dir = os.path.dirname(output_path)
+    os.makedirs(output_dir, exist_ok=True)
+    logger.info(f"开始合并素材...")
+    logger.info(f"  ① 视频: {video_path}")
+    logger.info(f"  ② 音频: {audio_path}")
+    if subtitle_path:
+        logger.info(f"  ③ 字幕: {subtitle_path}")
+    if bgm_path:
+        logger.info(f"  ④ 背景音乐: {bgm_path}")
+    logger.info(f"  ⑤ 输出: {output_path}")
+    # 加载视频
+    try:
+        video_clip = VideoFileClip(video_path)
+        logger.info(f"视频尺寸: {video_clip.size[0]}x{video_clip.size[1]}, 时长: {video_clip.duration}秒")
+        # 提取视频原声(如果需要)
+        original_audio = None
+        if keep_original_audio and original_audio_volume > 0:
+            try:
+                original_audio = video_clip.audio
+                if original_audio:
+                    original_audio = original_audio.with_effects([afx.MultiplyVolume(original_audio_volume)])
+                    logger.info(f"已提取视频原声，音量设置为: {original_audio_volume}")
+                else:
+                    logger.warning("视频没有音轨，无法提取原声")
+            except Exception as e:
+                logger.error(f"提取视频原声失败: {str(e)}")
+                original_audio = None
+        # 移除原始音轨，稍后会合并新的音频
+        video_clip = video_clip.without_audio()
+    except Exception as e:
+        logger.error(f"加载视频失败: {str(e)}")
+        raise
+    # 处理背景音乐和所有音频轨道合成
+    audio_tracks = []
+    # 先添加主音频（配音）
+    if audio_path and os.path.exists(audio_path):
+        try:
+            voice_audio = AudioFileClip(audio_path).with_effects([afx.MultiplyVolume(voice_volume)])
+            audio_tracks.append(voice_audio)
+            logger.info(f"已添加配音音频，音量: {voice_volume}")
+        except Exception as e:
+            logger.error(f"加载配音音频失败: {str(e)}")
+    # 添加原声（如果需要）
+    if original_audio is not None:
+        audio_tracks.append(original_audio)
+        logger.info(f"已添加视频原声，音量: {original_audio_volume}")
+    # 添加背景音乐（如果有）
+    if bgm_path and os.path.exists(bgm_path):
+        try:
+            bgm_clip = AudioFileClip(bgm_path).with_effects([
+                afx.MultiplyVolume(bgm_volume),
+                afx.AudioFadeOut(3),
+                afx.AudioLoop(duration=video_clip.duration),
+            ])
+            audio_tracks.append(bgm_clip)
+            logger.info(f"已添加背景音乐，音量: {bgm_volume}")
+        except Exception as e:
+            logger.error(f"添加背景音乐失败: \n{traceback.format_exc()}")
+    # 合成最终的音频轨道
+    if audio_tracks:
+        final_audio = CompositeAudioClip(audio_tracks)
+        video_clip = video_clip.with_audio(final_audio)
+        logger.info(f"已合成所有音频轨道，共{len(audio_tracks)}个")
+    else:
+        logger.warning("没有可用的音频轨道，输出视频将没有声音")
+    # 处理字体路径
+    font_path = None
+    if subtitle_path and subtitle_font:
+        font_path = os.path.join(utils.font_dir(), subtitle_font)
+        if os.name == "nt":
+            font_path = font_path.replace("\\", "/")
+        logger.info(f"使用字体: {font_path}")
+    # 处理视频尺寸
+    video_width, video_height = video_clip.size
+    # 字幕处理函数
+    def create_text_clip(subtitle_item):
+        """创建单个字幕片段"""
+        phrase = subtitle_item[1]
+        max_width = video_width * 0.9
+        # 如果有字体路径，进行文本换行处理
+        wrapped_txt = phrase
+        txt_height = 0
+        if font_path:
+            wrapped_txt, txt_height = wrap_text(
+                phrase,
+                max_width=max_width,
+                font=font_path,
+                fontsize=subtitle_font_size
+            )
+        # 创建文本片段
+        try:
+            _clip = TextClip(
+                text=wrapped_txt,
+                font=font_path,
+                font_size=subtitle_font_size,
+                color=subtitle_color,
+                bg_color=subtitle_bg_color,  # 这里已经在前面处理过，None表示透明
+                stroke_color=stroke_color,
+                stroke_width=stroke_width,
+            )
+        except Exception as e:
+            logger.error(f"创建字幕片段失败: {str(e)}, 使用简化参数重试")
+            # 如果上面的方法失败，尝试使用更简单的参数
+            _clip = TextClip(
+                text=wrapped_txt,
+                font=font_path,
+                font_size=subtitle_font_size,
+                color=subtitle_color,
+            )
+        # 设置字幕时间
+        duration = subtitle_item[0][1] - subtitle_item[0][0]
+        _clip = _clip.with_start(subtitle_item[0][0])
+        _clip = _clip.with_end(subtitle_item[0][1])
+        _clip = _clip.with_duration(duration)
+        # 设置字幕位置
+        if subtitle_position == "bottom":
+            _clip = _clip.with_position(("center", video_height * 0.95 - _clip.h))
+        elif subtitle_position == "top":
+            _clip = _clip.with_position(("center", video_height * 0.05))
+        elif subtitle_position == "custom":
+            margin = 10
+            max_y = video_height - _clip.h - margin
+            min_y = margin
+            custom_y = (video_height - _clip.h) * (custom_position / 100)
+            custom_y = max(
+                min_y, min(custom_y, max_y)
+            )
+            _clip = _clip.with_position(("center", custom_y))
+        else:  # center
+            _clip = _clip.with_position(("center", "center"))
+        return _clip
+    # 创建TextClip工厂函数
+    def make_textclip(text):
+        return TextClip(
+            text=text,
+            font=font_path,
+            font_size=subtitle_font_size,
+            color=subtitle_color,
+        )
+    # 处理字幕
+    if subtitle_path and os.path.exists(subtitle_path):
+        try:
+            # 加载字幕文件
+            sub = SubtitlesClip(
+                subtitles=subtitle_path,
+                encoding="utf-8",
+                make_textclip=make_textclip
+            )
+            # 创建每个字幕片段
+            text_clips = []
+            for item in sub.subtitles:
+                clip = create_text_clip(subtitle_item=item)
+                text_clips.append(clip)
+            # 合成视频和字幕
+            video_clip = CompositeVideoClip([video_clip, *text_clips])
+            logger.info(f"已添加{len(text_clips)}个字幕片段")
+        except Exception as e:
+            logger.error(f"处理字幕失败: \n{traceback.format_exc()}")
+    # 导出最终视频
+    try:
+        video_clip.write_videofile(
+            output_path,
+            audio_codec="aac",
+            temp_audiofile_path=output_dir,
+            threads=threads,
+            fps=fps,
+        )
+        logger.success(f"素材合并完成: {output_path}")
+    except Exception as e:
+        logger.error(f"导出视频失败: {str(e)}")
+        raise
+    finally:
+        # 释放资源
+        video_clip.close()
+        del video_clip
+    return output_path
+def wrap_text(text, max_width, font="Arial", fontsize=60):
+    """
+    文本换行函数，使长文本适应指定宽度
+    参数:
+        text: 需要换行的文本
+        max_width: 最大宽度（像素）
+        font: 字体路径
+        fontsize: 字体大小
+    返回:
+        换行后的文本和文本高度
+    """
+    # 创建ImageFont对象
+    try:
+        font_obj = ImageFont.truetype(font, fontsize)
+    except:
+        # 如果无法加载指定字体，使用默认字体
+        font_obj = ImageFont.load_default()
+    def get_text_size(inner_text):
+        inner_text = inner_text.strip()
+        left, top, right, bottom = font_obj.getbbox(inner_text)
+        return right - left, bottom - top
+    width, height = get_text_size(text)
+    if width <= max_width:
+        return text, height
+    processed = True
+    _wrapped_lines_ = []
+    words = text.split(" ")
+    _txt_ = ""
+    for word in words:
+        _before = _txt_
+        _txt_ += f"{word} "
+        _width, _height = get_text_size(_txt_)
+        if _width <= max_width:
+            continue
+        else:
+            if _txt_.strip() == word.strip():
+                processed = False
+                break
+            _wrapped_lines_.append(_before)
+            _txt_ = f"{word} "
+    _wrapped_lines_.append(_txt_)
+    if processed:
+        _wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
+        result = "\n".join(_wrapped_lines_).strip()
+        height = len(_wrapped_lines_) * height
+        return result, height
+    _wrapped_lines_ = []
+    chars = list(text)
+    _txt_ = ""
+    for word in chars:
+        _txt_ += word
+        _width, _height = get_text_size(_txt_)
+        if _width <= max_width:
+            continue
+        else:
+            _wrapped_lines_.append(_txt_)
+            _txt_ = ""
+    _wrapped_lines_.append(_txt_)
+    result = "\n".join(_wrapped_lines_).strip()
+    height = len(_wrapped_lines_) * height
+    return result, height
+if __name__ == '__main__':
+    merger_mp4 = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/merger.mp4'
+    merger_sub = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/merged_subtitle_00_00_00-00_01_30.srt'
+    merger_audio = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/merger_audio.mp3'
+    bgm_path = '/Users/apple/Desktop/home/NarratoAI/resource/songs/bgm.mp3'
+    output_video = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/combined_test.mp4'
+    # 调用示例
+    options = {
+        'voice_volume': 1.0,            # 配音音量
+        'bgm_volume': 0.1,              # 背景音乐音量
+        'original_audio_volume': 1.0,   # 视频原声音量，0表示不保留
+        'keep_original_audio': True,    # 是否保留原声
+        'subtitle_font': 'MicrosoftYaHeiNormal.ttc',  # 这里使用相对字体路径，会自动在 font_dir() 目录下查找
+        'subtitle_font_size': 40,
+        'subtitle_color': '#FFFFFF',
+        'subtitle_bg_color': None,      # 直接使用None表示透明背景
+        'subtitle_position': 'bottom',
+        'threads': 2
+    }
+    try:
+        merge_materials(
+            video_path=merger_mp4,
+            audio_path=merger_audio,
+            subtitle_path=merger_sub,
+            bgm_path=bgm_path,
+            output_path=output_video,
+            options=options
+        )
+    except Exception as e:
+        logger.error(f"合并素材失败: \n{traceback.format_exc()}")

app/services/llm.py ADDED Viewed

	@@ -0,0 +1,808 @@

+import os
+import re
+import json
+import traceback
+import streamlit as st
+from typing import List
+from loguru import logger
+from openai import OpenAI
+from openai import AzureOpenAI
+from moviepy import VideoFileClip
+from openai.types.chat import ChatCompletion
+import google.generativeai as gemini
+from googleapiclient.errors import ResumableUploadError
+from google.api_core.exceptions import *
+from google.generativeai.types import *
+import subprocess
+from typing import Union, TextIO
+from app.config import config
+from app.utils.utils import clean_model_output
+_max_retries = 5
+Method = """
+重要提示：每一部剧的文案，前几句必须吸引人
+首先我们在看完看懂电影后，大脑里面要先有一个大概的轮廓，也就是一个类似于作文的大纲，电影主题线在哪里，首先要找到。
+一般将文案分为开头、内容、结尾
+## 开头部分
+文案开头三句话，是留住用户的关键！
+### 方式一：开头概括总结
+文案的前三句，是整部电影的概括总结，2-3句介绍后，开始叙述故事剧情！
+推荐新手（新号）做：（盘点型）
+盘点全球最恐怖的10部电影
+盘���全球最科幻的10部电影
+盘点全球最悲惨的10部电影
+盘全球最值得看的10部灾难电影
+盘点全球最值得看的10部励志电影
+下面的示例就是最简单的解说文案开头：
+1.这是XXX国20年来最大尺度的一部剧，极度烧脑，却让99%的人看得心潮澎湃、无法自拔，故事开始……
+2.这是有史以来电影院唯一一部全程开灯放完的电影，期间无数人尖叫昏厥，他被成为勇敢者的专属，因为99%的人都不敢看到结局，许多人看完它从此不愿再碰手机，他就是大名鼎鼎的暗黑神作《XXX》……
+3.这到底是一部什么样的电影，能被55个国家公开抵制，它甚至为了上映，不惜删减掉整整47分钟的剧情……
+4.是什么样的一个人被豆瓣网友称之为史上最牛P的老太太，都70岁了还要去贩毒……
+5.他是M国历史上最NB/惨/猖狂/冤枉……的囚犯/抢劫犯/……
+6.这到底是一部什么样的影片，他一个人就拿了4个顶级奖项，第一季8.7分，第二季直接干到9.5分，11万人给出5星好评，一共也就6集，却斩获26项国际大奖，看过的人都说，他是近年来最好的xxx剧，几乎成为了近年来xxx剧的标杆。故事发生在……
+7.他是国产电影的巅峰佳作，更是许多80-90后的青春启蒙，曾入选《��代》周刊，获得年度佳片第一，可在国内却被尘封多年，至今为止都无法在各大视频网站看到完整资源，他就是《xxxxxx》
+8.这是一部让所有人看得荷尔蒙飙升的爽片……
+9.他被成为世界上最虐心绝望的电影，至今无人敢看第二遍，很难想象，他是根据真实事件改编而来……
+10.这大概是有史以来最令人不寒而栗的电影，当年一经放映，就点燃了无数人的怒火，不少观众不等影片放完，就愤然离场，它比《xxx》更让人绝望，比比《xxx》更让人xxx，能坚持看完全片的人，更是万中无一，包括我。甚至观影结束后，有无数人抵制投诉这部电影，认为影片的导演玩弄了他们的情感！他是顶级神作《xxxx》……
+11.这是X国有史以来最高赞的一部悬疑电影，然而却因为某些原因，国内90%的人，没能看过这部片子，他就是《xxx》……
+12.有这样一部电影，这辈子，你绝对不想再看第二遍，并不是它剧情烂俗，而是它的结局你根本承受不起/想象不到……甚至有80%的观众在观影途中情绪崩溃中途离场，更让许多同行都不想解说这部电影，他就是大名鼎鼎的暗黑神作《xxx》…
+13.它被誉为史上最牛悬疑片无数人在看完它时候，一个月不敢照镜��，这样一部仅适合部分年龄段观看的影片，究竟有什么样的魅力，竟然获得某瓣8.2的高分，很多人说这部电影到处都是看点，他就是《xxx》….
+14.这是一部在某瓣上被70万人打出9.3分的高分的电影……到底是一部什么样的电影，能够在某瓣上被70万人打出9.3分的高分……
+15.这是一部细思极恐的科幻大片，整部电影颠覆你的三观，它的名字叫……
+16.史上最震撼的灾难片，每一点都不舍得快进的电影，他叫……
+17.今天给大家带来一部基于真实事件改编的（主题介绍一句……）的故事片，这是一部连环悬疑剧，如果不看到最后绝对想不到结局竟然是这样的反转……
+### 方式：情景式、假设性开头
+1.他叫……你以为他是……的吗？不。他是来……然后开始叙述
+2.你知道……吗？原来……然后开始叙述
+3.如果给你….，你会怎么样？
+4.如果你是….，你会怎么样？
+### 方式三：以国家为开头！简单明了。话语不需要多，但是需要讲解透彻！
+1.这是一部韩国最新灾��片，你一定没有看过……
+2.这是一部印度高分悬疑片，
+3.这部电影原在日本因为……而被下架，
+4.这是韩国最恐怖的犯罪片，
+5.这是最近国产片评分最高的悬疑��
+以上均按照影片国家来区分，然后简单介绍下主题。就可以开始直接叙述作品。也是一个很不错的方法！
+### 方式四：如何自由发挥
+正常情况下，每一部电影都有非常关键的一个大纲，这部电影的主题其实是可以用一句话、两句话概括的。只要看懂电影，就能找到这个主题大纲。
+我们提前把这个主题大纲给放到影视最前面，作为我们的前三句的文案，将会非常吸引人！
+例如：
+1.这不是电影，这是真实故事。两个女人和一个男人被关在可桑拿室。喊破喉咙也没有一丝回音。窒息感和热度让人抓狂，故事就是从这里开始！
+2.如果你男朋友出轨了，他不爱你了，还你家暴，怎么办？接下来这部电影就会教你如何让老公服服帖帖的呆在你身边！女主是一个……开始叙述了。
+3.他力大无穷，双眼放光，这不是拯救地球的超人吗？然而不是。今天给大家推荐的这部电影叫……
+以上是需要看完影片，看懂影片，然后从里面提炼出精彩的几句话,当然是比较难的，当你不会自己去总结前三句的经典的话。可以用前面方式一二三！
+实在想不出来如何去提炼，可以去搜索这部剧，对这部电影的影评，也会给你带过来很多灵感的！
+## 内容部分
+开头有了，剩下的就是开始叙述正文了。主题介绍是根据影片内容来介绍，如果实在自己想不出来。可以参考其他平台中对这部电影的精彩介绍，提取2-3句也可以！
+正常情况下，我们叙述的时候其实是非常简单的，把整部电影主题线，叙述下来，其实文案就是加些修饰词把电影重点内容叙述下来。加上一些修饰词。
+以悬疑剧为例：
+竟然，突然，原来，但是，但，可是，结果，直到，如果，而，果然，发现，只是，出奇，之后，没错，不止，更是，当然，因为，所以……等！
+以上是比较常用的，当然还有很多，需要靠平时思考和阅读的积累！因悬疑剧会有多处反转剧情。所以需要用到反转的修饰词比较多，只有用到这些词。才能体现出各种反转剧情！
+建议大家在刚开始做的时候，做8分钟内的，不要太长，分成三段。每段也是不超过三分钟，这样时间刚好。可以比较好的完成完播率！
+## 结尾部分
+最后故事的结局，除了反转，可以来点人生的道理！如果刚开始不会，可以不写。
+后面水平越来越高的时候，可以进行人生道理的讲评。
+比如：这部电影告诉我们……
+类似于哲理性质��作为一个总结！
+也可以把最后的影视反转，原生放出来，留下悬念。
+比如：也可以总结下这部短片如何的好，推荐/值得大家去观看之类的话语。
+其实就是给我们的作品来一个总结，总结我们所做的三个视频，有开始就要有结束。这个结束不一定是固定的模版。但是视频一定要有结尾。让人感觉有头有尾才最舒服！
+做解说第一次，可能会做两天。第二次可能就需要一天了。慢慢的。时间缩短到8个小时之内是我们平的制作全部时间！
+"""
+def handle_exception(err):
+    if isinstance(err, PermissionDenied):
+        raise Exception("403 用户没有权限访问该资源")
+    elif isinstance(err, ResourceExhausted):
+        raise Exception("429 您的配额已用尽。请稍后重试。请考虑设置自动重试来处理这些错误")
+    elif isinstance(err, InvalidArgument):
+        raise Exception("400 参数无效。例如，文件过大，超出了载荷大小限制。另一个事件提供了无效的 API 密钥。")
+    elif isinstance(err, AlreadyExists):
+        raise Exception("409 已存在具有相同 ID 的已调参模型。对新模型进行调参时，请指定唯一的模型 ID。")
+    elif isinstance(err, RetryError):
+        raise Exception("使用不支持 gRPC 的代理时可能会引起此错误。请尝试将 REST 传输与 genai.configure(..., transport=rest) 搭配使用。")
+    elif isinstance(err, BlockedPromptException):
+        raise Exception("400 出于安全原因，该提示已被屏蔽。")
+    elif isinstance(err, BrokenResponseError):
+        raise Exception("500 流式传输响应已损坏。在访问需要完整响应的内容（例如聊天记录）时引发。查看堆栈轨迹中提供的错误详情。")
+    elif isinstance(err, IncompleteIterationError):
+        raise Exception("500 访问需要完整 API 响应但流式响应尚未完全迭代的内容时引发。对响应对象调用 resolve() 以使用迭代器。")
+    elif isinstance(err, ConnectionError):
+        raise Exception("网络连接错误, 请检查您的网��连接(建议使用 NarratoAI 官方提供的 url)")
+    else:
+        raise Exception(f"大模型请求失败, 下面是具体报错信息: \n\n{traceback.format_exc()}")
+def _generate_response(prompt: str, llm_provider: str = None) -> str:
+    """
+    调用大模型通用方法
+        prompt：
+        llm_provider：
+    """
+    content = ""
+    if not llm_provider:
+        llm_provider = config.app.get("llm_provider", "openai")
+    logger.info(f"llm provider: {llm_provider}")
+    if llm_provider == "g4f":
+        model_name = config.app.get("g4f_model_name", "")
+        if not model_name:
+            model_name = "gpt-3.5-turbo-16k-0613"
+        import g4f
+        content = g4f.ChatCompletion.create(
+            model=model_name,
+            messages=[{"role": "user", "content": prompt}],
+        )
+    else:
+        api_version = ""  # for azure
+        if llm_provider == "moonshot":
+            api_key = config.app.get("moonshot_api_key")
+            model_name = config.app.get("moonshot_model_name")
+            base_url = "https://api.moonshot.cn/v1"
+        elif llm_provider == "ollama":
+            # api_key = config.app.get("openai_api_key")
+            api_key = "ollama"  # any string works but you are required to have one
+            model_name = config.app.get("ollama_model_name")
+            base_url = config.app.get("ollama_base_url", "")
+            if not base_url:
+                base_url = "http://localhost:11434/v1"
+        elif llm_provider == "openai":
+            api_key = config.app.get("openai_api_key")
+            model_name = config.app.get("openai_model_name")
+            base_url = config.app.get("openai_base_url", "")
+            if not base_url:
+                base_url = "https://api.openai.com/v1"
+        elif llm_provider == "oneapi":
+            api_key = config.app.get("oneapi_api_key")
+            model_name = config.app.get("oneapi_model_name")
+            base_url = config.app.get("oneapi_base_url", "")
+        elif llm_provider == "azure":
+            api_key = config.app.get("azure_api_key")
+            model_name = config.app.get("azure_model_name")
+            base_url = config.app.get("azure_base_url", "")
+            api_version = config.app.get("azure_api_version", "2024-02-15-preview")
+        elif llm_provider == "gemini":
+            api_key = config.app.get("gemini_api_key")
+            model_name = config.app.get("gemini_model_name")
+            base_url = "***"
+        elif llm_provider == "qwen":
+            api_key = config.app.get("qwen_api_key")
+            model_name = config.app.get("qwen_model_name")
+            base_url = "***"
+        elif llm_provider == "cloudflare":
+            api_key = config.app.get("cloudflare_api_key")
+            model_name = config.app.get("cloudflare_model_name")
+            account_id = config.app.get("cloudflare_account_id")
+            base_url = "***"
+        elif llm_provider == "deepseek":
+            api_key = config.app.get("deepseek_api_key")
+            model_name = config.app.get("deepseek_model_name")
+            base_url = config.app.get("deepseek_base_url")
+            if not base_url:
+                base_url = "https://api.deepseek.com"
+        elif llm_provider == "ernie":
+            api_key = config.app.get("ernie_api_key")
+            secret_key = config.app.get("ernie_secret_key")
+            base_url = config.app.get("ernie_base_url")
+            model_name = "***"
+            if not secret_key:
+                raise ValueError(
+                    f"{llm_provider}: secret_key is not set, please set it in the config.toml file."
+                )
+        else:
+            raise ValueError(
+                "llm_provider is not set, please set it in the config.toml file."
+            )
+        if not api_key:
+            raise ValueError(
+                f"{llm_provider}: api_key is not set, please set it in the config.toml file."
+            )
+        if not model_name:
+            raise ValueError(
+                f"{llm_provider}: model_name is not set, please set it in the config.toml file."
+            )
+        if not base_url:
+            raise ValueError(
+                f"{llm_provider}: base_url is not set, please set it in the config.toml file."
+            )
+        if llm_provider == "qwen":
+            import dashscope
+            from dashscope.api_entities.dashscope_response import GenerationResponse
+            dashscope.api_key = api_key
+            response = dashscope.Generation.call(
+                model=model_name, messages=[{"role": "user", "content": prompt}]
+            )
+            if response:
+                if isinstance(response, GenerationResponse):
+                    status_code = response.status_code
+                    if status_code != 200:
+                        raise Exception(
+                            f'[{llm_provider}] returned an error response: "{response}"'
+                        )
+                    content = response["output"]["text"]
+                    return content.replace("\n", "")
+                else:
+                    raise Exception(
+                        f'[{llm_provider}] returned an invalid response: "{response}"'
+                    )
+            else:
+                raise Exception(f"[{llm_provider}] returned an empty response")
+        if llm_provider == "gemini":
+            import google.generativeai as genai
+            genai.configure(api_key=api_key, transport="rest")
+            safety_settings = {
+                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
+                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
+                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
+                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
+            }
+            model = genai.GenerativeModel(
+                model_name=model_name,
+                safety_settings=safety_settings,
+            )
+            try:
+                response = model.generate_content(prompt)
+                return response.text
+            except Exception as err:
+                return handle_exception(err)
+        if llm_provider == "cloudflare":
+            import requests
+            response = requests.post(
+                f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/{model_name}",
+                headers={"Authorization": f"Bearer {api_key}"},
+                json={
+                    "messages": [
+                        {"role": "system", "content": "You are a friendly assistant"},
+                        {"role": "user", "content": prompt},
+                    ]
+                },
+            )
+            result = response.json()
+            logger.info(result)
+            return result["result"]["response"]
+        if llm_provider == "ernie":
+            import requests
+            params = {
+                "grant_type": "client_credentials",
+                "client_id": api_key,
+                "client_secret": secret_key,
+            }
+            access_token = (
+                requests.post("https://aip.baidubce.com/oauth/2.0/token", params=params)
+                .json()
+                .get("access_token")
+            )
+            url = f"{base_url}?access_token={access_token}"
+            payload = json.dumps(
+                {
+                    "messages": [{"role": "user", "content": prompt}],
+                    "temperature": 0.5,
+                    "top_p": 0.8,
+                    "penalty_score": 1,
+                    "disable_search": False,
+                    "enable_citation": False,
+                    "response_format": "text",
+                }
+            )
+            headers = {"Content-Type": "application/json"}
+            response = requests.request(
+                "POST", url, headers=headers, data=payload
+            ).json()
+            return response.get("result")
+        if llm_provider == "azure":
+            client = AzureOpenAI(
+                api_key=api_key,
+                api_version=api_version,
+                azure_endpoint=base_url,
+            )
+        else:
+            client = OpenAI(
+                api_key=api_key,
+                base_url=base_url,
+            )
+        response = client.chat.completions.create(
+            model=model_name, messages=[{"role": "user", "content": prompt}]
+        )
+        if response:
+            if isinstance(response, ChatCompletion):
+                content = response.choices[0].message.content
+            else:
+                raise Exception(
+                    f'[{llm_provider}] returned an invalid response: "{response}", please check your network '
+                    f"connection and try again."
+                )
+        else:
+            raise Exception(
+                f"[{llm_provider}] returned an empty response, please check your network connection and try again."
+            )
+    return content.replace("\n", "")
+def _generate_response_video(prompt: str, llm_provider_video: str, video_file: Union[str, TextIO]) -> str:
+    """
+    多模态能力大模型
+    """
+    if llm_provider_video == "gemini":
+        api_key = config.app.get("gemini_api_key")
+        model_name = config.app.get("gemini_model_name")
+        base_url = "***"
+    else:
+        raise ValueError(
+            "llm_provider 未设置，请在 config.toml 文件中进行设置。"
+        )
+    if llm_provider_video == "gemini":
+        import google.generativeai as genai
+        genai.configure(api_key=api_key, transport="rest")
+        safety_settings = {
+            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
+            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
+            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
+            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
+        }
+        model = genai.GenerativeModel(
+            model_name=model_name,
+            safety_settings=safety_settings,
+        )
+        try:
+            response = model.generate_content([prompt, video_file])
+            return response.text
+        except Exception as err:
+            return handle_exception(err)
+def compress_video(input_path: str, output_path: str):
+    """
+    压缩视频文件
+    Args:
+        input_path: 输入视频文件路径
+        output_path: 输出压缩后的视频文件路径
+    """
+    # 如果压缩后的视频文件已经存在，则直接使用
+    if os.path.exists(output_path):
+        logger.info(f"压缩视频文件已存在: {output_path}")
+        return
+    try:
+        clip = VideoFileClip(input_path)
+        clip.write_videofile(output_path, codec='libx264', audio_codec='aac', bitrate="500k", audio_bitrate="128k")
+    except subprocess.CalledProcessError as e:
+        logger.error(f"视频压缩失败: {e}")
+        raise
+def generate_script(
+    video_path: str, video_plot: str, video_name: str, language: str = "zh-CN", progress_callback=None
+) -> str:
+    """
+    生成视频剪辑脚本
+    Args:
+        video_path: 视频文件路径
+        video_plot: 视频剧情内容
+        video_name: 视频名称
+        language: 语言
+        progress_callback: 进度回调函数
+    Returns:
+        str: 生成的脚本
+    """
+    try:
+        # 1. 压缩视频
+        compressed_video_path = f"{os.path.splitext(video_path)[0]}_compressed.mp4"
+        compress_video(video_path, compressed_video_path)
+        # 在关键步骤更新进度
+        if progress_callback:
+            progress_callback(15, "压缩完成")  # 例如,在压缩视频后
+        # 2. 转录视频
+        transcription = gemini_video_transcription(
+            video_name=video_name,
+            video_path=compressed_video_path,
+            language=language,
+            llm_provider_video=config.app["video_llm_provider"],
+            progress_callback=progress_callback
+        )
+        if progress_callback:
+            progress_callback(60, "生成解说文案...")  # 例如,在转录视频后
+        # 3. 编写解说文案
+        script = writing_short_play(video_plot, video_name, config.app["llm_provider"], count=300)
+        # 在关键步骤更新进度
+        if progress_callback:
+            progress_callback(70, "匹配画面...")  # 例如,在生成脚本后
+        # 4. 文案匹配画面
+        if transcription != "":
+            matched_script = screen_matching(huamian=transcription, wenan=script, llm_provider=config.app["video_llm_provider"])
+            # 在关键步骤更新进度
+            if progress_callback:
+                progress_callback(80, "匹配成功")
+            return matched_script
+        else:
+            return ""
+    except Exception as e:
+        handle_exception(e)
+        raise
+def gemini_video_transcription(video_name: str, video_path: str, language: str, llm_provider_video: str, progress_callback=None):
+    '''
+    使用 gemini-1.5-xxx 进行视频画面转录
+    '''
+    api_key = config.app.get("gemini_api_key")
+    gemini.configure(api_key=api_key)
+    prompt = """
+    请转录音频，包括时间戳，并提供视觉描述，然后以 JSON 格式输出，当前视频中使用的语言为 %s。
+    在转录视频时，请通过确保以下条件来完成转录：
+    1. 画面描述使用语言: %s 进行输出。
+    2. 同一个画面合并为一个转录记录。
+    3. 使用以下 JSON schema:
+        Graphics = {"timestamp": "MM:SS-MM:SS"(时间戳格式), "picture": "str"(画面描述), "speech": "str"(台词，如果没有人说话，则使用空字符串。)}
+        Return: list[Graphics]
+    4. 请以严格的 JSON 格式返回数据，不要包含任何注释、标记或其他字符。数据应符合 JSON 语法，可以被 json.loads() 函数直接解析， 不要添加 ```json 或其他标记。
+    """ % (language, language)
+    logger.debug(f"视频名称: {video_name}")
+    try:
+        if progress_callback:
+            progress_callback(20, "上传视频至 Google cloud")
+        gemini_video_file = gemini.upload_file(video_path)
+        logger.debug(f"视频 {gemini_video_file.name} 上传至 Google cloud 成功, 开始解析...")
+        while gemini_video_file.state.name == "PROCESSING":
+            gemini_video_file = gemini.get_file(gemini_video_file.name)
+            if progress_callback:
+                progress_callback(30, "上传成功, 开始解析")  # 更新进度为20%
+        if gemini_video_file.state.name == "FAILED":
+            raise ValueError(gemini_video_file.state.name)
+        elif gemini_video_file.state.name == "ACTIVE":
+            if progress_callback:
+                progress_callback(40, "解析完成, 开始转录...")  # 更新进度为30%
+            logger.debug("解析完成, 开始转录...")
+    except ResumableUploadError as err:
+        logger.error(f"上传视频至 Google cloud 失败, 用户的位置信息不支持用于该API; \n{traceback.format_exc()}")
+        return False
+    except FailedPrecondition as err:
+        logger.error(f"400 用户位置不支持 Google API 使用。\n{traceback.format_exc()}")
+        return False
+    if progress_callback:
+        progress_callback(50, "开始转录")
+    try:
+        response = _generate_response_video(prompt=prompt, llm_provider_video=llm_provider_video, video_file=gemini_video_file)
+        logger.success("视频转录成功")
+        logger.debug(response)
+        print(type(response))
+        return response
+    except Exception as err:
+        return handle_exception(err)
+def generate_terms(video_subject: str, video_script: str, amount: int = 5) -> List[str]:
+    prompt = f"""
+# Role: Video Search Terms Generator
+## Goals:
+Generate {amount} search terms for stock videos, depending on the subject of a video.
+## Constrains:
+1. the search terms are to be returned as a json-array of strings.
+2. each search term should consist of 1-3 words, always add the main subject of the video.
+3. you must only return the json-array of strings. you must not return anything else. you must not return the script.
+4. the search terms must be related to the subject of the video.
+5. reply with english search terms only.
+## Output Example:
+["search term 1", "search term 2", "search term 3","search term 4","search term 5"]
+## Context:
+### Video Subject
+{video_subject}
+### Video Script
+{video_script}
+Please note that you must use English for generating video search terms; Chinese is not accepted.
+""".strip()
+    logger.info(f"subject: {video_subject}")
+    search_terms = []
+    response = ""
+    for i in range(_max_retries):
+        try:
+            response = _generate_response(prompt)
+            search_terms = json.loads(response)
+            if not isinstance(search_terms, list) or not all(
+                isinstance(term, str) for term in search_terms
+            ):
+                logger.error("response is not a list of strings.")
+                continue
+        except Exception as e:
+            logger.warning(f"failed to generate video terms: {str(e)}")
+            if response:
+                match = re.search(r"\[.*]", response)
+                if match:
+                    try:
+                        search_terms = json.loads(match.group())
+                    except Exception as e:
+                        logger.warning(f"failed to generate video terms: {str(e)}")
+                        pass
+        if search_terms and len(search_terms) > 0:
+            break
+        if i < _max_retries:
+            logger.warning(f"failed to generate video terms, trying again... {i + 1}")
+    logger.success(f"completed: \n{search_terms}")
+    return search_terms
+def gemini_video2json(video_origin_name: str, video_origin_path: str, video_plot: str, language: str) -> str:
+    '''
+    使用 gemini-1.5-pro 进行影视解析
+    Args:
+        video_origin_name: str - 影视作品的原始名称
+        video_origin_path: str - 影视作品的原始路径
+        video_plot: str - 影视作品的简介或剧情概述
+    Return:
+        str - 解析后的 JSON 格式字符串
+    '''
+    api_key = config.app.get("gemini_api_key")
+    model_name = config.app.get("gemini_model_name")
+    gemini.configure(api_key=api_key)
+    model = gemini.GenerativeModel(model_name=model_name)
+    prompt = """
+**角色设定：**
+你是一位影视解说专家，擅长根据剧情生成引人入胜的短视频解说文案，特别熟悉适用于TikTok/抖音风格的快速、抓人视频解说。
+**任务目标：**
+1. 根据给定剧情，详细描述画面，重点突出重要场景和情节。
+2. 生成符合TikTok/抖音风格的解说，节奏紧凑，语言简洁，吸引观众。
+3. 解说的时候需要解说一段播放一段原视频，原视频一般为有台词的片段，原视频的控制有 OST 字段控制。
+4. 结果输出为JSON格式，包含字段：
+   - "picture"：画面描述
+   - "timestamp"：画面出现的时间范围
+   - "narration"：解说内容
+   - "OST": 是否开启原声（true / false）
+**输入示例：**
+```text
+在一个���暗的小巷中，主角缓慢走进，四周静谧无声，只有远处隐隐传来猫的叫声。突然，背后出现一个神秘的身影。
+```
+**输出格式：**
+```json
+[
+    {
+        "picture": "黑暗的小巷，主角缓慢走入，四周安静，远处传来猫叫声。",
+        "timestamp": "00:00-00:17",
+        "narration": "静谧的小巷里，主角步步前行，气氛渐渐变得压抑。"
+        "OST": False
+    },
+    {
+        "picture": "神秘身影突然出现，紧张气氛加剧。",
+        "timestamp": "00:17-00:39",
+        "narration": "原声播放"
+        "OST": True
+    }
+]
+```
+**提示：**
+- 文案要简短有力，契合��视频平台用户的观赏习惯。
+- 保持强烈的悬念和情感代入，吸引观众继续观看。
+- 解说一段后播放一段原声，原声内容尽量和解说匹配。
+- 文案语言为：%s
+- 剧情内容：%s (为空则忽略)
+""" % (language, video_plot)
+    logger.debug(f"视频名称: {video_origin_name}")
+    # try:
+    gemini_video_file = gemini.upload_file(video_origin_path)
+    logger.debug(f"上传视频至 Google cloud 成功: {gemini_video_file.name}")
+    while gemini_video_file.state.name == "PROCESSING":
+        import time
+        time.sleep(1)
+        gemini_video_file = gemini.get_file(gemini_video_file.name)
+        logger.debug(f"视频当前状态(ACTIVE才可用): {gemini_video_file.state.name}")
+    if gemini_video_file.state.name == "FAILED":
+        raise ValueError(gemini_video_file.state.name)
+    # except Exception as err:
+    #     logger.error(f"上传视频至 Google cloud 失败, 请检查 VPN 配置和 APIKey 是否正确 \n{traceback.format_exc()}")
+    #     raise TimeoutError(f"上传视频至 Google cloud 失败, 请检查 VPN 配置和 APIKey 是否正确; {err}")
+    streams = model.generate_content([prompt, gemini_video_file], stream=True)
+    response = []
+    for chunk in streams:
+        response.append(chunk.text)
+    response = "".join(response)
+    logger.success(f"llm response: \n{response}")
+    return response
+def writing_movie(video_plot, video_name, llm_provider):
+    """
+    影视解说（电影解说）
+    """
+    prompt = f"""
+    **角色设定：**
+    你是一名有10年经验的影视解说文案的创作者，
+    下面是关于如何写解说文案的方法 {Method}，请认真阅读它，之后我会给你一部影视作品的名称，然后让你写一篇文案
+    请根据方法撰写 《{video_name}》的影视解说文案，《{video_name}》的大致剧情如下: {video_plot}
+    文案要符合以下要求:
+    **任务目标：**
+    1. 文案字数在 1500字左右，严格要求字数，最低不得少于 1000字。
+    2. 避免使用 markdown 格式输出文案。
+    3. 仅输出解说文案，不输出任何其他内容。
+    4. 不要包含小标题，每个段落以 \n 进行分隔。
+    """
+    try:
+        response = _generate_response(prompt, llm_provider)
+        logger.success("解说文案生成成功")
+        return response
+    except Exception as err:
+        return handle_exception(err)
+def writing_short_play(video_plot: str, video_name: str, llm_provider: str, count: int = 500):
+    """
+    影视解说（短剧解说）
+    """
+    if not video_plot:
+        raise ValueError("短剧的简介不能为空")
+    if not video_name:
+        raise ValueError("短剧名称不能为空")
+    prompt = f"""
+    **角色设定：**
+    你是一名有10年经验的短剧解说文案的创作者，
+    下面是关于如何写解说文案的方法 {Method}，请认真阅读它，之后我会给你一部短剧作品的简介，然后让你写一篇解说文案
+    请根据方法撰写 《{video_name}》的解说文案，《{video_name}》的大致剧情如下: {video_plot}
+    文案要符合以下要求:
+    **任务目标：**
+    1. 请严格要求文案字数, 字数控制在 {count} 字左右。
+    2. 避免使用 markdown 格式输出文案。
+    3. 仅输出解说文案，不输出任何其他内容。
+    4. 不要包含小标题，每个段落以 \\n 进行分隔。
+    """
+    try:
+        response = _generate_response(prompt, llm_provider)
+        logger.success("解说文案生成成功")
+        logger.debug(response)
+        return response
+    except Exception as err:
+        return handle_exception(err)
+def screen_matching(huamian: str, wenan: str, llm_provider: str):
+    """
+    画面匹配（一次性匹配）
+    """
+    if not huamian:
+        raise ValueError("画面不能为空")
+    if not wenan:
+        raise ValueError("文案不能为空")
+    prompt = """
+    你是一名有10年经验的影视解说创作者，
+    你的任务是根据视频转录脚本和解说文案，匹配出每段解说文案对应的画面时间戳, 结果以 json 格式输出。
+    注意：
+    转录脚本中
+        - timestamp: 表示视频时间戳
+        - picture: 表示当前画面描述
+        - speech": 表示当前视频中人物的台词
+    转录脚本和文案（由 XML 标记<PICTURE></PICTURE>和 <COPYWRITER></COPYWRITER>分隔）如下所示：
+    <PICTURE>
+    %s
+    </PICTURE>
+    <COPYWRITER>
+    %s
+    </COPYWRITER>
+    在匹配的过程中，请通过确保以下条件来完成匹配：
+    - 使用以下 JSON schema:
+        script = {'picture': str, 'timestamp': str(时间戳), "narration": str, "OST": bool(是否开启原声)}
+        Return: list[script]
+    - picture: 字段表示当前画面描述，与转录脚本保持一致
+    - timestamp: 字段表示某一段文案对应的画面的时间戳，不必和转���脚本的时间戳一致，应该充分考虑文案内容，匹配出与其描述最匹配的时间戳
+        - 请注意，请严格的执行已经出现的画面不能重复出现，即生成的脚本中 timestamp 不能有重叠的部分。
+    - narration: 字段表示需要解说文案，每段解说文案尽量不要超过30字
+    - OST: 字段表示是否开启原声，即当 OST 字段为 true 时，narration 字段为空字符串，当 OST 为 false 时，narration 字段为对应的解说文案
+    - 注意，在画面匹配的过程中，需要适当的加入原声播放，使得解说和画面更加匹配，请按照 1:1 的比例，生成原声和解说的脚本内容。
+    - 注意，在时间戳匹配上，一定不能原样照搬“转录脚本”，应当适当的合并或者删减一些片段。
+    - 注意，第一个画面一定是原声播放并且时长不少于 20 s，为了吸引观众，第一段一定是整个转录脚本中最精彩的片段。
+    - 请以严格的 JSON 格式返回数据，不要包含任何注释、标记或其他字符。数据应符合 JSON 语法，可以被 json.loads() 函数直接解析， 不要添加 ```json 或其他标记。
+    """ % (huamian, wenan)
+    try:
+        response = _generate_response(prompt, llm_provider)
+        logger.success("匹配成功")
+        logger.debug(response)
+        return response
+    except Exception as err:
+        return handle_exception(err)
+if __name__ == "__main__":
+    # 1. 视频转录
+    video_subject = "第二十条之无罪释放"
+    video_path = "/Users/apple/Desktop/home/pipedream_project/downloads/jianzao.mp4"
+    language = "zh-CN"
+    gemini_video_transcription(
+        video_name=video_subject,
+        video_path=video_path,
+        language=language,
+        progress_callback=print,
+        llm_provider_video="gemini"
+    )
+    # # 2. 解说文案
+    # video_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4"
+    # # video_path = "E:\\projects\\NarratoAI\\resource\\videos\\1.mp4"
+    # video_plot = """
+    #     李自忠拿着儿子李牧名下的存折，去银行取钱给儿子救命，却被要求证明"你儿子是你儿子"。
+    # 走投无路时碰到银行被抢劫，劫匪给了他两沓钱救命，李自忠却因此被银行以抢劫罪起诉，并顶格判处20年有期徒刑。
+    # 苏醒后的李牧坚决为父亲做无罪辩护，面对银行的顶级律师团队，他一个法学院大一学生，能否力挽狂澜，创作奇迹？挥法律之利剑 ，持正义之天平！
+    # """
+    # res = generate_script(video_path, video_plot, video_name="第二十条之无罪释放")
+    # # res = generate_script(video_path, video_plot, video_name="海岸")
+    # print("脚本生成成功:\n", res)
+    # res = clean_model_output(res)
+    # aaa = json.loads(res)
+    # print(json.dumps(aaa, indent=2, ensure_ascii=False))

app/services/material.py ADDED Viewed

	@@ -0,0 +1,561 @@

+import os
+import subprocess
+import random
+import traceback
+from urllib.parse import urlencode
+from datetime import datetime
+import json
+import requests
+from typing import List, Optional
+from loguru import logger
+from moviepy.video.io.VideoFileClip import VideoFileClip
+from app.config import config
+from app.models.schema import VideoAspect, VideoConcatMode, MaterialInfo
+from app.utils import utils
+from app.utils import ffmpeg_utils
+requested_count = 0
+def get_api_key(cfg_key: str):
+    api_keys = config.app.get(cfg_key)
+    if not api_keys:
+        raise ValueError(
+            f"\n\n##### {cfg_key} is not set #####\n\nPlease set it in the config.toml file: {config.config_file}\n\n"
+            f"{utils.to_json(config.app)}"
+        )
+    # if only one key is provided, return it
+    if isinstance(api_keys, str):
+        return api_keys
+    global requested_count
+    requested_count += 1
+    return api_keys[requested_count % len(api_keys)]
+def search_videos_pexels(
+    search_term: str,
+    minimum_duration: int,
+    video_aspect: VideoAspect = VideoAspect.portrait,
+) -> List[MaterialInfo]:
+    aspect = VideoAspect(video_aspect)
+    video_orientation = aspect.name
+    video_width, video_height = aspect.to_resolution()
+    api_key = get_api_key("pexels_api_keys")
+    headers = {"Authorization": api_key}
+    # Build URL
+    params = {"query": search_term, "per_page": 20, "orientation": video_orientation}
+    query_url = f"https://api.pexels.com/videos/search?{urlencode(params)}"
+    logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")
+    try:
+        r = requests.get(
+            query_url,
+            headers=headers,
+            proxies=config.proxy,
+            verify=False,
+            timeout=(30, 60),
+        )
+        response = r.json()
+        video_items = []
+        if "videos" not in response:
+            logger.error(f"search videos failed: {response}")
+            return video_items
+        videos = response["videos"]
+        # loop through each video in the result
+        for v in videos:
+            duration = v["duration"]
+            # check if video has desired minimum duration
+            if duration < minimum_duration:
+                continue
+            video_files = v["video_files"]
+            # loop through each url to determine the best quality
+            for video in video_files:
+                w = int(video["width"])
+                h = int(video["height"])
+                if w == video_width and h == video_height:
+                    item = MaterialInfo()
+                    item.provider = "pexels"
+                    item.url = video["link"]
+                    item.duration = duration
+                    video_items.append(item)
+                    break
+        return video_items
+    except Exception as e:
+        logger.error(f"search videos failed: {str(e)}")
+    return []
+def search_videos_pixabay(
+    search_term: str,
+    minimum_duration: int,
+    video_aspect: VideoAspect = VideoAspect.portrait,
+) -> List[MaterialInfo]:
+    aspect = VideoAspect(video_aspect)
+    video_width, video_height = aspect.to_resolution()
+    api_key = get_api_key("pixabay_api_keys")
+    # Build URL
+    params = {
+        "q": search_term,
+        "video_type": "all",  # Accepted values: "all", "film", "animation"
+        "per_page": 50,
+        "key": api_key,
+    }
+    query_url = f"https://pixabay.com/api/videos/?{urlencode(params)}"
+    logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")
+    try:
+        r = requests.get(
+            query_url, proxies=config.proxy, verify=False, timeout=(30, 60)
+        )
+        response = r.json()
+        video_items = []
+        if "hits" not in response:
+            logger.error(f"search videos failed: {response}")
+            return video_items
+        videos = response["hits"]
+        # loop through each video in the result
+        for v in videos:
+            duration = v["duration"]
+            # check if video has desired minimum duration
+            if duration < minimum_duration:
+                continue
+            video_files = v["videos"]
+            # loop through each url to determine the best quality
+            for video_type in video_files:
+                video = video_files[video_type]
+                w = int(video["width"])
+                h = int(video["height"])
+                if w >= video_width:
+                    item = MaterialInfo()
+                    item.provider = "pixabay"
+                    item.url = video["url"]
+                    item.duration = duration
+                    video_items.append(item)
+                    break
+        return video_items
+    except Exception as e:
+        logger.error(f"search videos failed: {str(e)}")
+    return []
+def save_video(video_url: str, save_dir: str = "") -> str:
+    if not save_dir:
+        save_dir = utils.storage_dir("cache_videos")
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    url_without_query = video_url.split("?")[0]
+    url_hash = utils.md5(url_without_query)
+    video_id = f"vid-{url_hash}"
+    video_path = f"{save_dir}/{video_id}.mp4"
+    # if video already exists, return the path
+    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
+        logger.info(f"video already exists: {video_path}")
+        return video_path
+    # if video does not exist, download it
+    with open(video_path, "wb") as f:
+        f.write(
+            requests.get(
+                video_url, proxies=config.proxy, verify=False, timeout=(60, 240)
+            ).content
+        )
+    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
+        try:
+            clip = VideoFileClip(video_path)
+            duration = clip.duration
+            fps = clip.fps
+            clip.close()
+            if duration > 0 and fps > 0:
+                return video_path
+        except Exception as e:
+            try:
+                os.remove(video_path)
+            except Exception as e:
+                logger.warning(f"无效的视频文件: {video_path} => {str(e)}")
+    return ""
+def download_videos(
+    task_id: str,
+    search_terms: List[str],
+    source: str = "pexels",
+    video_aspect: VideoAspect = VideoAspect.portrait,
+    video_contact_mode: VideoConcatMode = VideoConcatMode.random,
+    audio_duration: float = 0.0,
+    max_clip_duration: int = 5,
+) -> List[str]:
+    valid_video_items = []
+    valid_video_urls = []
+    found_duration = 0.0
+    search_videos = search_videos_pexels
+    if source == "pixabay":
+        search_videos = search_videos_pixabay
+    for search_term in search_terms:
+        video_items = search_videos(
+            search_term=search_term,
+            minimum_duration=max_clip_duration,
+            video_aspect=video_aspect,
+        )
+        logger.info(f"found {len(video_items)} videos for '{search_term}'")
+        for item in video_items:
+            if item.url not in valid_video_urls:
+                valid_video_items.append(item)
+                valid_video_urls.append(item.url)
+                found_duration += item.duration
+    logger.info(
+        f"found total videos: {len(valid_video_items)}, required duration: {audio_duration} seconds, found duration: {found_duration} seconds"
+    )
+    video_paths = []
+    material_directory = config.app.get("material_directory", "").strip()
+    if material_directory == "task":
+        material_directory = utils.task_dir(task_id)
+    elif material_directory and not os.path.isdir(material_directory):
+        material_directory = ""
+    if video_contact_mode.value == VideoConcatMode.random.value:
+        random.shuffle(valid_video_items)
+    total_duration = 0.0
+    for item in valid_video_items:
+        try:
+            logger.info(f"downloading video: {item.url}")
+            saved_video_path = save_video(
+                video_url=item.url, save_dir=material_directory
+            )
+            if saved_video_path:
+                logger.info(f"video saved: {saved_video_path}")
+                video_paths.append(saved_video_path)
+                seconds = min(max_clip_duration, item.duration)
+                total_duration += seconds
+                if total_duration > audio_duration:
+                    logger.info(
+                        f"total duration of downloaded videos: {total_duration} seconds, skip downloading more"
+                    )
+                    break
+        except Exception as e:
+            logger.error(f"failed to download video: {utils.to_json(item)} => {str(e)}")
+    logger.success(f"downloaded {len(video_paths)} videos")
+    return video_paths
+def time_to_seconds(time_str: str) -> float:
+    """
+    将时间字符串转换为秒数
+    支持格式: 'HH:MM:SS,mmm' (时:分:秒,毫秒)
+    Args:
+        time_str: 时间字符串,如 "00:00:20,100"
+    Returns:
+        float: 转换后的秒数(包含毫秒)
+    """
+    try:
+        # 处理毫秒部分
+        if ',' in time_str:
+            time_part, ms_part = time_str.split(',')
+            ms = int(ms_part) / 1000
+        else:
+            time_part = time_str
+            ms = 0
+        # 处理时分秒
+        parts = time_part.split(':')
+        if len(parts) == 3:  # HH:MM:SS
+            h, m, s = map(int, parts)
+            seconds = h * 3600 + m * 60 + s
+        else:
+            raise ValueError("时间格式必须为 HH:MM:SS,mmm")
+        return seconds + ms
+    except ValueError as e:
+        logger.error(f"时间格式错误: {time_str}")
+        raise ValueError(f"时间格式错误: 必须为 HH:MM:SS,mmm 格式") from e
+def format_timestamp(seconds: float) -> str:
+    """
+    将秒数转换为可读的时间格式 (HH:MM:SS,mmm)
+    Args:
+        seconds: 秒数(可包含毫秒)
+    Returns:
+        str: 格式化的时间字符串,如 "00:00:20,100"
+    """
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    seconds_remain = seconds % 60
+    whole_seconds = int(seconds_remain)
+    milliseconds = int((seconds_remain - whole_seconds) * 1000)
+    return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
+def _detect_hardware_acceleration() -> Optional[str]:
+    """
+    检测系统可用的硬件加速器
+    Returns:
+        Optional[str]: 硬件加速参数，如果不支持则返回None
+    """
+    # 使用集中式硬件加速检测
+    hwaccel_type = ffmpeg_utils.get_ffmpeg_hwaccel_type()
+    return hwaccel_type
+def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> str:
+    """
+    保存剪辑后的视频
+    Args:
+        timestamp: 需要裁剪的时间戳,格式为 'HH:MM:SS,mmm-HH:MM:SS,mmm'
+                  例如: '00:00:00,000-00:00:20,100'
+        origin_video: 原视频路径
+        save_dir: 存储目录
+    Returns:
+        dict: 裁剪后的视频路径,格式为 {timestamp: video_path}
+    """
+    # 使用新的路径结构
+    if not save_dir:
+        base_dir = os.path.join(utils.temp_dir(), "clip_video")
+        video_hash = utils.md5(origin_video)
+        save_dir = os.path.join(base_dir, video_hash)
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    # 解析时间戳
+    start_str, end_str = timestamp.split('-')
+    # 格式化输出文件名（使用连字符替代冒号和逗号）
+    safe_start_time = start_str.replace(':', '-').replace(',', '-')
+    safe_end_time = end_str.replace(':', '-').replace(',', '-')
+    output_filename = f"vid_{safe_start_time}@{safe_end_time}.mp4"
+    video_path = os.path.join(save_dir, output_filename)
+    # 如果视频已存在，直接返回
+    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
+        logger.info(f"视频已存在: {video_path}")
+        return video_path
+    try:
+        # 检查视频是否存在
+        if not os.path.exists(origin_video):
+            logger.error(f"源视频文件不存在: {origin_video}")
+            return ''
+        # 获取视频总时长
+        try:
+            probe_cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration",
+                        "-of", "default=noprint_wrappers=1:nokey=1", origin_video]
+            total_duration = float(subprocess.check_output(probe_cmd).decode('utf-8').strip())
+        except subprocess.CalledProcessError as e:
+            logger.error(f"获取视频时长失败: {str(e)}")
+            return ''
+        # 计算时间点
+        start = time_to_seconds(start_str)
+        end = time_to_seconds(end_str)
+        # 验证时间段
+        if start >= total_duration:
+            logger.warning(f"起始时间 {format_timestamp(start)} ({start:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒)")
+            return ''
+        if end > total_duration:
+            logger.warning(f"结束时间 {format_timestamp(end)} ({end:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒)，将自动调整为视频结尾")
+            end = total_duration
+        if end <= start:
+            logger.warning(f"结束时间 {format_timestamp(end)} 必须大于起始时间 {format_timestamp(start)}")
+            return ''
+        # 计算剪辑时长
+        duration = end - start
+        # logger.info(f"开始剪辑视频: {format_timestamp(start)} - {format_timestamp(end)}，时长 {format_timestamp(duration)}")
+        # 获取硬件加速选项
+        hwaccel = _detect_hardware_acceleration()
+        hwaccel_args = []
+        if hwaccel:
+            hwaccel_args = ffmpeg_utils.get_ffmpeg_hwaccel_args()
+        # 转换为FFmpeg兼容的时间格式（逗号替换为点）
+        ffmpeg_start_time = start_str.replace(',', '.')
+        ffmpeg_end_time = end_str.replace(',', '.')
+        # 构建FFmpeg命令
+        ffmpeg_cmd = [
+            "ffmpeg", "-y", *hwaccel_args,
+            "-i", origin_video,
+            "-ss", ffmpeg_start_time,
+            "-to", ffmpeg_end_time,
+            "-c:v", "h264_videotoolbox" if hwaccel == "videotoolbox" else "libx264",
+            "-c:a", "aac",
+            "-strict", "experimental",
+            video_path
+        ]
+        # 执行FFmpeg命令
+        # logger.info(f"裁剪视频片段: {timestamp} -> {ffmpeg_start_time}到{ffmpeg_end_time}")
+        # logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}")
+        # 在Windows系统上使用UTF-8编码处理输出，避免GBK编码错误
+        is_windows = os.name == 'nt'
+        if is_windows:
+            process = subprocess.run(
+                ffmpeg_cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                encoding='utf-8',  # 明确指定编码为UTF-8
+                text=True,
+                check=False  # 不抛出异常，我们会检查返回码
+            )
+        else:
+            process = subprocess.run(
+                ffmpeg_cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                check=False  # 不抛出异常，我们会检查返回码
+            )
+        # 检查是否成功
+        if process.returncode != 0:
+            logger.error(f"视频剪辑失败: {process.stderr}")
+            if os.path.exists(video_path):
+                os.remove(video_path)
+            return ''
+        # 验证生成的视频文件
+        if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
+            # 检查视频是否可播放
+            probe_cmd = ["ffprobe", "-v", "error", video_path]
+            # 在Windows系统上使用UTF-8编码
+            if is_windows:
+                validate_result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8')
+            else:
+                validate_result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            if validate_result.returncode == 0:
+                logger.info(f"视频剪辑成功: {video_path}")
+                return video_path
+        logger.error("视频文件验证失败")
+        if os.path.exists(video_path):
+            os.remove(video_path)
+        return ''
+    except Exception as e:
+        logger.error(f"视频剪辑过程中发生错误: \n{str(traceback.format_exc())}")
+        if os.path.exists(video_path):
+            os.remove(video_path)
+        return ''
+def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, progress_callback=None) -> dict:
+    """
+    剪辑视频
+    Args:
+        task_id: 任务id
+        timestamp_terms: 需要剪辑的时间戳列表，如:['00:00:00,000-00:00:20,100', '00:00:43,039-00:00:46,959']
+        origin_video: 原视频路径
+        progress_callback: 进度回调函数
+    Returns:
+        剪辑后的视频路径
+    """
+    video_paths = {}
+    total_items = len(timestamp_terms)
+    for index, item in enumerate(timestamp_terms):
+        material_directory = config.app.get("material_directory", "").strip()
+        try:
+            saved_video_path = save_clip_video(timestamp=item, origin_video=origin_video, save_dir=material_directory)
+            if saved_video_path:
+                video_paths.update({index+1:saved_video_path})
+            # 更新进度
+            if progress_callback:
+                progress_callback(index + 1, total_items)
+        except Exception as e:
+            logger.error(f"视频裁剪失败: {utils.to_json(item)} =>\n{str(traceback.format_exc())}")
+            return {}
+    logger.success(f"裁剪 {len(video_paths)} videos")
+    # logger.debug(json.dumps(video_paths, indent=4, ensure_ascii=False))
+    return video_paths
+def merge_videos(video_paths, ost_list):
+    """
+    合并多个视频为一个视频，可选择是否保留每个视频的原声。
+    :param video_paths: 视频文件路径列表
+    :param ost_list: 是否保留原声的布尔值列表
+    :return: 合并后的视频文件路径
+    """
+    if len(video_paths) != len(ost_list):
+        raise ValueError("视频路径列表和保留原声列表长度必须相同")
+    if not video_paths:
+        raise ValueError("视频路径列表不能为空")
+    # 准备临时文件列表
+    temp_file = "temp_file_list.txt"
+    with open(temp_file, "w") as f:
+        for video_path, keep_ost in zip(video_paths, ost_list):
+            if keep_ost:
+                f.write(f"file '{video_path}'\n")
+            else:
+                # 如果不保留原声，创建一个无声的临时视频
+                silent_video = f"silent_{os.path.basename(video_path)}"
+                subprocess.run(["ffmpeg", "-i", video_path, "-c:v", "copy", "-an", silent_video], check=True)
+                f.write(f"file '{silent_video}'\n")
+    # 合并视频
+    output_file = "combined.mp4"
+    ffmpeg_cmd = [
+        "ffmpeg",
+        "-f", "concat",
+        "-safe", "0",
+        "-i", temp_file,
+        "-c:v", "copy",
+        "-c:a", "aac",
+        "-strict", "experimental",
+        output_file
+    ]
+    try:
+        subprocess.run(ffmpeg_cmd, check=True)
+        print(f"视频合并成功：{output_file}")
+    except subprocess.CalledProcessError as e:
+        print(f"视频合并失败：{e}")
+        return None
+    finally:
+        # 清理临时文件
+        os.remove(temp_file)
+        for video_path, keep_ost in zip(video_paths, ost_list):
+            if not keep_ost:
+                silent_video = f"silent_{os.path.basename(video_path)}"
+                if os.path.exists(silent_video):
+                    os.remove(silent_video)
+    return output_file

app/services/merger_video.py ADDED Viewed

	@@ -0,0 +1,662 @@

+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''
+@Project: NarratoAI
+@File   : merger_video
+@Author : 小林同学
+@Date   : 2025/5/6 下午7:38
+'''
+import os
+import shutil
+import subprocess
+from enum import Enum
+from typing import List, Optional, Tuple
+from loguru import logger
+from app.utils import ffmpeg_utils
+class VideoAspect(Enum):
+    """视频宽高比枚举"""
+    landscape = "16:9"  # 横屏 16:9
+    landscape_2 = "4:3"
+    portrait = "9:16"   # 竖屏 9:16
+    portrait_2 = "3:4"
+    square = "1:1"      # 方形 1:1
+    def to_resolution(self) -> Tuple[int, int]:
+        """根据宽高比返回标准分辨率"""
+        if self == VideoAspect.portrait:
+            return 1080, 1920  # 竖屏 9:16
+        elif self == VideoAspect.portrait_2:
+            return 720, 1280   # 竖屏 4:3
+        elif self == VideoAspect.landscape:
+            return 1920, 1080  # 横屏 16:9
+        elif self == VideoAspect.landscape_2:
+            return 1280, 720   # 横屏 4:3
+        elif self == VideoAspect.square:
+            return 1080, 1080  # 方形 1:1
+        else:
+            return 1080, 1920  # 默认竖屏
+def check_ffmpeg_installation() -> bool:
+    """
+    检查ffmpeg是否已安装
+    Returns:
+        bool: 如果安装则返回True，否则返回False
+    """
+    try:
+        subprocess.run(['ffmpeg', '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+        return True
+    except (subprocess.SubprocessError, FileNotFoundError):
+        logger.error("ffmpeg未安装或不在系统PATH中，请安装ffmpeg")
+        return False
+def get_hardware_acceleration_option() -> Optional[str]:
+    """
+    根据系统环境选择合适的硬件加速选项
+    Returns:
+        Optional[str]: 硬件加速参数，如果不支持则返回None
+    """
+    # 使用集中式硬件加速检测
+    return ffmpeg_utils.get_ffmpeg_hwaccel_type()
+def check_video_has_audio(video_path: str) -> bool:
+    """
+    检查视频是否包含音频流
+    Args:
+        video_path: 视频文件路径
+    Returns:
+        bool: 如果视频包含音频流则返回True，否则返回False
+    """
+    if not os.path.exists(video_path):
+        logger.warning(f"视频文件不存在: {video_path}")
+        return False
+    probe_cmd = [
+        'ffprobe', '-v', 'error',
+        '-select_streams', 'a:0',
+        '-show_entries', 'stream=codec_type',
+        '-of', 'csv=p=0',
+        video_path
+    ]
+    try:
+        result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
+        return result.stdout.strip() == 'audio'
+    except Exception as e:
+        logger.warning(f"检测视频音频流时出错: {str(e)}")
+        return False
+def create_ffmpeg_concat_file(video_paths: List[str], concat_file_path: str) -> str:
+    """
+    创建ffmpeg合并所需的concat文件
+    Args:
+        video_paths: 需要合并的视频文件路径列表
+        concat_file_path: concat文件的输出路径
+    Returns:
+        str: concat文件的路径
+    """
+    with open(concat_file_path, 'w', encoding='utf-8') as f:
+        for video_path in video_paths:
+            # 获取绝对路径
+            abs_path = os.path.abspath(video_path)
+            # 在Windows上将反斜杠替换为正斜杠
+            if os.name == 'nt':  # Windows系统
+                abs_path = abs_path.replace('\\', '/')
+            else:  # Unix/Mac系统
+                # 转义特殊字符
+                abs_path = abs_path.replace('\\', '\\\\').replace(':', '\\:')
+            # 处理路径中的单引号 (如果有)
+            abs_path = abs_path.replace("'", "\\'")
+            f.write(f"file '{abs_path}'\n")
+    return concat_file_path
+def process_single_video(
+        input_path: str,
+        output_path: str,
+        target_width: int,
+        target_height: int,
+        keep_audio: bool = True,
+        hwaccel: Optional[str] = None
+) -> str:
+    """
+    处理单个视频：调整分辨率、帧率等
+    Args:
+        input_path: 输入视频路径
+        output_path: 输出视频路径
+        target_width: 目标宽度
+        target_height: 目标高度
+        keep_audio: 是否保留音频
+        hwaccel: 硬件加速选项
+    Returns:
+        str: 处理后的视频路径
+    """
+    if not os.path.exists(input_path):
+        raise FileNotFoundError(f"找不到视频文件: {input_path}")
+    # 构建基本命令
+    command = ['ffmpeg', '-y']
+    # 安全检查：如果在Windows上，则慎用硬件加速
+    is_windows = os.name == 'nt'
+    if is_windows and hwaccel:
+        logger.info("在Windows系统上检测到硬件加速请求，将进行额外的兼容性检查")
+        try:
+            # 对视频进行快速探测，检测其基本信息
+            probe_cmd = [
+                'ffprobe', '-v', 'error',
+                '-select_streams', 'v:0',
+                '-show_entries', 'stream=codec_name,width,height',
+                '-of', 'csv=p=0',
+                input_path
+            ]
+            result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
+            # 如果探测成功，使用硬件加速；否则降级到软件编码
+            if result.returncode != 0:
+                logger.warning(f"视频探测失败，为安全起见，禁用硬件加速: {result.stderr}")
+                hwaccel = None
+        except Exception as e:
+            logger.warning(f"视频探测出错，禁用硬件加速: {str(e)}")
+            hwaccel = None
+    # 添加硬件加速参数（根据前面的安全检查可能已经被禁用）
+    if hwaccel:
+        try:
+            # 使用集中式硬件加速参数
+            hwaccel_args = ffmpeg_utils.get_ffmpeg_hwaccel_args()
+            command.extend(hwaccel_args)
+        except Exception as e:
+            logger.warning(f"应用硬件加速参数时出错: {str(e)}，将使用软件编码")
+            # 重置命令，移除可能添加了一半的硬件加速参数
+            command = ['ffmpeg', '-y']
+    # 输入文件
+    command.extend(['-i', input_path])
+    # 处理音频
+    if not keep_audio:
+        command.extend(['-an'])  # 移除音频
+    else:
+        # 检查输入视频是否有音频流
+        has_audio = check_video_has_audio(input_path)
+        if has_audio:
+            command.extend(['-c:a', 'aac', '-b:a', '128k'])  # 音频编码为AAC
+        else:
+            logger.warning(f"视频 {input_path} 没有音频流，将会忽略音频设置")
+            command.extend(['-an'])  # 没有音频流时移除音频设置
+    # 视频处理参数：缩放并添加填充以保持比例
+    scale_filter = f"scale={target_width}:{target_height}:force_original_aspect_ratio=decrease"
+    pad_filter = f"pad={target_width}:{target_height}:(ow-iw)/2:(oh-ih)/2"
+    command.extend([
+        '-vf', f"{scale_filter},{pad_filter}",
+        '-r', '30',  # 设置帧率为30fps
+    ])
+    # 选择编码器 - 考虑到Windows和特定硬件的兼容性
+    use_software_encoder = True
+    if hwaccel:
+        # 获取硬件加速类型和编码器信息
+        hwaccel_type = ffmpeg_utils.get_ffmpeg_hwaccel_type()
+        hwaccel_encoder = ffmpeg_utils.get_ffmpeg_hwaccel_encoder()
+        if hwaccel_type == 'cuda' or hwaccel_type == 'nvenc':
+            try:
+                # 检查NVENC编码器是否可用
+                encoders_cmd = subprocess.run(
+                    ["ffmpeg", "-hide_banner", "-encoders"],
+                    stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False
+                )
+                if "h264_nvenc" in encoders_cmd.stdout.lower():
+                    command.extend(['-c:v', 'h264_nvenc', '-preset', 'p4', '-profile:v', 'high'])
+                    use_software_encoder = False
+                else:
+                    logger.warning("NVENC编码器不可用，将使用软件编码")
+            except Exception as e:
+                logger.warning(f"NVENC编码器检测失败: {str(e)}，将使用软件编码")
+        elif hwaccel_type == 'qsv':
+            command.extend(['-c:v', 'h264_qsv', '-preset', 'medium'])
+            use_software_encoder = False
+        elif hwaccel_type == 'videotoolbox':  # macOS
+            command.extend(['-c:v', 'h264_videotoolbox', '-profile:v', 'high'])
+            use_software_encoder = False
+        elif hwaccel_type == 'vaapi':  # Linux VA-API
+            command.extend(['-c:v', 'h264_vaapi', '-profile', '100'])
+            use_software_encoder = False
+    # 如果前面的条件未能应用硬件编码器，使用软件编码
+    if use_software_encoder:
+        logger.info("使用软件编码器(libx264)")
+        command.extend(['-c:v', 'libx264', '-preset', 'medium', '-profile:v', 'high'])
+    # 设置视频比特率和其他参数
+    command.extend([
+        '-b:v', '5M',
+        '-maxrate', '8M',
+        '-bufsize', '10M',
+        '-pix_fmt', 'yuv420p',  # 兼容性更好的颜色格式
+    ])
+    # 输出文件
+    command.append(output_path)
+    # 执行命令
+    try:
+        # logger.info(f"执行FFmpeg命令: {' '.join(command)}")
+        process = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        logger.info(f"视频处理成功: {output_path}")
+        return output_path
+    except subprocess.CalledProcessError as e:
+        error_msg = e.stderr.decode() if e.stderr else str(e)
+        logger.error(f"处理视频失败: {error_msg}")
+        # 如果使用硬件加速失败，尝试使用软件编码
+        if hwaccel:
+            logger.info("尝试使用软件编码作为备选方案")
+            try:
+                # 构建新的命令，使用软件编码
+                fallback_cmd = ['ffmpeg', '-y', '-i', input_path]
+                # 保持原有的音频设置
+                if not keep_audio:
+                    fallback_cmd.extend(['-an'])
+                else:
+                    has_audio = check_video_has_audio(input_path)
+                    if has_audio:
+                        fallback_cmd.extend(['-c:a', 'aac', '-b:a', '128k'])
+                    else:
+                        fallback_cmd.extend(['-an'])
+                # 保持原有的视频过滤器
+                fallback_cmd.extend([
+                    '-vf', f"{scale_filter},{pad_filter}",
+                    '-r', '30',
+                    '-c:v', 'libx264',
+                    '-preset', 'medium',
+                    '-profile:v', 'high',
+                    '-b:v', '5M',
+                    '-maxrate', '8M',
+                    '-bufsize', '10M',
+                    '-pix_fmt', 'yuv420p',
+                    output_path
+                ])
+                logger.info(f"执行备选FFmpeg命令: {' '.join(fallback_cmd)}")
+                subprocess.run(fallback_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                logger.info(f"使用软件编码成功处理视频: {output_path}")
+                return output_path
+            except subprocess.CalledProcessError as fallback_error:
+                fallback_error_msg = fallback_error.stderr.decode() if fallback_error.stderr else str(fallback_error)
+                logger.error(f"备选软件编码也失败: {fallback_error_msg}")
+                raise RuntimeError(f"无法处理视频 {input_path}: 硬件加速和软件编码都失败")
+        # 如果不是硬件加速导致的问题，或者备选方案也失败了，抛出原始错误
+        raise RuntimeError(f"处理视频失败: {error_msg}")
+def combine_clip_videos(
+        output_video_path: str,
+        video_paths: List[str],
+        video_ost_list: List[int],
+        video_aspect: VideoAspect = VideoAspect.portrait,
+        threads: int = 4,
+        force_software_encoding: bool = False,  # 新参数，强制使用软件编码
+) -> str:
+    """
+    合并子视频
+    Args:
+        output_video_path: 合并后的存储路径
+        video_paths: 子视频路径列表
+        video_ost_list: 原声播放列表 (0: 不保留原声, 1: 只保留原声, 2: 保留原声并保留解说)
+        video_aspect: 屏幕比例
+        threads: 线程数
+        force_software_encoding: 是否强制使用软件编码（忽略硬件加速检测）
+    Returns:
+        str: 合并后的视频路径
+    """
+    # 检查ffmpeg是否安装
+    if not check_ffmpeg_installation():
+        raise RuntimeError("未找到ffmpeg，请先安装")
+    # 准备输出目录
+    output_dir = os.path.dirname(output_video_path)
+    os.makedirs(output_dir, exist_ok=True)
+    # 获取目标分辨率
+    aspect = VideoAspect(video_aspect)
+    video_width, video_height = aspect.to_resolution()
+    # 检测可用的硬件加速选项
+    hwaccel = None if force_software_encoding else get_hardware_acceleration_option()
+    if hwaccel:
+        logger.info(f"将使用 {hwaccel} 硬件加速")
+    elif force_software_encoding:
+        logger.info("已强制使用软件编码，跳过硬件加速检测")
+    else:
+        logger.info("未检测到兼容的硬件加速，将使用软件编码")
+    # Windows系统上，默认使用软件编码以提高兼容性
+    if os.name == 'nt' and hwaccel:
+        logger.warning("在Windows系统上检测到硬件加速，但为了提高兼容性，建议使用软件编码")
+        # 不强制禁用hwaccel，而是在process_single_video中进行额外安全检查
+    # 重组视频路径和原声设置为一个字典列表结构
+    video_segments = []
+    # 检查视频路径和原声设置列表长度是否匹配
+    if len(video_paths) != len(video_ost_list):
+        logger.warning(f"视频路径列表({len(video_paths)})和原声设置列表({len(video_ost_list)})长度不匹配")
+        # 调整长度以匹配较短的列表
+        min_length = min(len(video_paths), len(video_ost_list))
+        video_paths = video_paths[:min_length]
+        video_ost_list = video_ost_list[:min_length]
+    # 创建视频处理配置字典列表
+    for i, (video_path, video_ost) in enumerate(zip(video_paths, video_ost_list)):
+        if not os.path.exists(video_path):
+            logger.warning(f"视频不存在，跳过: {video_path}")
+            continue
+        # 检查是否有音频流
+        has_audio = check_video_has_audio(video_path)
+        # 构建视频片段配置
+        segment = {
+            "index": i,
+            "path": video_path,
+            "ost": video_ost,
+            "has_audio": has_audio,
+            "keep_audio": video_ost > 0 and has_audio  # 只有当ost>0且实际有音频时才保留
+        }
+        # 记录日志
+        if video_ost > 0 and not has_audio:
+            logger.warning(f"视频 {video_path} 设置为保留原声(ost={video_ost})，但该视频没有音频流")
+        video_segments.append(segment)
+    # 处理每个视频片段
+    processed_videos = []
+    temp_dir = os.path.join(output_dir, "temp_videos")
+    os.makedirs(temp_dir, exist_ok=True)
+    try:
+        # 第一阶段：处理所有视频片段到中间文件
+        for segment in video_segments:
+            # 处理单个视频，去除或保留音频
+            temp_output = os.path.join(temp_dir, f"processed_{segment['index']}.mp4")
+            try:
+                process_single_video(
+                    input_path=segment['path'],
+                    output_path=temp_output,
+                    target_width=video_width,
+                    target_height=video_height,
+                    keep_audio=segment['keep_audio'],
+                    hwaccel=hwaccel
+                )
+                processed_videos.append({
+                    "index": segment["index"],
+                    "path": temp_output,
+                    "keep_audio": segment["keep_audio"]
+                })
+                logger.info(f"视频 {segment['index'] + 1}/{len(video_segments)} 处理完成")
+            except Exception as e:
+                logger.error(f"处理视频 {segment['path']} 时出错: {str(e)}")
+                # 如果使用硬件加速失败，尝试使用软件编码
+                if hwaccel and not force_software_encoding:
+                    logger.info(f"尝试使用软件编码处理视频 {segment['path']}")
+                    try:
+                        process_single_video(
+                            input_path=segment['path'],
+                            output_path=temp_output,
+                            target_width=video_width,
+                            target_height=video_height,
+                            keep_audio=segment['keep_audio'],
+                            hwaccel=None  # 使用软件编码
+                        )
+                        processed_videos.append({
+                            "index": segment["index"],
+                            "path": temp_output,
+                            "keep_audio": segment["keep_audio"]
+                        })
+                        logger.info(f"使用软件编码成功处理视频 {segment['index'] + 1}/{len(video_segments)}")
+                    except Exception as fallback_error:
+                        logger.error(f"使用软件编码处理视频 {segment['path']} 也失败: {str(fallback_error)}")
+                        continue
+                else:
+                    continue
+        if not processed_videos:
+            raise ValueError("没有有效的视频片段可以合并")
+        # 按原始索引排序处理后的视频
+        processed_videos.sort(key=lambda x: x["index"])
+        # 第二阶段：分步骤合并视频 - 避免复杂的filter_complex滤镜
+        try:
+            # 1. 首先，将所有没有音频的视频或音频被禁用的视频合并到一个临时文件中
+            video_paths_only = [video["path"] for video in processed_videos]
+            video_concat_path = os.path.join(temp_dir, "video_concat.mp4")
+            # 创建concat文件，用于合并视频流
+            concat_file = os.path.join(temp_dir, "concat_list.txt")
+            create_ffmpeg_concat_file(video_paths_only, concat_file)
+            # 合并所有视频流，但不包含音频
+            concat_cmd = [
+                'ffmpeg', '-y',
+                '-f', 'concat',
+                '-safe', '0',
+                '-i', concat_file,
+                '-c:v', 'libx264',
+                '-preset', 'medium',
+                '-profile:v', 'high',
+                '-an',  # 不包含音频
+                '-threads', str(threads),
+                video_concat_path
+            ]
+            subprocess.run(concat_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            logger.info("视频流合并完成")
+            # 2. 提取并合并有音频的片段
+            audio_segments = [video for video in processed_videos if video["keep_audio"]]
+            if not audio_segments:
+                # 如果没有音频片段，直接使用无音频的合并视频作为最终结果
+                shutil.copy(video_concat_path, output_video_path)
+                logger.info("无音频视频合并完成")
+                return output_video_path
+            # 创建音频中间文件
+            audio_files = []
+            for i, segment in enumerate(audio_segments):
+                # 提取音频
+                audio_file = os.path.join(temp_dir, f"audio_{i}.aac")
+                extract_audio_cmd = [
+                    'ffmpeg', '-y',
+                    '-i', segment["path"],
+                    '-vn',  # 不包含视频
+                    '-c:a', 'aac',
+                    '-b:a', '128k',
+                    audio_file
+                ]
+                subprocess.run(extract_audio_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                audio_files.append({
+                    "index": segment["index"],
+                    "path": audio_file
+                })
+                logger.info(f"提取音频 {i+1}/{len(audio_segments)} 完成")
+            # 3. 计算每个音频片段的时间位置
+            audio_timings = []
+            current_time = 0.0
+            # 获取每个视频片段的时长
+            for i, video in enumerate(processed_videos):
+                duration_cmd = [
+                    'ffprobe', '-v', 'error',
+                    '-show_entries', 'format=duration',
+                    '-of', 'csv=p=0',
+                    video["path"]
+                ]
+                result = subprocess.run(duration_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+                duration = float(result.stdout.strip())
+                # 如果当前片段需要保留音频，记录时间位置
+                if video["keep_audio"]:
+                    for audio in audio_files:
+                        if audio["index"] == video["index"]:
+                            audio_timings.append({
+                                "file": audio["path"],
+                                "start": current_time,
+                                "index": video["index"]
+                            })
+                            break
+                current_time += duration
+            # 4. 创建静音音频轨道作为基础
+            silence_audio = os.path.join(temp_dir, "silence.aac")
+            create_silence_cmd = [
+                'ffmpeg', '-y',
+                '-f', 'lavfi',
+                '-i', f'anullsrc=r=44100:cl=stereo',
+                '-t', str(current_time),  # 总时长
+                '-c:a', 'aac',
+                '-b:a', '128k',
+                silence_audio
+            ]
+            subprocess.run(create_silence_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            # 5. 创建复杂滤镜命令以混合音频
+            filter_script = os.path.join(temp_dir, "filter_script.txt")
+            with open(filter_script, 'w') as f:
+                f.write(f"[0:a]volume=0.0[silence];\n")  # 首先静音背景轨道
+                # 添加每个音频文件
+                for i, timing in enumerate(audio_timings):
+                    f.write(f"[{i+1}:a]adelay={int(timing['start']*1000)}|{int(timing['start']*1000)}[a{i}];\n")
+                # 混合所有音频
+                mix_str = "[silence]"
+                for i in range(len(audio_timings)):
+                    mix_str += f"[a{i}]"
+                mix_str += f"amix=inputs={len(audio_timings)+1}:duration=longest[aout]"
+                f.write(mix_str)
+            # 6. 构建音频合并命令
+            audio_inputs = ['-i', silence_audio]
+            for timing in audio_timings:
+                audio_inputs.extend(['-i', timing["file"]])
+            mixed_audio = os.path.join(temp_dir, "mixed_audio.aac")
+            audio_mix_cmd = [
+                'ffmpeg', '-y'
+            ] + audio_inputs + [
+                '-filter_complex_script', filter_script,
+                '-map', '[aout]',
+                '-c:a', 'aac',
+                '-b:a', '128k',
+                mixed_audio
+            ]
+            subprocess.run(audio_mix_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            logger.info("音频混合完成")
+            # 7. 将合并的视频和混合的音频组合在一起
+            final_cmd = [
+                'ffmpeg', '-y',
+                '-i', video_concat_path,
+                '-i', mixed_audio,
+                '-c:v', 'copy',
+                '-c:a', 'aac',
+                '-map', '0:v:0',
+                '-map', '1:a:0',
+                '-shortest',
+                output_video_path
+            ]
+            subprocess.run(final_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            logger.info("视频最终合并完成")
+            return output_video_path
+        except subprocess.CalledProcessError as e:
+            logger.error(f"合并视频过程中出错: {e.stderr.decode() if e.stderr else str(e)}")
+            # 尝试备用合并方法 - 最简单的无音频合并
+            logger.info("尝试备用合并方法 - 无音频合并")
+            try:
+                concat_file = os.path.join(temp_dir, "concat_list.txt")
+                video_paths_only = [video["path"] for video in processed_videos]
+                create_ffmpeg_concat_file(video_paths_only, concat_file)
+                backup_cmd = [
+                    'ffmpeg', '-y',
+                    '-f', 'concat',
+                    '-safe', '0',
+                    '-i', concat_file,
+                    '-c:v', 'copy',
+                    '-an',  # 无音频
+                    output_video_path
+                ]
+                subprocess.run(backup_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                logger.warning("使用备用方法（无音频）成功合并视频")
+                return output_video_path
+            except Exception as backup_error:
+                logger.error(f"备用合并方法也失败: {str(backup_error)}")
+                raise RuntimeError(f"无法合并视频: {str(backup_error)}")
+    except Exception as e:
+        logger.error(f"合并视频时出错: {str(e)}")
+        raise
+    finally:
+        # 清理临时文件
+        try:
+            if os.path.exists(temp_dir):
+                shutil.rmtree(temp_dir)
+                logger.info("已清理临时文件")
+        except Exception as e:
+            logger.warning(f"清理临时文件时出错: {str(e)}")
+if __name__ == '__main__':
+    video_paths = [
+        '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E02_00_14_09_440.mp4',
+        '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E08_00_27_11_110.mp4',
+        '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E08_00_34_44_480.mp4',
+        '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E08_00_42_47_630.mp4',
+        '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E09_00_29_48_160.mp4'
+        ]
+    combine_clip_videos(
+        output_video_path="/Users/apple/Desktop/home/NarratoAI/storage/temp/merge/merged_123.mp4",
+        video_paths=video_paths,
+        video_ost_list=[1, 1, 1,1,1],
+        video_aspect=VideoAspect.portrait,
+        force_software_encoding=False  # 默认不强制使用软件编码，让系统自动决定
+    )

app/services/script_service.py ADDED Viewed

	@@ -0,0 +1,400 @@

+import os
+import json
+import time
+import asyncio
+import requests
+from app.utils import video_processor
+from loguru import logger
+from typing import List, Dict, Any, Callable
+from app.utils import utils, gemini_analyzer, video_processor
+from app.utils.script_generator import ScriptProcessor
+from app.config import config
+class ScriptGenerator:
+    def __init__(self):
+        self.temp_dir = utils.temp_dir()
+        self.keyframes_dir = os.path.join(self.temp_dir, "keyframes")
+    async def generate_script(
+        self,
+        video_path: str,
+        video_theme: str = "",
+        custom_prompt: str = "",
+        frame_interval_input: int = 5,
+        skip_seconds: int = 0,
+        threshold: int = 30,
+        vision_batch_size: int = 5,
+        vision_llm_provider: str = "gemini",
+        progress_callback: Callable[[float, str], None] = None
+    ) -> List[Dict[Any, Any]]:
+        """
+        生成视频脚本的核心逻辑
+        Args:
+            video_path: 视频文件路径
+            video_theme: 视频主题
+            custom_prompt: 自定义提示词
+            skip_seconds: 跳过开始的秒数
+            threshold: 差异���值
+            vision_batch_size: 视觉处理批次大小
+            vision_llm_provider: 视觉模型提供商
+            progress_callback: 进度回调函数
+        Returns:
+            List[Dict]: 生成的视频脚本
+        """
+        if progress_callback is None:
+            progress_callback = lambda p, m: None
+        try:
+            # 提取关键帧
+            progress_callback(10, "正在提取关键帧...")
+            keyframe_files = await self._extract_keyframes(
+                video_path,
+                skip_seconds,
+                threshold
+            )
+            if vision_llm_provider == "gemini":
+                script = await self._process_with_gemini(
+                    keyframe_files,
+                    video_theme,
+                    custom_prompt,
+                    vision_batch_size,
+                    progress_callback
+                )
+            elif vision_llm_provider == "narratoapi":
+                script = await self._process_with_narrato(
+                    keyframe_files,
+                    video_theme,
+                    custom_prompt,
+                    vision_batch_size,
+                    progress_callback
+                )
+            else:
+                raise ValueError(f"Unsupported vision provider: {vision_llm_provider}")
+            return json.loads(script) if isinstance(script, str) else script
+        except Exception as e:
+            logger.exception("Generate script failed")
+            raise
+    async def _extract_keyframes(
+        self,
+        video_path: str,
+        skip_seconds: int,
+        threshold: int
+    ) -> List[str]:
+        """提取视频关键帧"""
+        video_hash = utils.md5(video_path + str(os.path.getmtime(video_path)))
+        video_keyframes_dir = os.path.join(self.keyframes_dir, video_hash)
+        # 检查缓存
+        keyframe_files = []
+        if os.path.exists(video_keyframes_dir):
+            for filename in sorted(os.listdir(video_keyframes_dir)):
+                if filename.endswith('.jpg'):
+                    keyframe_files.append(os.path.join(video_keyframes_dir, filename))
+            if keyframe_files:
+                logger.info(f"Using cached keyframes: {video_keyframes_dir}")
+                return keyframe_files
+        # 提取新的关键帧
+        os.makedirs(video_keyframes_dir, exist_ok=True)
+        try:
+            processor = video_processor.VideoProcessor(video_path)
+            processor.process_video_pipeline(
+                output_dir=video_keyframes_dir,
+                skip_seconds=skip_seconds,
+                threshold=threshold
+            )
+            for filename in sorted(os.listdir(video_keyframes_dir)):
+                if filename.endswith('.jpg'):
+                    keyframe_files.append(os.path.join(video_keyframes_dir, filename))
+            return keyframe_files
+        except Exception as e:
+            if os.path.exists(video_keyframes_dir):
+                import shutil
+                shutil.rmtree(video_keyframes_dir)
+            raise
+    async def _process_with_gemini(
+        self,
+        keyframe_files: List[str],
+        video_theme: str,
+        custom_prompt: str,
+        vision_batch_size: int,
+        progress_callback: Callable[[float, str], None]
+    ) -> str:
+        """使用Gemini处理视频帧"""
+        progress_callback(30, "正在初始化视觉分析器...")
+        # 获取Gemini配置
+        vision_api_key = config.app.get("vision_gemini_api_key")
+        vision_model = config.app.get("vision_gemini_model_name")
+        if not vision_api_key or not vision_model:
+            raise ValueError("未配置 Gemini API Key 或者模型")
+        analyzer = gemini_analyzer.VisionAnalyzer(
+            model_name=vision_model,
+            api_key=vision_api_key,
+        )
+        progress_callback(40, "正在分析关键帧...")
+        # 执行异步分析
+        results = await analyzer.analyze_images(
+            images=keyframe_files,
+            prompt=config.app.get('vision_analysis_prompt'),
+            batch_size=vision_batch_size
+        )
+        progress_callback(60, "正在整理分析结果...")
+        # 合并所有批次的分析结果
+        frame_analysis = ""
+        prev_batch_files = None
+        for result in results:
+            if 'error' in result:
+                logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
+                continue
+            batch_files = self._get_batch_files(keyframe_files, result, vision_batch_size)
+            first_timestamp, last_timestamp, _ = self._get_batch_timestamps(batch_files, prev_batch_files)
+            # 添加带时间戳的分��结果
+            frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
+            frame_analysis += result['response']
+            frame_analysis += "\n"
+            prev_batch_files = batch_files
+        if not frame_analysis.strip():
+            raise Exception("未能生成有效的帧分析结果")
+        progress_callback(70, "正在生成脚本...")
+        # 构建帧内容列表
+        frame_content_list = []
+        prev_batch_files = None
+        for result in results:
+            if 'error' in result:
+                continue
+            batch_files = self._get_batch_files(keyframe_files, result, vision_batch_size)
+            _, _, timestamp_range = self._get_batch_timestamps(batch_files, prev_batch_files)
+            frame_content = {
+                "timestamp": timestamp_range,
+                "picture": result['response'],
+                "narration": "",
+                "OST": 2
+            }
+            frame_content_list.append(frame_content)
+            prev_batch_files = batch_files
+        if not frame_content_list:
+            raise Exception("没有有效的帧内容可以处理")
+        progress_callback(90, "正在生成文案...")
+        # 获取文本生��配置
+        text_provider = config.app.get('text_llm_provider', 'gemini').lower()
+        text_api_key = config.app.get(f'text_{text_provider}_api_key')
+        text_model = config.app.get(f'text_{text_provider}_model_name')
+        processor = ScriptProcessor(
+            model_name=text_model,
+            api_key=text_api_key,
+            prompt=custom_prompt,
+            video_theme=video_theme
+        )
+        return processor.process_frames(frame_content_list)
+    async def _process_with_narrato(
+        self,
+        keyframe_files: List[str],
+        video_theme: str,
+        custom_prompt: str,
+        vision_batch_size: int,
+        progress_callback: Callable[[float, str], None]
+    ) -> str:
+        """使用NarratoAPI处理视频帧"""
+        # 创建临时目录
+        temp_dir = utils.temp_dir("narrato")
+        # 打包关键帧
+        progress_callback(30, "正在打包关键帧...")
+        zip_path = os.path.join(temp_dir, f"keyframes_{int(time.time())}.zip")
+        try:
+            if not utils.create_zip(keyframe_files, zip_path):
+                raise Exception("打包关键帧失败")
+            # 获取API配置
+            api_url = config.app.get("narrato_api_url")
+            api_key = config.app.get("narrato_api_key")
+            if not api_key:
+                raise ValueError("未配置 Narrato API Key")
+            headers = {
+                'X-API-Key': api_key,
+                'accept': 'application/json'
+            }
+            api_params = {
+                'batch_size': vision_batch_size,
+                'use_ai': False,
+                'start_offset': 0,
+                'vision_model': config.app.get('narrato_vision_model', 'gemini-1.5-flash'),
+                'vision_api_key': config.app.get('narrato_vision_key'),
+                'llm_model': config.app.get('narrato_llm_model', 'qwen-plus'),
+                'llm_api_key': config.app.get('narrato_llm_key'),
+                'custom_prompt': custom_prompt
+            }
+            progress_callback(40, "正在上传文件...")
+            with open(zip_path, 'rb') as f:
+                files = {'file': (os.path.basename(zip_path), f, 'application/x-zip-compressed')}
+                response = requests.post(
+                    f"{api_url}/video/analyze",
+                    headers=headers,
+                    params=api_params,
+                    files=files,
+                    timeout=30
+                )
+                response.raise_for_status()
+            task_data = response.json()
+            task_id = task_data["data"].get('task_id')
+            if not task_id:
+                raise Exception(f"无效的API��应: {response.text}")
+            progress_callback(50, "正在等待分析结果...")
+            retry_count = 0
+            max_retries = 60
+            while retry_count < max_retries:
+                try:
+                    status_response = requests.get(
+                        f"{api_url}/video/tasks/{task_id}",
+                        headers=headers,
+                        timeout=10
+                    )
+                    status_response.raise_for_status()
+                    task_status = status_response.json()['data']
+                    if task_status['status'] == 'SUCCESS':
+                        return task_status['result']['data']
+                    elif task_status['status'] in ['FAILURE', 'RETRY']:
+                        raise Exception(f"任务失败: {task_status.get('error')}")
+                    retry_count += 1
+                    time.sleep(2)
+                except requests.RequestException as e:
+                    logger.warning(f"获取任务状态失败，重试中: {str(e)}")
+                    retry_count += 1
+                    time.sleep(2)
+                    continue
+            raise Exception("任务执行超时")
+        finally:
+            # 清理临时文件
+            try:
+                if os.path.exists(zip_path):
+                    os.remove(zip_path)
+            except Exception as e:
+                logger.warning(f"清理临时文件失败: {str(e)}")
+    def _get_batch_files(
+        self,
+        keyframe_files: List[str],
+        result: Dict[str, Any],
+        batch_size: int
+    ) -> List[str]:
+        """获取当前批次的图片文件"""
+        batch_start = result['batch_index'] * batch_size
+        batch_end = min(batch_start + batch_size, len(keyframe_files))
+        return keyframe_files[batch_start:batch_end]
+    def _get_batch_timestamps(
+        self,
+        batch_files: List[str],
+        prev_batch_files: List[str] = None
+    ) -> tuple[str, str, str]:
+        """获取一批文件的时间戳范围，支持毫秒级精度"""
+        if not batch_files:
+            logger.warning("Empty batch files")
+            return "00:00:00,000", "00:00:00,000", "00:00:00,000-00:00:00,000"
+        if len(batch_files) == 1 and prev_batch_files and len(prev_batch_files) > 0:
+            first_frame = os.path.basename(prev_batch_files[-1])
+            last_frame = os.path.basename(batch_files[0])
+        else:
+            first_frame = os.path.basename(batch_files[0])
+            last_frame = os.path.basename(batch_files[-1])
+        first_time = first_frame.split('_')[2].replace('.jpg', '')
+        last_time = last_frame.split('_')[2].replace('.jpg', '')
+        def format_timestamp(time_str: str) -> str:
+            """将时间字符串转换为 HH:MM:SS,mmm 格式"""
+            try:
+                if len(time_str) < 4:
+                    logger.warning(f"Invalid timestamp format: {time_str}")
+                    return "00:00:00,000"
+                # 处理毫秒部分
+                if ',' in time_str:
+                    time_part, ms_part = time_str.split(',')
+                    ms = int(ms_part)
+                else:
+                    time_part = time_str
+                    ms = 0
+                # 处理时分秒
+                parts = time_part.split(':')
+                if len(parts) == 3:  # HH:MM:SS
+                    h, m, s = map(int, parts)
+                elif len(parts) == 2:  # MM:SS
+                    h = 0
+                    m, s = map(int, parts)
+                else:  # SS
+                    h = 0
+                    m = 0
+                    s = int(parts[0])
+                # 处理进位
+                if s >= 60:
+                    m += s // 60
+                    s = s % 60
+                if m >= 60:
+                    h += m // 60
+                    m = m % 60
+                return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
+            except Exception as e:
+                logger.error(f"时间戳格式转换错误 {time_str}: {str(e)}")
+                return "00:00:00,000"
+        first_timestamp = format_timestamp(first_time)
+        last_timestamp = format_timestamp(last_time)
+        timestamp_range = f"{first_timestamp}-{last_timestamp}"
+        return first_timestamp, last_timestamp, timestamp_range

app/services/state.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import ast
+from abc import ABC, abstractmethod
+from app.config import config
+from app.models import const
+# Base class for state management
+class BaseState(ABC):
+    @abstractmethod
+    def update_task(self, task_id: str, state: int, progress: int = 0, **kwargs):
+        pass
+    @abstractmethod
+    def get_task(self, task_id: str):
+        pass
+# Memory state management
+class MemoryState(BaseState):
+    def __init__(self):
+        self._tasks = {}
+    def update_task(
+        self,
+        task_id: str,
+        state: int = const.TASK_STATE_PROCESSING,
+        progress: int = 0,
+        **kwargs,
+    ):
+        progress = int(progress)
+        if progress > 100:
+            progress = 100
+        self._tasks[task_id] = {
+            "state": state,
+            "progress": progress,
+            **kwargs,
+        }
+    def get_task(self, task_id: str):
+        return self._tasks.get(task_id, None)
+    def delete_task(self, task_id: str):
+        if task_id in self._tasks:
+            del self._tasks[task_id]
+# Redis state management
+class RedisState(BaseState):
+    def __init__(self, host="localhost", port=6379, db=0, password=None):
+        import redis
+        self._redis = redis.StrictRedis(host=host, port=port, db=db, password=password)
+    def update_task(
+        self,
+        task_id: str,
+        state: int = const.TASK_STATE_PROCESSING,
+        progress: int = 0,
+        **kwargs,
+    ):
+        progress = int(progress)
+        if progress > 100:
+            progress = 100
+        fields = {
+            "state": state,
+            "progress": progress,
+            **kwargs,
+        }
+        for field, value in fields.items():
+            self._redis.hset(task_id, field, str(value))
+    def get_task(self, task_id: str):
+        task_data = self._redis.hgetall(task_id)
+        if not task_data:
+            return None
+        task = {
+            key.decode("utf-8"): self._convert_to_original_type(value)
+            for key, value in task_data.items()
+        }
+        return task
+    def delete_task(self, task_id: str):
+        self._redis.delete(task_id)
+    @staticmethod
+    def _convert_to_original_type(value):
+        """
+        Convert the value from byte string to its original data type.
+        You can extend this method to handle other data types as needed.
+        """
+        value_str = value.decode("utf-8")
+        try:
+            # try to convert byte string array to list
+            return ast.literal_eval(value_str)
+        except (ValueError, SyntaxError):
+            pass
+        if value_str.isdigit():
+            return int(value_str)
+        # Add more conversions here if needed
+        return value_str
+# Global state
+_enable_redis = config.app.get("enable_redis", False)
+_redis_host = config.app.get("redis_host", "localhost")
+_redis_port = config.app.get("redis_port", 6379)
+_redis_db = config.app.get("redis_db", 0)
+_redis_password = config.app.get("redis_password", None)
+state = (
+    RedisState(
+        host=_redis_host, port=_redis_port, db=_redis_db, password=_redis_password
+    )
+    if _enable_redis
+    else MemoryState()
+)

app/services/subtitle.py ADDED Viewed

	@@ -0,0 +1,462 @@

+import json
+import os.path
+import re
+import traceback
+from typing import Optional
+# from faster_whisper import WhisperModel
+from timeit import default_timer as timer
+from loguru import logger
+import google.generativeai as genai
+from moviepy import VideoFileClip
+import os
+from app.config import config
+from app.utils import utils
+model_size = config.whisper.get("model_size", "faster-whisper-large-v2")
+device = config.whisper.get("device", "cpu")
+compute_type = config.whisper.get("compute_type", "int8")
+model = None
+def create(audio_file, subtitle_file: str = ""):
+    """
+    为给定的音频文件创建字幕文件。
+    参数:
+    - audio_file: 音频文件的路径。
+    - subtitle_file: 字幕文件的输出路径（可选）。如果未提供，将根据音频文件的路径生成字幕文件。
+    返回:
+    无返回值，但会在指定路径生成字幕文件。
+    """
+    global model, device, compute_type
+    if not model:
+        model_path = f"{utils.root_dir()}/app/models/faster-whisper-large-v3"
+        model_bin_file = f"{model_path}/model.bin"
+        if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file):
+            logger.error(
+                "请先下载 whisper 模型\n\n"
+                "********************************************\n"
+                "下载地址：https://huggingface.co/guillaumekln/faster-whisper-large-v2\n"
+                "存放路径：app/models \n"
+                "********************************************\n"
+            )
+            return None
+        # 首先使用CPU模式，不触发CUDA检查
+        use_cuda = False
+        try:
+            # 在函数中延迟导入torch，而不是在全局范围内
+            # 使用安全的方式检查CUDA可用性
+            def check_cuda_available():
+                try:
+                    import torch
+                    return torch.cuda.is_available()
+                except (ImportError, RuntimeError) as e:
+                    logger.warning(f"检查CUDA可用性时出错: {e}")
+                    return False
+            # 仅当明确需要时才检查CUDA
+            use_cuda = check_cuda_available()
+            if use_cuda:
+                logger.info(f"尝试使用 CUDA 加载模型: {model_path}")
+                try:
+                    model = WhisperModel(
+                        model_size_or_path=model_path,
+                        device="cuda",
+                        compute_type="float16",
+                        local_files_only=True
+                    )
+                    device = "cuda"
+                    compute_type = "float16"
+                    logger.info("成功使用 CUDA 加载模型")
+                except Exception as e:
+                    logger.warning(f"CUDA 加载失败，错误信息: {str(e)}")
+                    logger.warning("回退到 CPU 模式")
+                    use_cuda = False
+            else:
+                logger.info("使用 CPU 模式")
+        except Exception as e:
+            logger.warning(f"CUDA检查过程出错: {e}")
+            logger.warning("默认使用CPU模式")
+            use_cuda = False
+        # 如果CUDA不可用或加载失败，使用CPU
+        if not use_cuda:
+            device = "cpu"
+            compute_type = "int8"
+            logger.info(f"使用 CPU 加载模型: {model_path}")
+            model = WhisperModel(
+                model_size_or_path=model_path,
+                device=device,
+                compute_type=compute_type,
+                local_files_only=True
+            )
+        logger.info(f"模型加载完成，使用设备: {device}, 计算类型: {compute_type}")
+    logger.info(f"start, output file: {subtitle_file}")
+    if not subtitle_file:
+        subtitle_file = f"{audio_file}.srt"
+    segments, info = model.transcribe(
+        audio_file,
+        beam_size=5,
+        word_timestamps=True,
+        vad_filter=True,
+        vad_parameters=dict(min_silence_duration_ms=500),
+        initial_prompt="以下是普通话的句子"
+    )
+    logger.info(
+        f"检测到的语言: '{info.language}', probability: {info.language_probability:.2f}"
+    )
+    start = timer()
+    subtitles = []
+    def recognized(seg_text, seg_start, seg_end):
+        seg_text = seg_text.strip()
+        if not seg_text:
+            return
+        msg = "[%.2fs -> %.2fs] %s" % (seg_start, seg_end, seg_text)
+        logger.debug(msg)
+        subtitles.append(
+            {"msg": seg_text, "start_time": seg_start, "end_time": seg_end}
+        )
+    for segment in segments:
+        words_idx = 0
+        words_len = len(segment.words)
+        seg_start = 0
+        seg_end = 0
+        seg_text = ""
+        if segment.words:
+            is_segmented = False
+            for word in segment.words:
+                if not is_segmented:
+                    seg_start = word.start
+                    is_segmented = True
+                seg_end = word.end
+                # 如果包含标点,则断句
+                seg_text += word.word
+                if utils.str_contains_punctuation(word.word):
+                    # remove last char
+                    seg_text = seg_text[:-1]
+                    if not seg_text:
+                        continue
+                    recognized(seg_text, seg_start, seg_end)
+                    is_segmented = False
+                    seg_text = ""
+                if words_idx == 0 and segment.start < word.start:
+                    seg_start = word.start
+                if words_idx == (words_len - 1) and segment.end > word.end:
+                    seg_end = word.end
+                words_idx += 1
+        if not seg_text:
+            continue
+        recognized(seg_text, seg_start, seg_end)
+    end = timer()
+    diff = end - start
+    logger.info(f"complete, elapsed: {diff:.2f} s")
+    idx = 1
+    lines = []
+    for subtitle in subtitles:
+        text = subtitle.get("msg")
+        if text:
+            lines.append(
+                utils.text_to_srt(
+                    idx, text, subtitle.get("start_time"), subtitle.get("end_time")
+                )
+            )
+            idx += 1
+    sub = "\n".join(lines) + "\n"
+    with open(subtitle_file, "w", encoding="utf-8") as f:
+        f.write(sub)
+    logger.info(f"subtitle file created: {subtitle_file}")
+def file_to_subtitles(filename):
+    """
+    将字幕文件转换为字幕列表。
+    参数:
+    filename (str): 字幕文件的路径。
+    返回:
+    list: 包含字幕序号、出现时间、和字幕文本的元组列表。
+    """
+    if not filename or not os.path.isfile(filename):
+        return []
+    times_texts = []
+    current_times = None
+    current_text = ""
+    index = 0
+    with open(filename, "r", encoding="utf-8") as f:
+        for line in f:
+            times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line)
+            if times:
+                current_times = line
+            elif line.strip() == "" and current_times:
+                index += 1
+                times_texts.append((index, current_times.strip(), current_text.strip()))
+                current_times, current_text = None, ""
+            elif current_times:
+                current_text += line
+    return times_texts
+def levenshtein_distance(s1, s2):
+    if len(s1) < len(s2):
+        return levenshtein_distance(s2, s1)
+    if len(s2) == 0:
+        return len(s1)
+    previous_row = range(len(s2) + 1)
+    for i, c1 in enumerate(s1):
+        current_row = [i + 1]
+        for j, c2 in enumerate(s2):
+            insertions = previous_row[j + 1] + 1
+            deletions = current_row[j] + 1
+            substitutions = previous_row[j] + (c1 != c2)
+            current_row.append(min(insertions, deletions, substitutions))
+        previous_row = current_row
+    return previous_row[-1]
+def similarity(a, b):
+    distance = levenshtein_distance(a.lower(), b.lower())
+    max_length = max(len(a), len(b))
+    return 1 - (distance / max_length)
+def correct(subtitle_file, video_script):
+    subtitle_items = file_to_subtitles(subtitle_file)
+    script_lines = utils.split_string_by_punctuations(video_script)
+    corrected = False
+    new_subtitle_items = []
+    script_index = 0
+    subtitle_index = 0
+    while script_index < len(script_lines) and subtitle_index < len(subtitle_items):
+        script_line = script_lines[script_index].strip()
+        subtitle_line = subtitle_items[subtitle_index][2].strip()
+        if script_line == subtitle_line:
+            new_subtitle_items.append(subtitle_items[subtitle_index])
+            script_index += 1
+            subtitle_index += 1
+        else:
+            combined_subtitle = subtitle_line
+            start_time = subtitle_items[subtitle_index][1].split(" --> ")[0]
+            end_time = subtitle_items[subtitle_index][1].split(" --> ")[1]
+            next_subtitle_index = subtitle_index + 1
+            while next_subtitle_index < len(subtitle_items):
+                next_subtitle = subtitle_items[next_subtitle_index][2].strip()
+                if similarity(
+                    script_line, combined_subtitle + " " + next_subtitle
+                ) > similarity(script_line, combined_subtitle):
+                    combined_subtitle += " " + next_subtitle
+                    end_time = subtitle_items[next_subtitle_index][1].split(" --> ")[1]
+                    next_subtitle_index += 1
+                else:
+                    break
+            if similarity(script_line, combined_subtitle) > 0.8:
+                logger.warning(
+                    f"Merged/Corrected - Script: {script_line}, Subtitle: {combined_subtitle}"
+                )
+                new_subtitle_items.append(
+                    (
+                        len(new_subtitle_items) + 1,
+                        f"{start_time} --> {end_time}",
+                        script_line,
+                    )
+                )
+                corrected = True
+            else:
+                logger.warning(
+                    f"Mismatch - Script: {script_line}, Subtitle: {combined_subtitle}"
+                )
+                new_subtitle_items.append(
+                    (
+                        len(new_subtitle_items) + 1,
+                        f"{start_time} --> {end_time}",
+                        script_line,
+                    )
+                )
+                corrected = True
+            script_index += 1
+            subtitle_index = next_subtitle_index
+    # 处理剩余的脚本行
+    while script_index < len(script_lines):
+        logger.warning(f"Extra script line: {script_lines[script_index]}")
+        if subtitle_index < len(subtitle_items):
+            new_subtitle_items.append(
+                (
+                    len(new_subtitle_items) + 1,
+                    subtitle_items[subtitle_index][1],
+                    script_lines[script_index],
+                )
+            )
+            subtitle_index += 1
+        else:
+            new_subtitle_items.append(
+                (
+                    len(new_subtitle_items) + 1,
+                    "00:00:00,000 --> 00:00:00,000",
+                    script_lines[script_index],
+                )
+            )
+        script_index += 1
+        corrected = True
+    if corrected:
+        with open(subtitle_file, "w", encoding="utf-8") as fd:
+            for i, item in enumerate(new_subtitle_items):
+                fd.write(f"{i + 1}\n{item[1]}\n{item[2]}\n\n")
+        logger.info("Subtitle corrected")
+    else:
+        logger.success("Subtitle is correct")
+def create_with_gemini(audio_file: str, subtitle_file: str = "", api_key: Optional[str] = None) -> Optional[str]:
+    if not api_key:
+        logger.error("Gemini API key is not provided")
+        return None
+    genai.configure(api_key=api_key)
+    logger.info(f"开始使用Gemini模型处理音频文件: {audio_file}")
+    model = genai.GenerativeModel(model_name="gemini-1.5-flash")
+    prompt = "生成这段语音的转录文本。请以SRT格式输出，包含时间戳。"
+    try:
+        with open(audio_file, "rb") as f:
+            audio_data = f.read()
+        response = model.generate_content([prompt, audio_data])
+        transcript = response.text
+        if not subtitle_file:
+            subtitle_file = f"{audio_file}.srt"
+        with open(subtitle_file, "w", encoding="utf-8") as f:
+            f.write(transcript)
+        logger.info(f"Gemini生成的字幕文件已保存: {subtitle_file}")
+        return subtitle_file
+    except Exception as e:
+        logger.error(f"使用Gemini处理音频时出错: {e}")
+        return None
+def extract_audio_and_create_subtitle(video_file: str, subtitle_file: str = "") -> Optional[str]:
+    """
+    从视频文件中提取音频并生成字幕文件。
+    参数:
+    - video_file: MP4视频文件的路径
+    - subtitle_file: 输出字幕文件的路径（可选）。如果未提供，将根据视频文件名自动生成。
+    返回:
+    - str: 生成的字幕文件路径
+    - None: 如果处理过程中出现错误
+    """
+    try:
+        # 获取视频文件所在目录
+        video_dir = os.path.dirname(video_file)
+        video_name = os.path.splitext(os.path.basename(video_file))[0]
+        # 设置音频文件路径
+        audio_file = os.path.join(video_dir, f"{video_name}_audio.wav")
+        # 如果未指定字幕文件路径，则自动生成
+        if not subtitle_file:
+            subtitle_file = os.path.join(video_dir, f"{video_name}.srt")
+        logger.info(f"开始从视频提取音频: {video_file}")
+        # 加载视频文件
+        video = VideoFileClip(video_file)
+        # 提取音频并保存为WAV格式
+        logger.info(f"正在提取音频到: {audio_file}")
+        video.audio.write_audiofile(audio_file, codec='pcm_s16le')
+        # 关闭视频文件
+        video.close()
+        logger.info("音频提取完成，开始生成字幕")
+        # 使用create函数生成字幕
+        create("/Users/apple/Desktop/WhisperX-zhuanlu/1_qyn2-2_Vocals.wav", subtitle_file)
+        # 删除临时音频文件
+        if os.path.exists(audio_file):
+            os.remove(audio_file)
+            logger.info("已清理临时音频文件")
+        return subtitle_file
+    except Exception as e:
+        logger.error(f"处理视频文件时出错: {str(e)}")
+        logger.error(traceback.format_exc())
+        return None
+if __name__ == "__main__":
+    task_id = "123456"
+    task_dir = utils.task_dir(task_id)
+    subtitle_file = f"{task_dir}/subtitle_123456.srt"
+    audio_file = "/Users/apple/Desktop/WhisperX-zhuanlu/1_qyn2-2_Vocals.wav"
+    video_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/merge/qyn2-2-720p.mp4"
+    extract_audio_and_create_subtitle(video_file, subtitle_file)
+    # subtitles = file_to_subtitles(subtitle_file)
+    # print(subtitles)
+    # # script_file = f"{task_dir}/script.json"
+    # # with open(script_file, "r") as f:
+    # #     script_content = f.read()
+    # # s = json.loads(script_content)
+    # # script = s.get("script")
+    # #
+    # # correct(subtitle_file, script)
+    # subtitle_file = f"{task_dir}/subtitle111.srt"
+    # create(audio_file, subtitle_file)
+    # # # 使用Gemini模型处理音频
+    # # gemini_api_key = config.app.get("gemini_api_key")  # 请替换为实际的API密钥
+    # # gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key)
+    # #
+    # # if gemini_subtitle_file:
+    # #     print(f"Gemini生成的字幕文件: {gemini_subtitle_file}")

app/services/subtitle_merger.py ADDED Viewed

	@@ -0,0 +1,202 @@

+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''
+@Project: NarratoAI
+@File   : subtitle_merger
+@Author : viccy
+@Date   : 2025/5/6 下午4:00
+'''
+import re
+import os
+from datetime import datetime, timedelta
+def parse_time(time_str):
+    """解析时间字符串为timedelta对象"""
+    hours, minutes, seconds_ms = time_str.split(':')
+    seconds, milliseconds = seconds_ms.split(',')
+    td = timedelta(
+        hours=int(hours),
+        minutes=int(minutes),
+        seconds=int(seconds),
+        milliseconds=int(milliseconds)
+    )
+    return td
+def format_time(td):
+    """将timedelta对象格式化为SRT时间字符串"""
+    total_seconds = int(td.total_seconds())
+    hours = total_seconds // 3600
+    minutes = (total_seconds % 3600) // 60
+    seconds = total_seconds % 60
+    milliseconds = td.microseconds // 1000
+    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
+def parse_edited_time_range(time_range_str):
+    """从editedTimeRange字符串中提取时间范围"""
+    if not time_range_str:
+        return None, None
+    parts = time_range_str.split('-')
+    if len(parts) != 2:
+        return None, None
+    start_time_str, end_time_str = parts
+    # 将HH:MM:SS格式转换为timedelta
+    start_h, start_m, start_s = map(int, start_time_str.split(':'))
+    end_h, end_m, end_s = map(int, end_time_str.split(':'))
+    start_time = timedelta(hours=start_h, minutes=start_m, seconds=start_s)
+    end_time = timedelta(hours=end_h, minutes=end_m, seconds=end_s)
+    return start_time, end_time
+def merge_subtitle_files(subtitle_items, output_file=None):
+    """
+    合并多个SRT字幕文件
+    参数:
+        subtitle_items: 字典列表，每个字典包含subtitle文件路径和editedTimeRange
+        output_file: 输出文件的路径，如果为None则自动生成
+    返回:
+        合并后的字幕文件路径
+    """
+    # 按照editedTimeRange的开始时间排序
+    sorted_items = sorted(subtitle_items,
+                         key=lambda x: parse_edited_time_range(x.get('editedTimeRange', ''))[0] or timedelta())
+    merged_subtitles = []
+    subtitle_index = 1
+    for item in sorted_items:
+        if not item.get('subtitle') or not os.path.exists(item.get('subtitle')):
+            continue
+        # 从editedTimeRange获取起始时间偏移
+        offset_time, _ = parse_edited_time_range(item.get('editedTimeRange', ''))
+        if offset_time is None:
+            print(f"警告: 无法从项目 {item.get('_id')} 的editedTimeRange中提取时间范围，跳过该项")
+            continue
+        with open(item['subtitle'], 'r', encoding='utf-8') as file:
+            content = file.read()
+        # 解析字幕文件
+        subtitle_blocks = re.split(r'\n\s*\n', content.strip())
+        for block in subtitle_blocks:
+            lines = block.strip().split('\n')
+            if len(lines) < 3:  # 确保块有足够的行数
+                continue
+            # 解析时间轴行
+            time_line = lines[1]
+            time_parts = time_line.split(' --> ')
+            if len(time_parts) != 2:
+                continue
+            start_time = parse_time(time_parts[0])
+            end_time = parse_time(time_parts[1])
+            # 应用时间偏移
+            adjusted_start_time = start_time + offset_time
+            adjusted_end_time = end_time + offset_time
+            # 重建字幕块
+            adjusted_time_line = f"{format_time(adjusted_start_time)} --> {format_time(adjusted_end_time)}"
+            text_lines = lines[2:]
+            new_block = [
+                str(subtitle_index),
+                adjusted_time_line,
+                *text_lines
+            ]
+            merged_subtitles.append('\n'.join(new_block))
+            subtitle_index += 1
+    # 确定输出文件路径
+    if output_file is None:
+        dir_path = os.path.dirname(sorted_items[0]['subtitle'])
+        first_start = parse_edited_time_range(sorted_items[0]['editedTimeRange'])[0]
+        last_end = parse_edited_time_range(sorted_items[-1]['editedTimeRange'])[1]
+        first_start_h, first_start_m, first_start_s = int(first_start.seconds // 3600), int((first_start.seconds % 3600) // 60), int(first_start.seconds % 60)
+        last_end_h, last_end_m, last_end_s = int(last_end.seconds // 3600), int((last_end.seconds % 3600) // 60), int(last_end.seconds % 60)
+        first_start_str = f"{first_start_h:02d}_{first_start_m:02d}_{first_start_s:02d}"
+        last_end_str = f"{last_end_h:02d}_{last_end_m:02d}_{last_end_s:02d}"
+        output_file = os.path.join(dir_path, f"merged_subtitle_{first_start_str}-{last_end_str}.srt")
+    # 合并所有字幕块
+    merged_content = '\n\n'.join(merged_subtitles)
+    # 写��合并后的内容
+    with open(output_file, 'w', encoding='utf-8') as file:
+        file.write(merged_content)
+    return output_file
+if __name__ == '__main__':
+    # 测试数据
+    test_data = [
+        {'picture': '【解说】好的，各位，欢迎回到我的频道！《庆余年 2》刚开播就给了我们一个王炸！范闲在北齐"死"了？这怎么可能！',
+         'timestamp': '00:00:00-00:01:15',
+         'narration': '好的各位，欢迎回到我的频道！《庆余年 2》刚开播就给了我们一个王炸！范闲在北齐"死"了？这怎么可能！上集片尾那个巨大的悬念，这一集就立刻揭晓了！范闲假死归来，他面临的第一个，也是最大的难关，就是如何面对他最敬爱的，同时也是最可怕的那个人——庆帝！',
+         'OST': 0,
+         '_id': 1,
+         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
+         'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
+         'sourceTimeRange': '00:00:00-00:00:26',
+         'duration': 26,
+         'editedTimeRange': '00:00:00-00:00:26'
+        },
+        {'picture': '【解说】上一集我们看到，范闲在北齐遭遇了惊天变故，生死不明！',
+         'timestamp': '00:01:15-00:04:40',
+         'narration': '但我们都知道，他绝不可能就这么轻易退场！第二集一开场，范闲就已经秘密回到了京都。他的生死传闻，可不像我们想象中那样只是小范围流传，而是…',
+         'OST': 0,
+         '_id': 2,
+         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
+         'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
+         'sourceTimeRange': '00:01:15-00:01:29',
+         'duration': 14,
+         'editedTimeRange': '00:00:26-00:00:40'
+        },
+        {'picture': '【解说】"欺君之罪"！在封建王朝，这可是抄家灭族的大罪！搁一般人，肯定脚底抹油溜之大吉了。',
+         'timestamp': '00:04:58-00:05:45',
+         'narration': '"欺君之罪"！在封建王朝，这可是抄家灭族的大罪！搁一般人，肯定脚底抹油溜之大吉了。但范闲是谁啊？他偏要反其道而行之！他竟然决定，直接去见庆帝！冒着天大的风险，用"假死"这个事实去赌庆帝的态度！',
+         'OST': 0,
+         '_id': 4,
+         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
+         'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
+         'sourceTimeRange': '00:04:58-00:05:20',
+         'duration': 22,
+         'editedTimeRange': '00:00:57-00:01:19'
+        },
+        {'picture': '【解说】但想见庆帝，哪有那么容易？范闲艺高人胆大，竟然选择了最激进的方式——闯宫！',
+         'timestamp': '00:05:45-00:06:00',
+         'narration': '但想见庆帝，哪有那么容易？范闲艺高人胆大，竟然选择了最激进的方式——闯宫！',
+         'OST': 0,
+         '_id': 5,
+         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3',
+         'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt',
+         'sourceTimeRange': '00:05:45-00:05:53',
+         'duration': 8,
+         'editedTimeRange': '00:01:19-00:01:27'
+        }
+    ]
+    output_file = merge_subtitle_files(test_data)
+    print(f"字幕文件已合并至: {output_file}")

app/services/task.py ADDED Viewed

	@@ -0,0 +1,398 @@

+import math
+import json
+import os.path
+import re
+import traceback
+from os import path
+from loguru import logger
+from app.config import config
+from app.models import const
+from app.models.schema import VideoConcatMode, VideoParams, VideoClipParams
+from app.services import (llm, material, subtitle, video, voice, audio_merger,
+                          subtitle_merger, clip_video, merger_video, update_script, generate_video)
+from app.services import state as sm
+from app.utils import utils
+# def generate_script(task_id, params):
+#     logger.info("\n\n## generating video script")
+#     video_script = params.video_script.strip()
+#     if not video_script:
+#         video_script = llm.generate_script(
+#             video_subject=params.video_subject,
+#             language=params.video_language,
+#             paragraph_number=params.paragraph_number,
+#         )
+#     else:
+#         logger.debug(f"video script: \n{video_script}")
+#     if not video_script:
+#         sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+#         logger.error("failed to generate video script.")
+#         return None
+#     return video_script
+# def generate_terms(task_id, params, video_script):
+#     logger.info("\n\n## generating video terms")
+#     video_terms = params.video_terms
+#     if not video_terms:
+#         video_terms = llm.generate_terms(
+#             video_subject=params.video_subject, video_script=video_script, amount=5
+#         )
+#     else:
+#         if isinstance(video_terms, str):
+#             video_terms = [term.strip() for term in re.split(r"[,，]", video_terms)]
+#         elif isinstance(video_terms, list):
+#             video_terms = [term.strip() for term in video_terms]
+#         else:
+#             raise ValueError("video_terms must be a string or a list of strings.")
+#         logger.debug(f"video terms: {utils.to_json(video_terms)}")
+#     if not video_terms:
+#         sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+#         logger.error("failed to generate video terms.")
+#         return None
+#     return video_terms
+# def save_script_data(task_id, video_script, video_terms, params):
+#     script_file = path.join(utils.task_dir(task_id), "script.json")
+#     script_data = {
+#         "script": video_script,
+#         "search_terms": video_terms,
+#         "params": params,
+#     }
+#     with open(script_file, "w", encoding="utf-8") as f:
+#         f.write(utils.to_json(script_data))
+# def generate_audio(task_id, params, video_script):
+#     logger.info("\n\n## generating audio")
+#     audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
+#     sub_maker = voice.tts(
+#         text=video_script,
+#         voice_name=voice.parse_voice_name(params.voice_name),
+#         voice_rate=params.voice_rate,
+#         voice_file=audio_file,
+#     )
+#     if sub_maker is None:
+#         sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+#         logger.error(
+#             """failed to generate audio:
+# 1. check if the language of the voice matches the language of the video script.
+# 2. check if the network is available. If you are in China, it is recommended to use a VPN and enable the global traffic mode.
+#         """.strip()
+#         )
+#         return None, None, None
+#     audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
+#     return audio_file, audio_duration, sub_maker
+# def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
+#     if not params.subtitle_enabled:
+#         return ""
+#     subtitle_path = path.join(utils.task_dir(task_id), "subtitle111.srt")
+#     subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
+#     logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
+#     subtitle_fallback = False
+#     if subtitle_provider == "edge":
+#         voice.create_subtitle(
+#             text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path
+#         )
+#         if not os.path.exists(subtitle_path):
+#             subtitle_fallback = True
+#             logger.warning("subtitle file not found, fallback to whisper")
+#     if subtitle_provider == "whisper" or subtitle_fallback:
+#         subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
+#         logger.info("\n\n## correcting subtitle")
+#         subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
+#     subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
+#     if not subtitle_lines:
+#         logger.warning(f"subtitle file is invalid: {subtitle_path}")
+#         return ""
+#     return subtitle_path
+# def get_video_materials(task_id, params, video_terms, audio_duration):
+#     if params.video_source == "local":
+#         logger.info("\n\n## preprocess local materials")
+#         materials = video.preprocess_video(
+#             materials=params.video_materials, clip_duration=params.video_clip_duration
+#         )
+#         if not materials:
+#             sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+#             logger.error(
+#                 "no valid materials found, please check the materials and try again."
+#             )
+#             return None
+#         return [material_info.url for material_info in materials]
+#     else:
+#         logger.info(f"\n\n## downloading videos from {params.video_source}")
+#         downloaded_videos = material.download_videos(
+#             task_id=task_id,
+#             search_terms=video_terms,
+#             source=params.video_source,
+#             video_aspect=params.video_aspect,
+#             video_contact_mode=params.video_concat_mode,
+#             audio_duration=audio_duration * params.video_count,
+#             max_clip_duration=params.video_clip_duration,
+#         )
+#         if not downloaded_videos:
+#             sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+#             logger.error(
+#                 "failed to download videos, maybe the network is not available. if you are in China, please use a VPN."
+#             )
+#             return None
+#         return downloaded_videos
+def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict):
+    """
+    后台任务（自动剪辑视频进行剪辑）
+    Args:
+        task_id: 任务ID
+        params: 视频参数
+        subclip_path_videos: 视频片段路径
+    """
+    global merged_audio_path, merged_subtitle_path
+    logger.info(f"\n\n## 开始任务: {task_id}")
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0)
+    # # 初始化 ImageMagick
+    # if not utils.init_imagemagick():
+    #     logger.warning("ImageMagick 初始化失败，字幕可能无法正常显示")
+    # # tts 角色名称
+    # voice_name = voice.parse_voice_name(params.voice_name)
+    """
+    1. 加载剪辑脚本
+    """
+    logger.info("\n\n## 1. 加载视频脚本")
+    video_script_path = path.join(params.video_clip_json_path)
+    if path.exists(video_script_path):
+        try:
+            with open(video_script_path, "r", encoding="utf-8") as f:
+                list_script = json.load(f)
+                video_list = [i['narration'] for i in list_script]
+                video_ost = [i['OST'] for i in list_script]
+                time_list = [i['timestamp'] for i in list_script]
+                video_script = " ".join(video_list)
+                logger.debug(f"解说完整脚本: \n{video_script}")
+                logger.debug(f"解说 OST 列表: \n{video_ost}")
+                logger.debug(f"解说时间戳列表: \n{time_list}")
+        except Exception as e:
+            logger.error(f"无法读取视频json脚本，请检查脚本格式是否正确")
+            raise ValueError("无法读取视频json脚本，请检查脚本格式是否正确")
+    else:
+        logger.error(f"video_script_path: {video_script_path} \n\n", traceback.format_exc())
+        raise ValueError("解说脚本不存在！请检查配置是否正确。")
+    """
+    2. 使用 TTS 生成音频素材
+    """
+    logger.info("\n\n## 2. 根据OST设置生成音频列表")
+    # 只为OST=0 or 2的判断生成音频， OST=0 仅保留解说 OST=2 保留解说和原声
+    tts_segments = [
+        segment for segment in list_script
+        if segment['OST'] in [0, 2]
+    ]
+    logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}")
+    tts_results = voice.tts_multiple(
+        task_id=task_id,
+        list_script=tts_segments,  # 只传入需要TTS的片段
+        voice_name=params.voice_name,
+        voice_rate=params.voice_rate,
+        voice_pitch=params.voice_pitch,
+    )
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
+    # """
+    # 3. (可选) 使用 whisper 生成字幕
+    # """
+    # if merged_subtitle_path is None:
+    #     if audio_files:
+    #         merged_subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
+    #         subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
+    #         logger.info(f"\n\n使用 {subtitle_provider} 生成字幕")
+    #
+    #         subtitle.create(
+    #             audio_file=merged_audio_path,
+    #             subtitle_file=merged_subtitle_path,
+    #         )
+    #         subtitle_lines = subtitle.file_to_subtitles(merged_subtitle_path)
+    #         if not subtitle_lines:
+    #             logger.warning(f"字幕文件无效: {merged_subtitle_path}")
+    #
+    # sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
+    """
+    3. 裁剪视频 - 将超出音频长度的视频进行裁剪
+    """
+    logger.info("\n\n## 3. 裁剪视频")
+    video_clip_result = clip_video.clip_video(params.video_origin_path, tts_results)
+    # 更新 list_script 中的时间戳
+    tts_clip_result = {tts_result['_id']: tts_result['audio_file'] for tts_result in tts_results}
+    subclip_clip_result = {
+        tts_result['_id']: tts_result['subtitle_file'] for tts_result in tts_results
+    }
+    new_script_list = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result)
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
+    """
+    4. 合并音频和字幕
+    """
+    logger.info("\n\n## 4. 合并音频和字幕")
+    total_duration = sum([script["duration"] for script in new_script_list])
+    if tts_segments:
+        try:
+            # 合并音频文件
+            merged_audio_path = audio_merger.merge_audio_files(
+                task_id=task_id,
+                total_duration=total_duration,
+                list_script=new_script_list
+            )
+            logger.info(f"音频文件合并成功->{merged_audio_path}")
+            # 合并字幕文件
+            merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list)
+            logger.info(f"字幕文件合并成功->{merged_subtitle_path}")
+        except Exception as e:
+            logger.error(f"合并音频文件失败: {str(e)}")
+    else:
+        logger.warning("没有需要合并的音频/字幕")
+        merged_audio_path = ""
+        merged_subtitle_path = ""
+    """
+    5. 合并视频
+    """
+    final_video_paths = []
+    combined_video_paths = []
+    combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
+    logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
+    # 如果 new_script_list 中没有 video，则使用 subclip_path_videos 中的视频
+    video_clips = [new_script['video'] if new_script.get('video') else subclip_path_videos.get(new_script.get('_id', '')) for new_script in new_script_list]
+    merger_video.combine_clip_videos(
+        output_video_path=combined_video_path,
+        video_paths=video_clips,
+        video_ost_list=video_ost,
+        video_aspect=params.video_aspect,
+        threads=params.n_threads
+    )
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80)
+    """
+    6. 合并字幕/BGM/配音/视频
+    """
+    output_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
+    logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {output_video_path}")
+    # bgm_path = '/Users/apple/Desktop/home/NarratoAI/resource/songs/bgm.mp3'
+    bgm_path = utils.get_bgm_file()
+    # 调用示例
+    options = {
+        'voice_volume': params.tts_volume,  # 配音音量
+        'bgm_volume': params.bgm_volume,  # 背景音乐音量
+        'original_audio_volume': params.original_volume,  # 视频原声音量，0表示不保留
+        'keep_original_audio': True,  # 是否保留原声
+        'subtitle_font': params.font_name,  # 这里使用相对字体路径，会自动在 font_dir() 目录下查找
+        'subtitle_font_size': params.font_size,
+        'subtitle_color': params.text_fore_color,
+        'subtitle_bg_color': None,  # 直接使用None表示透明背景
+        'subtitle_position': params.subtitle_position,
+        'custom_position': params.custom_position,
+        'threads': params.n_threads
+    }
+    generate_video.merge_materials(
+        video_path=combined_video_path,
+        audio_path=merged_audio_path,
+        subtitle_path=merged_subtitle_path,
+        bgm_path=bgm_path,
+        output_path=output_video_path,
+        options=options
+    )
+    final_video_paths.append(output_video_path)
+    combined_video_paths.append(combined_video_path)
+    logger.success(f"任务 {task_id} 已完成, 生成 {len(final_video_paths)} 个视频.")
+    kwargs = {
+        "videos": final_video_paths,
+        "combined_videos": combined_video_paths
+    }
+    sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs)
+    return kwargs
+def validate_params(video_path, audio_path, output_file, params):
+    """
+    验证输入参数
+    Args:
+        video_path: 视频文件路径
+        audio_path: 音频文件路径（可以为空字符串）
+        output_file: 输出文件路径
+        params: 视频参数
+    Raises:
+        FileNotFoundError: 文件不存在时抛出
+        ValueError: 参数无效时抛出
+    """
+    if not video_path:
+        raise ValueError("视频路径不能为空")
+    if not os.path.exists(video_path):
+        raise FileNotFoundError(f"视频文件不存在: {video_path}")
+    # 如果提供了音频路径，则验证文件是否存在
+    if audio_path and not os.path.exists(audio_path):
+        raise FileNotFoundError(f"音频文件不存在: {audio_path}")
+    if not output_file:
+        raise ValueError("输出文件路径不能为空")
+    # 确保输出目录存在
+    output_dir = os.path.dirname(output_file)
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    if not params:
+        raise ValueError("视频参数不能为空")
+if __name__ == "__main__":
+    task_id = "demo"
+    # 提前裁剪是为了方便检��视频
+    subclip_path_videos = {
+        1: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/[email protected]',
+        2: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/[email protected]',
+        3: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/[email protected]',
+        4: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/[email protected]',
+        5: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/[email protected]',
+    }
+    params = VideoClipParams(
+        video_clip_json_path="/Users/apple/Desktop/home/NarratoAI/resource/scripts/2025-0507-223311.json",
+        video_origin_path="/Users/apple/Desktop/home/NarratoAI/resource/videos/merged_video_4938.mp4",
+    )
+    start_subclip(task_id, params, subclip_path_videos)

app/services/update_script.py ADDED Viewed

	@@ -0,0 +1,266 @@

+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''
+@Project: NarratoAI
+@File   : update_script
+@Author : 小林同学
+@Date   : 2025/5/6 下午11:00
+'''
+import re
+import os
+from typing import Dict, List, Any, Tuple, Union
+def extract_timestamp_from_video_path(video_path: str) -> str:
+    """
+    从视频文件路径中提取时间戳
+    Args:
+        video_path: 视频文件路径
+    Returns:
+        提取出的时间戳，格式为 'HH:MM:SS-HH:MM:SS' 或 'HH:MM:SS,sss-HH:MM:SS,sss'
+    """
+    # 使用正则表达式从文件名中提取时间戳
+    filename = os.path.basename(video_path)
+    # 匹配新格式: [email protected]
+    match_new = re.search(r'vid_(\d{2})-(\d{2})-(\d{2})-(\d{3})@(\d{2})-(\d{2})-(\d{2})-(\d{3})\.mp4', filename)
+    if match_new:
+        # 提取并格式化时间戳（包含毫秒）
+        start_h, start_m, start_s, start_ms = match_new.group(1), match_new.group(2), match_new.group(3), match_new.group(4)
+        end_h, end_m, end_s, end_ms = match_new.group(5), match_new.group(6), match_new.group(7), match_new.group(8)
+        return f"{start_h}:{start_m}:{start_s},{start_ms}-{end_h}:{end_m}:{end_s},{end_ms}"
+    # 匹配旧格式: vid-00-00-00-00-00-00.mp4
+    match_old = re.search(r'vid-(\d{2}-\d{2}-\d{2})-(\d{2}-\d{2}-\d{2})\.mp4', filename)
+    if match_old:
+        # 提取并格式化时间戳
+        start_time = match_old.group(1).replace('-', ':')
+        end_time = match_old.group(2).replace('-', ':')
+        return f"{start_time}-{end_time}"
+    return ""
+def calculate_duration(timestamp: str) -> float:
+    """
+    计算时间戳范围的持续时间（秒）
+    Args:
+        timestamp: 格式为 'HH:MM:SS-HH:MM:SS' 或 'HH:MM:SS,sss-HH:MM:SS,sss' 的时间戳
+    Returns:
+        持续时间（秒）
+    """
+    try:
+        start_time, end_time = timestamp.split('-')
+        # 处理毫秒部分
+        if ',' in start_time:
+            start_parts = start_time.split(',')
+            start_time_parts = start_parts[0].split(':')
+            start_ms = float('0.' + start_parts[1]) if len(start_parts) > 1 else 0
+            start_h, start_m, start_s = map(int, start_time_parts)
+        else:
+            start_h, start_m, start_s = map(int, start_time.split(':'))
+            start_ms = 0
+        if ',' in end_time:
+            end_parts = end_time.split(',')
+            end_time_parts = end_parts[0].split(':')
+            end_ms = float('0.' + end_parts[1]) if len(end_parts) > 1 else 0
+            end_h, end_m, end_s = map(int, end_time_parts)
+        else:
+            end_h, end_m, end_s = map(int, end_time.split(':'))
+            end_ms = 0
+        # 转换为秒
+        start_seconds = start_h * 3600 + start_m * 60 + start_s + start_ms
+        end_seconds = end_h * 3600 + end_m * 60 + end_s + end_ms
+        # 计算时间差（秒）
+        return round(end_seconds - start_seconds, 2)
+    except (ValueError, AttributeError):
+        return 0.0
+def update_script_timestamps(
+    script_list: List[Dict[str, Any]],
+    video_result: Dict[Union[str, int], str],
+    audio_result: Dict[Union[str, int], str] = None,
+    subtitle_result: Dict[Union[str, int], str] = None,
+    calculate_edited_timerange: bool = True
+) -> List[Dict[str, Any]]:
+    """
+    根据 video_result 中的视频文件更新 script_list 中的时间戳，添加持续时间，
+    并根据 audio_result 添加音频路径，根据 subtitle_result 添加字幕路径
+    Args:
+        script_list: 原始脚本列表
+        video_result: 视频结果字典，键为原时间戳或_id，值为视频文件路径
+        audio_result: 音频结果字典，键为原时间戳或_id，值为音频文件路径
+        subtitle_result: 字幕结果字典，键为原时间戳或_id，值为字幕文件路径
+        calculate_edited_timerange: 是否计算并添加成品视频中的时间范围
+    Returns:
+        更新后的脚本列表
+    """
+    # 创建副本，避免修改原始数据
+    updated_script = []
+    # 建立ID和时间戳到视频路径和新时间戳的映射
+    id_timestamp_mapping = {}
+    for key, video_path in video_result.items():
+        new_timestamp = extract_timestamp_from_video_path(video_path)
+        if new_timestamp:
+            id_timestamp_mapping[key] = {
+                'new_timestamp': new_timestamp,
+                'video_path': video_path
+            }
+    # 计算累积时长，用于生成成品视频中的时间范围
+    accumulated_duration = 0.0
+    # 更新脚本中的时间戳
+    for item in script_list:
+        item_copy = item.copy()
+        item_id = item_copy.get('_id')
+        orig_timestamp = item_copy.get('timestamp', '')
+        # 初始化音频和字幕路径为空字符串
+        item_copy['audio'] = ""
+        item_copy['subtitle'] = ""
+        item_copy['video'] = ""  # 初始化视频路径为空字符串
+        # 如果��供了音频结果字典且ID存在于音频结果中，直接使用对应的音频路径
+        if audio_result:
+            if item_id and item_id in audio_result:
+                item_copy['audio'] = audio_result[item_id]
+            elif orig_timestamp in audio_result:
+                item_copy['audio'] = audio_result[orig_timestamp]
+        # 如果提供了字幕结果字典且ID存在于字幕结果中，直接使用对应的字幕路径
+        if subtitle_result:
+            if item_id and item_id in subtitle_result:
+                item_copy['subtitle'] = subtitle_result[item_id]
+            elif orig_timestamp in subtitle_result:
+                item_copy['subtitle'] = subtitle_result[orig_timestamp]
+        # 添加视频路径
+        if item_id and item_id in video_result:
+            item_copy['video'] = video_result[item_id]
+        elif orig_timestamp in video_result:
+            item_copy['video'] = video_result[orig_timestamp]
+        # 更新时间戳和计算持续时间
+        current_duration = 0.0
+        if item_id and item_id in id_timestamp_mapping:
+            # 根据ID找到对应的新时间戳
+            item_copy['sourceTimeRange'] = id_timestamp_mapping[item_id]['new_timestamp']
+            current_duration = calculate_duration(item_copy['sourceTimeRange'])
+            item_copy['duration'] = current_duration
+        elif orig_timestamp in id_timestamp_mapping:
+            # 根据原始时间戳找到对应的新时间戳
+            item_copy['sourceTimeRange'] = id_timestamp_mapping[orig_timestamp]['new_timestamp']
+            current_duration = calculate_duration(item_copy['sourceTimeRange'])
+            item_copy['duration'] = current_duration
+        elif orig_timestamp:
+            # 对于未更新的时间戳，也计算并添加持续时间
+            item_copy['sourceTimeRange'] = orig_timestamp
+            current_duration = calculate_duration(orig_timestamp)
+            item_copy['duration'] = current_duration
+        # 计算片段在成品视频中的时间范围
+        if calculate_edited_timerange and current_duration > 0:
+            start_time_seconds = accumulated_duration
+            end_time_seconds = accumulated_duration + current_duration
+            # 将秒数转换为 HH:MM:SS 格式
+            start_h = int(start_time_seconds // 3600)
+            start_m = int((start_time_seconds % 3600) // 60)
+            start_s = int(start_time_seconds % 60)
+            end_h = int(end_time_seconds // 3600)
+            end_m = int((end_time_seconds % 3600) // 60)
+            end_s = int(end_time_seconds % 60)
+            item_copy['editedTimeRange'] = f"{start_h:02d}:{start_m:02d}:{start_s:02d}-{end_h:02d}:{end_m:02d}:{end_s:02d}"
+            # 更新累积时长
+            accumulated_duration = end_time_seconds
+        updated_script.append(item_copy)
+    return updated_script
+if __name__ == '__main__':
+    list_script = [
+        {
+            'picture': '【解说】好的，各位，欢迎回到我的频道！《庆余年 2》刚开播就给了我们一个王炸！范闲在北齐"死"了？这怎么可能！',
+            'timestamp': '00:00:00,001-00:01:15,001',
+            'narration': '好的各位，欢迎回到我的频道！《庆余年 2》刚开播就给了我们一个王炸！范闲在北齐"死"了？这怎么可能！上集片尾那个巨大的悬念，这一集就立刻揭晓了！范闲假死归来，他面临的第一个，也是最大的难关，就是如何面对他最敬爱的，同时也是最可怕的那个人——庆帝！',
+            'OST': 0,
+            '_id': 1
+        },
+        {
+            'picture': '【解说】上一集我们看到，范闲在北齐遭遇了惊天变故，生死不明！',
+            'timestamp': '00:01:15,001-00:04:40,001',
+            'narration': '但我们都知道，他绝不可能就这么轻易退场！第二集一开场，范闲就已经秘密回到了京都。他的生死传闻，可不像我们想象中那样只是小范围流传，而是…',
+            'OST': 0,
+            '_id': 2
+        },
+        {
+            'picture': '画面切到王启年小心翼翼地向范闲汇报。',
+            'timestamp': '00:04:41,001-00:04:58,001',
+            'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪',
+            'OST': 1,
+            '_id': 3
+        },
+        {
+            'picture': '【解说】"欺君之罪"！在封建王朝，这可是抄家灭族的大罪！搁一般人，肯定脚底抹油溜之大吉了。',
+            'timestamp': '00:04:58,001-00:05:45,001',
+            'narration': '"欺君之罪"！在封建王朝，这可是抄家灭族的大罪！搁一般人，肯定脚底抹油溜之大吉了。但范闲是谁啊？他偏要反其道而行之！他竟然决定，直接去见庆帝！冒着天大的��险，用"假死"这个事实去赌庆帝的态度！',
+            'OST': 0,
+            '_id': 4
+        },
+        {
+            'picture': '【解说】但想见庆帝，哪有那么容易？范闲艺高人胆大，竟然选择了最激进的方式——闯宫！',
+            'timestamp': '00:05:45,001-00:06:00,001',
+            'narration': '但想见庆帝，哪有那么容易？范闲艺高人胆大，竟然选择了最激进的方式——闯宫！',
+            'OST': 0,
+            '_id': 5
+        },
+        {
+            'picture': '画面切换到范闲蒙面闯入皇宫，被侍卫包围的场景。',
+            'timestamp': '00:06:00,001-00:06:03,001',
+            'narration': '抓刺客',
+            'OST': 1,
+            '_id': 6
+        }]
+    video_res = {
+        1: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
+        2: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
+        4: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
+        5: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]'}
+    audio_res = {
+        1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
+        2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
+        4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
+        5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'}
+    sub_res = {
+        1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
+        2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
+        4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
+        5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt'}
+    # 更新并打印结果
+    updated_list_script = update_script_timestamps(list_script, video_res, audio_res, sub_res)
+    for item in updated_list_script:
+        print(
+            f"ID: {item['_id']} | Picture: {item['picture'][:20]}... | Timestamp: {item['timestamp']} | " +
+            f"SourceTimeRange: {item['sourceTimeRange']} | EditedTimeRange: {item.get('editedTimeRange', '')} | " +
+            f"Duration: {item['duration']} 秒 | Audio: {item['audio']} | Video: {item['video']} | Subtitle: {item['subtitle']}")

app/services/video.py ADDED Viewed

	@@ -0,0 +1,365 @@

+import traceback
+# import pysrt
+from typing import Optional
+from typing import List
+from loguru import logger
+from moviepy import *
+from PIL import ImageFont
+from contextlib import contextmanager
+from moviepy import (
+    VideoFileClip,
+    AudioFileClip,
+    TextClip,
+    CompositeVideoClip,
+    CompositeAudioClip
+)
+from app.models.schema import VideoAspect, SubtitlePosition
+def wrap_text(text, max_width, font, fontsize=60):
+    """
+    文本自动换行处理
+    Args:
+        text: 待处理的文本
+        max_width: 最大宽度
+        font: 字体文件路径
+        fontsize: 字体大小
+    Returns:
+        tuple: (换行后的文本, 文本高度)
+    """
+    # 创建字体对象
+    font = ImageFont.truetype(font, fontsize)
+    def get_text_size(inner_text):
+        inner_text = inner_text.strip()
+        left, top, right, bottom = font.getbbox(inner_text)
+        return right - left, bottom - top
+    width, height = get_text_size(text)
+    if width <= max_width:
+        return text, height
+    logger.debug(f"换行文本, 最大宽度: {max_width}, 文本宽度: {width}, 文本: {text}")
+    processed = True
+    _wrapped_lines_ = []
+    words = text.split(" ")
+    _txt_ = ""
+    for word in words:
+        _before = _txt_
+        _txt_ += f"{word} "
+        _width, _height = get_text_size(_txt_)
+        if _width <= max_width:
+            continue
+        else:
+            if _txt_.strip() == word.strip():
+                processed = False
+                break
+            _wrapped_lines_.append(_before)
+            _txt_ = f"{word} "
+    _wrapped_lines_.append(_txt_)
+    if processed:
+        _wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
+        result = "\n".join(_wrapped_lines_).strip()
+        height = len(_wrapped_lines_) * height
+        # logger.warning(f"wrapped text: {result}")
+        return result, height
+    _wrapped_lines_ = []
+    chars = list(text)
+    _txt_ = ""
+    for word in chars:
+        _txt_ += word
+        _width, _height = get_text_size(_txt_)
+        if _width <= max_width:
+            continue
+        else:
+            _wrapped_lines_.append(_txt_)
+            _txt_ = ""
+    _wrapped_lines_.append(_txt_)
+    result = "\n".join(_wrapped_lines_).strip()
+    height = len(_wrapped_lines_) * height
+    logger.debug(f"换行文本: {result}")
+    return result, height
+@contextmanager
+def manage_clip(clip):
+    """
+    视频片段资源管理器
+    Args:
+        clip: 视频片段对象
+    Yields:
+        VideoFileClip: 视频片段对象
+    """
+    try:
+        yield clip
+    finally:
+        clip.close()
+        del clip
+def resize_video_with_padding(clip, target_width: int, target_height: int):
+    """
+    调整视频尺寸并添加黑边
+    Args:
+        clip: 视频片段
+        target_width: 目标宽度
+        target_height: 目标高度
+    Returns:
+        CompositeVideoClip: 调整尺寸后的视频
+    """
+    clip_ratio = clip.w / clip.h
+    target_ratio = target_width / target_height
+    if clip_ratio == target_ratio:
+        return clip.resize((target_width, target_height))
+    if clip_ratio > target_ratio:
+        scale_factor = target_width / clip.w
+    else:
+        scale_factor = target_height / clip.h
+    new_width = int(clip.w * scale_factor)
+    new_height = int(clip.h * scale_factor)
+    clip_resized = clip.resize(newsize=(new_width, new_height))
+    background = ColorClip(
+        size=(target_width, target_height),
+        color=(0, 0, 0)
+    ).set_duration(clip.duration)
+    return CompositeVideoClip([
+        background,
+        clip_resized.set_position("center")
+    ])
+def loop_audio_clip(audio_clip: AudioFileClip, target_duration: float) -> AudioFileClip:
+    """
+    循环音频片段直到达到目标时长
+    参数:
+        audio_clip: 原始音频片段
+        target_duration: 目标时长（秒）
+    返回:
+        循环后的音频片段
+    """
+    # 计算需要循环的次数
+    loops_needed = int(target_duration / audio_clip.duration) + 1
+    # 创建足够长的音频
+    extended_audio = audio_clip
+    for _ in range(loops_needed - 1):
+        extended_audio = CompositeAudioClip([
+            extended_audio,
+            audio_clip.set_start(extended_audio.duration)
+        ])
+    # 裁剪到目标时长
+    return extended_audio.subclip(0, target_duration)
+def calculate_subtitle_position(position, video_height: int, text_height: int = 0) -> tuple:
+    """
+    计算字幕在视频中的具体位置
+    Args:
+        position: 位置配置，可以是 SubtitlePosition 枚举值或表示距顶部百分比的浮点数
+        video_height: 视频高度
+        text_height: 字幕文本高度
+    Returns:
+        tuple: (x, y) 坐标
+    """
+    margin = 50  # 字幕距离边缘的边距
+    if isinstance(position, (int, float)):
+        # 百分比位置
+        return ('center', int(video_height * position))
+    # 预设位置
+    if position == SubtitlePosition.TOP:
+        return ('center', margin)
+    elif position == SubtitlePosition.CENTER:
+        return ('center', video_height // 2)
+    elif position == SubtitlePosition.BOTTOM:
+        return ('center', video_height - margin - text_height)
+    # 默认底部
+    return ('center', video_height - margin - text_height)
+def generate_video_v3(
+        video_path: str,
+        subtitle_style: dict,
+        volume_config: dict,
+        subtitle_path: Optional[str] = None,
+        bgm_path: Optional[str] = None,
+        narration_path: Optional[str] = None,
+        output_path: str = "output.mp4",
+        font_path: Optional[str] = None
+) -> None:
+    """
+    合并视频素材，包括视频、字幕、BGM和解说音频
+    参数:
+        video_path: 原视频文件路径
+        subtitle_path: SRT字幕文件路径（可选）
+        bgm_path: 背景音乐文件路径（可选）
+        narration_path: 解说音频文件路径（可选）
+        output_path: 输出文件路径
+        volume_config: 音量配置字典，可包含以下键：
+            - original: 原声音量（0-1），默认1.0
+            - bgm: BGM音量（0-1），默认0.3
+            - narration: 解说音量（0-1），默认1.0
+        subtitle_style: 字幕样式配置字典，可包含以下键：
+            - font: 字体名称
+            - fontsize: 字体大小
+            - color: 字体颜色
+            - stroke_color: 描边颜色
+            - stroke_width: 描边宽度
+            - bg_color: 背景色
+            - position: 位置支持 SubtitlePosition 枚举值或 0-1 之间的浮点数（表示距顶部的百分比）
+            - method: 文字渲染方法
+        font_path: 字体文件路径（.ttf/.otf 等格式）
+    """
+    # 检查视频文件是否存在
+    if not os.path.exists(video_path):
+        raise FileNotFoundError(f"视频文件不存在: {video_path}")
+    # 加载视频
+    video = VideoFileClip(video_path)
+    subtitle_clips = []
+    # 处理字幕（如果提供）
+    if subtitle_path:
+        if os.path.exists(subtitle_path):
+            # 检查字体文件
+            if font_path and not os.path.exists(font_path):
+                logger.warning(f"警告：字体文件不存在: {font_path}")
+            try:
+                subs = pysrt.open(subtitle_path)
+                logger.info(f"读取到 {len(subs)} 条字幕")
+                for index, sub in enumerate(subs):
+                    start_time = sub.start.ordinal / 1000
+                    end_time = sub.end.ordinal / 1000
+                    try:
+                        # 检查字幕文本是否为空
+                        if not sub.text or sub.text.strip() == '':
+                            logger.info(f"警告：第 {index + 1} 条字幕内容为空，已跳过")
+                            continue
+                        # 处理字幕文本：确保是字符串，并处理可能的列表情况
+                        if isinstance(sub.text, (list, tuple)):
+                            subtitle_text = ' '.join(str(item) for item in sub.text if item is not None)
+                        else:
+                            subtitle_text = str(sub.text)
+                        subtitle_text = subtitle_text.strip()
+                        if not subtitle_text:
+                            logger.info(f"警告：第 {index + 1} 条字幕处理后为空，已跳过")
+                            continue
+                        # 创建临时 TextClip 来获取文本高度
+                        temp_clip = TextClip(
+                            subtitle_text,
+                            font=font_path,
+                            fontsize=subtitle_style['fontsize'],
+                            color=subtitle_style['color']
+                        )
+                        text_height = temp_clip.h
+                        temp_clip.close()
+                        # 计算字幕位置
+                        position = calculate_subtitle_position(
+                            subtitle_style['position'],
+                            video.h,
+                            text_height
+                        )
+                        # 创建最终的 TextClip
+                        text_clip = (TextClip(
+                            subtitle_text,
+                            font=font_path,
+                            fontsize=subtitle_style['fontsize'],
+                            color=subtitle_style['color']
+                        )
+                            .set_position(position)
+                            .set_duration(end_time - start_time)
+                            .set_start(start_time))
+                        subtitle_clips.append(text_clip)
+                    except Exception as e:
+                        logger.error(f"警告：创建第 {index + 1} 条字幕时出错: {traceback.format_exc()}")
+                logger.info(f"成功创建 {len(subtitle_clips)} 条字幕剪辑")
+            except Exception as e:
+                logger.info(f"警告：处理字幕文件时出错: {str(e)}")
+        else:
+            logger.info(f"提示：字幕文件不存在: {subtitle_path}")
+    # 合并音频
+    audio_clips = []
+    # 添加原声（设置音量）
+    logger.debug(f"音量配置: {volume_config}")
+    if video.audio is not None:
+        original_audio = video.audio.volumex(volume_config['original'])
+        audio_clips.append(original_audio)
+    # 添加BGM（如果提供）
+    if bgm_path:
+        bgm = AudioFileClip(bgm_path)
+        if bgm.duration < video.duration:
+            bgm = loop_audio_clip(bgm, video.duration)
+        else:
+            bgm = bgm.subclip(0, video.duration)
+        bgm = bgm.volumex(volume_config['bgm'])
+        audio_clips.append(bgm)
+    # 添加解说音频（如果提供）
+    if narration_path:
+        narration = AudioFileClip(narration_path).volumex(volume_config['narration'])
+        audio_clips.append(narration)
+    # 合成最终视频（包含字幕）
+    if subtitle_clips:
+        final_video = CompositeVideoClip([video] + subtitle_clips, size=video.size)
+    else:
+        logger.info("警告：没有字幕被添加到视频中")
+        final_video = video
+    if audio_clips:
+        final_audio = CompositeAudioClip(audio_clips)
+        final_video = final_video.set_audio(final_audio)
+    # 导出视频
+    logger.info("开始导出视频...")  # 调试信息
+    final_video.write_videofile(
+        output_path,
+        codec='libx264',
+        audio_codec='aac',
+        fps=video.fps
+    )
+    logger.info(f"视频已导出到: {output_path}")  # 调试信息
+    # 清理资源
+    video.close()
+    for clip in subtitle_clips:
+        clip.close()
+    if bgm_path:
+        bgm.close()
+    if narration_path:
+        narration.close()

app/services/video_service.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+from uuid import uuid4
+from loguru import logger
+from typing import Dict, List, Optional, Tuple
+from app.services import material
+class VideoService:
+    @staticmethod
+    async def crop_video(
+        video_path: str,
+        video_script: List[dict]
+    ) -> Tuple[str, Dict[str, str]]:
+        """
+        裁剪视频服务
+        Args:
+            video_path: 视频文件路径
+            video_script: 视频脚本列表
+        Returns:
+            Tuple[str, Dict[str, str]]: (task_id, 裁剪后的视频片段字典)
+            视频片段字典格式: {timestamp: video_path}
+        """
+        try:
+            task_id = str(uuid4())
+            # 从脚本中提取时间戳列表
+            time_list = [scene['timestamp'] for scene in video_script]
+            # 调用裁剪服务
+            subclip_videos = material.clip_videos(
+                task_id=task_id,
+                timestamp_terms=time_list,
+                origin_video=video_path
+            )
+            if subclip_videos is None:
+                raise ValueError("裁剪视频失败")
+            # 更新脚本中的视频路径
+            for scene in video_script:
+                try:
+                    scene['path'] = subclip_videos[scene['timestamp']]
+                except KeyError as err:
+                    logger.error(f"更新视频路径失败: {err}")
+            logger.debug(f"裁剪视频成功，共生成 {len(time_list)} 个视频片段")
+            logger.debug(f"视频片段路径: {subclip_videos}")
+            return task_id, subclip_videos
+        except Exception as e:
+            logger.exception("裁剪视频失败")
+            raise

app/services/voice.py ADDED Viewed

	@@ -0,0 +1,1469 @@

+import os
+import re
+import json
+import traceback
+import edge_tts
+import asyncio
+from loguru import logger
+from typing import List, Union
+from datetime import datetime
+from xml.sax.saxutils import unescape
+from edge_tts import submaker, SubMaker
+from edge_tts.submaker import mktimestamp
+from moviepy.video.tools import subtitles
+import time
+from app.config import config
+from app.utils import utils
+def get_all_azure_voices(filter_locals=None) -> list[str]:
+    if filter_locals is None:
+        filter_locals = ["zh-CN", "en-US", "zh-HK", "zh-TW", "vi-VN"]
+    voices_str = """
+Name: af-ZA-AdriNeural
+Gender: Female
+Name: af-ZA-WillemNeural
+Gender: Male
+Name: am-ET-AmehaNeural
+Gender: Male
+Name: am-ET-MekdesNeural
+Gender: Female
+Name: ar-AE-FatimaNeural
+Gender: Female
+Name: ar-AE-HamdanNeural
+Gender: Male
+Name: ar-BH-AliNeural
+Gender: Male
+Name: ar-BH-LailaNeural
+Gender: Female
+Name: ar-DZ-AminaNeural
+Gender: Female
+Name: ar-DZ-IsmaelNeural
+Gender: Male
+Name: ar-EG-SalmaNeural
+Gender: Female
+Name: ar-EG-ShakirNeural
+Gender: Male
+Name: ar-IQ-BasselNeural
+Gender: Male
+Name: ar-IQ-RanaNeural
+Gender: Female
+Name: ar-JO-SanaNeural
+Gender: Female
+Name: ar-JO-TaimNeural
+Gender: Male
+Name: ar-KW-FahedNeural
+Gender: Male
+Name: ar-KW-NouraNeural
+Gender: Female
+Name: ar-LB-LaylaNeural
+Gender: Female
+Name: ar-LB-RamiNeural
+Gender: Male
+Name: ar-LY-ImanNeural
+Gender: Female
+Name: ar-LY-OmarNeural
+Gender: Male
+Name: ar-MA-JamalNeural
+Gender: Male
+Name: ar-MA-MounaNeural
+Gender: Female
+Name: ar-OM-AbdullahNeural
+Gender: Male
+Name: ar-OM-AyshaNeural
+Gender: Female
+Name: ar-QA-AmalNeural
+Gender: Female
+Name: ar-QA-MoazNeural
+Gender: Male
+Name: ar-SA-HamedNeural
+Gender: Male
+Name: ar-SA-ZariyahNeural
+Gender: Female
+Name: ar-SY-AmanyNeural
+Gender: Female
+Name: ar-SY-LaithNeural
+Gender: Male
+Name: ar-TN-HediNeural
+Gender: Male
+Name: ar-TN-ReemNeural
+Gender: Female
+Name: ar-YE-MaryamNeural
+Gender: Female
+Name: ar-YE-SalehNeural
+Gender: Male
+Name: az-AZ-BabekNeural
+Gender: Male
+Name: az-AZ-BanuNeural
+Gender: Female
+Name: bg-BG-BorislavNeural
+Gender: Male
+Name: bg-BG-KalinaNeural
+Gender: Female
+Name: bn-BD-NabanitaNeural
+Gender: Female
+Name: bn-BD-PradeepNeural
+Gender: Male
+Name: bn-IN-BashkarNeural
+Gender: Male
+Name: bn-IN-TanishaaNeural
+Gender: Female
+Name: bs-BA-GoranNeural
+Gender: Male
+Name: bs-BA-VesnaNeural
+Gender: Female
+Name: ca-ES-EnricNeural
+Gender: Male
+Name: ca-ES-JoanaNeural
+Gender: Female
+Name: cs-CZ-AntoninNeural
+Gender: Male
+Name: cs-CZ-VlastaNeural
+Gender: Female
+Name: cy-GB-AledNeural
+Gender: Male
+Name: cy-GB-NiaNeural
+Gender: Female
+Name: da-DK-ChristelNeural
+Gender: Female
+Name: da-DK-JeppeNeural
+Gender: Male
+Name: de-AT-IngridNeural
+Gender: Female
+Name: de-AT-JonasNeural
+Gender: Male
+Name: de-CH-JanNeural
+Gender: Male
+Name: de-CH-LeniNeural
+Gender: Female
+Name: de-DE-AmalaNeural
+Gender: Female
+Name: de-DE-ConradNeural
+Gender: Male
+Name: de-DE-FlorianMultilingualNeural
+Gender: Male
+Name: de-DE-KatjaNeural
+Gender: Female
+Name: de-DE-KillianNeural
+Gender: Male
+Name: de-DE-SeraphinaMultilingualNeural
+Gender: Female
+Name: el-GR-AthinaNeural
+Gender: Female
+Name: el-GR-NestorasNeural
+Gender: Male
+Name: en-AU-NatashaNeural
+Gender: Female
+Name: en-AU-WilliamNeural
+Gender: Male
+Name: en-CA-ClaraNeural
+Gender: Female
+Name: en-CA-LiamNeural
+Gender: Male
+Name: en-GB-LibbyNeural
+Gender: Female
+Name: en-GB-MaisieNeural
+Gender: Female
+Name: en-GB-RyanNeural
+Gender: Male
+Name: en-GB-SoniaNeural
+Gender: Female
+Name: en-GB-ThomasNeural
+Gender: Male
+Name: en-HK-SamNeural
+Gender: Male
+Name: en-HK-YanNeural
+Gender: Female
+Name: en-IE-ConnorNeural
+Gender: Male
+Name: en-IE-EmilyNeural
+Gender: Female
+Name: en-IN-NeerjaExpressiveNeural
+Gender: Female
+Name: en-IN-NeerjaNeural
+Gender: Female
+Name: en-IN-PrabhatNeural
+Gender: Male
+Name: en-KE-AsiliaNeural
+Gender: Female
+Name: en-KE-ChilembaNeural
+Gender: Male
+Name: en-NG-AbeoNeural
+Gender: Male
+Name: en-NG-EzinneNeural
+Gender: Female
+Name: en-NZ-MitchellNeural
+Gender: Male
+Name: en-NZ-MollyNeural
+Gender: Female
+Name: en-PH-JamesNeural
+Gender: Male
+Name: en-PH-RosaNeural
+Gender: Female
+Name: en-SG-LunaNeural
+Gender: Female
+Name: en-SG-WayneNeural
+Gender: Male
+Name: en-TZ-ElimuNeural
+Gender: Male
+Name: en-TZ-ImaniNeural
+Gender: Female
+Name: en-US-AnaNeural
+Gender: Female
+Name: en-US-AndrewNeural
+Gender: Male
+Name: en-US-AriaNeural
+Gender: Female
+Name: en-US-AvaNeural
+Gender: Female
+Name: en-US-BrianNeural
+Gender: Male
+Name: en-US-ChristopherNeural
+Gender: Male
+Name: en-US-EmmaNeural
+Gender: Female
+Name: en-US-EricNeural
+Gender: Male
+Name: en-US-GuyNeural
+Gender: Male
+Name: en-US-JennyNeural
+Gender: Female
+Name: en-US-MichelleNeural
+Gender: Female
+Name: en-US-RogerNeural
+Gender: Male
+Name: en-US-SteffanNeural
+Gender: Male
+Name: en-ZA-LeahNeural
+Gender: Female
+Name: en-ZA-LukeNeural
+Gender: Male
+Name: es-AR-ElenaNeural
+Gender: Female
+Name: es-AR-TomasNeural
+Gender: Male
+Name: es-BO-MarceloNeural
+Gender: Male
+Name: es-BO-SofiaNeural
+Gender: Female
+Name: es-CL-CatalinaNeural
+Gender: Female
+Name: es-CL-LorenzoNeural
+Gender: Male
+Name: es-CO-GonzaloNeural
+Gender: Male
+Name: es-CO-SalomeNeural
+Gender: Female
+Name: es-CR-JuanNeural
+Gender: Male
+Name: es-CR-MariaNeural
+Gender: Female
+Name: es-CU-BelkysNeural
+Gender: Female
+Name: es-CU-ManuelNeural
+Gender: Male
+Name: es-DO-EmilioNeural
+Gender: Male
+Name: es-DO-RamonaNeural
+Gender: Female
+Name: es-EC-AndreaNeural
+Gender: Female
+Name: es-EC-LuisNeural
+Gender: Male
+Name: es-ES-AlvaroNeural
+Gender: Male
+Name: es-ES-ElviraNeural
+Gender: Female
+Name: es-ES-XimenaNeural
+Gender: Female
+Name: es-GQ-JavierNeural
+Gender: Male
+Name: es-GQ-TeresaNeural
+Gender: Female
+Name: es-GT-AndresNeural
+Gender: Male
+Name: es-GT-MartaNeural
+Gender: Female
+Name: es-HN-CarlosNeural
+Gender: Male
+Name: es-HN-KarlaNeural
+Gender: Female
+Name: es-MX-DaliaNeural
+Gender: Female
+Name: es-MX-JorgeNeural
+Gender: Male
+Name: es-NI-FedericoNeural
+Gender: Male
+Name: es-NI-YolandaNeural
+Gender: Female
+Name: es-PA-MargaritaNeural
+Gender: Female
+Name: es-PA-RobertoNeural
+Gender: Male
+Name: es-PE-AlexNeural
+Gender: Male
+Name: es-PE-CamilaNeural
+Gender: Female
+Name: es-PR-KarinaNeural
+Gender: Female
+Name: es-PR-VictorNeural
+Gender: Male
+Name: es-PY-MarioNeural
+Gender: Male
+Name: es-PY-TaniaNeural
+Gender: Female
+Name: es-SV-LorenaNeural
+Gender: Female
+Name: es-SV-RodrigoNeural
+Gender: Male
+Name: es-US-AlonsoNeural
+Gender: Male
+Name: es-US-PalomaNeural
+Gender: Female
+Name: es-UY-MateoNeural
+Gender: Male
+Name: es-UY-ValentinaNeural
+Gender: Female
+Name: es-VE-PaolaNeural
+Gender: Female
+Name: es-VE-SebastianNeural
+Gender: Male
+Name: et-EE-AnuNeural
+Gender: Female
+Name: et-EE-KertNeural
+Gender: Male
+Name: fa-IR-DilaraNeural
+Gender: Female
+Name: fa-IR-FaridNeural
+Gender: Male
+Name: fi-FI-HarriNeural
+Gender: Male
+Name: fi-FI-NooraNeural
+Gender: Female
+Name: fil-PH-AngeloNeural
+Gender: Male
+Name: fil-PH-BlessicaNeural
+Gender: Female
+Name: fr-BE-CharlineNeural
+Gender: Female
+Name: fr-BE-GerardNeural
+Gender: Male
+Name: fr-CA-AntoineNeural
+Gender: Male
+Name: fr-CA-JeanNeural
+Gender: Male
+Name: fr-CA-SylvieNeural
+Gender: Female
+Name: fr-CA-ThierryNeural
+Gender: Male
+Name: fr-CH-ArianeNeural
+Gender: Female
+Name: fr-CH-FabriceNeural
+Gender: Male
+Name: fr-FR-DeniseNeural
+Gender: Female
+Name: fr-FR-EloiseNeural
+Gender: Female
+Name: fr-FR-HenriNeural
+Gender: Male
+Name: fr-FR-RemyMultilingualNeural
+Gender: Male
+Name: fr-FR-VivienneMultilingualNeural
+Gender: Female
+Name: ga-IE-ColmNeural
+Gender: Male
+Name: ga-IE-OrlaNeural
+Gender: Female
+Name: gl-ES-RoiNeural
+Gender: Male
+Name: gl-ES-SabelaNeural
+Gender: Female
+Name: gu-IN-DhwaniNeural
+Gender: Female
+Name: gu-IN-NiranjanNeural
+Gender: Male
+Name: he-IL-AvriNeural
+Gender: Male
+Name: he-IL-HilaNeural
+Gender: Female
+Name: hi-IN-MadhurNeural
+Gender: Male
+Name: hi-IN-SwaraNeural
+Gender: Female
+Name: hr-HR-GabrijelaNeural
+Gender: Female
+Name: hr-HR-SreckoNeural
+Gender: Male
+Name: hu-HU-NoemiNeural
+Gender: Female
+Name: hu-HU-TamasNeural
+Gender: Male
+Name: id-ID-ArdiNeural
+Gender: Male
+Name: id-ID-GadisNeural
+Gender: Female
+Name: is-IS-GudrunNeural
+Gender: Female
+Name: is-IS-GunnarNeural
+Gender: Male
+Name: it-IT-DiegoNeural
+Gender: Male
+Name: it-IT-ElsaNeural
+Gender: Female
+Name: it-IT-GiuseppeNeural
+Gender: Male
+Name: it-IT-IsabellaNeural
+Gender: Female
+Name: ja-JP-KeitaNeural
+Gender: Male
+Name: ja-JP-NanamiNeural
+Gender: Female
+Name: jv-ID-DimasNeural
+Gender: Male
+Name: jv-ID-SitiNeural
+Gender: Female
+Name: ka-GE-EkaNeural
+Gender: Female
+Name: ka-GE-GiorgiNeural
+Gender: Male
+Name: kk-KZ-AigulNeural
+Gender: Female
+Name: kk-KZ-DauletNeural
+Gender: Male
+Name: km-KH-PisethNeural
+Gender: Male
+Name: km-KH-SreymomNeural
+Gender: Female
+Name: kn-IN-GaganNeural
+Gender: Male
+Name: kn-IN-SapnaNeural
+Gender: Female
+Name: ko-KR-HyunsuNeural
+Gender: Male
+Name: ko-KR-InJoonNeural
+Gender: Male
+Name: ko-KR-SunHiNeural
+Gender: Female
+Name: lo-LA-ChanthavongNeural
+Gender: Male
+Name: lo-LA-KeomanyNeural
+Gender: Female
+Name: lt-LT-LeonasNeural
+Gender: Male
+Name: lt-LT-OnaNeural
+Gender: Female
+Name: lv-LV-EveritaNeural
+Gender: Female
+Name: lv-LV-NilsNeural
+Gender: Male
+Name: mk-MK-AleksandarNeural
+Gender: Male
+Name: mk-MK-MarijaNeural
+Gender: Female
+Name: ml-IN-MidhunNeural
+Gender: Male
+Name: ml-IN-SobhanaNeural
+Gender: Female
+Name: mn-MN-BataaNeural
+Gender: Male
+Name: mn-MN-YesuiNeural
+Gender: Female
+Name: mr-IN-AarohiNeural
+Gender: Female
+Name: mr-IN-ManoharNeural
+Gender: Male
+Name: ms-MY-OsmanNeural
+Gender: Male
+Name: ms-MY-YasminNeural
+Gender: Female
+Name: mt-MT-GraceNeural
+Gender: Female
+Name: mt-MT-JosephNeural
+Gender: Male
+Name: my-MM-NilarNeural
+Gender: Female
+Name: my-MM-ThihaNeural
+Gender: Male
+Name: nb-NO-FinnNeural
+Gender: Male
+Name: nb-NO-PernilleNeural
+Gender: Female
+Name: ne-NP-HemkalaNeural
+Gender: Female
+Name: ne-NP-SagarNeural
+Gender: Male
+Name: nl-BE-ArnaudNeural
+Gender: Male
+Name: nl-BE-DenaNeural
+Gender: Female
+Name: nl-NL-ColetteNeural
+Gender: Female
+Name: nl-NL-FennaNeural
+Gender: Female
+Name: nl-NL-MaartenNeural
+Gender: Male
+Name: pl-PL-MarekNeural
+Gender: Male
+Name: pl-PL-ZofiaNeural
+Gender: Female
+Name: ps-AF-GulNawazNeural
+Gender: Male
+Name: ps-AF-LatifaNeural
+Gender: Female
+Name: pt-BR-AntonioNeural
+Gender: Male
+Name: pt-BR-FranciscaNeural
+Gender: Female
+Name: pt-BR-ThalitaNeural
+Gender: Female
+Name: pt-PT-DuarteNeural
+Gender: Male
+Name: pt-PT-RaquelNeural
+Gender: Female
+Name: ro-RO-AlinaNeural
+Gender: Female
+Name: ro-RO-EmilNeural
+Gender: Male
+Name: ru-RU-DmitryNeural
+Gender: Male
+Name: ru-RU-SvetlanaNeural
+Gender: Female
+Name: si-LK-SameeraNeural
+Gender: Male
+Name: si-LK-ThiliniNeural
+Gender: Female
+Name: sk-SK-LukasNeural
+Gender: Male
+Name: sk-SK-ViktoriaNeural
+Gender: Female
+Name: sl-SI-PetraNeural
+Gender: Female
+Name: sl-SI-RokNeural
+Gender: Male
+Name: so-SO-MuuseNeural
+Gender: Male
+Name: so-SO-UbaxNeural
+Gender: Female
+Name: sq-AL-AnilaNeural
+Gender: Female
+Name: sq-AL-IlirNeural
+Gender: Male
+Name: sr-RS-NicholasNeural
+Gender: Male
+Name: sr-RS-SophieNeural
+Gender: Female
+Name: su-ID-JajangNeural
+Gender: Male
+Name: su-ID-TutiNeural
+Gender: Female
+Name: sv-SE-MattiasNeural
+Gender: Male
+Name: sv-SE-SofieNeural
+Gender: Female
+Name: sw-KE-RafikiNeural
+Gender: Male
+Name: sw-KE-ZuriNeural
+Gender: Female
+Name: sw-TZ-DaudiNeural
+Gender: Male
+Name: sw-TZ-RehemaNeural
+Gender: Female
+Name: ta-IN-PallaviNeural
+Gender: Female
+Name: ta-IN-ValluvarNeural
+Gender: Male
+Name: ta-LK-KumarNeural
+Gender: Male
+Name: ta-LK-SaranyaNeural
+Gender: Female
+Name: ta-MY-KaniNeural
+Gender: Female
+Name: ta-MY-SuryaNeural
+Gender: Male
+Name: ta-SG-AnbuNeural
+Gender: Male
+Name: ta-SG-VenbaNeural
+Gender: Female
+Name: te-IN-MohanNeural
+Gender: Male
+Name: te-IN-ShrutiNeural
+Gender: Female
+Name: th-TH-NiwatNeural
+Gender: Male
+Name: th-TH-PremwadeeNeural
+Gender: Female
+Name: tr-TR-AhmetNeural
+Gender: Male
+Name: tr-TR-EmelNeural
+Gender: Female
+Name: uk-UA-OstapNeural
+Gender: Male
+Name: uk-UA-PolinaNeural
+Gender: Female
+Name: ur-IN-GulNeural
+Gender: Female
+Name: ur-IN-SalmanNeural
+Gender: Male
+Name: ur-PK-AsadNeural
+Gender: Male
+Name: ur-PK-UzmaNeural
+Gender: Female
+Name: uz-UZ-MadinaNeural
+Gender: Female
+Name: uz-UZ-SardorNeural
+Gender: Male
+Name: vi-VN-HoaiMyNeural
+Gender: Female
+Name: vi-VN-NamMinhNeural
+Gender: Male
+Name: zh-CN-XiaoxiaoNeural
+Gender: Female
+Name: zh-CN-XiaoyiNeural
+Gender: Female
+Name: zh-CN-YunjianNeural
+Gender: Male
+Name: zh-CN-YunxiNeural
+Gender: Male
+Name: zh-CN-YunxiaNeural
+Gender: Male
+Name: zh-CN-YunyangNeural
+Gender: Male
+Name: zh-CN-liaoning-XiaobeiNeural
+Gender: Female
+Name: zh-CN-shaanxi-XiaoniNeural
+Gender: Female
+Name: zh-HK-HiuGaaiNeural
+Gender: Female
+Name: zh-HK-HiuMaanNeural
+Gender: Female
+Name: zh-HK-WanLungNeural
+Gender: Male
+Name: zh-TW-HsiaoChenNeural
+Gender: Female
+Name: zh-TW-HsiaoYuNeural
+Gender: Female
+Name: zh-TW-YunJheNeural
+Gender: Male
+Name: zu-ZA-ThandoNeural
+Gender: Female
+Name: zu-ZA-ThembaNeural
+Gender: Male
+Name: en-US-AvaMultilingualNeural-V2
+Gender: Female
+Name: en-US-AndrewMultilingualNeural-V2
+Gender: Male
+Name: en-US-EmmaMultilingualNeural-V2
+Gender: Female
+Name: en-US-BrianMultilingualNeural-V2
+Gender: Male
+Name: de-DE-FlorianMultilingualNeural-V2
+Gender: Male
+Name: de-DE-SeraphinaMultilingualNeural-V2
+Gender: Female
+Name: fr-FR-RemyMultilingualNeural-V2
+Gender: Male
+Name: fr-FR-VivienneMultilingualNeural-V2
+Gender: Female
+Name: zh-CN-XiaoxiaoMultilingualNeural-V2
+Gender: Female
+Name: zh-CN-YunxiNeural-V2
+Gender: Male
+    """.strip()
+    voices = []
+    name = ""
+    for line in voices_str.split("\n"):
+        line = line.strip()
+        if not line:
+            continue
+        if line.startswith("Name: "):
+            name = line[6:].strip()
+        if line.startswith("Gender: "):
+            gender = line[8:].strip()
+            if name and gender:
+                # voices.append({
+                #     "name": name,
+                #     "gender": gender,
+                # })
+                if filter_locals:
+                    for filter_local in filter_locals:
+                        if name.lower().startswith(filter_local.lower()):
+                            voices.append(f"{name}-{gender}")
+                else:
+                    voices.append(f"{name}-{gender}")
+                name = ""
+    voices.sort()
+    return voices
+def parse_voice_name(name: str):
+    # zh-CN-XiaoyiNeural-Female
+    # zh-CN-YunxiNeural-Male
+    # zh-CN-XiaoxiaoMultilingualNeural-V2-Female
+    name = name.replace("-Female", "").replace("-Male", "").strip()
+    return name
+def is_azure_v2_voice(voice_name: str):
+    voice_name = parse_voice_name(voice_name)
+    if voice_name.endswith("-V2"):
+        return voice_name.replace("-V2", "").strip()
+    return ""
+def tts(
+    text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
+) -> Union[SubMaker, None]:
+    if is_azure_v2_voice(voice_name):
+        return azure_tts_v2(text, voice_name, voice_file)
+    return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
+def convert_rate_to_percent(rate: float) -> str:
+    if rate == 1.0:
+        return "+0%"
+    percent = round((rate - 1.0) * 100)
+    if percent > 0:
+        return f"+{percent}%"
+    else:
+        return f"{percent}%"
+def convert_pitch_to_percent(rate: float) -> str:
+    if rate == 1.0:
+        return "+0Hz"
+    percent = round((rate - 1.0) * 100)
+    if percent > 0:
+        return f"+{percent}Hz"
+    else:
+        return f"{percent}Hz"
+def azure_tts_v1(
+    text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
+) -> Union[SubMaker, None]:
+    voice_name = parse_voice_name(voice_name)
+    text = text.strip()
+    rate_str = convert_rate_to_percent(voice_rate)
+    pitch_str = convert_pitch_to_percent(voice_pitch)
+    for i in range(3):
+        try:
+            logger.info(f"第 {i+1} 次使用 edge_tts 生成音频")
+            async def _do() -> tuple[SubMaker, bytes]:
+                communicate = edge_tts.Communicate(text, voice_name, rate=rate_str, pitch=pitch_str, proxy=config.proxy.get("http"))
+                sub_maker = edge_tts.SubMaker()
+                audio_data = bytes()  # 用于存储音频数据
+                async for chunk in communicate.stream():
+                    if chunk["type"] == "audio":
+                        audio_data += chunk["data"]
+                    elif chunk["type"] == "WordBoundary":
+                        sub_maker.create_sub(
+                            (chunk["offset"], chunk["duration"]), chunk["text"]
+                        )
+                return sub_maker, audio_data
+            # 获取音频数据和字幕信息
+            sub_maker, audio_data = asyncio.run(_do())
+            # 验证数据是否有效
+            if not sub_maker or not sub_maker.subs or not audio_data:
+                logger.warning(f"failed, invalid data generated")
+                if i < 2:
+                    time.sleep(1)
+                continue
+            # 数据有效，写入文件
+            with open(voice_file, "wb") as file:
+                file.write(audio_data)
+            return sub_maker
+        except Exception as e:
+            logger.error(f"生成音频文件时出错: {str(e)}")
+            if i < 2:
+                time.sleep(1)
+    return None
+def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]:
+    voice_name = is_azure_v2_voice(voice_name)
+    if not voice_name:
+        logger.error(f"invalid voice name: {voice_name}")
+        raise ValueError(f"invalid voice name: {voice_name}")
+    text = text.strip()
+    def _format_duration_to_offset(duration) -> int:
+        if isinstance(duration, str):
+            time_obj = datetime.strptime(duration, "%H:%M:%S.%f")
+            milliseconds = (
+                (time_obj.hour * 3600000)
+                + (time_obj.minute * 60000)
+                + (time_obj.second * 1000)
+                + (time_obj.microsecond // 1000)
+            )
+            return milliseconds * 10000
+        if isinstance(duration, int):
+            return duration
+        return 0
+    for i in range(3):
+        try:
+            logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
+            import azure.cognitiveservices.speech as speechsdk
+            sub_maker = SubMaker()
+            def speech_synthesizer_word_boundary_cb(evt: speechsdk.SessionEventArgs):
+                duration = _format_duration_to_offset(str(evt.duration))
+                offset = _format_duration_to_offset(evt.audio_offset)
+                sub_maker.subs.append(evt.text)
+                sub_maker.offset.append((offset, offset + duration))
+            # Creates an instance of a speech config with specified subscription key and service region.
+            speech_key = config.azure.get("speech_key", "")
+            service_region = config.azure.get("speech_region", "")
+            audio_config = speechsdk.audio.AudioOutputConfig(
+                filename=voice_file, use_default_speaker=True
+            )
+            speech_config = speechsdk.SpeechConfig(
+                subscription=speech_key, region=service_region
+            )
+            speech_config.speech_synthesis_voice_name = voice_name
+            # speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
+            #                            value='true')
+            speech_config.set_property(
+                property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestWordBoundary,
+                value="true",
+            )
+            speech_config.set_speech_synthesis_output_format(
+                speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3
+            )
+            speech_synthesizer = speechsdk.SpeechSynthesizer(
+                audio_config=audio_config, speech_config=speech_config
+            )
+            speech_synthesizer.synthesis_word_boundary.connect(
+                speech_synthesizer_word_boundary_cb
+            )
+            result = speech_synthesizer.speak_text_async(text).get()
+            if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
+                logger.success(f"azure v2 speech synthesis succeeded: {voice_file}")
+                return sub_maker
+            elif result.reason == speechsdk.ResultReason.Canceled:
+                cancellation_details = result.cancellation_details
+                logger.error(
+                    f"azure v2 speech synthesis canceled: {cancellation_details.reason}"
+                )
+                if cancellation_details.reason == speechsdk.CancellationReason.Error:
+                    logger.error(
+                        f"azure v2 speech synthesis error: {cancellation_details.error_details}"
+                    )
+            if i < 2:  # 如果不是最后一次重试，则等待1秒
+                time.sleep(1)
+            logger.info(f"completed, output file: {voice_file}")
+        except Exception as e:
+            logger.error(f"failed, error: {str(e)}")
+            if i < 2:  # 如果不是最后一次重试，则等待1秒
+                time.sleep(3)
+    return None
+def _format_text(text: str) -> str:
+    text = text.replace("\n", " ")
+    text = text.replace("\"", " ")
+    text = text.replace("[", " ")
+    text = text.replace("]", " ")
+    text = text.replace("(", " ")
+    text = text.replace(")", " ")
+    text = text.replace("）", " ")
+    text = text.replace("（", " ")
+    text = text.replace("{", " ")
+    text = text.replace("}", " ")
+    text = text.strip()
+    return text
+def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], list_script: List[dict],
+                                  subtitle_file: str):
+    """
+    根据多个 SubMaker 对象、完整文本和原始脚本创建优化的字幕文件
+    1. 使用原始脚本中的时间戳
+    2. 跳过 OST 为 true 的部分
+    3. 将字幕文件按照标点符号分割成多行
+    4. 根据完整文本分段，保持原文的语句结构
+    5. 生成新的字幕文件，时间戳包含小时单位
+    """
+    text = _format_text(text)
+    sentences = utils.split_string_by_punctuations(text)
+    def formatter(idx: int, start_time: str, end_time: str, sub_text: str) -> str:
+        return f"{idx}\n{start_time.replace('.', ',')} --> {end_time.replace('.', ',')}\n{sub_text}\n"
+    sub_items = []
+    sub_index = 0
+    sentence_index = 0
+    try:
+        sub_maker_index = 0
+        for script_item in list_script:
+            if script_item['OST']:
+                continue
+            start_time, end_time = script_item['timestamp'].split('-')
+            if sub_maker_index >= len(sub_maker_list):
+                logger.error(f"Sub maker list index out of range: {sub_maker_index}")
+                break
+            sub_maker = sub_maker_list[sub_maker_index]
+            sub_maker_index += 1
+            script_duration = utils.time_to_seconds(end_time) - utils.time_to_seconds(start_time)
+            audio_duration = get_audio_duration(sub_maker)
+            time_ratio = script_duration / audio_duration if audio_duration > 0 else 1
+            current_sub = ""
+            current_start = None
+            current_end = None
+            for offset, sub in zip(sub_maker.offset, sub_maker.subs):
+                sub = unescape(sub).strip()
+                sub_start = utils.seconds_to_time(utils.time_to_seconds(start_time) + offset[0] / 10000000 * time_ratio)
+                sub_end = utils.seconds_to_time(utils.time_to_seconds(start_time) + offset[1] / 10000000 * time_ratio)
+                if current_start is None:
+                    current_start = sub_start
+                current_end = sub_end
+                current_sub += sub
+                # 检查当前累积的字幕是否匹配下一个句子
+                while sentence_index < len(sentences) and sentences[sentence_index] in current_sub:
+                    sub_index += 1
+                    line = formatter(
+                        idx=sub_index,
+                        start_time=current_start,
+                        end_time=current_end,
+                        sub_text=sentences[sentence_index].strip(),
+                    )
+                    sub_items.append(line)
+                    current_sub = current_sub.replace(sentences[sentence_index], "", 1).strip()
+                    current_start = current_end
+                    sentence_index += 1
+                # 如果当前字幕长度超过15个字符，也生成一个新的字幕项
+                if len(current_sub) > 15:
+                    sub_index += 1
+                    line = formatter(
+                        idx=sub_index,
+                        start_time=current_start,
+                        end_time=current_end,
+                        sub_text=current_sub.strip(),
+                    )
+                    sub_items.append(line)
+                    current_sub = ""
+                    current_start = current_end
+            # 处理剩余的文本
+            if current_sub.strip():
+                sub_index += 1
+                line = formatter(
+                    idx=sub_index,
+                    start_time=current_start,
+                    end_time=current_end,
+                    sub_text=current_sub.strip(),
+                )
+                sub_items.append(line)
+        if len(sub_items) == 0:
+            logger.error("No subtitle items generated")
+            return
+        with open(subtitle_file, "w", encoding="utf-8") as file:
+            file.write("\n".join(sub_items))
+        logger.info(f"completed, subtitle file created: {subtitle_file}")
+    except Exception as e:
+        logger.error(f"failed, error: {str(e)}")
+        traceback.print_exc()
+def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
+    """
+    优化字幕文件
+    1. 将字幕文件按照标点符号分割成多行
+    2. 逐行匹配字幕文件中的文本
+    3. 生成新的字幕文件
+    """
+    text = _format_text(text)
+    def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
+        """
+        1
+        00:00:00,000 --> 00:00:02,360
+        跑步是一项简单易行的运动
+        """
+        start_t = mktimestamp(start_time).replace(".", ",")
+        end_t = mktimestamp(end_time).replace(".", ",")
+        return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n"
+    start_time = -1.0
+    sub_items = []
+    sub_index = 0
+    script_lines = utils.split_string_by_punctuations(text)
+    def match_line(_sub_line: str, _sub_index: int):
+        if len(script_lines) <= _sub_index:
+            return ""
+        _line = script_lines[_sub_index]
+        if _sub_line == _line:
+            return script_lines[_sub_index].strip()
+        _sub_line_ = re.sub(r"[^\w\s]", "", _sub_line)
+        _line_ = re.sub(r"[^\w\s]", "", _line)
+        if _sub_line_ == _line_:
+            return _line_.strip()
+        _sub_line_ = re.sub(r"\W+", "", _sub_line)
+        _line_ = re.sub(r"\W+", "", _line)
+        if _sub_line_ == _line_:
+            return _line.strip()
+        return ""
+    sub_line = ""
+    try:
+        for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
+            _start_time, end_time = offset
+            if start_time < 0:
+                start_time = _start_time
+            sub = unescape(sub)
+            sub_line += sub
+            sub_text = match_line(sub_line, sub_index)
+            if sub_text:
+                sub_index += 1
+                line = formatter(
+                    idx=sub_index,
+                    start_time=start_time,
+                    end_time=end_time,
+                    sub_text=sub_text,
+                )
+                sub_items.append(line)
+                start_time = -1.0
+                sub_line = ""
+        if len(sub_items) == len(script_lines):
+            with open(subtitle_file, "w", encoding="utf-8") as file:
+                file.write("\n".join(sub_items) + "\n")
+            try:
+                sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
+                duration = max([tb for ((ta, tb), txt) in sbs])
+                logger.info(
+                    f"已创建字幕文件: {subtitle_file}, duration: {duration}"
+                )
+                return subtitle_file, duration
+            except Exception as e:
+                logger.error(f"failed, error: {str(e)}")
+                os.remove(subtitle_file)
+        else:
+            logger.error(
+                f"字幕创建失败, 字幕长度: {len(sub_items)}, script_lines len: {len(script_lines)}"
+                f"\nsub_items:{json.dumps(sub_items, indent=4, ensure_ascii=False)}"
+                f"\nscript_lines:{json.dumps(script_lines, indent=4, ensure_ascii=False)}"
+            )
+    except Exception as e:
+        logger.error(f"failed, error: {str(e)}")
+def get_audio_duration(sub_maker: submaker.SubMaker):
+    """
+    获取��频时长
+    """
+    if not sub_maker.offset:
+        return 0.0
+    return sub_maker.offset[-1][1] / 10000000
+def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: float, voice_pitch: float):
+    """
+    根据JSON文件中的多段文本进行TTS转换
+    :param task_id: 任务ID
+    :param list_script: 脚本列表
+    :param voice_name: 语音名称
+    :param voice_rate: 语音速率
+    :return: 生成的音频文件列表
+    """
+    voice_name = parse_voice_name(voice_name)
+    output_dir = utils.task_dir(task_id)
+    tts_results = []
+    for item in list_script:
+        if item['OST'] != 1:
+            # 将时间戳中的冒号替换为下划线
+            timestamp = item['timestamp'].replace(':', '_')
+            audio_file = os.path.join(output_dir, f"audio_{timestamp}.mp3")
+            subtitle_file = os.path.join(output_dir, f"subtitle_{timestamp}.srt")
+            text = item['narration']
+            sub_maker = tts(
+                text=text,
+                voice_name=voice_name,
+                voice_rate=voice_rate,
+                voice_pitch=voice_pitch,
+                voice_file=audio_file,
+            )
+            if sub_maker is None:
+                logger.error(f"无法为时间戳 {timestamp} 生成音频; "
+                             f"如果您在中国，请使用VPN; "
+                             f"或者使用其他 tts 引擎")
+                continue
+            else:
+                # 为当前片段生成字幕文件
+                _, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
+            tts_results.append({
+                "_id": item['_id'],
+                "timestamp": item['timestamp'],
+                "audio_file": audio_file,
+                "subtitle_file": subtitle_file,
+                "duration": duration,
+                "text": text,
+            })
+            logger.info(f"已生成音频文件: {audio_file}")
+    return tts_results