mp4 / app /services /llm.py
gitdeem's picture
Upload 34 files
a9837a2 verified
raw
history blame
14.7 kB
from openai import OpenAI
from app.config import get_settings
from loguru import logger
from typing import List, Dict, Any
import json
from http import HTTPStatus
from pathlib import PurePosixPath
import requests
from urllib.parse import urlparse, unquote
import random
from app.models.const import LANGUAGE_NAMES, Language
from app.exceptions import LLMResponseValidationError
import dashscope
from dashscope import ImageSynthesis
from app.schemas.llm import (
StoryGenerationRequest,
)
settings = get_settings()
openai_client = None
if settings.openai_api_key:
openai_client = OpenAI(api_key=settings.openai_api_key, base_url=settings.openai_base_url or "https://api.openai.com/v1")
aliyun_text_client = None
if settings.aliyun_api_key:
dashscope.api_key = settings.aliyun_api_key
aliyun_text_client = OpenAI(base_url=settings.aliyun_base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1", api_key=settings.aliyun_api_key)
if settings.deepseek_api_key:
deepseek_client = OpenAI(api_key=settings.deepseek_api_key, base_url=settings.deepseek_base_url or "https://api.deepseek.com/v1")
if settings.ollama_api_key:
ollama_client = OpenAI(api_key=settings.ollama_api_key, base_url=settings.ollama_base_url or "http://localhost:11434/v1")
if settings.siliconflow_api_key:
siliconflow_client = OpenAI(api_key=settings.siliconflow_api_key, base_url=settings.siliconflow_base_url or "https://api.siliconflow.cn/v1")
class LLMService:
def __init__(self):
self.openai_client = openai_client
self.aliyun_text_client = aliyun_text_client
self.text_llm_model = settings.text_llm_model
self.image_llm_model = settings.image_llm_model
async def generate_story(self, request: StoryGenerationRequest) -> List[Dict[str, Any]]:
"""生成故事场景
Args:
story_prompt (str, optional): 故事提示. Defaults to None.
segments (int, optional): 故事分段数. Defaults to 3.
Returns:
List[Dict[str, Any]]: 故事场景列表
"""
messages = [
{"role": "system", "content": "你是一个专业的故事创作者,善于创作引人入胜的故事。请只返回JSON格式的内容。"},
{"role": "user", "content": await self._get_story_prompt(request.story_prompt, request.language, request.segments)}
]
logger.info(f"prompt messages: {json.dumps(messages, indent=4, ensure_ascii=False)}")
response = await self._generate_response(text_llm_provider = request.text_llm_provider or None, text_llm_model = request.text_llm_model or None, messages=messages, response_format="json_object")
response = response["list"]
response = self.normalize_keys(response)
logger.info(f"Generated story: {json.dumps(response, indent=4, ensure_ascii=False)}")
# 验证响应格式
self._validate_story_response(response)
return response
def normalize_keys(self, data):
"""
阿里云和 openai 的模型返回结果不一致,处理一下
修改对象中非 `text` 的键为 `image_prompt`
- 如果是字典,替换 `text` 以外的单个键为 `image_prompt`
- 如果是列表,对列表中的每个对象递归处理
"""
if isinstance(data, dict):
# 如果是字典,处理键值
if "text" in data:
# 找到非 `text` 的键
other_keys = [key for key in data.keys() if key != "text"]
# 确保只处理一个非 `text` 键的情况
if len(other_keys) == 1:
data["image_prompt"] = data.pop(other_keys[0])
elif len(other_keys) > 1:
raise ValueError(f"Unexpected extra keys: {other_keys}. Only one non-'text' key is allowed.")
return data
elif isinstance(data, list):
# 如果是列表,递归处理每个对象
return [self.normalize_keys(item) for item in data]
else:
raise TypeError("Input must be a dict or list of dicts")
def generate_image(self, *, prompt: str, image_llm_provider: str = None, image_llm_model: str = None, resolution: str = "1024x1024") -> str:
# return "https://dashscope-result-bj.oss-cn-beijing.aliyuncs.com/1d/56/20250118/3c4cc727/4fc622b5-54a6-484c-bf1f-f1cfb66ace2d-1.png?Expires=1737290655&OSSAccessKeyId=LTAI5tQZd8AEcZX6KZV4G8qL&Signature=W8D4CN3uonQ2pL1e9xGMWufz33E%3D"
"""生成图片
Args:
prompt (str): 图片描述
resolution (str): 图片分辨率,默认为 1024x1024
Returns:
str: 图片URL
"""
image_llm_provider = image_llm_provider or settings.image_provider
image_llm_model = image_llm_model or settings.image_llm_model
try:
# 添加安全提示词
safe_prompt = f"Create a safe, family-friendly illustration. {prompt} The image should be appropriate for all ages, non-violent, and non-controversial."
if image_llm_provider == "aliyun":
rsp = ImageSynthesis.call(model=image_llm_model,
prompt=prompt,
size=resolution,)
if rsp.status_code == HTTPStatus.OK:
# print("aliyun image response", rsp.output)
for result in rsp.output.results:
return result.url
else:
error_message = f'Failed, status_code: {rsp.status_code}, code: {rsp.code}, message: {rsp.message}'
logger.error(error_message)
raise Exception(error_message)
elif image_llm_provider == "openai":
if (resolution != None):
resolution = resolution.replace("*", "x")
response = self.openai_client.images.generate(
model=image_llm_model,
prompt=safe_prompt,
size=resolution,
quality="standard",
n=1
)
logger.info("image generate res", response.data[0].url)
return response.data[0].url
elif image_llm_provider == "siliconflow":
if (resolution != None):
resolution = resolution.replace("*", "x")
payload = {
"model": image_llm_model,
"prompt": safe_prompt,
"seed": random.randint(1000000, 4999999999),
"image_size": resolution,
"guidance_scale": 7.5,
"batch_size": 1,
}
headers = {
"Authorization": "Bearer " + settings.siliconflow_api_key,
"Content-Type": "application/json"
}
response = requests.request("POST", "https://api.siliconflow.cn/v1/images/generations", json=payload, headers=headers)
if response.text != None:
response = json.loads(response.text)
return response["images"][0]["url"]
else:
raise Exception(response.text)
except Exception as e:
logger.error(f"Failed to generate image: {e}")
return ""
async def generate_story_with_images(self, request: StoryGenerationRequest) -> List[Dict[str, Any]]:
"""生成故事和配图
Args:
story_prompt (str, optional): 故事提示. Defaults to None.
language (Language, optional): 语言. Defaults to Language.CHINESE.
segments (int, optional): 故事分段数. Defaults to 3.
Returns:
List[Dict[str, Any]]: 故事场景列表,每个场景包含文本、图片提示词和图片URL
"""
# 先生成故事
story_segments = await self.generate_story(
request,
)
# 为每个场景生成图片
for segment in story_segments:
try:
image_url = self.generate_image(prompt=segment["image_prompt"], resolution=request.resolution, image_llm_provider=request.image_llm_provider, image_llm_model=request.image_llm_model)
segment["url"] = image_url
except Exception as e:
logger.error(f"Failed to generate image for segment: {e}")
segment["url"] = None
return story_segments
def get_llm_providers(self) -> Dict[str, List[str]]:
imgLLMList = []
textLLMList = []
if settings.openai_api_key:
textLLMList.append("openai")
imgLLMList.append("openai")
if settings.aliyun_api_key:
textLLMList.append("aliyun")
imgLLMList.append("aliyun")
if settings.deepseek_api_key:
textLLMList.append("deepseek")
if settings.ollama_api_key:
textLLMList.append("ollama")
if settings.siliconflow_api_key:
textLLMList.append("siliconflow")
imgLLMList.append("siliconflow")
return { "textLLMProviders": textLLMList, "imageLLMProviders": imgLLMList }
def _validate_story_response(self, response: any) -> None:
"""验证故事生成响应
Args:
response: LLM 响应
Raises:
LLMResponseValidationError: 响应格式错误
"""
if not isinstance(response, list):
raise LLMResponseValidationError("Response must be an array")
for i, scene in enumerate(response):
if not isinstance(scene, dict):
raise LLMResponseValidationError(f"story item {i} must be an object")
if "text" not in scene:
raise LLMResponseValidationError(f"Scene {i} missing 'text' field")
if "image_prompt" not in scene:
raise LLMResponseValidationError(f"Scene {i} missing 'image_prompt' field")
if not isinstance(scene["text"], str):
raise LLMResponseValidationError(f"Scene {i} 'text' must be a string")
if not isinstance(scene["image_prompt"], str):
raise LLMResponseValidationError(f"Scene {i} 'image_prompt' must be a string")
async def _generate_response(self, *, text_llm_provider: str = None, text_llm_model: str = None, messages: List[Dict[str, str]], response_format: str = "json_object") -> any:
"""生成 LLM 响应
Args:
messages: 消息列表
response_format: 响应格式,默认为 json_object
Returns:
Dict[str, Any]: 解析后的响应
Raises:
Exception: 请求失败或解析失败时抛出异常
"""
if text_llm_provider == None:
text_llm_provider = settings.text_llm_provider
if text_llm_provider == "aliyun":
text_client = self.aliyun_text_client
elif text_llm_provider == "openai":
text_client = self.openai_client
elif text_llm_provider == "deepseek":
text_client = deepseek_client
elif text_llm_provider == "ollama":
text_client = ollama_client
elif text_llm_provider == "siliconflow":
text_client = siliconflow_client
if text_llm_model == None:
text_llm_model = settings.text_llm_model
response = text_client.chat.completions.create(
model= text_llm_model,
response_format={"type": response_format},
messages=messages,
)
try:
content = response.choices[0].message.content
result = json.loads(content)
return result
except Exception as e:
logger.error(f"Failed to parse response: {e}")
raise e
async def _get_story_prompt(self, story_prompt: str = None, language: Language = Language.CHINESE_CN, segments: int = 3) -> str:
"""生成故事提示词
Args:
story_prompt (str, optional): 故事提示. Defaults to None.
segments (int, optional): 故事分段数. Defaults to 3.
Returns:
str: 完整的提示词
"""
languageValue = LANGUAGE_NAMES[language]
if story_prompt:
base_prompt = f"讲一个故事,主题是:{story_prompt}"
return f"""
{base_prompt}. The story needs to be divided into {segments} scenes, and each scene must include descriptive text and an image prompt.
Please return the result in the following JSON format, where the key `list` contains an array of objects:
**Expected JSON format**:
{{
"list": [
{{
"text": "Descriptive text for the scene",
"image_prompt": "Detailed image generation prompt, described in English"
}},
{{
"text": "Another scene description text",
"image_prompt": "Another detailed image generation prompt in English"
}}
]
}}
**Requirements**:
1. The root object must contain a key named `list`, and its value must be an array of scene objects.
2. Each object in the `list` array must include:
- `text`: A descriptive text for the scene, written in {languageValue}.
- `image_prompt`: A detailed prompt for generating an image, written in English.
3. Ensure the JSON format matches the above example exactly. Avoid extra fields or incorrect key names like `cimage_prompt` or `inage_prompt`.
**Important**:
- If there is only one scene, the array under `list` should contain a single object.
- The output must be a valid JSON object. Do not include explanations, comments, or additional content outside the JSON.
Example output:
{{
"list": [
{{
"text": "Scene description text",
"image_prompt": "Detailed image generation prompt in English"
}}
]
}}
"""
# 创建服务实例
llm_service = LLMService()