Spaces:
Runtime error
Runtime error
import spaces | |
import os | |
import json | |
import subprocess | |
from llama_cpp import Llama | |
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType | |
from llama_cpp_agent.providers import LlamaCppPythonProvider | |
from llama_cpp_agent.chat_history import BasicChatHistory, BasicChatMessageStore, BasicChatHistoryStrategy | |
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
import uvicorn | |
# 加载模型 | |
repo_id = "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.3-Q5_K_M-GGUF" | |
filename = "nqlsg-qwen2.5-14b-megafusion-v9.3-q5_k_m.gguf" | |
model_path = hf_hub_download(repo_id=repo_id, filename=filename) | |
llm = None | |
def generate_response( | |
message, | |
history: list[tuple[str, str]], | |
system_message: str = "你是Lunzima训练的NQLSG-Qwen2.5-14B-MegaFusion大模型。请严格遵循用户的指示,使用Markdown回复。当处理数学问题时,请分步推导并验证中间结果;编写代码时优先考虑可读性和最佳实践;进行创意写作时发挥想象力和使用隐喻表达。", | |
max_tokens: int = 2048, | |
temperature: float = 0.6, | |
top_k: int = 30, | |
top_p: float = 0.9, | |
min_p: float = 0.05, | |
repeat_penalty: float = 1.2 | |
): | |
try: | |
chat_template = MessagesFormatterType.OPEN_CHAT | |
global llm | |
if llm is None: | |
llm = Llama( | |
model_path=model_path, | |
n_ctx=32768, | |
n_batch=512, | |
flash_attn=True, | |
n_gpu_layers=50, | |
) | |
provider = LlamaCppPythonProvider(llm) | |
settings = provider.get_provider_default_settings() | |
settings.stream = True | |
settings.max_tokens = max_tokens | |
settings.temperature = temperature | |
settings.top_k = top_k | |
settings.top_p = top_p | |
settings.min_p = min_p | |
settings.repeat_penalty = repeat_penalty | |
chat_history_store = BasicChatMessageStore() | |
chat_history = BasicChatHistory(message_store=chat_history_store, chat_history_strategy=BasicChatHistoryStrategy.last_k_messages, k=20, llm_provider=provider) | |
agent = LlamaCppAgent( | |
provider, | |
system_prompt=f"{system_message}", | |
chat_history=chat_history, | |
predefined_messages_formatter_type=chat_template, | |
debug_output=True | |
) | |
stream = agent.get_chat_response( | |
message, | |
system_prompt=f"{system_message}", | |
chat_history=chat_history, | |
llm_sampling_settings=settings, | |
returns_streaming_generator=True, | |
print_output=False | |
) | |
outputs = "" | |
for output in stream: | |
outputs += output | |
yield outputs | |
except Exception as e: | |
error_str = f"Error occurred: {str(e)}" | |
print(f"Error occurred: {error_str}") | |
history.append((message, error_str)) | |
yield history, error_str | |
demo = gr.ChatInterface( | |
generate_response, | |
type="messages", | |
title="NQLSG Qwen2.5 14B MegaFusion v9.3 Playground" | |
) | |
if __name__ == "__main__": | |
import multiprocessing | |
# 启动Gradio服务 | |
demo.launch() | |