Spaces:
Running
Running
File size: 3,200 Bytes
4e937f5 da03023 021e3cd da03023 021e3cd 12d1dd5 021e3cd e308ec0 12d1dd5 021e3cd 4e937f5 e6d4457 4e937f5 e6d4457 4e937f5 da03023 4e937f5 da03023 e6d4457 da03023 001ce47 da03023 4e937f5 e308ec0 ad44100 ed9d3e7 12d1dd5 2d0b3a0 1f1d286 12d1dd5 ed9d3e7 4e937f5 79facd7 77d21ca 4e937f5 77d21ca 4e937f5 e6d4457 4e937f5 79facd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv
# 加载.env文件中的环境变量
load_dotenv()
# 从环境变量中读取配置
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")
# 验证必要的环境变量
if not API_URL or not API_TOKEN:
raise ValueError("make sure API_URL & API_TOKEN")
print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}") # 只显示token的前10位和后10位
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
def respond(
message,
history: list[dict], # 修改为新的消息格式
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
# 添加历史消息
messages.extend(history)
# 添加当前用户消息
messages.append({"role": "user", "content": message})
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_TOKEN}"
}
data = {
"model": "/data/DMind-1-mini",
"stream": False,
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"top_k": 20,
"min_p": 0.1,
"max_tokens": 16384
}
print(f"[INFO] process user msg...")
print(f"[INFO] sysMsg: {system_message}")
print(f"[INFO] userMsg: {message}")
print(f"[INFO] modelParam: temperature={temperature}, top_p={top_p}")
print(f"[INFO] reqData: {data}")
try:
with requests.post(API_URL, headers=headers, json=data) as r:
if r.status_code == 200:
json_response = r.json()
if 'choices' in json_response and len(json_response['choices']) > 0:
content = json_response['choices'][0].get('message', {}).get('content', '')
if content:
if '<think>' in content and '</think>' in content:
content = content.split('</think>')[-1].strip()
print(f"[INFO] response: {content}")
return content
return "Service temporarily unavailable"
except Exception as e:
print(f"[ERROR] Request error: {e}")
return "Service error occurred"
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
type="messages" # 指定使用新的消息格式
)
if __name__ == "__main__":
demo.launch()
|