Spaces:
Running
Running
File size: 3,227 Bytes
4e937f5 da03023 021e3cd da03023 021e3cd 12d1dd5 021e3cd e308ec0 12d1dd5 021e3cd 4e937f5 da03023 4e937f5 da03023 12d1dd5 da03023 4e937f5 e308ec0 ad44100 ed9d3e7 12d1dd5 1f1d286 12d1dd5 ed9d3e7 4e937f5 365ab23 4e937f5 365ab23 4e937f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv
# 加载.env文件中的环境变量
load_dotenv()
# 从环境变量中读取配置
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")
# 验证必要的环境变量
if not API_URL or not API_TOKEN:
raise ValueError("make sure API_URL & API_TOKEN")
print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}") # 只显示token的前10位和后10位
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_TOKEN}"
}
data = {
"model": "/data/DMind-1-mini",
"stream": False, # 改为非流式模式
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"top_k": 20,
"min_p": 0.1
}
print(f"[INFO] process user msg...")
print(f"[INFO] sysMsg: {system_message}")
print(f"[INFO] userMsg: {message}")
print(f"[INFO] modelParam: temperature={temperature}, top_p={top_p}")
print(f"[INFO] reqData: {data}")
try:
with requests.post(API_URL, headers=headers, json=data) as r:
if r.status_code == 200:
json_response = r.json()
if 'choices' in json_response and len(json_response['choices']) > 0:
content = json_response['choices'][0].get('message', {}).get('content', '')
if content:
if '<think>' in content and '</think>' in content:
content = content.split('</think>')[-1].strip()
print(f"[INFO] response: {content}")
return content
return "Service temporarily unavailable"
except Exception as e:
print(f"[ERROR] Request error: {e}")
return "Service error occurred"
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.96,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()
|