Spaces:
Running
Running
File size: 3,757 Bytes
4e937f5 da03023 021e3cd da03023 021e3cd ebfa154 021e3cd e308ec0 63850fe 021e3cd 4e937f5 da03023 4e937f5 da03023 63850fe da03023 4e937f5 e308ec0 ad44100 ed9d3e7 9cb8eea 8709f65 9cb8eea 8709f65 ed9d3e7 4e937f5 365ab23 4e937f5 365ab23 4e937f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv
# 加载.env文件中的环境变量
load_dotenv()
# 从环境变量中读取配置
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")
# 验证必要的环境变量
if not API_URL or not API_TOKEN:
raise ValueError("请确保设置了环境变量 API_URL 和 API_TOKEN")
print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-13:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}") # 只显示token的前10位和后10位
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_TOKEN}"
}
data = {
"model": "/data/DMind-1-mini",
"stream": False, # 改为非流式模式
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"top_k": 20,
"min_p": 0.1
}
print(f"[INFO] process user msg...")
print(f"[INFO] sysMsg: {system_message}")
print(f"[INFO] userMsg: {message}")
print(f"[INFO] modelParam: temperature={temperature}, top_p={top_p}")
print(f"[INFO] reqData: {data}")
try:
with requests.post(API_URL, headers=headers, json=data) as r:
if r.status_code == 200:
json_response = r.json()
if 'choices' in json_response and len(json_response['choices']) > 0:
response_content = json_response['choices'][0].get('message', {}).get('content', '')
# 去掉<think>标签及其内容
import re
clean_response = re.sub(r'<think>.*?</think>', '', response_content).strip()
if clean_response:
print(f"[INFO] Cleaned API response content: {clean_response}")
return clean_response
else:
print("[ERROR] No cleaned content found in API response")
return "No content in the response after cleaning"
else:
print("[ERROR] No choices found in API response")
return "No choices in the response"
else:
print(f"[ERROR] Unexpected status code: {r.status_code}")
return "Unexpected status code from API"
except Exception as e:
print(f"[ERROR] Request error: {e}")
return "Service error occurred"
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.96,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()
|