File size: 3,757 Bytes
4e937f5
da03023
 
021e3cd
 
da03023
021e3cd
 
 
 
 
 
 
 
 
ebfa154
021e3cd
e308ec0
63850fe
021e3cd
4e937f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da03023
 
 
 
4e937f5
da03023
 
63850fe
da03023
 
 
 
 
 
4e937f5
e308ec0
 
 
 
 
ad44100
ed9d3e7
 
 
 
 
9cb8eea
 
 
 
 
 
 
8709f65
9cb8eea
 
8709f65
 
 
 
 
 
ed9d3e7
 
 
4e937f5
 
 
 
 
 
 
 
 
 
365ab23
4e937f5
 
 
365ab23
4e937f5
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv

# 加载.env文件中的环境变量
load_dotenv()

# 从环境变量中读取配置
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")

# 验证必要的环境变量
if not API_URL or not API_TOKEN:
    raise ValueError("请确保设置了环境变量 API_URL 和 API_TOKEN")

print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-13:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")  # 只显示token的前10位和后10位

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_TOKEN}"
    }

    data = {
        "model": "/data/DMind-1-mini",
        "stream": False,  # 改为非流式模式
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": 20,
        "min_p": 0.1
    }

    print(f"[INFO] process user msg...")
    print(f"[INFO] sysMsg: {system_message}")
    print(f"[INFO] userMsg: {message}")
    print(f"[INFO] modelParam: temperature={temperature}, top_p={top_p}")
    print(f"[INFO] reqData: {data}")

    try:
        with requests.post(API_URL, headers=headers, json=data) as r:
            if r.status_code == 200:
                json_response = r.json()
                if 'choices' in json_response and len(json_response['choices']) > 0:
                    response_content = json_response['choices'][0].get('message', {}).get('content', '')
                    # 去掉<think>标签及其内容
                    import re
                    clean_response = re.sub(r'<think>.*?</think>', '', response_content).strip()
                    if clean_response:
                        print(f"[INFO] Cleaned API response content: {clean_response}")
                        return clean_response
                    else:
                        print("[ERROR] No cleaned content found in API response")
                        return "No content in the response after cleaning"
                else:
                    print("[ERROR] No choices found in API response")
                    return "No choices in the response"
            else:
                print(f"[ERROR] Unexpected status code: {r.status_code}")
                return "Unexpected status code from API"
    except Exception as e:
        print(f"[ERROR] Request error: {e}")
        return "Service error occurred"


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.96,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()