File size: 3,227 Bytes
4e937f5
da03023
 
021e3cd
 
da03023
021e3cd
 
 
 
 
 
 
 
 
12d1dd5
021e3cd
e308ec0
12d1dd5
021e3cd
4e937f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da03023
 
 
 
4e937f5
da03023
 
12d1dd5
da03023
 
 
 
 
 
4e937f5
e308ec0
 
 
 
 
ad44100
ed9d3e7
 
12d1dd5
 
 
 
 
1f1d286
 
 
12d1dd5
 
ed9d3e7
 
 
4e937f5
 
 
 
 
 
 
 
 
 
365ab23
4e937f5
 
 
365ab23
4e937f5
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv

# 加载.env文件中的环境变量
load_dotenv()

# 从环境变量中读取配置
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")

# 验证必要的环境变量
if not API_URL or not API_TOKEN:
    raise ValueError("make sure API_URL & API_TOKEN")

print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")  # 只显示token的前10位和后10位

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_TOKEN}"
    }

    data = {
        "model": "/data/DMind-1-mini",
        "stream": False,  # 改为非流式模式
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": 20,
        "min_p": 0.1
    }

    print(f"[INFO] process user msg...")
    print(f"[INFO] sysMsg: {system_message}")
    print(f"[INFO] userMsg: {message}")
    print(f"[INFO] modelParam: temperature={temperature}, top_p={top_p}")
    print(f"[INFO] reqData: {data}")

    try:
        with requests.post(API_URL, headers=headers, json=data) as r:
            if r.status_code == 200:
                json_response = r.json()
                if 'choices' in json_response and len(json_response['choices']) > 0:
                    content = json_response['choices'][0].get('message', {}).get('content', '')
                    if content:
                        if '<think>' in content and '</think>' in content:
                            content = content.split('</think>')[-1].strip()
                        print(f"[INFO] response: {content}")
                        return content
            return "Service temporarily unavailable"
    except Exception as e:
        print(f"[ERROR] Request error: {e}")
        return "Service error occurred"


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.96,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()