File size: 3,181 Bytes
4e937f5
da03023
 
021e3cd
 
da03023
021e3cd
 
 
 
 
 
 
 
 
ebfa154
021e3cd
e308ec0
63850fe
021e3cd
4e937f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da03023
 
 
 
4e937f5
da03023
 
fd11aed
da03023
 
 
 
 
 
4e937f5
e308ec0
 
 
 
 
ad44100
ed9d3e7
 
fd11aed
 
 
 
f22333a
fd11aed
 
 
f22333a
fd11aed
ed9d3e7
 
 
4e937f5
 
 
 
 
 
 
 
 
 
365ab23
4e937f5
 
 
365ab23
4e937f5
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv

# 加载.env文件中的环境变量
load_dotenv()

# 从环境变量中读取配置
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")

# 验证必要的环境变量
if not API_URL or not API_TOKEN:
    raise ValueError("请确保设置了环境变量 API_URL 和 API_TOKEN")

print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-13:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")  # 只显示token的前10位和后10位

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_TOKEN}"
    }

    data = {
        "model": "/data/DMind-1-mini",
        "stream": False,
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": 20,
        "min_p": 0.1
    }

    print(f"[INFO] process user msg...")
    print(f"[INFO] sysMsg: {system_message}")
    print(f"[INFO] userMsg: {message}")
    print(f"[INFO] modelParam: temperature={temperature}, top_p={top_p}")
    print(f"[INFO] reqData: {data}")

    try:
        with requests.post(API_URL, headers=headers, json=data) as r:
            if r.status_code != 200:
                print(f"[ERROR] API Error: {r.status_code} - {r.text}")
                return "Service error"
            json_response = r.json()
            # print(f"[DEBUG] API Response: {json.dumps(json_response, indent=2)}")
            if 'choices' in json_response and len(json_response['choices']) > 0:
                response = json_response['choices'][0].get('message', {}).get('content', '')
                if response:
                    return "hello"
            return "No response from model"
    except Exception as e:
        print(f"[ERROR] Request error: {e}")
        return "Service error occurred"


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.96,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()