File size: 3,644 Bytes
7cfa5bf
8072750
 
 
 
 
 
 
 
 
 
 
dbdc900
8072750
 
 
dbdc900
7cfa5bf
 
 
 
 
 
 
70d0b73
7cfa5bf
 
 
 
 
 
 
8072750
 
7cfa5bf
 
8072750
 
 
 
 
 
 
70d0b73
8072750
 
 
 
 
 
 
7cfa5bf
8072750
 
7cfa5bf
8072750
70d0b73
8072750
a1a8972
70d0b73
 
 
 
 
 
 
 
 
 
 
7f3369f
 
 
 
e337119
70d0b73
e337119
0c41d6c
 
70d0b73
8072750
 
70d0b73
7cfa5bf
 
 
 
 
 
 
 
8072750
 
 
7cfa5bf
 
 
 
 
 
 
 
dbdc900
7cfa5bf
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv

load_dotenv()

API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")

if not API_URL or not API_TOKEN:
    raise ValueError("invalid API_URL || API_TOKEN")

print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")  

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

def respond(
    message,
    history: list[dict],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    messages.extend(history)
    
    messages.append({"role": "user", "content": message})

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_TOKEN}"
    }

    data = {
        "model": "/data/DMind-1",
        "stream": True,
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": 20,
        "min_p": 0.1,
        "max_tokens": 32768
    }

    print(f"[INFO] process user msg...")
    print(f"[INFO] userMsg: {message}")

    try:
        with requests.post(API_URL, headers=headers, json=data, stream=True) as r:
            if r.status_code == 200:
                current_response = ""
                for line in r.iter_lines():
                    if line:
                        line = line.decode('utf-8')
                        if line.startswith('data: '):
                            try:
                                json_response = json.loads(line[6:])
                                if 'choices' in json_response and len(json_response['choices']) > 0:
                                    delta = json_response['choices'][0].get('delta', {})
                                    if 'content' in delta:
                                        content = delta['content']
                                        if content:
                                            content = content.replace('<', '&lt;').replace('>', '&gt;')
                                            content = content.replace('*', '\\*')
                                            current_response += content
                                            yield current_response
                            except json.JSONDecodeError:
                                continue
                print(f"[INFO] final response: {current_response}")
            else:
                print(f"[ERROR] Bad status code: {r.status_code}, response: {r.text}")
                yield "Service temporarily unavailable"
    except Exception as e:
        print(f"[ERROR] Request error: {e}")
        yield "Service error occurred"


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are DMind AI Assistant, built by DMind.AI. Never present as Qwen, ChatGPT, Tongyi, OpenAI, etc.", label="System message"),
        gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    type="messages"  
)


if __name__ == "__main__":
    demo.launch()