File size: 4,103 Bytes
7cfa5bf
8072750
 
 
 
 
 
 
 
 
 
 
dbdc900
8072750
 
 
dbdc900
7cfa5bf
 
 
 
 
 
 
70d0b73
7cfa5bf
 
 
 
 
 
 
8072750
 
7cfa5bf
 
8072750
 
 
 
 
 
 
70d0b73
8072750
 
 
 
 
 
 
7cfa5bf
8072750
70d0b73
8072750
a1a8972
70d0b73
 
 
 
 
 
 
 
 
 
 
7fb0028
 
7f3369f
7fb0028
 
 
 
 
 
 
e337119
70d0b73
0c41d6c
 
70d0b73
8072750
 
70d0b73
7cfa5bf
 
 
 
 
 
 
 
8072750
 
 
7cfa5bf
 
 
 
 
 
 
 
dbdc900
7cfa5bf
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv

load_dotenv()

API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")

if not API_URL or not API_TOKEN:
    raise ValueError("invalid API_URL || API_TOKEN")

print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")  

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

def respond(
    message,
    history: list[dict],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    messages.extend(history)
    
    messages.append({"role": "user", "content": message})

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_TOKEN}"
    }

    data = {
        "model": "/data/DMind-1",
        "stream": True,
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": 20,
        "min_p": 0.1,
        "max_tokens": 32768
    }

    try:
        with requests.post(API_URL, headers=headers, json=data, stream=True) as r:
            if r.status_code == 200:
                current_response = ""
                for line in r.iter_lines():
                    if line:
                        line = line.decode('utf-8')
                        if line.startswith('data: '):
                            try:
                                json_response = json.loads(line[6:])
                                if 'choices' in json_response and len(json_response['choices']) > 0:
                                    delta = json_response['choices'][0].get('delta', {})
                                    if 'content' in delta:
                                        content = delta['content']
                                        if content:
                                            # content = content.replace('<', '&lt;').replace('>', '&gt;')
                                            # content = content.replace('*', '\\*')
                                            current_response += content
                                            # yield current_response
                                            formatted_response = current_response
                                            if '<think>' in formatted_response and '</think>' in formatted_response:
                                                formatted_response = formatted_response.replace('<think>', '> ')
                                                formatted_response = formatted_response.replace('</think>', '')
                                                formatted_response = formatted_response.replace('\n', '\n> ')
                                            yield formatted_response
                            except json.JSONDecodeError:
                                continue
            else:
                print(f"[ERROR] Bad status code: {r.status_code}, response: {r.text}")
                yield "Service temporarily unavailable"
    except Exception as e:
        print(f"[ERROR] Request error: {e}")
        yield "Service error occurred"


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are DMind AI Assistant, built by DMind.AI. Never present as Qwen, ChatGPT, Tongyi, OpenAI, etc.", label="System message"),
        gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    type="messages"  
)


if __name__ == "__main__":
    demo.launch()