Spaces:

DMindAI
/

DMind-1-mini

Running

File size: 3,200 Bytes

4e937f5
da03023
 
021e3cd
 
da03023
021e3cd
 
 
 
 
 
 
 
 
12d1dd5
021e3cd
e308ec0
12d1dd5
021e3cd
4e937f5
 
 
 
 
 
 
e6d4457
4e937f5
 
 
 
 
 
 
e6d4457
 
 
 
4e937f5
 
da03023
 
 
 
4e937f5
da03023
 
e6d4457
da03023
 
 
 
001ce47
 
da03023
4e937f5
e308ec0
 
 
 
 
ad44100
ed9d3e7
 
12d1dd5
 
 
 
 
2d0b3a0
 
1f1d286
12d1dd5
 
ed9d3e7
 
 
4e937f5
 
 
 
 
 
 
 
 
79facd7
77d21ca
4e937f5
 
 
77d21ca
4e937f5
 
 
 
e6d4457
4e937f5
 
 
 
79facd7

import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv

# 加载.env文件中的环境变量
load_dotenv()

# 从环境变量中读取配置
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")

# 验证必要的环境变量
if not API_URL or not API_TOKEN:
    raise ValueError("make sure API_URL & API_TOKEN")

print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")  # 只显示token的前10位和后10位

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

def respond(
    message,
    history: list[dict],  # 修改为新的消息格式
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    # 添加历史消息
    messages.extend(history)
    
    # 添加当前用户消息
    messages.append({"role": "user", "content": message})

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_TOKEN}"
    }

    data = {
        "model": "/data/DMind-1-mini",
        "stream": False,
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": 20,
        "min_p": 0.1,
        "max_tokens": 16384
    }

    print(f"[INFO] process user msg...")
    print(f"[INFO] sysMsg: {system_message}")
    print(f"[INFO] userMsg: {message}")
    print(f"[INFO] modelParam: temperature={temperature}, top_p={top_p}")
    print(f"[INFO] reqData: {data}")

    try:
        with requests.post(API_URL, headers=headers, json=data) as r:
            if r.status_code == 200:
                json_response = r.json()
                if 'choices' in json_response and len(json_response['choices']) > 0:
                    content = json_response['choices'][0].get('message', {}).get('content', '')
                    if content:
                        if '<think>' in content and '</think>' in content:
                            content = content.split('</think>')[-1].strip()
                        print(f"[INFO] response: {content}")
                        return content
            return "Service temporarily unavailable"
    except Exception as e:
        print(f"[ERROR] Request error: {e}")
        return "Service error occurred"


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    type="messages"  # 指定使用新的消息格式
)


if __name__ == "__main__":
    demo.launch()