Spaces:
Running
Running
File size: 3,458 Bytes
4e937f5 da03023 021e3cd da03023 021e3cd e3ca1d7 021e3cd e308ec0 12d1dd5 e3ca1d7 4e937f5 e3ca1d7 4e937f5 e6d4457 4e937f5 da03023 4e937f5 da03023 e6d4457 da03023 001ce47 da03023 4e937f5 e308ec0 ad44100 ed9d3e7 e3ca1d7 12d1dd5 e3ca1d7 12d1dd5 e3ca1d7 12d1dd5 2d0b3a0 e3ca1d7 12d1dd5 e3ca1d7 12d1dd5 ed9d3e7 4e937f5 e3ca1d7 79facd7 77d21ca 4e937f5 77d21ca 4e937f5 e3ca1d7 4e937f5 79facd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv
load_dotenv()
API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")
if not API_URL or not API_TOKEN:
raise ValueError("invalid API_URL | API_TOKEN")
print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
def respond(
message,
history: list[dict],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
# 添加历史消息
messages.extend(history)
# 添加当前用户消息
messages.append({"role": "user", "content": message})
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_TOKEN}"
}
data = {
"model": "/data/DMind-1-mini",
"stream": False,
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"top_k": 20,
"min_p": 0.1,
"max_tokens": 16384
}
print(f"[INFO] process user msg...")
print(f"[INFO] userMsg: {message}")
try:
with requests.post(API_URL, headers=headers, json=data) as r:
# print(f"[INFO] response status: {r.status_code}")
if r.status_code == 200:
json_response = r.json()
# print(f"[INFO] response json: {json_response}")
if 'choices' in json_response and len(json_response['choices']) > 0:
content = json_response['choices'][0].get('message', {}).get('content', '')
# print(f"[INFO] response content: {content}")
if content:
if '<think>' in content and '</think>' in content:
content = content.split('</think>')[-1].strip()
if '**Final Answer**' in content:
content = content.split('**Final Answer**')[-1].strip()
print(f"[INFO] final response: {content}")
return content
else:
print(f"[ERROR] No choices in response: {json_response}")
else:
print(f"[ERROR] Bad status code: {r.status_code}, response: {r.text}")
return "Service temporarily unavailable"
except Exception as e:
print(f"[ERROR] Request error: {e}")
return "Service error occurred"
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are DMind AI Assistant, built by DMind.AI. Never present as Qwen, ChatGPT, Tongyi, OpenAI, etc.", label="System message"),
gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
type="messages"
)
if __name__ == "__main__":
demo.launch()
|