Spaces:
Running
Running
import os | |
import gradio as gr | |
from openai import OpenAI | |
title = "ERNIE X1 Turbo: BAIDU's Reasoning LLM" | |
description = """ | |
- Official Website: <https://yiyan.baidu.com/> (UI in Chinese) | |
- API services: [Qianfan Large Model Platform](https://cloud.baidu.com/product-s/qianfan_home) (cloud platform providing LLM services, UI in Chinese) | |
- [ERNIE 4.5 Turbo Demo](https://huggingface.co/spaces/PaddlePaddle/ernie_4.5_turbo_demo) | [ERNIE X1 Turbo Demo](https://huggingface.co/spaces/PaddlePaddle/ernie_x1_turbo_demo) | |
""" | |
qianfan_api_key = os.getenv("QIANFAN_TOKEN") | |
qianfan_model = "ernie-x1-turbo-32k" | |
client = OpenAI(base_url="https://qianfan.baidubce.com/v2", api_key=qianfan_api_key) | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
): | |
messages = [{"role": "system", "content": system_message}] | |
messages.extend(history) | |
messages.append({"role": "user", "content": message}) | |
response = client.chat.completions.create( | |
model=qianfan_model, | |
messages=messages, | |
max_completion_tokens=max_tokens, | |
stream=True, | |
) | |
reasoning_content = "**Thinking**:\n" | |
content = "\n\n**Answer**: \n" | |
for chunk in response: | |
if hasattr(chunk.choices[0].delta, 'reasoning_content'): | |
token = chunk.choices[0].delta.reasoning_content | |
if token: | |
reasoning_content += token | |
yield reasoning_content | |
elif hasattr(chunk.choices[0].delta, 'content'): | |
token = chunk.choices[0].delta.content | |
if token: | |
content += token | |
yield reasoning_content + content | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox(value="", label="System message"), | |
gr.Slider(minimum=2, maximum=16384, value=10240, step=1, label="Max new tokens"), | |
], | |
title=title, | |
description=description, | |
type='messages', | |
concurrency_limit=50 | |
) | |
if __name__ == "__main__": | |
demo.launch() | |