File size: 5,330 Bytes
e77241a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
import modelscope_studio as mgr
from http import HTTPStatus
import os
from dashscope import MultiModalConversation
import dashscope
YOUR_API_TOKEN = os.getenv('YOUR_API_TOKEN')
dashscope.api_key = YOUR_API_TOKEN

def add_text(chatbot, task_history, input):
    """Add text to the chat history."""
    task_history.append({"role": "user", "content": [{"text": input.text}]})
    task_history.append({"role": "user", "content": [{"audio": input.files[0].path}]})
    chatbot.append([{
        "text": input.text,
        "files": input.files,
    }, None])
    return chatbot, task_history, None


# def add_mic(chatbot, task_history, mic):
#     """Add audio to the chat history."""
#     task_history.append({"role": "user", "content": [{"audio": mic}]})
#     chatbot.append((f"[Audio input: {mic}]", None))
#     return chatbot, task_history

def add_file(chatbot, task_history, audio_file):
    """Add audio file to the chat history."""
    task_history.append({"role": "user", "content": [{"audio": audio_file.name}]})
    chatbot.append((f"[Audio file: {audio_file.name}]", None))
    return chatbot, task_history


def reset_user_input():
    """Reset the user input field."""
    return gr.Textbox.update(value='')

def reset_state(task_history):
    """Reset the chat history."""
    return [], []

def regenerate(chatbot, task_history):
    """Regenerate the last bot response."""
    if task_history and task_history[-1]['role'] == 'assistant':
        task_history.pop()
        chatbot.pop()
    if task_history:
        chatbot, task_history = predict(chatbot, task_history)
    return chatbot, task_history


def predict(chatbot, task_history):
    """Generate a response from the model."""
    response = MultiModalConversation.call(model='qwen2-audio-instruct',
                                           messages=task_history)
    if response.status_code == HTTPStatus.OK:
        output_text = response.output.choices[0].message.content
        if isinstance(output_text, list):
            output_text = next((item.get('text') for item in output_text if 'text' in item), '')
        elif isinstance(output_text, dict):
            output_text = output_text.get('text', '')
        task_history.append({'role': response.output.choices[0].message.role,
                             'content': [{'text': output_text}]})
        chatbot.append((None, output_text))  # Add the response to chatbot
        return chatbot, task_history
    else:
        error_message = f"Failed to get a response: {response.code} - {response.message}"
        chatbot.append((None, error_message))  # Add the error message to chatbot
        return chatbot, task_history



with gr.Blocks() as demo:
    gr.Markdown("""<p align="center"><img src="https://modelscope.oss-cn-beijing.aliyuncs.com/resource/qwen.png" style="height: 80px"/><p>""")  ## todo
    gr.Markdown("""<center><font size=8>Qwen2-Audio-Instruct Bot</center>""")
    gr.Markdown(
        """\
<center><font size=3>This WebUI is based on Qwen2-Audio-Instruct, developed by Alibaba Cloud. \
(本WebUI基于Qwen2-Audio-Instruct打造,实现聊天机器人功能。)</center>""")
    gr.Markdown("""\
<center><font size=4>Qwen2-Audio <a href="https://modelscope.cn/models/qwen/Qwen2-Audio-7B">🤖 </a> 
| <a href="https://huggingface.co/Qwen/Qwen2-Audio-7B">🤗</a>&nbsp | 
Qwen2-Audio-Instruct <a href="https://modelscope.cn/models/qwen/Qwen2-Audio-7B-Instruct">🤖 </a> | 
<a href="https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct">🤗</a>&nbsp | 
&nbsp<a href="https://github.com/QwenLM/Qwen2-Audio">Github</a></center>""")
    chatbot = mgr.Chatbot(label='Qwen2-Audio-7B-Instruct', elem_classes="control-height", height=750)
    # query = gr.Textbox(lines=2, label='Input')
    # mic = gr.Audio(source="microphone", type="filepath")
    user_input = mgr.MultimodalInput(
        interactive=True,
        sources=['microphone'],
        submit_button_props=dict(value="🚀 Submit (发送)")
    )
    task_history = gr.State([])

    with gr.Row():
        empty_bin = gr.Button("🧹 Clear History (清除历史)")
        # submit_btn = gr.Button("🚀 Submit (发送)")
        regen_btn = gr.Button("🤔️ Regenerate (重试)")
        addfile_btn = gr.UploadButton("📁 Upload (上传文件)", file_types=["audio"])

    # mic.change(add_mic, [chatbot, task_history, mic], [chatbot, task_history])
    # submit_btn.click(add_text, [chatbot, task_history, query], [chatbot, task_history]).then(
    #     predict, [chatbot, task_history], [chatbot, task_history], show_progress=True
    # )

    # submit_btn.click(reset_user_input, [], [query])
    user_input.submit(fn=add_text,
                      inputs=[chatbot, task_history, user_input],
                      outputs=[chatbot, task_history, user_input]).then(
        predict, [chatbot, task_history], [chatbot, task_history], show_progress=True
    )
    empty_bin.click(reset_state, outputs=[chatbot, task_history], show_progress=True)
    regen_btn.click(regenerate, [chatbot, task_history], [chatbot, task_history], show_progress=True)
    addfile_btn.upload(add_file, [chatbot, task_history, addfile_btn], [chatbot, task_history], show_progress=True)

demo.queue().launch(
    share=False,
    inbrowser=True,
    server_port=7860,
    server_name="0.0.0.0",
)