File size: 3,433 Bytes
47fcff2
18d6e67
79cade0
18d6e67
0cce7a0
79cade0
0c5007d
61abdf6
79cade0
 
 
 
 
18d6e67
0cce7a0
79cade0
f23b3ba
79cade0
 
 
 
 
18d6e67
 
79cade0
18d6e67
 
 
 
 
79cade0
18d6e67
79cade0
0cce7a0
efe1573
5d4663f
47fcff2
18d6e67
0cce7a0
18d6e67
 
 
5d4663f
efe1573
 
 
 
3ba965f
18d6e67
 
 
 
 
 
 
 
 
 
 
 
 
3ba965f
18d6e67
 
3ba965f
18d6e67
47fcff2
 
3ba965f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47fcff2
 
 
 
61abdf6
47fcff2
e8c6912
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import gradio as gr
from openai import OpenAI, APIError
import os
import tenacity
import asyncio

ACCESS_TOKEN = os.getenv("HF_TOKEN")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)

@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10))
async def respond(
    message,
    history,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    try:
        messages = [{"role": "system", "content": system_message}]

        for val in history:
            if val[0]:
                messages.append({"role": "user", "content": val[0]})
            if val[1]:
                messages.append({"role": "assistant", "content": val[1]})

        messages.append({"role": "user", "content": message})

        response = ""
        # Properly stream chat completions using dot notation
        stream = client.chat.completions.create(
            model="NousResearch/Hermes-3-Llama-3.1-8B",
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
            messages=messages,
        )
        for chunk in stream:  # Iterate over the streamed response chunks
            if hasattr(chunk.choices[0].delta, 'content'):
                token = chunk.choices[0].delta.content
                response += token
        return response
    except APIError as e:
        error_details = e.body
        error_type = error_details.get("type")
        error_code = error_details.get("code")
        error_param = error_details.get("param")
        error_message = error_details.get("message")

        if error_type:
            error_str = f"{error_type}: {error_message} (code: {error_code}, param: {error_param})"
        else:
            error_str = "An error occurred during streaming"

        print(f"Error: {error_str}")
        return error_str
    except Exception as e:
        print(f"Error: {e}")
        return "Error occurred. Please try again."

def launch_app():
    try:
        demo = gr.Blocks()
        with demo:
            gr.Markdown("# Chatbot")
            message = gr.Textbox(label="Message")
            history = gr.State([["", ""]])
            system_message = gr.Textbox(label="System message")
            max_tokens = gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens")
            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
            response = gr.Text(label="Response")

            def generate_response(message, history, system_message, max_tokens, temperature, top_p):
                new_history = history + [[message, ""]]
                response = asyncio.run(respond(message, history, system_message, max_tokens, temperature, top_p))
                new_history[-1][1] = response
                return response, new_history

            gr.Button("Generate Response").click(
                generate_response,
                inputs=[message, history, system_message, max_tokens, temperature, top_p],
                outputs=[response, history],
                show_progress=False,
            )
        demo.launch(show_error=True)
    except KeyError as e:
        print(f"Error: {e}")
        print("Please try again.")

if __name__ == "__main__":
    launch_app()