File size: 2,793 Bytes
47fcff2
18d6e67
79cade0
18d6e67
79cade0
0c5007d
61abdf6
79cade0
 
 
 
 
18d6e67
79cade0
 
f23b3ba
79cade0
 
 
 
 
18d6e67
 
79cade0
18d6e67
 
 
 
 
79cade0
18d6e67
79cade0
18d6e67
04ec251
18d6e67
47fcff2
18d6e67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47fcff2
 
f23b3ba
47fcff2
f23b3ba
 
 
 
47fcff2
 
 
 
 
 
 
 
 
 
f23b3ba
 
 
47fcff2
 
 
 
 
61abdf6
47fcff2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
from openai import OpenAI, APIError
import os
import tenacity

ACCESS_TOKEN = os.getenv("HF_TOKEN")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)

@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10))
def respond(
    message,
    history,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    try:
        messages = [{"role": "system", "content": system_message}]

        for val in history:
            if val[0]:
                messages.append({"role": "user", "content": val[0]})
            if val[1]:
                messages.append({"role": "assistant", "content": val[1]})

        messages.append({"role": "user", "content": message})

        response = ""
        
        for message in  client.chat.completions.create(
            model="NousResearch/Hermes-3-Llama-3.1-8B",
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
            messages=messages,
        ):
            token = message.choices[0].delta.content
            
            response += token
            yield response
    except APIError as e:
        error_details = e.body
        error_type = error_details.get("type")
        error_code = error_details.get("code")
        error_param = error_details.get("param")
        error_message = error_details.get("message")

        if error_type:
            error_str = f"{error_type}: {error_message} (code: {error_code}, param: {error_param})"
        else:
            error_str = "An error occurred during streaming"

        print(f"Error: {error_str}")
        yield error_str
    except Exception as e:
        print(f"Error: {e}")
        yield "Error occurred. Please try again."

def launch_app():
    try:
        demo = gr.Interface(
            respond,
            [
                gr.Textbox(label="Message"),
                gr.Dataframe(label="History", headers=["User", "Assistant"]),
                gr.Textbox(label="System message"),
                gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens"),
                gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
                gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-P",
                ),
            ],
            "text",
            title="Chatbot",
            description="A chatbot that responds to user input",
        )
        demo.launch(show_error=True)
    except KeyError as e:
        print(f"Error: {e}")
        print("Please try again.")

if __name__ == "__main__":
    launch_app()