File size: 3,549 Bytes
47fcff2
79cade0
1b9ab22
18d6e67
771f83c
 
 
79cade0
0c5007d
1b9ab22
79cade0
246199f
 
0cce7a0
79cade0
 
 
 
 
 
18d6e67
246199f
 
 
79cade0
0cce7a0
efe1573
1b9ab22
47fcff2
18d6e67
 
 
 
1b9ab22
5d4663f
246199f
 
 
1b9ab22
 
efe1573
246199f
3ba965f
246199f
1b9ab22
efe8c50
18d6e67
efe8c50
 
 
 
 
18d6e67
 
efe8c50
 
 
3ba965f
246199f
18d6e67
efe8c50
3ba965f
18d6e67
a604d22
b94dfba
 
 
 
 
 
246199f
a604d22
 
47fcff2
 
3ba965f
 
 
 
 
 
 
 
 
 
b94dfba
3ba965f
 
246199f
 
3ba965f
 
47fcff2
 
 
 
61abdf6
47fcff2
96700cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import gradio as gr
import os
import openai
import tenacity
import nest_asyncio

nest_asyncio.apply()

ACCESS_TOKEN = os.getenv("HF_TOKEN")
openai.api_key = ACCESS_TOKEN

# Retry logic with tenacity for handling API rate limits
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
async def respond(
    message,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    try:
        # Only use the system message and the current message for the response
        messages = [{"role": "system", "content": system_message},
                    {"role": "user", "content": message}]

        response = ""
        # Properly stream chat completions using dot notation
        stream = openai.ChatCompletion.create(
            model="NousResearch/Hermes-3-Llama-3.1-8B",
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            messages=messages,
            stream=True,
        )

        # Stream response and concatenate tokens
        for chunk in stream:
            if 'choices' in chunk and 'delta' in chunk['choices'][0] and 'content' in chunk['choices'][0]['delta']:
                token = chunk['choices'][0]['delta']['content']
                response += token

        return response

    except openai.error.APIError as e:
        # Handle both string and dict types of error bodies
        error_details = e.body
        if isinstance(error_details, dict):
            error_type = error_details.get("type", "Unknown")
            error_code = error_details.get("code", "Unknown")
            error_param = error_details.get("param", "Unknown")
            error_message = error_details.get("message", "An error occurred.")
            error_str = f"{error_type}: {error_message} (code: {error_code}, param: {error_param})"
        else:
            error_str = f"Error: {error_details}"

        print(f"APIError: {error_str}")
        return error_str

    except Exception as e:
        print(f"Exception: {e}")
        return "Error occurred. Please try again."


# Gradio function to handle user input and response generation without history
def generate_response(message, system_message, max_tokens, temperature, top_p):
    import asyncio
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    response = loop.run_until_complete(respond(message, system_message, max_tokens, temperature, top_p))
    return response


def launch_app():
    try:
        demo = gr.Blocks()
        with demo:
            gr.Markdown("# Chatbot")
            message = gr.Textbox(label="Message")
            system_message = gr.Textbox(label="System message")
            max_tokens = gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens")
            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
            response = gr.Text(label="Response")

            # Use the generate_response function without history
            gr.Button("Generate Response").click(
                generate_response,
                inputs=[message, system_message, max_tokens, temperature, top_p],
                outputs=[response],
                show_progress=False,
            )
        demo.launch(show_error=True)
    except KeyError as e:
        print(f"Error: {e}")
        print("Please try again.")

if __name__ == "__main__":
    launch_app()