File size: 3,401 Bytes
c4b7c58
f3846d0
 
c4b7c58
f3846d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4b7c58
f3846d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4b7c58
 
 
 
 
 
 
a5ba795
c4b7c58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5ba795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4b7c58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3846d0
dbd5476
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""import gradio as gr
from huggingface_hub import InferenceClient


client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response


demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()
"""

import os
import gradio as gr
from huggingface_hub import InferenceClient
import json

# Retrieve the API token from the environment variable
API_TOKEN = os.getenv("HF_READ_TOKEN")

# Initialize the Hugging Face Inference Client
client = InferenceClient(
    "mistralai/Mistral-Nemo-Instruct-2407",
    token=API_TOKEN
)

# System prompt to define model behavior
system_prompt = "You are a helpful assistant that provides concise and accurate answers."

# Function to handle the chat completion
def hf_chat(user_input):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_input}
    ]
    response = ""
    
    try:
        # Stream the response
        for message in client.chat_completion(
            messages=messages,
            max_tokens=500,
            stream=True,
        ):
            try:
                # Parse each part of the response carefully
                content = message.choices[0].delta.content
                response += content
            except (KeyError, json.JSONDecodeError) as e:
                # Print error details for debugging
                print(f"Error while parsing response: {e}")
                continue  # Continue receiving the stream

    except Exception as e:
        # Catch and print any unexpected errors during the stream
        return f"Error occurred: {str(e)}"
    
    return response

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Hugging Face Chat Completion")
    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(
                label="Enter your message", 
                placeholder="Ask me anything..."
            )
            submit_btn = gr.Button("Submit")
        with gr.Column():
            output = gr.Textbox(label="Response")

    submit_btn.click(fn=hf_chat, inputs=user_input, outputs=output)

# Launch Gradio app
if __name__ == "__main__":
    demo.launch(show_api=True, share=False)