File size: 4,348 Bytes
9ca7283
 
 
c5d274e
9ca7283
 
 
 
c5d274e
fa89eeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ca7283
 
 
 
 
 
 
 
fa89eeb
 
c5d274e
9ca7283
c5d274e
9ca7283
fa89eeb
 
 
 
9ca7283
 
 
fa89eeb
 
9ca7283
 
 
c5d274e
fa89eeb
9ca7283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5d274e
9ca7283
 
c5d274e
9ca7283
 
 
c5d274e
9ca7283
 
 
 
 
c5d274e
9ca7283
 
 
 
 
 
 
 
 
 
 
6c757bd
9ca7283
 
 
 
 
 
6c757bd
9ca7283
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import gradio as gr
from huggingface_hub import InferenceClient

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("meta-llama/Meta-Llama-3-8B",token=os.getenv('HF_API_TOKEN'))

# ## None type 
# def respond(
#     message: str,
#     history: list[tuple[str, str]],  # This will not be used
#     system_message: str,
#     max_tokens: int,
#     temperature: float,
#     top_p: float,
# ):
#     messages = [{"role": "system", "content": system_message}]
    
#     # Append only the latest user message
#     messages.append({"role": "user", "content": message})

#     response = ""

#     try:
#         # Generate response from the model
#         for message in client.chat_completion(
#             messages,
#             max_tokens=max_tokens,
#             stream=True,
#             temperature=temperature,
#             top_p=top_p,
#         ):
#             if message.choices[0].delta.content is not None:
#                 token = message.choices[0].delta.content
#                 response += token
#             yield response
#     except Exception as e:
#         yield f"An error occurred: {e}"

# """
# For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
# """
# demo = gr.ChatInterface(
#     respond,
#     additional_inputs=[
#         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
#         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
#         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
#         gr.Slider(
#             minimum=0.1,
#             maximum=1.0,
#             value=0.95,
#             step=0.05,
#             label="Top-p (nucleus sampling)",
#         ),
#     ],
# )

# if __name__ == "__main__":
#     demo.launch()


####19
def respond(
    message: str,
    history: list[tuple[str, str]],  # This will not be used
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    # Combine the system message and user input into a single prompt
    prompt = f"{system_message}\n{message}"

    response = ""

    try:
        # Generate response from the model using text generation method
        for message in client.text_generation(
            prompt=prompt,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
        ):
            if message.token is not None:
                response += message.token
            yield response
    except Exception as e:
        yield f"An error occurred: {e}"

# Define the Gradio interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)

if __name__ == "__main__":
    demo.launch()

# import os
# import gradio as gr
# from huggingface_hub import InferenceClient

# # Initialize the Hugging Face Inference Client
# client = InferenceClient(
#     "meta-llama/Meta-Llama-3.1-8B-Instruct",
#     token=  os.getenv("HF_API_TOKEN"),# Replace with your actual token
# )

# # Define a function to handle the chat input and get a response from the model
# def chat_with_model(user_input):
#     # Call the client to get the model's response
#     response = ""
#     for message in client.chat_completion(
#         messages=[{"role": "user", "content": user_input}],
#         max_tokens=500,
#         stream=True,
#     ):
#         response += message.choices[0].delta.content
#     return response

# # Create a Gradio interface with a chat component
# with gr.Blocks() as demo:
#     chatbot = gr.Chatbot()
#     with gr.Row():
#         txt = gr.Textbox(show_label=False, placeholder="Type your message here...")
#         txt.submit(chat_with_model, inputs=txt, outputs=chatbot)

# demo.launch()