MadsGalsgaard commited on
Commit
9ca7283
·
verified ·
1 Parent(s): a1d16cc
Files changed (1) hide show
  1. app.py +81 -80
app.py CHANGED
@@ -1,93 +1,94 @@
1
- # import gradio as gr
2
- # from huggingface_hub import InferenceClient
 
3
 
4
- # """
5
- # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- # """
7
- # client = InferenceClient("meta-llama/Meta-Llama-3-8B")
8
 
9
- # ## None type
10
- # def respond(
11
- # message: str,
12
- # history: list[tuple[str, str]], # This will not be used
13
- # system_message: str,
14
- # max_tokens: int,
15
- # temperature: float,
16
- # top_p: float,
17
- # ):
18
- # messages = [{"role": "system", "content": system_message}]
19
 
20
- # # Append only the latest user message
21
- # messages.append({"role": "user", "content": message})
22
 
23
- # response = ""
24
 
25
- # try:
26
- # # Generate response from the model
27
- # for message in client.chat_completion(
28
- # messages,
29
- # max_tokens=max_tokens,
30
- # stream=True,
31
- # temperature=temperature,
32
- # top_p=top_p,
33
- # ):
34
- # if message.choices[0].delta.content is not None:
35
- # token = message.choices[0].delta.content
36
- # response += token
37
- # yield response
38
- # except Exception as e:
39
- # yield f"An error occurred: {e}"
40
 
41
- # """
42
- # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
43
- # """
44
- # demo = gr.ChatInterface(
45
- # respond,
46
- # additional_inputs=[
47
- # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
48
- # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
49
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
50
- # gr.Slider(
51
- # minimum=0.1,
52
- # maximum=1.0,
53
- # value=0.95,
54
- # step=0.05,
55
- # label="Top-p (nucleus sampling)",
56
- # ),
57
- # ],
58
- # )
59
 
60
- # if __name__ == "__main__":
61
- # demo.launch()
62
 
63
 
64
- import os
65
- import gradio as gr
66
- from huggingface_hub import InferenceClient
67
 
68
- # Initialize the Hugging Face Inference Client
69
- client = InferenceClient(
70
- "meta-llama/Meta-Llama-3.1-8B-Instruct",
71
- token= os.getenv("HF_API_TOKEN"),# Replace with your actual token
72
- )
73
 
74
- # Define a function to handle the chat input and get a response from the model
75
- def chat_with_model(user_input):
76
- # Call the client to get the model's response
77
- response = ""
78
- for message in client.chat_completion(
79
- messages=[{"role": "user", "content": user_input}],
80
- max_tokens=500,
81
- stream=True,
82
- ):
83
- response += message.choices[0].delta.content
84
- return response
85
 
86
- # Create a Gradio interface with a chat component
87
- with gr.Blocks() as demo:
88
- chatbot = gr.Chatbot()
89
- with gr.Row():
90
- txt = gr.Textbox(show_label=False, placeholder="Type your message here...")
91
- txt.submit(chat_with_model, inputs=txt, outputs=chatbot)
92
 
93
- demo.launch()
 
1
+ import os
2
+ import gradio as gr
3
+ from huggingface_hub import InferenceClient
4
 
5
+ """
6
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
+ """
8
+ client = InferenceClient("meta-llama/Meta-Llama-3-8B",token=os.getenv('HF_API_TOKEN'))
9
 
10
+ ## None type
11
+ def respond(
12
+ message: str,
13
+ history: list[tuple[str, str]], # This will not be used
14
+ system_message: str,
15
+ max_tokens: int,
16
+ temperature: float,
17
+ top_p: float,
18
+ ):
19
+ messages = [{"role": "system", "content": system_message}]
20
 
21
+ # Append only the latest user message
22
+ messages.append({"role": "user", "content": message})
23
 
24
+ response = ""
25
 
26
+ try:
27
+ # Generate response from the model
28
+ for message in client.chat_completion(
29
+ messages,
30
+ max_tokens=max_tokens,
31
+ stream=True,
32
+ temperature=temperature,
33
+ top_p=top_p,
34
+ ):
35
+ if message.choices[0].delta.content is not None:
36
+ token = message.choices[0].delta.content
37
+ response += token
38
+ yield response
39
+ except Exception as e:
40
+ yield f"An error occurred: {e}"
41
 
42
+ """
43
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
+ """
45
+ demo = gr.ChatInterface(
46
+ respond,
47
+ additional_inputs=[
48
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
+ gr.Slider(
52
+ minimum=0.1,
53
+ maximum=1.0,
54
+ value=0.95,
55
+ step=0.05,
56
+ label="Top-p (nucleus sampling)",
57
+ ),
58
+ ],
59
+ )
60
 
61
+ if __name__ == "__main__":
62
+ demo.launch()
63
 
64
 
65
+ # import os
66
+ # import gradio as gr
67
+ # from huggingface_hub import InferenceClient
68
 
69
+ # # Initialize the Hugging Face Inference Client
70
+ # client = InferenceClient(
71
+ # "meta-llama/Meta-Llama-3.1-8B-Instruct",
72
+ # token= os.getenv("HF_API_TOKEN"),# Replace with your actual token
73
+ # )
74
 
75
+ # # Define a function to handle the chat input and get a response from the model
76
+ # def chat_with_model(user_input):
77
+ # # Call the client to get the model's response
78
+ # response = ""
79
+ # for message in client.chat_completion(
80
+ # messages=[{"role": "user", "content": user_input}],
81
+ # max_tokens=500,
82
+ # stream=True,
83
+ # ):
84
+ # response += message.choices[0].delta.content
85
+ # return response
86
 
87
+ # # Create a Gradio interface with a chat component
88
+ # with gr.Blocks() as demo:
89
+ # chatbot = gr.Chatbot()
90
+ # with gr.Row():
91
+ # txt = gr.Textbox(show_label=False, placeholder="Type your message here...")
92
+ # txt.submit(chat_with_model, inputs=txt, outputs=chatbot)
93
 
94
+ # demo.launch()