MadsGalsgaard commited on
Commit
8d5090f
·
verified ·
1 Parent(s): fa89eeb
Files changed (1) hide show
  1. app.py +23 -112
app.py CHANGED
@@ -1,68 +1,12 @@
1
- import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
5
  """
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
- client = InferenceClient("meta-llama/Meta-Llama-3-8B",token=os.getenv('HF_API_TOKEN'))
9
 
10
- # ## None type
11
- # def respond(
12
- # message: str,
13
- # history: list[tuple[str, str]], # This will not be used
14
- # system_message: str,
15
- # max_tokens: int,
16
- # temperature: float,
17
- # top_p: float,
18
- # ):
19
- # messages = [{"role": "system", "content": system_message}]
20
-
21
- # # Append only the latest user message
22
- # messages.append({"role": "user", "content": message})
23
-
24
- # response = ""
25
-
26
- # try:
27
- # # Generate response from the model
28
- # for message in client.chat_completion(
29
- # messages,
30
- # max_tokens=max_tokens,
31
- # stream=True,
32
- # temperature=temperature,
33
- # top_p=top_p,
34
- # ):
35
- # if message.choices[0].delta.content is not None:
36
- # token = message.choices[0].delta.content
37
- # response += token
38
- # yield response
39
- # except Exception as e:
40
- # yield f"An error occurred: {e}"
41
-
42
- # """
43
- # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- # """
45
- # demo = gr.ChatInterface(
46
- # respond,
47
- # additional_inputs=[
48
- # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- # gr.Slider(
52
- # minimum=0.1,
53
- # maximum=1.0,
54
- # value=0.95,
55
- # step=0.05,
56
- # label="Top-p (nucleus sampling)",
57
- # ),
58
- # ],
59
- # )
60
-
61
- # if __name__ == "__main__":
62
- # demo.launch()
63
-
64
-
65
- ####19
66
  def respond(
67
  message: str,
68
  history: list[tuple[str, str]], # This will not be used
@@ -71,72 +15,39 @@ def respond(
71
  temperature: float,
72
  top_p: float,
73
  ):
74
- # Combine the system message and user input into a single prompt
75
- prompt = f"{system_message}\n{message}"
 
 
 
 
 
 
 
76
 
77
  response = ""
78
 
79
  try:
80
- # Generate response from the model using text generation method
81
- for message in client.text_generation(
82
- prompt=prompt,
83
- max_new_tokens=max_tokens,
 
84
  temperature=temperature,
85
  top_p=top_p,
86
  ):
87
- if message.token is not None:
88
- response += message.token
 
89
  yield response
90
  except Exception as e:
91
  yield f"An error occurred: {e}"
92
 
93
- # Define the Gradio interface
94
- demo = gr.ChatInterface(
95
- respond,
96
- additional_inputs=[
97
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
98
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
99
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
100
- gr.Slider(
101
- minimum=0.1,
102
- maximum=1.0,
103
- value=0.95,
104
- step=0.05,
105
- label="Top-p (nucleus sampling)",
106
- ),
107
  ],
108
  )
109
 
110
- if __name__ == "__main__":
111
- demo.launch()
112
 
113
- # import os
114
- # import gradio as gr
115
- # from huggingface_hub import InferenceClient
116
-
117
- # # Initialize the Hugging Face Inference Client
118
- # client = InferenceClient(
119
- # "meta-llama/Meta-Llama-3.1-8B-Instruct",
120
- # token= os.getenv("HF_API_TOKEN"),# Replace with your actual token
121
- # )
122
-
123
- # # Define a function to handle the chat input and get a response from the model
124
- # def chat_with_model(user_input):
125
- # # Call the client to get the model's response
126
- # response = ""
127
- # for message in client.chat_completion(
128
- # messages=[{"role": "user", "content": user_input}],
129
- # max_tokens=500,
130
- # stream=True,
131
- # ):
132
- # response += message.choices[0].delta.content
133
- # return response
134
-
135
- # # Create a Gradio interface with a chat component
136
- # with gr.Blocks() as demo:
137
- # chatbot = gr.Chatbot()
138
- # with gr.Row():
139
- # txt = gr.Textbox(show_label=False, placeholder="Type your message here...")
140
- # txt.submit(chat_with_model, inputs=txt, outputs=chatbot)
141
-
142
- # demo.launch()
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
+ client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
8
 
9
+ ## None type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def respond(
11
  message: str,
12
  history: list[tuple[str, str]], # This will not be used
 
15
  temperature: float,
16
  top_p: float,
17
  ):
18
+ messages = [{"role": "system", "content": system_message}]
19
+
20
+ # Append only the latest user message
21
+
22
+
23
+
24
+
25
+
26
+ messages.append({"role": "user", "content": message})
27
 
28
  response = ""
29
 
30
  try:
31
+ # Generate response from the model
32
+ for message in client.chat_completion(
33
+ messages,
34
+ max_tokens=max_tokens,
35
+ stream=True,
36
  temperature=temperature,
37
  top_p=top_p,
38
  ):
39
+ if message.choices[0].delta.content is not None:
40
+ token = message.choices[0].delta.content
41
+ response += token
42
  yield response
43
  except Exception as e:
44
  yield f"An error occurred: {e}"
45
 
46
+ """
47
+ # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
 
 
 
 
 
 
 
 
 
 
 
48
  ],
49
  )
50
 
 
 
51
 
52
+ if __name__ == "__main__":
53
+ demo.launch()