xyizko commited on
Commit
6a1d4c8
·
verified ·
1 Parent(s): 87569bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -42
app.py CHANGED
@@ -2,60 +2,80 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
  """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
6
  """
7
 
8
- # Getting the HF Token and Well as the model
9
- token_input = gr.Textbox(type="password", label="HF API Token")
10
-
11
- if not token:
12
- return "Please provide an HF API Token."
13
-
14
- model_input = gr.Textbox(label="Model Name", value="HuggingFaceH4/zephyr-7b-beta")
15
-
16
- if not model_input:
17
- model_input = "meta-llama/Llama-3.1-8B-Instruct"
18
-
19
- client = InferenceClient(model_input)
20
-
21
-
22
- def respond(
23
- message,
24
- history: list[tuple[str, str]],
25
- system_message,
26
- max_tokens,
27
- temperature,
28
- top_p,
29
- ):
 
 
 
 
 
 
 
 
 
 
 
 
30
  messages = [{"role": "system", "content": system_message}]
31
 
32
  for val in history:
33
- if val[0]:
34
  messages.append({"role": "user", "content": val[0]})
35
- if val[1]:
36
  messages.append({"role": "assistant", "content": val[1]})
37
 
38
  messages.append({"role": "user", "content": message})
39
 
40
  response = ""
41
 
42
- for message in client.chat_completion(
43
- messages,
44
- max_tokens=max_tokens,
45
- stream=True,
46
- temperature=temperature,
47
- top_p=top_p,
48
- ):
49
- token = message.choices[0].delta.content
50
-
51
- response += token
52
- yield response
53
-
54
-
55
-
 
 
 
 
 
 
56
 
57
  """
58
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
59
  """
60
  demo = gr.ChatInterface(
61
  respond,
@@ -75,6 +95,5 @@ demo = gr.ChatInterface(
75
  ],
76
  )
77
 
78
-
79
  if __name__ == "__main__":
80
- demo.launch()
 
2
  from huggingface_hub import InferenceClient
3
 
4
  """
5
+ For more information on `huggingface_hub` Inference API support, please check the docs:
6
+ https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
 
9
+ def respond(message, history, token, model, system_message, max_tokens, temperature, top_p):
10
+ """
11
+ Handle chat responses using the Hugging Face Inference API.
12
+
13
+ Parameters:
14
+ - message: The user's current message.
15
+ - history: List of previous user-assistant message pairs.
16
+ - token: HF API token for authentication.
17
+ - model: Model name to use for inference.
18
+ - system_message: System prompt to initialize the chat.
19
+ - max_tokens: Maximum number of tokens to generate.
20
+ - temperature: Sampling temperature.
21
+ - top_p: Top-p (nucleus) sampling parameter.
22
+
23
+ Yields:
24
+ - Incremental responses for streaming in the chat interface.
25
+ """
26
+ # Check for missing token
27
+ if not token:
28
+ yield "Please provide an HF API Token."
29
+ return
30
+
31
+ # Use default model if none provided
32
+ if not model:
33
+ model = "meta-llama/Llama-3.1-8B-Instruct"
34
+
35
+ # Initialize the InferenceClient
36
+ try:
37
+ client = InferenceClient(model=model, token=token)
38
+ except Exception as e:
39
+ yield f"Error initializing client: {str(e)}"
40
+ return
41
+
42
+ # Build the message history, starting with the system message
43
  messages = [{"role": "system", "content": system_message}]
44
 
45
  for val in history:
46
+ if val[0]: # User message
47
  messages.append({"role": "user", "content": val[0]})
48
+ if val[1]: # Assistant message
49
  messages.append({"role": "assistant", "content": val[1]})
50
 
51
  messages.append({"role": "user", "content": message})
52
 
53
  response = ""
54
 
55
+ # Make the API call with streaming
56
+ try:
57
+ for message in client.chat_completion(
58
+ messages,
59
+ max_tokens=max_tokens,
60
+ stream=True,
61
+ temperature=temperature,
62
+ top_p=top_p,
63
+ ):
64
+ # Check for non-empty content in the delta
65
+ if message.choices and message.choices[0].delta.content is not None:
66
+ token = message.choices[0].delta.content
67
+ response += token
68
+ yield response
69
+ except Exception as e:
70
+ yield f"Error during API call: {str(e)}"
71
+
72
+ # Define input components
73
+ token_input = gr.Textbox(type="password", label="HF API Token")
74
+ model_input = gr.Textbox(label="Model Name", value="HuggingFaceH4/zephyr-7b-beta")
75
 
76
  """
77
+ For information on how to customize the ChatInterface, peruse the gradio docs:
78
+ https://www.gradio.app/docs/chatinterface
79
  """
80
  demo = gr.ChatInterface(
81
  respond,
 
95
  ],
96
  )
97
 
 
98
  if __name__ == "__main__":
99
+ demo.launch()