xyizko commited on
Commit
5bacd80
·
verified ·
1 Parent(s): 40a939a

Update app.py

Browse files

- add more fixes

Files changed (1) hide show
  1. app.py +52 -68
app.py CHANGED
@@ -1,99 +1,83 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs:
6
- https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
- """
8
-
9
  def respond(message, history, token, model, system_message, max_tokens, temperature, top_p):
10
  """
11
  Handle chat responses using the Hugging Face Inference API.
12
-
13
- Parameters:
14
- - message: The user's current message.
15
- - history: List of previous user-assistant message pairs.
16
- - token: HF API token for authentication.
17
- - model: Model name to use for inference.
18
- - system_message: System prompt to initialize the chat.
19
- - max_tokens: Maximum number of tokens to generate.
20
- - temperature: Sampling temperature.
21
- - top_p: Top-p (nucleus) sampling parameter.
22
-
23
- Yields:
24
- - Incremental responses for streaming in the chat interface.
25
  """
26
- # Check for missing token
 
 
 
 
27
  if not token:
28
- yield "Please provide an HF API Token."
29
- return
30
-
31
- # Use default model if none provided
32
- if not model:
33
- model = "meta-llama/Llama-3.1-8B-Instruct"
34
-
35
- # Initialize the InferenceClient
36
- try:
37
- client = InferenceClient(model=model, token=token)
38
- except Exception as e:
39
- yield f"Error initializing client: {str(e)}"
40
- return
41
 
42
- # Build the message history, starting with the system message
43
  messages = [{"role": "system", "content": system_message}]
44
-
45
- for val in history:
46
- if val[0]: # User message
47
- messages.append({"role": "user", "content": val[0]})
48
- if val[1]: # Assistant message
49
- messages.append({"role": "assistant", "content": val[1]})
50
-
51
  messages.append({"role": "user", "content": message})
52
 
 
53
  response = ""
54
-
55
- # Make the API call with streaming
56
  try:
57
- for message in client.chat_completion(
58
  messages,
59
  max_tokens=max_tokens,
60
  stream=True,
61
  temperature=temperature,
62
  top_p=top_p,
63
  ):
64
- # Check for non-empty content in the delta
65
- if message.choices and message.choices[0].delta.content is not None:
66
- token = message.choices[0].delta.content
67
- response += token
68
  yield response
69
  except Exception as e:
70
- yield f"Error during API call: {str(e)}"
71
 
72
- # Define input components
73
- token_input = gr.Textbox(type="password", label="HF API Token")
74
- model_input = gr.Textbox(label="Model Name", value="HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- """
77
- For information on how to customize the ChatInterface, peruse the gradio docs:
78
- https://www.gradio.app/docs/chatinterface
79
- """
80
  demo = gr.ChatInterface(
81
- title="Chat Interface to test HF Models with a HF TOKEN",
82
- description="Enter your HF Token To Acces the Models",
83
- respond,
84
  additional_inputs=[
85
  token_input,
86
  model_input,
87
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
88
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
89
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
90
- gr.Slider(
91
- minimum=0.1,
92
- maximum=1.0,
93
- value=0.95,
94
- step=0.05,
95
- label="Top-p (nucleus sampling)",
96
- ),
97
  ],
98
  )
99
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
 
 
 
 
 
4
  def respond(message, history, token, model, system_message, max_tokens, temperature, top_p):
5
  """
6
  Handle chat responses using the Hugging Face Inference API.
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  """
8
+ # Handle token and model defaults
9
+ token = token.strip()
10
+ model = model.strip()
11
+
12
+ # Default model selection logic
13
  if not token:
14
+ model = "gpt2" # Default public model that doesn't require token
15
+ try:
16
+ client = InferenceClient(model=model)
17
+ except Exception as e:
18
+ yield f"Error initializing client: {str(e)}"
19
+ return
20
+ else:
21
+ model = model or "meta-llama/Llama-3.1-8B-Instruct" # Default private model
22
+ try:
23
+ client = InferenceClient(model=model, token=token)
24
+ except Exception as e:
25
+ yield f"Error initializing client: {str(e)}"
26
+ return
27
 
28
+ # Build message history
29
  messages = [{"role": "system", "content": system_message}]
30
+ for user_msg, assistant_msg in history:
31
+ if user_msg:
32
+ messages.append({"role": "user", "content": user_msg})
33
+ if assistant_msg:
34
+ messages.append({"role": "assistant", "content": assistant_msg})
 
 
35
  messages.append({"role": "user", "content": message})
36
 
37
+ # Generate response
38
  response = ""
 
 
39
  try:
40
+ for chunk in client.chat_completion(
41
  messages,
42
  max_tokens=max_tokens,
43
  stream=True,
44
  temperature=temperature,
45
  top_p=top_p,
46
  ):
47
+ if chunk.choices and chunk.choices[0].delta.content:
48
+ response += chunk.choices[0].delta.content
 
 
49
  yield response
50
  except Exception as e:
51
+ yield f"API Error: {str(e)}"
52
 
53
+ # Input components
54
+ token_input = gr.Textbox(
55
+ type="password",
56
+ label="HF API Token (leave empty for public models)",
57
+ placeholder="hf_XXXXXXXXXXXX"
58
+ )
59
+ model_input = gr.Dropdown(
60
+ label="Model Name",
61
+ choices=[
62
+ "gpt2",
63
+ "HuggingFaceH4/zephyr-7b-beta",
64
+ "meta-llama/Llama-3.1-8B-Instruct"
65
+ ],
66
+ value="gpt2"
67
+ )
68
 
69
+ # Chat interface
 
 
 
70
  demo = gr.ChatInterface(
71
+ fn=respond,
72
+ title="HF Model Chat Interface",
73
+ description="Enter token for private models or use public models without token",
74
  additional_inputs=[
75
  token_input,
76
  model_input,
77
+ gr.Textbox(value="You are helpful AI.", label="System Message"),
78
+ gr.Slider(1, 2048, value=512, label="Max Tokens"),
79
+ gr.Slider(0.1, 4.0, value=0.7, label="Temperature"),
80
+ gr.Slider(0.1, 1.0, value=0.95, label="Top-p"),
 
 
 
 
 
 
81
  ],
82
  )
83