SaisExperiments commited on
Commit
a8e97ac
·
verified ·
1 Parent(s): 5576714

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -33
app.py CHANGED
@@ -1,64 +1,146 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def respond(
11
- message,
12
  history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
  messages.append({"role": "user", "content": message})
27
 
28
- response = ""
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
 
 
 
38
 
39
- response += token
40
- yield response
 
 
41
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  demo = gr.ChatInterface(
47
  respond,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  gr.Slider(
53
  minimum=0.1,
54
  maximum=1.0,
55
- value=0.95,
56
  step=0.05,
57
  label="Top-p (nucleus sampling)",
 
58
  ),
59
  ],
 
60
  )
61
 
62
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ import os
4
 
5
+ # --- Installation Note ---
6
+ # Ensure you have the necessary libraries installed:
7
+ # pip install gradio huggingface_hub
8
+
9
+ # --- Hugging Face Hub Token ---
10
+ # The InferenceClient might require a Hugging Face Hub token for certain models or usage.
11
+ # Set it as an environment variable HUGGING_FACE_HUB_TOKEN, or log in via `huggingface-cli login`.
12
+ # If the model is public and doesn't require login, this might work without a token.
13
+ # HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN") # Optional: explicitly get token if needed
14
+ client = None
15
+ try:
16
+ client = InferenceClient(
17
+ "HuggingFaceH4/zephyr-7b-beta",
18
+ # token=HUGGING_FACE_HUB_TOKEN # Uncomment if you want to pass token explicitly
19
+ )
20
+ print("InferenceClient initialized successfully.")
21
+ except Exception as e:
22
+ print(f"Error initializing InferenceClient: {e}")
23
+ print("Please ensure the model identifier is correct and you have necessary permissions/token.")
24
+ # You might want to exit or raise the error depending on your application structure
25
+ # For this Gradio app, we'll let the respond function handle the missing client.
26
 
27
 
28
  def respond(
29
+ message: str,
30
  history: list[tuple[str, str]],
31
+ system_message: str = "You are a friendly Chatbot.", # Default value matching UI
32
+ max_tokens: int = 512, # Default value matching UI
33
+ temperature: float = 0.7, # Default value matching UI
34
+ top_p: float = 0.95, # Default value matching UI
35
  ):
36
+ """
37
+ Chat response function for the Gradio interface.
38
+ """
39
+ # --- Client Check ---
40
+ if client is None:
41
+ yield "Error: InferenceClient could not be initialized. Please check server logs."
42
+ return # Stop generation if client is not available
43
+
44
+ # --- Input Validation (Basic) ---
45
+ if not message:
46
+ yield "Error: Please enter a message."
47
+ return
48
+ if not system_message:
49
+ system_message = "You are a helpful assistant." # Fallback system message
50
+
51
  messages = [{"role": "system", "content": system_message}]
52
 
53
+ for user_msg, assistant_msg in history:
54
+ if user_msg:
55
+ messages.append({"role": "user", "content": user_msg})
56
+ if assistant_msg:
57
+ messages.append({"role": "assistant", "content": assistant_msg})
58
 
59
  messages.append({"role": "user", "content": message})
60
 
61
+ response_text = ""
62
 
63
+ try:
64
+ # Stream the response
65
+ for message_chunk in client.chat_completion(
66
+ messages=messages,
67
+ max_tokens=max_tokens,
68
+ stream=True,
69
+ temperature=temperature,
70
+ top_p=top_p,
71
+ ):
72
+ # Check if delta and content exist and are not None
73
+ token = message_chunk.choices[0].delta.content
74
 
75
+ # --- Robust Token Handling ---
76
+ if token is not None:
77
+ response_text += token
78
+ yield response_text # Yield the accumulated response incrementally
79
 
80
+ except Exception as e:
81
+ print(f"Error during API call: {e}")
82
+ # Yield a user-friendly error message
83
+ yield f"An error occurred while generating the response: {e}"
84
 
85
+
86
+ # --- Gradio Interface Definition ---
 
87
  demo = gr.ChatInterface(
88
  respond,
89
+ chatbot=gr.Chatbot(
90
+ height=500,
91
+ label="Zephyr 7B Beta",
92
+ show_label=True,
93
+ bubble_full_width=False, # Optional: Adjust bubble width
94
+ ),
95
+ title="🤖 Zephyr 7B Beta Chat",
96
+ description="Chat with the Zephyr 7B Beta model using the Hugging Face Inference API. \nEnter your message and adjust settings below.",
97
+ examples=[
98
+ ["Hello, how are you today?"],
99
+ ["What is the capital of France?"],
100
+ ["Explain the concept of large language models in simple terms."],
101
+ ["Write a short poem about the rain."]
102
+ ],
103
+ cache_examples=False, # Set to True to cache example results if desired
104
  additional_inputs=[
105
+ gr.Textbox(
106
+ value="You are a friendly and helpful chatbot.", # Default system message
107
+ label="System Message",
108
+ info="The instruction given to the chatbot to guide its behavior.",
109
+ ),
110
+ gr.Slider(
111
+ minimum=1,
112
+ maximum=2048,
113
+ value=512, # Default max tokens
114
+ step=1,
115
+ label="Max New Tokens",
116
+ info="Maximum number of tokens to generate."
117
+ ),
118
+ gr.Slider(
119
+ minimum=0.1,
120
+ # Max temperature adjusted: values > 1.0 often degrade quality
121
+ maximum=1.0,
122
+ value=0.7, # Default temperature
123
+ step=0.1,
124
+ label="Temperature",
125
+ info="Controls randomness. Lower values make output more focused, higher values make it more diverse."
126
+ ),
127
  gr.Slider(
128
  minimum=0.1,
129
  maximum=1.0,
130
+ value=0.95, # Default top-p
131
  step=0.05,
132
  label="Top-p (nucleus sampling)",
133
+ info="Considers only the most probable tokens with cumulative probability p. Helps prevent low-probability tokens."
134
  ),
135
  ],
136
+ additional_inputs_accordion_name="⚙️ Advanced Settings" # Group settings
137
  )
138
 
139
 
140
  if __name__ == "__main__":
141
+ # Launch the Gradio app
142
+ demo.launch(
143
+ # share=True # Uncomment to create a temporary public link (use with caution)
144
+ # server_name="0.0.0.0" # Uncomment to allow access from your local network
145
+ # auth=("user", "password") # Optional: Add basic authentication
146
+ )