ajsbsd commited on
Commit
ee2d859
·
verified ·
1 Parent(s): 9656c26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -34
app.py CHANGED
@@ -54,7 +54,7 @@ def load_model_for_zerocpu():
54
  print(f"Falling back to standard Hugging Face model '{ORIGINAL_MODEL_ID}' for CPU (will be slower without GGUF quantization).")
55
  else:
56
  print("WARNING: ctransformers is not available. Will load standard Hugging Face model directly.")
57
-
58
  print(f"Loading standard Hugging Face model '{ORIGINAL_MODEL_ID}' for CPU...")
59
  try:
60
  model = AutoModelForCausalLM.from_pretrained(ORIGINAL_MODEL_ID)
@@ -123,7 +123,7 @@ def predict_chat(message: str, history: list):
123
  )
124
  generated_text = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True).strip()
125
  yield generated_text
126
-
127
  end_time = time.time()
128
  print(f"Inference Time for this turn: {end_time - start_time:.2f} seconds")
129
 
@@ -136,41 +136,35 @@ if __name__ == "__main__":
136
  "environment for efficient demonstration. How can I help you today?"
137
  )
138
 
139
- chatbot_component = gr.Chatbot(height=500, type='messages')
140
-
141
- with gr.Blocks(theme="soft") as demo:
142
- gr.Markdown(
143
- f"# SmolLM2-360M-Instruct (or TinyLlama GGUF) on ZeroCPU\n"
 
 
 
 
 
 
 
 
 
144
  f"This Space demonstrates an LLM for efficient CPU-only inference. "
145
  f"**Note:** For ZeroCPU, this app prioritizes `{GGUF_MODEL_ID}` (a GGUF-quantized model "
146
  f"like TinyLlama) due to better CPU performance than `{ORIGINAL_MODEL_ID}` "
147
  f"without GGUF. Expect varied responses each run due to randomized generation."
148
- )
149
-
150
- # This is the key change: explicitly placing the chat_interface component
151
- chat_interface = gr.ChatInterface(
152
- fn=predict_chat,
153
- chatbot=chatbot_component,
154
- textbox=gr.Textbox(
155
- placeholder="Ask me a question...",
156
- container=False,
157
- scale=7
158
- ),
159
- examples=[
160
- ["What is the capital of France?"],
161
- ["Can you tell me a fun fact about outer space?"],
162
- ["What's the best way to stay motivated?"],
163
- ],
164
- cache_examples=False,
165
- )
166
-
167
- # Now explicitly place the chat_interface component into the Blocks layout
168
- chat_interface.render()
169
-
170
- # The clear button is typically below the chat interface
171
- gr.ClearButton(components=[chatbot_component])
172
-
173
- chatbot_component.value = [[None, initial_chatbot_message]]
174
-
175
 
176
  demo.launch()
 
54
  print(f"Falling back to standard Hugging Face model '{ORIGINAL_MODEL_ID}' for CPU (will be slower without GGUF quantization).")
55
  else:
56
  print("WARNING: ctransformers is not available. Will load standard Hugging Face model directly.")
57
+
58
  print(f"Loading standard Hugging Face model '{ORIGINAL_MODEL_ID}' for CPU...")
59
  try:
60
  model = AutoModelForCausalLM.from_pretrained(ORIGINAL_MODEL_ID)
 
123
  )
124
  generated_text = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True).strip()
125
  yield generated_text
126
+
127
  end_time = time.time()
128
  print(f"Inference Time for this turn: {end_time - start_time:.2f} seconds")
129
 
 
136
  "environment for efficient demonstration. How can I help you today?"
137
  )
138
 
139
+ # Use gr.ChatInterface directly without gr.Blocks wrapper for simplicity
140
+ # This often works better when ChatInterface is the sole component
141
+ demo = gr.ChatInterface(
142
+ fn=predict_chat,
143
+ # Define the chatbot here, with type='messages'
144
+ chatbot=gr.Chatbot(height=500, type='messages',
145
+ value=[[None, initial_chatbot_message]]), # Set initial message directly here
146
+ textbox=gr.Textbox(
147
+ placeholder="Ask me a question...",
148
+ container=False,
149
+ scale=7
150
+ ),
151
+ title="SmolLM2-360M-Instruct (or TinyLlama GGUF) on ZeroCPU",
152
+ description=(
153
  f"This Space demonstrates an LLM for efficient CPU-only inference. "
154
  f"**Note:** For ZeroCPU, this app prioritizes `{GGUF_MODEL_ID}` (a GGUF-quantized model "
155
  f"like TinyLlama) due to better CPU performance than `{ORIGINAL_MODEL_ID}` "
156
  f"without GGUF. Expect varied responses each run due to randomized generation."
157
+ ),
158
+ theme="soft",
159
+ examples=[
160
+ ["What is the capital of France?"],
161
+ ["Can you tell me a fun fact about outer space?"],
162
+ ["What's the best way to stay motivated?"],
163
+ ],
164
+ cache_examples=False,
165
+ # Gradio 4.x has `clear_btn` directly on ChatInterface again
166
+ # but if this causes issues, you might need to revert to a gr.ClearButton() below
167
+ clear_btn="Clear Chat" # Re-added clear_btn as it seems to be supported again in latest Gradio versions
168
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  demo.launch()