Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -141,109 +141,3 @@ with gr.Blocks(fill_height=True, css=css) as demo:
|
|
141 |
if __name__ == "__main__":
|
142 |
demo.launch()
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
import spaces
|
148 |
-
import os
|
149 |
-
import torch
|
150 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
151 |
-
from transformers import TextIteratorStreamer
|
152 |
-
from threading import Thread
|
153 |
-
|
154 |
-
import gradio as gr
|
155 |
-
|
156 |
-
text_generator = None
|
157 |
-
is_hugging_face = True
|
158 |
-
model_id = "AXCXEPT/phi-4-deepseek-R1K-RL-EZO"
|
159 |
-
model_id = "AXCXEPT/phi-4-open-R1-Distill-EZOv1"
|
160 |
-
|
161 |
-
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
162 |
-
huggingface_token = None
|
163 |
-
device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
164 |
-
device = "cuda"
|
165 |
-
dtype = torch.bfloat16
|
166 |
-
dtype = torch.float16
|
167 |
-
|
168 |
-
if not huggingface_token:
|
169 |
-
pass
|
170 |
-
print("no HUGGINGFACE_TOKEN if you need set secret ")
|
171 |
-
#raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
|
181 |
-
|
182 |
-
print(model_id,device,dtype)
|
183 |
-
histories = []
|
184 |
-
#model = None
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
if not is_hugging_face:
|
189 |
-
model = AutoModelForCausalLM.from_pretrained(
|
190 |
-
model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
|
191 |
-
)
|
192 |
-
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device,stream=True ) #pipeline has not to(device)
|
193 |
-
|
194 |
-
if next(model.parameters()).is_cuda:
|
195 |
-
print("The model is on a GPU")
|
196 |
-
else:
|
197 |
-
print("The model is on a CPU")
|
198 |
-
|
199 |
-
#print(f"text_generator.device='{text_generator.device}")
|
200 |
-
if str(text_generator.device).strip() == 'cuda':
|
201 |
-
print("The pipeline is using a GPU")
|
202 |
-
else:
|
203 |
-
print("The pipeline is using a CPU")
|
204 |
-
|
205 |
-
print("initialized")
|
206 |
-
|
207 |
-
|
208 |
-
def generate_text(messages):
|
209 |
-
if is_hugging_face:#need everytime initialize for ZeroGPU
|
210 |
-
model = AutoModelForCausalLM.from_pretrained(
|
211 |
-
model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
|
212 |
-
)
|
213 |
-
model.to(device)
|
214 |
-
question = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
215 |
-
question = tokenizer(question, return_tensors="pt").to(device)
|
216 |
-
|
217 |
-
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
|
218 |
-
generation_kwargs = dict(question, streamer=streamer, max_new_tokens=200)
|
219 |
-
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
220 |
-
|
221 |
-
generated_output = ""
|
222 |
-
thread.start()
|
223 |
-
for new_text in streamer:
|
224 |
-
generated_output += new_text
|
225 |
-
yield generated_output
|
226 |
-
generate_text.zerogpu = True
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
@spaces.GPU(duration=60)
|
231 |
-
def call_generate_text(message, history):
|
232 |
-
# history.append({"role": "user", "content": message})
|
233 |
-
#print(message)
|
234 |
-
#print(history)
|
235 |
-
|
236 |
-
messages = history+[{"role":"user","content":message}]
|
237 |
-
try:
|
238 |
-
|
239 |
-
for text in generate_text(messages):
|
240 |
-
yield text
|
241 |
-
except RuntimeError as e:
|
242 |
-
print(f"An unexpected error occurred: {e}")
|
243 |
-
yield ""
|
244 |
-
|
245 |
-
demo = gr.ChatInterface(call_generate_text,type="messages")
|
246 |
-
|
247 |
-
#if __name__ == "__main__":
|
248 |
-
demo.queue()
|
249 |
-
demo.launch()
|
|
|
141 |
if __name__ == "__main__":
|
142 |
demo.launch()
|
143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|