Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -217,11 +217,11 @@ def simple_chat(message: dict, temperature: float = 0.8, max_length: int = 4096,
|
|
217 |
try:
|
218 |
model = AutoModelForCausalLM.from_pretrained(
|
219 |
MODEL_ID,
|
220 |
-
torch_dtype=torch.bfloat16,
|
221 |
low_cpu_mem_usage=True,
|
222 |
trust_remote_code=True
|
223 |
)
|
224 |
-
|
225 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
226 |
|
227 |
conversation = []
|
@@ -230,14 +230,14 @@ def simple_chat(message: dict, temperature: float = 0.8, max_length: int = 4096,
|
|
230 |
if "file_content" in message and message["file_content"]:
|
231 |
file_content = message["file_content"]
|
232 |
file_name = message["file_name"]
|
233 |
-
|
234 |
# Guardar el archivo en un archivo temporal
|
235 |
with open(file_name, "wb") as f:
|
236 |
f.write(file_content.read())
|
237 |
-
|
238 |
# Llamar a `mode_load` con el nombre del archivo
|
239 |
choice, contents = mode_load(file_name)
|
240 |
-
|
241 |
if choice == "image":
|
242 |
conversation.append({"role": "user", "image": contents, "content": message['text']})
|
243 |
elif choice == "doc":
|
@@ -267,31 +267,31 @@ def simple_chat(message: dict, temperature: float = 0.8, max_length: int = 4096,
|
|
267 |
eos_token_id=[151329, 151336, 151338],
|
268 |
)
|
269 |
|
270 |
-
|
271 |
|
272 |
-
#
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
# for new_text in streamer:
|
277 |
-
# buffer += new_text
|
278 |
-
# yield buffer
|
279 |
|
280 |
-
#
|
281 |
-
|
282 |
-
|
283 |
-
# print(buffer)
|
284 |
-
# print(" ")
|
285 |
-
# print("--------------")
|
286 |
-
|
287 |
-
with torch.no_grad():
|
288 |
-
generated_ids = model.generate(input_ids['input_ids'], **generate_kwargs)
|
289 |
-
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
|
290 |
|
291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
|
|
|
293 |
|
294 |
-
return PlainTextResponse(generated_text)
|
295 |
except Exception as e:
|
296 |
return PlainTextResponse(f"Error: {str(e)}")
|
297 |
|
|
|
217 |
try:
|
218 |
model = AutoModelForCausalLM.from_pretrained(
|
219 |
MODEL_ID,
|
220 |
+
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
221 |
low_cpu_mem_usage=True,
|
222 |
trust_remote_code=True
|
223 |
)
|
224 |
+
|
225 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
226 |
|
227 |
conversation = []
|
|
|
230 |
if "file_content" in message and message["file_content"]:
|
231 |
file_content = message["file_content"]
|
232 |
file_name = message["file_name"]
|
233 |
+
|
234 |
# Guardar el archivo en un archivo temporal
|
235 |
with open(file_name, "wb") as f:
|
236 |
f.write(file_content.read())
|
237 |
+
|
238 |
# Llamar a `mode_load` con el nombre del archivo
|
239 |
choice, contents = mode_load(file_name)
|
240 |
+
|
241 |
if choice == "image":
|
242 |
conversation.append({"role": "user", "image": contents, "content": message['text']})
|
243 |
elif choice == "doc":
|
|
|
267 |
eos_token_id=[151329, 151336, 151338],
|
268 |
)
|
269 |
|
270 |
+
gen_kwargs = {**input_ids, **generate_kwargs}
|
271 |
|
272 |
+
# Define the function to run generation
|
273 |
+
def generate_text():
|
274 |
+
with torch.no_grad():
|
275 |
+
model.generate(**gen_kwargs, streamer=streamer)
|
|
|
|
|
|
|
276 |
|
277 |
+
# Start the generation in a separate thread
|
278 |
+
thread = Thread(target=generate_text)
|
279 |
+
thread.start()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
|
281 |
+
def stream_response():
|
282 |
+
buffer = ""
|
283 |
+
for new_text in streamer:
|
284 |
+
buffer += new_text
|
285 |
+
yield new_text
|
286 |
+
print("--------------")
|
287 |
+
print("Buffer: ")
|
288 |
+
print(" ")
|
289 |
+
print(buffer)
|
290 |
+
print(" ")
|
291 |
+
print("--------------")
|
292 |
|
293 |
+
return StreamingResponse(stream_response(), media_type="text/plain")
|
294 |
|
|
|
295 |
except Exception as e:
|
296 |
return PlainTextResponse(f"Error: {str(e)}")
|
297 |
|