mateoluksenberg commited on
Commit
f265caa
·
verified ·
1 Parent(s): 29e870f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -127
app.py CHANGED
@@ -211,157 +211,157 @@ EXAMPLES = [
211
  [{"text": "Quiero armar un JSON, solo el JSON sin texto, que contenga los datos de la primera mitad de la tabla de la imagen (las primeras 10 jurisdicciones 901-910). Ten en cuenta que los valores numéricos son decimales de cuatro dígitos. La tabla contiene las siguientes columnas: Codigo, Nombre, Fecha Inicio, Fecha Cese, Coeficiente Ingresos, Coeficiente Gastos y Coeficiente Unificado. La tabla puede contener valores vacíos, en ese caso dejarlos como null. Cada fila de la tabla representa una jurisdicción con sus respectivos valores.", }]
212
  ]
213
 
214
- @spaces.GPU()
215
- def simple_chat(message, history: list, temperature: float = 0.8, max_length: int = 4096, top_p: float = 1, top_k: int = 10, penalty: float = 1.0):
216
 
217
- model = AutoModelForCausalLM.from_pretrained(
218
- MODEL_ID,
219
- torch_dtype=torch.bfloat16,
220
- low_cpu_mem_usage=True,
221
- trust_remote_code=True
222
- )
223
 
224
- print(f'message is - {message}')
225
- print(f'history is - {history}')
226
- conversation = []
227
- prompt_files = []
228
- if message["files"]:
229
- choice, contents = mode_load(message["files"][-1])
230
- if choice == "image":
231
- conversation.append({"role": "user", "image": contents, "content": message['text']})
232
- elif choice == "doc":
233
- format_msg = contents + "\n\n\n" + "{} files uploaded.\n" + message['text']
234
- conversation.append({"role": "user", "content": format_msg})
235
- else:
236
- if len(history) == 0:
237
- # raise gr.Error("Please upload an image first.")
238
- contents = None
239
- conversation.append({"role": "user", "content": message['text']})
240
- else:
241
- # image = Image.open(history[0][0][0])
242
- for prompt, answer in history:
243
- if answer is None:
244
- prompt_files.append(prompt[0])
245
- conversation.extend([{"role": "user", "content": ""}, {"role": "assistant", "content": ""}])
246
- else:
247
- conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
248
- if len(prompt_files) > 0:
249
- choice, contents = mode_load(prompt_files[-1])
250
- else:
251
- choice = ""
252
- conversation.append({"role": "user", "image": "", "content": message['text']})
253
-
254
-
255
- if choice == "image":
256
- conversation.append({"role": "user", "image": contents, "content": message['text']})
257
- elif choice == "doc":
258
- format_msg = contents + "\n\n\n" + "{} files uploaded.\n" + message['text']
259
- conversation.append({"role": "user", "content": format_msg})
260
- print(f"Conversation is -\n{conversation}")
261
-
262
- input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True,
263
- return_tensors="pt", return_dict=True).to(model.device)
264
- streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
265
 
266
- generate_kwargs = dict(
267
- max_length=max_length,
268
- streamer=streamer,
269
- do_sample=True,
270
- top_p=top_p,
271
- top_k=top_k,
272
- temperature=temperature,
273
- repetition_penalty=penalty,
274
- eos_token_id=[151329, 151336, 151338],
275
- )
276
- gen_kwargs = {**input_ids, **generate_kwargs}
277
 
278
- with torch.no_grad():
279
- thread = Thread(target=model.generate, kwargs=gen_kwargs)
280
- thread.start()
281
- buffer = ""
282
- for new_text in streamer:
283
- buffer += new_text
284
- yield buffer
285
- print(" ")
286
- print("---------")
287
- print("Text: ")
288
- print(" ")
289
- print(buffer)
290
- print(" ")
291
- print("---------")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
 
294
 
295
 
296
- # @spaces.GPU()
297
- # def simple_chat(message: dict, temperature: float = 0.8, max_length: int = 4096, top_p: float = 1, top_k: int = 10, penalty: float = 1.0):
298
- # try:
299
- # model = AutoModelForCausalLM.from_pretrained(
300
- # MODEL_ID,
301
- # torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
302
- # low_cpu_mem_usage=True,
303
- # trust_remote_code=True
304
- # )
305
 
306
- # #tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
307
 
308
- # conversation = []
309
 
310
- # if "file_content" in message and message["file_content"]:
311
- # file_content = message["file_content"]
312
- # file_name = message["file_name"]
313
 
314
- # with open(file_name, "wb") as f:
315
- # f.write(file_content.read())
316
 
317
- # choice, contents = mode_load(file_name)
318
 
319
- # if choice == "image":
320
- # conversation.append({"role": "user", "image": contents, "content": message['text']})
321
- # elif choice == "doc":
322
- # message['text'] = contents + "\n\n\n" + "{} files uploaded.\n".format(1) + message['text']
323
- # conversation.append({"role": "user", "content": message['text']})
324
- # # format_msg = contents + "\n\n\n" + "{} files uploaded.\n".format(1) + message['text']
325
- # # conversation.append({"role": "user", "content": format_msg})
326
- # else:
327
- # conversation.append({"role": "user", "content": message['text']})
328
 
329
- # input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
330
 
331
- # streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
332
 
333
- # generate_kwargs = dict(
334
- # max_length=max_length,
335
- # do_sample=True,
336
- # top_p=top_p,
337
- # top_k=top_k,
338
- # temperature=temperature,
339
- # repetition_penalty=penalty,
340
- # eos_token_id=[151329, 151336, 151338],
341
- # )
342
 
343
- # gen_kwargs = {**input_ids, **generate_kwargs}
344
 
345
- # for entry in conversation:
346
- # print(f"Role: {entry['role']}, Content: {entry.get('content', '')}")
347
 
348
- # with torch.no_grad():
349
- # generated_ids = model.generate(input_ids['input_ids'], **generate_kwargs)
350
- # generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
351
 
352
- # text_original = message['text'].strip()
353
- # generated_text_cleaned = generated_text.replace(text_original, "").strip()
354
 
355
- # print(" ")
356
- # print("---------")
357
- # print("Text: ")
358
- # print(" ")
359
- # print(generated_text_cleaned)
360
 
361
 
362
- # return PlainTextResponse(generated_text_cleaned)
363
- # except Exception as e:
364
- # return PlainTextResponse(f"Error: {str(e)}")
365
 
366
 
367
 
 
211
  [{"text": "Quiero armar un JSON, solo el JSON sin texto, que contenga los datos de la primera mitad de la tabla de la imagen (las primeras 10 jurisdicciones 901-910). Ten en cuenta que los valores numéricos son decimales de cuatro dígitos. La tabla contiene las siguientes columnas: Codigo, Nombre, Fecha Inicio, Fecha Cese, Coeficiente Ingresos, Coeficiente Gastos y Coeficiente Unificado. La tabla puede contener valores vacíos, en ese caso dejarlos como null. Cada fila de la tabla representa una jurisdicción con sus respectivos valores.", }]
212
  ]
213
 
214
+ # @spaces.GPU()
215
+ # def simple_chat(message, history: list, temperature: float = 0.8, max_length: int = 4096, top_p: float = 1, top_k: int = 10, penalty: float = 1.0):
216
 
217
+ # model = AutoModelForCausalLM.from_pretrained(
218
+ # MODEL_ID,
219
+ # torch_dtype=torch.bfloat16,
220
+ # low_cpu_mem_usage=True,
221
+ # trust_remote_code=True
222
+ # )
223
 
224
+ # print(f'message is - {message}')
225
+ # print(f'history is - {history}')
226
+ # conversation = []
227
+ # prompt_files = []
228
+ # if message["files"]:
229
+ # choice, contents = mode_load(message["files"][-1])
230
+ # if choice == "image":
231
+ # conversation.append({"role": "user", "image": contents, "content": message['text']})
232
+ # elif choice == "doc":
233
+ # format_msg = contents + "\n\n\n" + "{} files uploaded.\n" + message['text']
234
+ # conversation.append({"role": "user", "content": format_msg})
235
+ # else:
236
+ # if len(history) == 0:
237
+ # # raise gr.Error("Please upload an image first.")
238
+ # contents = None
239
+ # conversation.append({"role": "user", "content": message['text']})
240
+ # else:
241
+ # # image = Image.open(history[0][0][0])
242
+ # for prompt, answer in history:
243
+ # if answer is None:
244
+ # prompt_files.append(prompt[0])
245
+ # conversation.extend([{"role": "user", "content": ""}, {"role": "assistant", "content": ""}])
246
+ # else:
247
+ # conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
248
+ # if len(prompt_files) > 0:
249
+ # choice, contents = mode_load(prompt_files[-1])
250
+ # else:
251
+ # choice = ""
252
+ # conversation.append({"role": "user", "image": "", "content": message['text']})
 
 
 
 
 
 
 
 
 
 
 
 
253
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
+ # if choice == "image":
256
+ # conversation.append({"role": "user", "image": contents, "content": message['text']})
257
+ # elif choice == "doc":
258
+ # format_msg = contents + "\n\n\n" + "{} files uploaded.\n" + message['text']
259
+ # conversation.append({"role": "user", "content": format_msg})
260
+ # print(f"Conversation is -\n{conversation}")
261
+
262
+ # input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True,
263
+ # return_tensors="pt", return_dict=True).to(model.device)
264
+ # streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
265
+
266
+ # generate_kwargs = dict(
267
+ # max_length=max_length,
268
+ # streamer=streamer,
269
+ # do_sample=True,
270
+ # top_p=top_p,
271
+ # top_k=top_k,
272
+ # temperature=temperature,
273
+ # repetition_penalty=penalty,
274
+ # eos_token_id=[151329, 151336, 151338],
275
+ # )
276
+ # gen_kwargs = {**input_ids, **generate_kwargs}
277
+
278
+ # with torch.no_grad():
279
+ # thread = Thread(target=model.generate, kwargs=gen_kwargs)
280
+ # thread.start()
281
+ # buffer = ""
282
+ # for new_text in streamer:
283
+ # buffer += new_text
284
+ # yield buffer
285
+ # print(" ")
286
+ # print("---------")
287
+ # print("Text: ")
288
+ # print(" ")
289
+ # print(buffer)
290
+ # print(" ")
291
+ # print("---------")
292
 
293
 
294
 
295
 
296
+ @spaces.GPU()
297
+ def simple_chat(message: dict, temperature: float = 0.8, max_length: int = 4096, top_p: float = 1, top_k: int = 10, penalty: float = 1.0):
298
+ try:
299
+ model = AutoModelForCausalLM.from_pretrained(
300
+ MODEL_ID,
301
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
302
+ low_cpu_mem_usage=True,
303
+ trust_remote_code=True
304
+ )
305
 
306
+ #tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
307
 
308
+ conversation = []
309
 
310
+ if "file_content" in message and message["file_content"]:
311
+ file_content = message["file_content"]
312
+ file_name = message["file_name"]
313
 
314
+ with open(file_name, "wb") as f:
315
+ f.write(file_content.read())
316
 
317
+ choice, contents = mode_load(file_name)
318
 
319
+ if choice == "image":
320
+ conversation.append({"role": "user", "image": contents, "content": message['text']})
321
+ elif choice == "doc":
322
+ message['text'] = contents + "\n\n\n" + "{} files uploaded.\n".format(1) + message['text']
323
+ conversation.append({"role": "user", "content": message['text']})
324
+ # format_msg = contents + "\n\n\n" + "{} files uploaded.\n".format(1) + message['text']
325
+ # conversation.append({"role": "user", "content": format_msg})
326
+ else:
327
+ conversation.append({"role": "user", "content": message['text']})
328
 
329
+ input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
330
 
331
+ streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
332
 
333
+ generate_kwargs = dict(
334
+ max_length=max_length,
335
+ do_sample=True,
336
+ top_p=top_p,
337
+ top_k=top_k,
338
+ temperature=temperature,
339
+ repetition_penalty=penalty,
340
+ eos_token_id=[151329, 151336, 151338],
341
+ )
342
 
343
+ gen_kwargs = {**input_ids, **generate_kwargs}
344
 
345
+ for entry in conversation:
346
+ print(f"Role: {entry['role']}, Content: {entry.get('content', '')}")
347
 
348
+ with torch.no_grad():
349
+ generated_ids = model.generate(input_ids['input_ids'], **generate_kwargs)
350
+ generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
351
 
352
+ text_original = message['text'].strip()
353
+ generated_text_cleaned = generated_text.replace(text_original, "").strip()
354
 
355
+ print(" ")
356
+ print("---------")
357
+ print("Text: ")
358
+ print(" ")
359
+ print(generated_text_cleaned)
360
 
361
 
362
+ return PlainTextResponse(generated_text_cleaned)
363
+ except Exception as e:
364
+ return PlainTextResponse(f"Error: {str(e)}")
365
 
366
 
367