Spaces:

valencar
/

chat-sabia

Sleeping

chat-sabia / app.py

Update app.py

a83e000 verified 8 months ago

1.27 kB

	import time
	import datetime

	import streamlit as st

	question = "Name the planets in the solar system? A: "
	question = "Quais são os planetas do sistema solar?"
	question = "Qual é o maior planeta do sistema solar?"

	before = datetime.datetime.now()


	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	model_id = "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4"
	prompt = [
	{"role": "system", "content": "You are a helpful assistant"},
	{"role": "user", "content": question},
	]

	tokenizer = AutoTokenizer.from_pretrained(model_id)

	inputs = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt").cuda()

	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True,
	device_map="auto",
	)

	outputs = model.generate(inputs, do_sample=True, max_new_tokens=256)
	response = tokenizer.batch_decode(outputs, skip_special_tokens=True)


	with st.container():
	st.write('\n\n')
	st.write('LLM-LANAChat\n\n')
	st.write(response)

	print('\nsaida gerada.')
	print('\n\n')

	after = datetime.datetime.now()
	current_time = (after - before) # .strftime("%H:%M:%S")
	print("\nTime Elapsed: ", current_time)
	st.write("\nTime Elapsed: ", current_time)