Spaces:
Sleeping
Sleeping
import time | |
import datetime | |
import streamlit as st | |
question = "Name the planets in the solar system? A: " | |
question = "Quais são os planetas do sistema solar?" | |
question = "Qual é o maior planeta do sistema solar?" | |
before = datetime.datetime.now() | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model_id = "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4" | |
prompt = [ | |
{"role": "system", "content": "You are a helpful assistant"}, | |
{"role": "user", "content": question}, | |
] | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
inputs = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt").cuda() | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
torch_dtype=torch.bfloat16, | |
low_cpu_mem_usage=True, | |
device_map="auto", | |
) | |
outputs = model.generate(inputs, do_sample=True, max_new_tokens=256) | |
response = tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
with st.container(): | |
st.write('\n\n') | |
st.write('LLM-LANAChat\n\n') | |
st.write(response) | |
print('\nsaida gerada.') | |
print('\n\n') | |
after = datetime.datetime.now() | |
current_time = (after - before) # .strftime("%H:%M:%S") | |
print("\nTime Elapsed: ", current_time) | |
st.write("\nTime Elapsed: ", current_time) | |