Spaces:
Sleeping
Sleeping
File size: 1,272 Bytes
695223e c6f4632 a83e000 d31725a a83e000 695223e a83e000 695223e a83e000 d31725a a83e000 695223e a83e000 519f1ed a57c0d8 695223e f68e906 d31725a 695223e f68e906 695223e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import time
import datetime
import streamlit as st
question = "Name the planets in the solar system? A: "
question = "Quais são os planetas do sistema solar?"
question = "Qual é o maior planeta do sistema solar?"
before = datetime.datetime.now()
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4"
prompt = [
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": question},
]
tokenizer = AutoTokenizer.from_pretrained(model_id)
inputs = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt").cuda()
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
device_map="auto",
)
outputs = model.generate(inputs, do_sample=True, max_new_tokens=256)
response = tokenizer.batch_decode(outputs, skip_special_tokens=True)
with st.container():
st.write('\n\n')
st.write('LLM-LANAChat\n\n')
st.write(response)
print('\nsaida gerada.')
print('\n\n')
after = datetime.datetime.now()
current_time = (after - before) # .strftime("%H:%M:%S")
print("\nTime Elapsed: ", current_time)
st.write("\nTime Elapsed: ", current_time)
|