import streamlit as st import pandas as pd import torch from transformers import AutoTokenizer, AutoModelForCausalLM import os hf_token = os.getenv("HF_TOKEN") token = os.getenv("HF_TOKEN") # Load the tokenizer model_name = "meta-llama/Meta-Llama-3.1-8B" tokenizer = AutoTokenizer.from_pretrained(model_name) # Load the model model = AutoModelForCausalLM.from_pretrained(model_name) # Apply dynamic quantization for CPU model = torch.quantization.quantize_dynamic( model, {torch.nn.Linear}, dtype=torch.qint8 ) # Move model to CPU device = torch.device("cpu") model = model.to(device) # Set the padding token to the end-of-sequence token if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Load the anomalies data df = pd.read_csv('anomalies.csv', sep=',', decimal='.') # Function to generate a response def response(question): prompt = f"Considerando os dados: {df.to_string(index=False)}, onde a coluna 'ds' está em formato DateTime, a coluna 'real' é o valor da despesa e a coluna 'group' é o grupo da despesa. Pergunta: {question}" inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=256).to(device) generated_ids = model.generate( inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=inputs['input_ids'].shape[1] + 50, temperature=0.7, top_p=0.9, no_repeat_ngram_size=2, num_beams=3, ) generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) final_response = generated_text.split("Resposta:")[-1].split(".")[0] + "." return final_response # Streamlit interface st.markdown("""