import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel, PeftConfig import torch # Carrega config do adaptador config = PeftConfig.from_pretrained("lambdaindie/lambda-1v-1B") # Carrega modelo base + LoRA base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path) model = PeftModel.from_pretrained(base_model, "lambdaindie/lambda-1v-1B") # Tokenizer tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) # Envia pra CPU device = torch.device("cpu") model.to(device) def responder(prompt): inputs = tokenizer(prompt, return_tensors="pt").to(device) outputs = model.generate( **inputs, max_new_tokens=50, temperature=0.8, top_p=0.95, pad_token_id=tokenizer.eos_token_id, ) return tokenizer.decode(outputs[0], skip_special_tokens=True) iface = gr.Interface(fn=responder, inputs=gr.Textbox(lines=2, placeholder="Escreve algo..."), outputs="text", title="Lambda-1v-1B (LoRA)", description="Modelo LoRA fine-tuned por Marius Jabami.") iface.launch()