import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer import threading import torch # Detectar dispositivo automaticamente (GPU ou CPU) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Carregar modelo e tokenizer model_name = "lambdaindie/lambda-1v-1B" model = AutoModelForCausalLM.from_pretrained(model_name).to(device) tokenizer = AutoTokenizer.from_pretrained(model_name) stop_flag = {"stop": False} # Função de resposta def respond(prompt, history): stop_flag["stop"] = False full_prompt = f"\nThink a bit step-by-step before answering. \nQuestion: {prompt} \nAnswer:" inputs = tokenizer(full_prompt, return_tensors="pt").to(device) streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) # Iniciar thread de geração generation_thread = threading.Thread( target=model.generate, kwargs={ "input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"], "max_new_tokens": 512, "do_sample": True, "temperature": 0.7, "top_p": 0.9, "pad_token_id": tokenizer.eos_token_id, "streamer": streamer, } ) generation_thread.start() reasoning = "" for new_text in streamer: if stop_flag["stop"]: return "", history reasoning += new_text yield "", (history or []) + [(prompt, f"
{reasoning}
")] # Função para parar a geração def stop_generation(): stop_flag["stop"] = True # Interface Gradio with gr.Blocks(css=""" #chatbot, .gr-markdown, .gr-button, .gr-textbox { font-family: 'JetBrains Mono', monospace !important; font-size: 11px !important; } .final-answer { background-color: #1e1e1e; color: #ffffff; padding: 10px; border-left: 4px solid #4caf50; font-family: 'JetBrains Mono', monospace !important; white-space: pre-wrap; font-size: 11px !important; } """) as demo: gr.Markdown('') gr.Markdown("## λambdAI — Reasoning Chat") chatbot = gr.Chatbot(elem_id="chatbot") with gr.Row(): txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False) send_btn = gr.Button("Enviar") stop_btn = gr.Button("Parar") send_btn.click(respond, [txt, chatbot], [txt, chatbot]) txt.submit(respond, [txt, chatbot], [txt, chatbot]) stop_btn.click(stop_generation, None, None) demo.launch(share=True)