import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer import torch import threading model_name = "lambdaindie/lambda-1v-1B" # Carrega modelo na CPU de forma mais leve model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, # ou torch.bfloat16 se suportar low_cpu_mem_usage=True ) tokenizer = AutoTokenizer.from_pretrained(model_name) stop_flag = {"stop": False} def respond(prompt, history): stop_flag["stop"] = False history = history[-3:] # Mantém apenas os últimos 3 pares full_prompt = f"\nThink a bit step-by-step before answering.\nQuestion: {prompt}\nAnswer:" inputs = tokenizer(full_prompt, return_tensors="pt") streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_thread = threading.Thread( target=model.generate, kwargs={ "input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"], "max_new_tokens": 512, "do_sample": True, "temperature": 0.7, "top_p": 0.9, "pad_token_id": tokenizer.eos_token_id, "streamer": streamer, } ) generation_thread.start() reasoning = "" for new_text in streamer: if stop_flag["stop"]: return "", history reasoning += new_text yield "", history + [(prompt, f"
{reasoning}
")] def stop_generation(): stop_flag["stop"] = True # Interface Gradio with gr.Blocks(css=""" #chatbot, .gr-markdown, .gr-button, .gr-textbox { font-family: 'JetBrains Mono', monospace !important; font-size: 11px !important; } .final-answer { background-color: #1e1e1e; color: #ffffff; padding: 10px; border-left: 4px solid #4caf50; white-space: pre-wrap; font-size: 11px !important; } """) as demo: gr.Markdown("## λambdAI — Reasoning Chat") chatbot = gr.Chatbot(elem_id="chatbot") with gr.Row(): txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False) send_btn = gr.Button("Enviar") stop_btn = gr.Button("Parar") send_btn.click(respond, [txt, chatbot], [txt, chatbot]) txt.submit(respond, [txt, chatbot], [txt, chatbot]) stop_btn.click(stop_generation, None, None) demo.launch(share=True)