Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
import threading | |
import torch | |
# Detectar dispositivo automaticamente (GPU ou CPU) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Carregar modelo e tokenizer | |
model_name = "lambdaindie/lambda-1v-1B" | |
model = AutoModelForCausalLM.from_pretrained(model_name).to(device) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
stop_flag = {"stop": False} | |
# Função de resposta | |
def respond(prompt, history): | |
stop_flag["stop"] = False | |
full_prompt = f"\nThink a bit step-by-step before answering. \nQuestion: {prompt} \nAnswer:" | |
inputs = tokenizer(full_prompt, return_tensors="pt").to(device) | |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
# Iniciar thread de geração | |
generation_thread = threading.Thread( | |
target=model.generate, | |
kwargs={ | |
"input_ids": inputs["input_ids"], | |
"attention_mask": inputs["attention_mask"], | |
"max_new_tokens": 512, | |
"do_sample": True, | |
"temperature": 0.7, | |
"top_p": 0.9, | |
"pad_token_id": tokenizer.eos_token_id, | |
"streamer": streamer, | |
} | |
) | |
generation_thread.start() | |
reasoning = "" | |
for new_text in streamer: | |
if stop_flag["stop"]: | |
return "", history | |
reasoning += new_text | |
yield "", (history or []) + [(prompt, f"<div class='final-answer'>{reasoning}</div>")] | |
# Função para parar a geração | |
def stop_generation(): | |
stop_flag["stop"] = True | |
# Interface Gradio | |
with gr.Blocks(css=""" | |
#chatbot, .gr-markdown, .gr-button, .gr-textbox { | |
font-family: 'JetBrains Mono', monospace !important; | |
font-size: 11px !important; | |
} | |
.final-answer { | |
background-color: #1e1e1e; | |
color: #ffffff; | |
padding: 10px; | |
border-left: 4px solid #4caf50; | |
font-family: 'JetBrains Mono', monospace !important; | |
white-space: pre-wrap; | |
font-size: 11px !important; | |
} | |
""") as demo: | |
gr.Markdown('<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap" rel="stylesheet">') | |
gr.Markdown("## λambdAI — Reasoning Chat") | |
chatbot = gr.Chatbot(elem_id="chatbot") | |
with gr.Row(): | |
txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False) | |
send_btn = gr.Button("Enviar") | |
stop_btn = gr.Button("Parar") | |
send_btn.click(respond, [txt, chatbot], [txt, chatbot]) | |
txt.submit(respond, [txt, chatbot], [txt, chatbot]) | |
stop_btn.click(stop_generation, None, None) | |
demo.launch(share=True) |