|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
|
import torch |
|
import threading |
|
|
|
model_name = "lambdaindie/lambda-1v-1B" |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float16, |
|
low_cpu_mem_usage=True |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
stop_flag = {"stop": False} |
|
|
|
def respond(prompt, history): |
|
stop_flag["stop"] = False |
|
history = history[-3:] |
|
|
|
full_prompt = f"\nThink a bit step-by-step before answering.\nQuestion: {prompt}\nAnswer:" |
|
inputs = tokenizer(full_prompt, return_tensors="pt") |
|
|
|
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) |
|
|
|
generation_thread = threading.Thread( |
|
target=model.generate, |
|
kwargs={ |
|
"input_ids": inputs["input_ids"], |
|
"attention_mask": inputs["attention_mask"], |
|
"max_new_tokens": 512, |
|
"do_sample": True, |
|
"temperature": 0.7, |
|
"top_p": 0.9, |
|
"pad_token_id": tokenizer.eos_token_id, |
|
"streamer": streamer, |
|
} |
|
) |
|
generation_thread.start() |
|
|
|
reasoning = "" |
|
for new_text in streamer: |
|
if stop_flag["stop"]: |
|
return "", history |
|
reasoning += new_text |
|
yield "", history + [(prompt, f"<div class='final-answer'>{reasoning}</div>")] |
|
|
|
def stop_generation(): |
|
stop_flag["stop"] = True |
|
|
|
|
|
with gr.Blocks(css=""" |
|
#chatbot, .gr-markdown, .gr-button, .gr-textbox { |
|
font-family: 'JetBrains Mono', monospace !important; |
|
font-size: 11px !important; |
|
} |
|
.final-answer { |
|
background-color: #1e1e1e; |
|
color: #ffffff; |
|
padding: 10px; |
|
border-left: 4px solid #4caf50; |
|
white-space: pre-wrap; |
|
font-size: 11px !important; |
|
} |
|
""") as demo: |
|
gr.Markdown("## λambdAI — Reasoning Chat") |
|
|
|
chatbot = gr.Chatbot(elem_id="chatbot") |
|
with gr.Row(): |
|
txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False) |
|
send_btn = gr.Button("Enviar") |
|
stop_btn = gr.Button("Parar") |
|
|
|
send_btn.click(respond, [txt, chatbot], [txt, chatbot]) |
|
txt.submit(respond, [txt, chatbot], [txt, chatbot]) |
|
stop_btn.click(stop_generation, None, None) |
|
|
|
demo.launch(share=True) |