Spaces:
Running
Running
File size: 4,148 Bytes
a474012 665b7ce 4f7e40d a474012 665b7ce a474012 9faf370 a474012 9917b41 a474012 9917b41 a474012 665b7ce a474012 665b7ce a474012 665b7ce 9917b41 665b7ce a474012 665b7ce 9917b41 a474012 9917b41 a474012 9917b41 a474012 9917b41 a474012 9917b41 a474012 9917b41 a474012 9917b41 a474012 9917b41 a474012 9917b41 a474012 9917b41 a474012 665b7ce 9917b41 665b7ce a474012 665b7ce 9917b41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import os
import time
import threading
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch
# Carregar modelo local
model_id = "lambdaindie/lambda-1v-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
model.to("cuda" if torch.cuda.is_available() else "cpu")
model.eval()
# Estilo
css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');
* {
font-family: 'JetBrains Mono', monospace !important;
}
html, body, .gradio-container {
background-color: #111 !important;
color: #e0e0e0 !important;
}
textarea, input, button, select {
background-color: transparent !important;
color: #e0e0e0 !important;
border: 1px solid #444 !important;
}
.markdown-think {
background-color: #1e1e1e;
border-left: 4px solid #555;
padding: 10px;
margin-bottom: 8px;
font-style: italic;
white-space: pre-wrap;
animation: pulse 1.5s infinite ease-in-out;
}
@keyframes pulse {
0% { opacity: 0.6; }
50% { opacity: 1.0; }
100% { opacity: 0.6; }
}
"""
theme = gr.themes.Base(
primary_hue="gray",
font=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"]
).set(
body_background_fill="#111",
body_text_color="#e0e0e0",
button_primary_background_fill="#333",
button_primary_text_color="#e0e0e0",
input_background_fill="#222",
input_border_color="#444",
block_title_text_color="#fff"
)
# Flag de parada
stop_signal = False
def stop_stream():
global stop_signal
stop_signal = True
def respond(history, system_message, max_tokens, temperature, top_p):
global stop_signal
stop_signal = False
# Construir prompt
prompt = ""
if system_message:
prompt += system_message + "\n\n"
for msg in history:
role = msg["role"]
content = msg["content"]
if role == "user":
prompt += f"User: {content}\n"
elif role == "assistant":
prompt += f"Assistant: {content}\n"
prompt += "Assistant:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
)
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
output = ""
start = time.time()
for token in streamer:
if stop_signal:
break
output += token
yield history + [{"role": "assistant", "content": output}]
end = time.time()
yield history + [
{"role": "assistant", "content": output},
{"role": "system", "content": f"Pensou por {end - start:.1f} segundos"}
]
# Interface
with gr.Blocks(css=css, theme=theme) as app:
chatbot = gr.Chatbot(label="λ", type="messages")
state = gr.State([])
with gr.Row():
msg = gr.Textbox(label="Mensagem")
send_btn = gr.Button("Enviar")
stop_btn = gr.Button("Parar")
with gr.Accordion("Configurações Avançadas", open=False):
system_message = gr.Textbox(label="System Message", value="")
max_tokens = gr.Slider(64, 2048, value=256, step=1, label="Max Tokens")
temperature = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
def handle_user_msg(user_msg, chat_history):
if user_msg:
chat_history = chat_history + [{"role": "user", "content": user_msg}]
return "", chat_history
send_btn.click(fn=handle_user_msg, inputs=[msg, state], outputs=[msg, state])\
.then(fn=respond, inputs=[state, system_message, max_tokens, temperature, top_p], outputs=[chatbot, state])
stop_btn.click(fn=stop_stream, inputs=[], outputs=[])
app.launch(share=True |