mariusjabami commited on
Commit
9faf370
·
verified ·
1 Parent(s): b77cd37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -34
app.py CHANGED
@@ -1,36 +1,76 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- from peft import PeftModel, PeftConfig
4
- import torch
5
-
6
- # Carrega config do adaptador
7
- config = PeftConfig.from_pretrained("lambdaindie/lambda-1v-1B")
8
-
9
- # Carrega modelo base + LoRA
10
- base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
11
- model = PeftModel.from_pretrained(base_model, "lambdaindie/lambda-1v-1B")
12
-
13
- # Tokenizer
14
- tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
15
-
16
- # Envia pra CPU
17
- device = torch.device("cpu")
18
- model.to(device)
19
-
20
- def responder(prompt):
21
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
22
- outputs = model.generate(
23
- **inputs,
24
- max_new_tokens=50,
25
- temperature=0.8,
26
- top_p=0.95,
27
- pad_token_id=tokenizer.eos_token_id,
 
 
 
 
 
 
 
28
  )
29
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
30
-
31
- iface = gr.Interface(fn=responder,
32
- inputs=gr.Textbox(lines=2, placeholder="Escreve algo..."),
33
- outputs="text",
34
- title="Lambda-1v-1B (LoRA)",
35
- description="Modelo LoRA fine-tuned por Marius Jabami.")
36
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
+ import threading
4
+
5
+ # Inicializar o modelo e o tokenizer
6
+ model_name = "lambdaindie/lambda-1v-1B" # Troca com o nome do modelo que estás a usar
7
+ model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda") # Usando GPU (ou "cpu" se não tiveres GPU)
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+
10
+ stop_flag = {"stop": False}
11
+
12
+ def respond(prompt, history):
13
+ stop_flag["stop"] = False
14
+
15
+ # Prompt modificado conforme solicitado
16
+ full_prompt = f"\nThink a bit step-by-step before answering. \nQuestion: {prompt} \nAnswer:"
17
+
18
+ inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
19
+
20
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
21
+
22
+ # Thread para geração de texto
23
+ generation_thread = threading.Thread(
24
+ target=model.generate,
25
+ kwargs={
26
+ "input_ids": inputs["input_ids"],
27
+ "attention_mask": inputs["attention_mask"],
28
+ "max_new_tokens": 512,
29
+ "do_sample": True,
30
+ "temperature": 0.7,
31
+ "top_p": 0.9,
32
+ "pad_token_id": tokenizer.eos_token_id,
33
+ "streamer": streamer,
34
+ }
35
  )
36
+ generation_thread.start()
37
+
38
+ reasoning = ""
39
+ for new_text in streamer:
40
+ if stop_flag["stop"]:
41
+ return "", history
42
+ reasoning += new_text
43
+ yield "", history[:-1] + [(prompt, f"<div class='final-answer'>{reasoning}</div>")]
44
+
45
+ def stop_generation():
46
+ stop_flag["stop"] = True
47
+
48
+ # Definir a interface do Gradio
49
+ with gr.Blocks(css="""
50
+ #chatbot, .gr-markdown, .gr-button, .gr-textbox {
51
+ font-family: 'JetBrains Mono', monospace !important;
52
+ font-size: 11px !important;
53
+ }
54
+ .final-answer {
55
+ background-color: #1e1e1e;
56
+ color: #ffffff;
57
+ padding: 10px;
58
+ border-left: 4px solid #4caf50;
59
+ font-family: 'JetBrains Mono', monospace !important;
60
+ white-space: pre-wrap;
61
+ font-size: 11px !important;
62
+ }
63
+ """) as demo:
64
+ gr.Markdown("## λambdAI — Reasoning Chat")
65
+
66
+ chatbot = gr.Chatbot(elem_id="chatbot")
67
+ with gr.Row():
68
+ txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False)
69
+ send_btn = gr.Button("Enviar")
70
+ stop_btn = gr.Button("Parar")
71
+
72
+ send_btn.click(respond, [txt, chatbot], [txt, chatbot])
73
+ txt.submit(respond, [txt, chatbot], [txt, chatbot])
74
+ stop_btn.click(stop_generation, None, None)
75
+
76
+ demo.launch(share=True)