mariusjabami commited on
Commit
b77cd37
·
verified ·
1 Parent(s): 71282da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -17
app.py CHANGED
@@ -1,35 +1,36 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
3
  import torch
4
 
5
- # Carrega tokenizer e modelo manualmente
6
- tokenizer = AutoTokenizer.from_pretrained("lambdaindie/lambda-1v-1B")
7
- model = AutoModelForCausalLM.from_pretrained("lambdaindie/lambda-1v-1B")
8
 
9
- # Garante uso de CPU
 
 
 
 
 
 
 
10
  device = torch.device("cpu")
11
  model.to(device)
12
 
13
- # Função de geração
14
  def responder(prompt):
15
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
16
  outputs = model.generate(
17
  **inputs,
18
  max_new_tokens=50,
19
- do_sample=True,
20
- top_p=0.95,
21
  temperature=0.8,
22
- pad_token_id=tokenizer.eos_token_id # evita warning se for causal
 
23
  )
24
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
25
 
26
- # Interface Gradio
27
- iface = gr.Interface(
28
- fn=responder,
29
- inputs=gr.Textbox(lines=2, placeholder="Escreve algo..."),
30
- outputs="text",
31
- title="Lambda-1v-1B",
32
- description="Modelo local de geração de texto criado por Marius Jabami.",
33
- )
34
-
35
  iface.launch()
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from peft import PeftModel, PeftConfig
4
  import torch
5
 
6
+ # Carrega config do adaptador
7
+ config = PeftConfig.from_pretrained("lambdaindie/lambda-1v-1B")
 
8
 
9
+ # Carrega modelo base + LoRA
10
+ base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
11
+ model = PeftModel.from_pretrained(base_model, "lambdaindie/lambda-1v-1B")
12
+
13
+ # Tokenizer
14
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
15
+
16
+ # Envia pra CPU
17
  device = torch.device("cpu")
18
  model.to(device)
19
 
 
20
  def responder(prompt):
21
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
22
  outputs = model.generate(
23
  **inputs,
24
  max_new_tokens=50,
 
 
25
  temperature=0.8,
26
+ top_p=0.95,
27
+ pad_token_id=tokenizer.eos_token_id,
28
  )
29
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
30
 
31
+ iface = gr.Interface(fn=responder,
32
+ inputs=gr.Textbox(lines=2, placeholder="Escreve algo..."),
33
+ outputs="text",
34
+ title="Lambda-1v-1B (LoRA)",
35
+ description="Modelo LoRA fine-tuned por Marius Jabami.")
 
 
 
 
36
  iface.launch()