omeryentur commited on
Commit
8b66fe4
·
verified ·
1 Parent(s): 3ffc7cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -88
app.py CHANGED
@@ -1,96 +1,57 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  from peft import PeftModel
5
- from typing import Dict, Any
6
 
7
- class LlamaInterface:
8
- def __init__(
9
- self,
10
- base_model_name: str = "meta-llama/Llama-3.2-1B",
11
- lora_model_name: str = "Anlam-Lab/Llama-3.2-1B-it-anlamlab-SA-Chatgpt4mini"
12
- ):
13
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- self.tokenizer = AutoTokenizer.from_pretrained(base_model_name)
16
- # Padding token'ı ayarla
17
- self.tokenizer.pad_token = self.tokenizer.eos_token
18
-
19
- self.model = AutoModelForCausalLM.from_pretrained(
20
- base_model_name,
21
- device_map="auto",
22
- torch_dtype=torch.float16
23
- )
24
- self.model = PeftModel.from_pretrained(self.model, lora_model_name)
25
- self.model.eval()
26
-
27
- def generate_response(self, input_text: str) -> str:
28
- if not input_text or not input_text.strip():
29
- return "Error: Please provide valid input text."
30
-
31
- try:
32
- inputs = self.tokenizer(
33
- input_text,
34
- return_tensors="pt",
35
- padding=True,
36
- truncation=True,
37
- max_length=512
38
- ).to(self.device)
39
-
40
- generation_config: Dict[str, Any] = {
41
- "max_length": 512,
42
- "temperature": 0.01,
43
- "do_sample": True,
44
- "pad_token_id": self.tokenizer.pad_token_id,
45
- "eos_token_id": self.tokenizer.eos_token_id,
46
- "num_return_sequences": 1,
47
- "top_k": 50,
48
- "top_p": 0.95,
49
- }
50
-
51
- with torch.no_grad():
52
- outputs = self.model.generate(**inputs, **generation_config)
53
-
54
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
55
- return response.split("<|end_header_id|>")[-1].split("<|eot_id|>")[0].strip()
56
-
57
- except Exception as e:
58
- return f"Error generating response: {str(e)}"
59
-
60
- def create_interface(self) -> gr.Interface:
61
- return gr.Interface(
62
- fn=self.generate_response,
63
- inputs=gr.Textbox(
64
- lines=5,
65
- placeholder="Metninizi buraya girin...",
66
- label="Giriş Metni"
67
- ),
68
- outputs=gr.Textbox(
69
- lines=5,
70
- label="Model Çıktısı"
71
- ),
72
- title="Anlam-Lab Duygu Analizi",
73
- description="Metin girişi yaparak duygu analizi sonucunu alabilirsiniz.",
74
- examples=[
75
- ["Akıllı saati uzun süre kullandım ve şık tasarımı, harika sağlık takibi özellikleri ve uzun pil ömrüyle çok memnun kaldım."],
76
- ["Ürünü aldım ama pil ömrü kısa, ekran parlaklığı yetersiz ve sağlık takibi doğru sonuçlar vermedi."],
77
- ],
78
- theme="default"
79
- )
80
 
81
- def main():
82
- try:
83
- llama_interface = LlamaInterface()
84
- interface = llama_interface.create_interface()
85
- interface.launch(
86
- share=False,
87
- debug=True,
88
- server_name="0.0.0.0",
89
- server_port=7860
90
- )
91
- except Exception as e:
92
- print(f"Error launching interface: {str(e)}")
93
- raise
94
 
95
  if __name__ == "__main__":
96
- main()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
  import torch
4
  from peft import PeftModel
 
5
 
6
+ # Model and tokenizer names
7
+ model_name = "google/gemma-2-2b-it"
8
+ lora_model_name = "Anlam-Lab/gemma-2-2b-it-anlamlab-SA-Chatgpt4mini"
9
+
10
+ # Configure 4-bit quantization
11
+ bnb_config = BitsAndBytesConfig(
12
+ load_in_4bit=True,
13
+ bnb_4bit_quant_type="nf4",
14
+ bnb_4bit_compute_dtype=torch.float16,
15
+ bnb_4bit_use_double_quant=True,
16
+ )
17
+
18
+ # Initialize tokenizer
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+
21
+ # Load the base model with 4-bit quantization
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ model_name,
24
+ device_map="auto",
25
+ quantization_config=bnb_config
26
+ )
27
+
28
+ # Load the LoRA adapter
29
+ model = PeftModel.from_pretrained(model, lora_model_name)
30
+
31
+ def generate_response(input_text):
32
+ inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
33
+
34
+ generation_config = {
35
+ "max_length": 512,
36
+ "temperature": 0.01,
37
+ "do_sample": True,
38
+ "pad_token_id": tokenizer.pad_token_id,
39
+ "eos_token_id": tokenizer.eos_token_id,
40
+ }
41
+
42
+ with torch.no_grad():
43
+ outputs = model.generate(**inputs, **generation_config)
44
 
45
+ response = tokenizer.decode(outputs[0])
46
+ return response.split("<start_of_turn>model\n")[1].split("<end_of_turn>")[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ # Create Gradio interface
49
+ iface = gr.Interface(
50
+ fn=generate_response,
51
+ inputs=gr.Textbox(lines=5, placeholder="Metninizi buraya girin..."),
52
+ outputs=gr.Textbox(lines=5, label="Model Çıktısı"),
53
+ title="Anlam-Lab"
54
+ )
 
 
 
 
 
 
55
 
56
  if __name__ == "__main__":
57
+ iface.launch()