Spaces:

Anlam-Lab
/

Sentiment-Analysis

Sleeping

App Files Files Community

omeryentur commited on Jan 4

Commit

8b66fe4

verified ·

1 Parent(s): 3ffc7cc

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -88

app.py CHANGED Viewed

@@ -1,96 +1,57 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 from peft import PeftModel
-from typing import Dict, Any
-class LlamaInterface:
-    def __init__(
-        self,
-        base_model_name: str = "meta-llama/Llama-3.2-1B",
-        lora_model_name: str = "Anlam-Lab/Llama-3.2-1B-it-anlamlab-SA-Chatgpt4mini"
-    ):
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.tokenizer = AutoTokenizer.from_pretrained(base_model_name)
-        # Padding token'ı ayarla
-        self.tokenizer.pad_token = self.tokenizer.eos_token
-        self.model = AutoModelForCausalLM.from_pretrained(
-            base_model_name,
-            device_map="auto",
-            torch_dtype=torch.float16
-        )
-        self.model = PeftModel.from_pretrained(self.model, lora_model_name)
-        self.model.eval()
-    def generate_response(self, input_text: str) -> str:
-        if not input_text or not input_text.strip():
-            return "Error: Please provide valid input text."
-        try:
-            inputs = self.tokenizer(
-                input_text,
-                return_tensors="pt",
-                padding=True,
-                truncation=True,
-                max_length=512
-            ).to(self.device)
-            generation_config: Dict[str, Any] = {
-                "max_length": 512,
-                "temperature": 0.01,
-                "do_sample": True,
-                "pad_token_id": self.tokenizer.pad_token_id,
-                "eos_token_id": self.tokenizer.eos_token_id,
-                "num_return_sequences": 1,
-                "top_k": 50,
-                "top_p": 0.95,
-            }
-            with torch.no_grad():
-                outputs = self.model.generate(**inputs, **generation_config)
-            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            return response.split("<|end_header_id|>")[-1].split("<|eot_id|>")[0].strip()
-        except Exception as e:
-            return f"Error generating response: {str(e)}"
-    def create_interface(self) -> gr.Interface:
-        return gr.Interface(
-            fn=self.generate_response,
-            inputs=gr.Textbox(
-                lines=5,
-                placeholder="Metninizi buraya girin...",
-                label="Giriş Metni"
-            ),
-            outputs=gr.Textbox(
-                lines=5,
-                label="Model Çıktısı"
-            ),
-            title="Anlam-Lab Duygu Analizi",
-            description="Metin girişi yaparak duygu analizi sonucunu alabilirsiniz.",
-            examples=[
-                ["Akıllı saati uzun süre kullandım ve şık tasarımı, harika sağlık takibi özellikleri ve uzun pil ömrüyle çok memnun kaldım."],
-                ["Ürünü aldım ama pil ömrü kısa, ekran parlaklığı yetersiz ve sağlık takibi doğru sonuçlar vermedi."],
-            ],
-            theme="default"
-        )
-def main():
-    try:
-        llama_interface = LlamaInterface()
-        interface = llama_interface.create_interface()
-        interface.launch(
-            share=False,
-            debug=True,
-            server_name="0.0.0.0",
-            server_port=7860
-        )
-    except Exception as e:
-        print(f"Error launching interface: {str(e)}")
-        raise
 if __name__ == "__main__":
-    main()

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 from peft import PeftModel
+# Model and tokenizer names
+model_name = "google/gemma-2-2b-it"
+lora_model_name = "Anlam-Lab/gemma-2-2b-it-anlamlab-SA-Chatgpt4mini"
+# Configure 4-bit quantization
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_use_double_quant=True,
+)
+# Initialize tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Load the base model with 4-bit quantization
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    device_map="auto",
+    quantization_config=bnb_config
+)
+# Load the LoRA adapter
+model = PeftModel.from_pretrained(model, lora_model_name)
+def generate_response(input_text):
+    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
+    generation_config = {
+        "max_length": 512,
+        "temperature": 0.01,
+        "do_sample": True,
+        "pad_token_id": tokenizer.pad_token_id,
+        "eos_token_id": tokenizer.eos_token_id,
+    }
+    with torch.no_grad():
+        outputs = model.generate(**inputs, **generation_config)
+    response = tokenizer.decode(outputs[0])
+    return response.split("<start_of_turn>model\n")[1].split("<end_of_turn>")[0]
+# Create Gradio interface
+iface = gr.Interface(
+    fn=generate_response,
+    inputs=gr.Textbox(lines=5, placeholder="Metninizi buraya girin..."),
+    outputs=gr.Textbox(lines=5, label="Model Çıktısı"),
+    title="Anlam-Lab"
+)
 if __name__ == "__main__":
+    iface.launch()