Gokulram2710
/

phi-3-vision-128k-instruct-new

Text Generation

Model card Files Files and versions

Gokulram2710 commited on Aug 3, 2024

Commit

dd96f01

·

verified ·

1 Parent(s): 528b129

Update handler.py

Files changed (1) hide show

handler.py +26 -25

handler.py CHANGED Viewed

@@ -1,26 +1,27 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-class CustomModelHandler:
-    def __init__(self, model_name_or_path: str):
-        self.model_name_or_path = model_name_or_path
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.load_model()
-    def load_model(self):
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, trust_remote_code=True)
-        self.model = AutoModelForCausalLM.from_pretrained(self.model_name_or_path, trust_remote_code=True)
-        self.model.to(self.device)
-    def predict(self, inputs):
-        inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device)
-        outputs = self.model.generate(**inputs)
-        predictions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
-        return predictions
-# Initialize the handler with the model path
-handler = CustomModelHandler("microsoft/Phi-3-vision-128k-instruct")
-# Example prediction function
-def predict(input_text):
     return handler.predict(input_text)

+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+class CustomModelHandler:
+    def __init__(self, model_name_or_path: str):
+        self.model_name_or_path = model_name_or_path
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.load_model()
+    def load_model(self):
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, trust_remote_code=True)
+        self.model = AutoModelForCausalLM.from_pretrained(self.model_name_or_path, trust_remote_code=True, torch_dtype="auto",
+            use_flash_attn=False)
+        self.model.to(self.device)
+    def predict(self, inputs):
+        inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device)
+        outputs = self.model.generate(**inputs)
+        predictions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        return predictions
+# Initialize the handler with the model path
+handler = CustomModelHandler("microsoft/Phi-3-vision-128k-instruct")
+# Example prediction function
+def predict(input_text):
     return handler.predict(input_text)