File size: 3,052 Bytes
dd96f01 9a514a1 0b32e87 a8458ec 0b32e87 f1d1223 0b32e87 f1d1223 9a514a1 dd96f01 3333487 dd96f01 675cd9d 9dd4aba f1d1223 9dd4aba a8458ec 3333487 9dd4aba dd96f01 9dd4aba dd96f01 675cd9d 3333487 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import subprocess
import pkg_resources
# Verify and print the transformers version
try:
transformers_version = pkg_resources.get_distribution("transformers").version
print(f"Transformers version: {transformers_version}")
except pkg_resources.DistributionNotFound:
transformers_version = None
print("Transformers not installed")
# Update transformers to the development version if necessary
if transformers_version != "4.40.2":
try:
subprocess.run('pip uninstall -y transformers', shell=True, check=True)
subprocess.run('pip install git+https://github.com/huggingface/transformers', shell=True, check=True)
transformers_version = pkg_resources.get_distribution("transformers").version
print(f"Updated Transformers version: {transformers_version}")
except subprocess.CalledProcessError as e:
print(f"Error occurred while updating transformers: {e}")
class CustomModelHandler:
def __init__(self, model_name_or_path: str):
self.model_name_or_path = model_name_or_path
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {self.device}")
self.load_model()
def load_model(self):
try:
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, trust_remote_code=True)
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name_or_path,
trust_remote_code=True,
torch_dtype="auto"
)
self.model.to(self.device)
print(f"Model loaded and moved to {self.device}")
# Check if the model parameters are on the GPU
all_on_gpu = all(param.device.type == 'cuda' for param in self.model.parameters())
if not all_on_gpu:
print("Warning: Not all model parameters are on the GPU!")
else:
print("All model parameters are on the GPU.")
# Confirm model device
print(f"Model is on device: {self.model.device}")
except Exception as e:
print(f"An error occurred while loading the model: {e}")
raise
def predict(self, inputs):
try:
inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device)
outputs = self.model.generate(**inputs)
predictions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
return predictions
except Exception as e:
print(f"An error occurred during prediction: {e}")
raise
# Initialize the handler with the model path
handler = CustomModelHandler("microsoft/Phi-3-vision-128k-instruct")
# Example prediction function
def predict(input_text):
return handler.predict(input_text)
# Example usage
if __name__ == "__main__":
input_text = "Hello, how are you?"
predictions = predict(input_text)
print("Predictions:", predictions)
|