from transformers import AutoModelForCausalLM, AutoTokenizer import torch import subprocess import pkg_resources # Verify and print the transformers version try: transformers_version = pkg_resources.get_distribution("transformers").version print(f"Transformers version: {transformers_version}") except pkg_resources.DistributionNotFound: transformers_version = None print("Transformers not installed") # Update transformers to the development version if necessary if transformers_version != "4.40.2": try: subprocess.run('pip uninstall -y transformers', shell=True, check=True) subprocess.run('pip install git+https://github.com/huggingface/transformers', shell=True, check=True) transformers_version = pkg_resources.get_distribution("transformers").version print(f"Updated Transformers version: {transformers_version}") except subprocess.CalledProcessError as e: print(f"Error occurred while updating transformers: {e}") class CustomModelHandler: def __init__(self, model_name_or_path: str): self.model_name_or_path = model_name_or_path self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {self.device}") self.load_model() def load_model(self): try: self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, trust_remote_code=True) self.model = AutoModelForCausalLM.from_pretrained( self.model_name_or_path, trust_remote_code=True, torch_dtype="auto" ) self.model.to(self.device) print(f"Model loaded and moved to {self.device}") # Check if the model parameters are on the GPU all_on_gpu = all(param.device.type == 'cuda' for param in self.model.parameters()) if not all_on_gpu: print("Warning: Not all model parameters are on the GPU!") else: print("All model parameters are on the GPU.") # Confirm model device print(f"Model is on device: {self.model.device}") except Exception as e: print(f"An error occurred while loading the model: {e}") raise def predict(self, inputs): try: inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device) outputs = self.model.generate(**inputs) predictions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True) return predictions except Exception as e: print(f"An error occurred during prediction: {e}") raise # Initialize the handler with the model path handler = CustomModelHandler("microsoft/Phi-3-vision-128k-instruct") # Example prediction function def predict(input_text): return handler.predict(input_text) # Example usage if __name__ == "__main__": input_text = "Hello, how are you?" predictions = predict(input_text) print("Predictions:", predictions)