|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
import subprocess |
|
import pkg_resources |
|
|
|
|
|
try: |
|
transformers_version = pkg_resources.get_distribution("transformers").version |
|
print(f"Transformers version: {transformers_version}") |
|
except pkg_resources.DistributionNotFound: |
|
transformers_version = None |
|
print("Transformers not installed") |
|
|
|
|
|
if transformers_version != "4.40.2": |
|
try: |
|
subprocess.run('pip uninstall -y transformers', shell=True, check=True) |
|
subprocess.run('pip install git+https://github.com/huggingface/transformers', shell=True, check=True) |
|
transformers_version = pkg_resources.get_distribution("transformers").version |
|
print(f"Updated Transformers version: {transformers_version}") |
|
except subprocess.CalledProcessError as e: |
|
print(f"Error occurred while updating transformers: {e}") |
|
|
|
class CustomModelHandler: |
|
def __init__(self, model_name_or_path: str): |
|
self.model_name_or_path = model_name_or_path |
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
print(f"Using device: {self.device}") |
|
self.load_model() |
|
|
|
def load_model(self): |
|
try: |
|
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, trust_remote_code=True) |
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
self.model_name_or_path, |
|
trust_remote_code=True, |
|
torch_dtype="auto" |
|
) |
|
self.model.to(self.device) |
|
print(f"Model loaded and moved to {self.device}") |
|
|
|
|
|
all_on_gpu = all(param.device.type == 'cuda' for param in self.model.parameters()) |
|
if not all_on_gpu: |
|
print("Warning: Not all model parameters are on the GPU!") |
|
else: |
|
print("All model parameters are on the GPU.") |
|
|
|
|
|
print(f"Model is on device: {self.model.device}") |
|
except Exception as e: |
|
print(f"An error occurred while loading the model: {e}") |
|
raise |
|
|
|
def predict(self, inputs): |
|
try: |
|
inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device) |
|
outputs = self.model.generate(**inputs) |
|
predictions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
return predictions |
|
except Exception as e: |
|
print(f"An error occurred during prediction: {e}") |
|
raise |
|
|
|
|
|
handler = CustomModelHandler("microsoft/Phi-3-vision-128k-instruct") |
|
|
|
|
|
def predict(input_text): |
|
return handler.predict(input_text) |
|
|
|
|
|
if __name__ == "__main__": |
|
input_text = "Hello, how are you?" |
|
predictions = predict(input_text) |
|
print("Predictions:", predictions) |
|
|