""" File: model_llm.py Description: Load a Large Language Model (LLM) Author: Didier Guillevic Date: 2024-03-16 """ import torch import transformers from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import TextIteratorStreamer from transformers import BitsAndBytesConfig from threading import Thread model_name = "mistralai/Mistral-7B-Instruct-v0.3" # Auto-regressive model for language completion: padding left tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left") model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", torch_dtype=torch.float16, low_cpu_mem_usage=True ) model = torch.compile(model)