sksameermujahid commited on
Commit
21b4828
·
verified ·
1 Parent(s): b8b1be8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -5
app.py CHANGED
@@ -11,6 +11,7 @@ import cloudinary
11
  import cloudinary.uploader
12
  import cloudinary.api
13
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
 
14
  import speech_recognition as sr
15
  from pydub import AudioSegment
16
  from happytransformer import HappyTextToText, TTSettings
@@ -188,18 +189,29 @@ retriever = CustomRagRetriever(index, model_embedding)
188
  def load_tokenizer_and_model():
189
  print("Loading tokenizer...")
190
  try:
191
- tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
 
 
 
192
  print("Tokenizer loaded successfully.")
193
 
194
  print("Loading LLM model...")
195
- model_config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
196
- model_llm = AutoModelForCausalLM.from_pretrained(
197
- model_dir,
198
- config=model_config,
199
  trust_remote_code=True,
200
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
201
  device_map="auto"
 
 
 
 
 
 
 
 
202
  ).to(device)
 
203
  print("LLM model loaded successfully.")
204
  return tokenizer, model_llm
205
  except Exception as e:
 
11
  import cloudinary.uploader
12
  import cloudinary.api
13
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
14
+ from peft import PeftModel, PeftConfig
15
  import speech_recognition as sr
16
  from pydub import AudioSegment
17
  from happytransformer import HappyTextToText, TTSettings
 
189
  def load_tokenizer_and_model():
190
  print("Loading tokenizer...")
191
  try:
192
+ # Load base model first
193
+ base_model_name = "unsloth/llama-3.2-3b-instruct-unsloth-bnb-4bit"
194
+
195
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
196
  print("Tokenizer loaded successfully.")
197
 
198
  print("Loading LLM model...")
199
+ # Load the base model
200
+ base_model = AutoModelForCausalLM.from_pretrained(
201
+ base_model_name,
 
202
  trust_remote_code=True,
203
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
204
  device_map="auto"
205
+ )
206
+
207
+ # Load the PEFT adapter
208
+ model_llm = PeftModel.from_pretrained(
209
+ base_model,
210
+ model_dir,
211
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
212
+ device_map="auto"
213
  ).to(device)
214
+
215
  print("LLM model loaded successfully.")
216
  return tokenizer, model_llm
217
  except Exception as e: