wangzerui commited on
Commit
cd77bd3
·
1 Parent(s): f2040a9
Files changed (1) hide show
  1. app.py +4 -13
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
  import gradio as gr
5
  from peft import PeftModel
6
  import spaces # Ensure spaces is imported
@@ -8,14 +8,6 @@ import spaces # Ensure spaces is imported
8
  # Define the base model ID
9
  base_model_id = "meta-llama/Llama-2-13b-hf"
10
 
11
- # Create a BitsAndBytesConfig object with the corrected settings
12
- quantization_config = BitsAndBytesConfig(
13
- load_in_4bit=True,
14
- bnb_4bit_use_double_quant=True,
15
- bnb_4bit_quant_type="nf4",
16
- bnb_4bit_compute_dtype=torch.bfloat16
17
- )
18
-
19
  # Ensure you have the Hugging Face token set as an environment variable
20
  huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
21
  if not huggingface_token:
@@ -24,14 +16,13 @@ if not huggingface_token:
24
  # Define cache directory
25
  cache_dir = "./cache"
26
 
27
- # Load the base model with the updated quantization configuration
28
  base_model = AutoModelForCausalLM.from_pretrained(
29
  base_model_id,
30
- quantization_config=quantization_config,
31
  trust_remote_code=True,
32
  token=huggingface_token, # Use the token parameter
33
  cache_dir=cache_dir # Specify cache directory
34
- )
35
 
36
  # Load the tokenizer
37
  tokenizer = AutoTokenizer.from_pretrained(
@@ -43,7 +34,7 @@ tokenizer = AutoTokenizer.from_pretrained(
43
  )
44
 
45
  # Load the fine-tuned model
46
- ft_model = PeftModel.from_pretrained(base_model, "checkpoint-2800", cache_dir=cache_dir)
47
 
48
  def formatting_func(job_description):
49
  text = f"### The job description: {job_description}\n ### The skills: "
 
1
  import os
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import gradio as gr
5
  from peft import PeftModel
6
  import spaces # Ensure spaces is imported
 
8
  # Define the base model ID
9
  base_model_id = "meta-llama/Llama-2-13b-hf"
10
 
 
 
 
 
 
 
 
 
11
  # Ensure you have the Hugging Face token set as an environment variable
12
  huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
13
  if not huggingface_token:
 
16
  # Define cache directory
17
  cache_dir = "./cache"
18
 
19
+ # Load the base model without quantization configuration
20
  base_model = AutoModelForCausalLM.from_pretrained(
21
  base_model_id,
 
22
  trust_remote_code=True,
23
  token=huggingface_token, # Use the token parameter
24
  cache_dir=cache_dir # Specify cache directory
25
+ ).to("cuda") # Move model to CUDA
26
 
27
  # Load the tokenizer
28
  tokenizer = AutoTokenizer.from_pretrained(
 
34
  )
35
 
36
  # Load the fine-tuned model
37
+ ft_model = PeftModel.from_pretrained(base_model, "checkpoint-2800", cache_dir=cache_dir).to("cuda") # Move model to CUDA
38
 
39
  def formatting_func(job_description):
40
  text = f"### The job description: {job_description}\n ### The skills: "