wangzerui commited on
Commit
f2b26e1
·
1 Parent(s): d3b3e87
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -2,7 +2,7 @@ import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
3
  import gradio as gr
4
  from peft import PeftModel
5
- import spaces
6
 
7
  # Define the base model ID
8
  base_model_id = "meta-llama/Llama-2-13b-hf"
@@ -15,17 +15,21 @@ quantization_config = BitsAndBytesConfig(
15
  bnb_4bit_compute_dtype=torch.bfloat16
16
  )
17
 
 
 
 
 
 
18
  # Load the base model with the updated quantization configuration
19
- # Adjust 'device_map' based on your system's GPU configuration
20
  base_model = AutoModelForCausalLM.from_pretrained(
21
  base_model_id,
22
  quantization_config=quantization_config,
23
  trust_remote_code=True,
24
- token=True # Update this to use the token parameter
25
  )
26
 
27
  # Load the tokenizer
28
- tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True, trust_remote_code=True)
29
 
30
  # Load the fine-tuned model
31
  ft_model = PeftModel.from_pretrained(base_model, "checkpoint-2800")
@@ -34,7 +38,6 @@ def formatting_func(job_description):
34
  text = f"### The job description: {job_description}\n ### The skills: "
35
  return text
36
 
37
- @spaces.GPU
38
  def generate_skills(job_description):
39
  formatted_text = formatting_func(job_description)
40
  model_input = tokenizer(formatted_text, return_tensors="pt").to("cuda")
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
3
  import gradio as gr
4
  from peft import PeftModel
5
+ import os
6
 
7
  # Define the base model ID
8
  base_model_id = "meta-llama/Llama-2-13b-hf"
 
15
  bnb_4bit_compute_dtype=torch.bfloat16
16
  )
17
 
18
+ # Ensure you have the Hugging Face token set as an environment variable
19
+ huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
20
+ if not huggingface_token:
21
+ raise Exception("Hugging Face token not found. Please set it as an environment variable 'HUGGINGFACE_TOKEN'.")
22
+
23
  # Load the base model with the updated quantization configuration
 
24
  base_model = AutoModelForCausalLM.from_pretrained(
25
  base_model_id,
26
  quantization_config=quantization_config,
27
  trust_remote_code=True,
28
+ token=huggingface_token # Use the token parameter
29
  )
30
 
31
  # Load the tokenizer
32
+ tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True, trust_remote_code=True, token=huggingface_token)
33
 
34
  # Load the fine-tuned model
35
  ft_model = PeftModel.from_pretrained(base_model, "checkpoint-2800")
 
38
  text = f"### The job description: {job_description}\n ### The skills: "
39
  return text
40
 
 
41
  def generate_skills(job_description):
42
  formatted_text = formatting_func(job_description)
43
  model_input = tokenizer(formatted_text, return_tensors="pt").to("cuda")