Spaces:
Sleeping
Sleeping
token
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import torch
|
3 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
import gradio as gr
|
5 |
from peft import PeftModel
|
6 |
import spaces # Ensure spaces is imported
|
@@ -8,14 +8,6 @@ import spaces # Ensure spaces is imported
|
|
8 |
# Define the base model ID
|
9 |
base_model_id = "meta-llama/Llama-2-13b-hf"
|
10 |
|
11 |
-
# Create a BitsAndBytesConfig object with the corrected settings
|
12 |
-
quantization_config = BitsAndBytesConfig(
|
13 |
-
load_in_4bit=True,
|
14 |
-
bnb_4bit_use_double_quant=True,
|
15 |
-
bnb_4bit_quant_type="nf4",
|
16 |
-
bnb_4bit_compute_dtype=torch.bfloat16
|
17 |
-
)
|
18 |
-
|
19 |
# Ensure you have the Hugging Face token set as an environment variable
|
20 |
huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
|
21 |
if not huggingface_token:
|
@@ -24,14 +16,13 @@ if not huggingface_token:
|
|
24 |
# Define cache directory
|
25 |
cache_dir = "./cache"
|
26 |
|
27 |
-
# Load the base model
|
28 |
base_model = AutoModelForCausalLM.from_pretrained(
|
29 |
base_model_id,
|
30 |
-
quantization_config=quantization_config,
|
31 |
trust_remote_code=True,
|
32 |
token=huggingface_token, # Use the token parameter
|
33 |
cache_dir=cache_dir # Specify cache directory
|
34 |
-
)
|
35 |
|
36 |
# Load the tokenizer
|
37 |
tokenizer = AutoTokenizer.from_pretrained(
|
@@ -43,7 +34,7 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
43 |
)
|
44 |
|
45 |
# Load the fine-tuned model
|
46 |
-
ft_model = PeftModel.from_pretrained(base_model, "checkpoint-2800", cache_dir=cache_dir)
|
47 |
|
48 |
def formatting_func(job_description):
|
49 |
text = f"### The job description: {job_description}\n ### The skills: "
|
|
|
1 |
import os
|
2 |
import torch
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
import gradio as gr
|
5 |
from peft import PeftModel
|
6 |
import spaces # Ensure spaces is imported
|
|
|
8 |
# Define the base model ID
|
9 |
base_model_id = "meta-llama/Llama-2-13b-hf"
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# Ensure you have the Hugging Face token set as an environment variable
|
12 |
huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
|
13 |
if not huggingface_token:
|
|
|
16 |
# Define cache directory
|
17 |
cache_dir = "./cache"
|
18 |
|
19 |
+
# Load the base model without quantization configuration
|
20 |
base_model = AutoModelForCausalLM.from_pretrained(
|
21 |
base_model_id,
|
|
|
22 |
trust_remote_code=True,
|
23 |
token=huggingface_token, # Use the token parameter
|
24 |
cache_dir=cache_dir # Specify cache directory
|
25 |
+
).to("cuda") # Move model to CUDA
|
26 |
|
27 |
# Load the tokenizer
|
28 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
34 |
)
|
35 |
|
36 |
# Load the fine-tuned model
|
37 |
+
ft_model = PeftModel.from_pretrained(base_model, "checkpoint-2800", cache_dir=cache_dir).to("cuda") # Move model to CUDA
|
38 |
|
39 |
def formatting_func(job_description):
|
40 |
text = f"### The job description: {job_description}\n ### The skills: "
|