Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -34,10 +34,12 @@ import spaces
|
|
34 |
|
35 |
load_dotenv()
|
36 |
|
|
|
37 |
MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
|
38 |
PREFERENCE_API_URL = os.getenv("PREFERENCE_API_URL")
|
39 |
assert PREFERENCE_API_URL, "PREFERENCE_API_URL is not set"
|
40 |
|
|
|
41 |
quantization_config = BitsAndBytesConfig(
|
42 |
load_in_4bit=True,
|
43 |
bnb_4bit_compute_dtype=torch.bfloat16,
|
@@ -45,9 +47,9 @@ quantization_config = BitsAndBytesConfig(
|
|
45 |
bnb_4bit_use_double_quant=True,
|
46 |
)
|
47 |
model = AutoModelForCausalLM.from_pretrained(
|
48 |
-
MODEL_NAME, quantization_config=quantization_config, device_map="auto"
|
49 |
)
|
50 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
51 |
|
52 |
print("Compiling model...")
|
53 |
model = torch.compile(model)
|
|
|
34 |
|
35 |
load_dotenv()
|
36 |
|
37 |
+
HF_API_KEY = os.getenv("HF_API_KEY")
|
38 |
MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
|
39 |
PREFERENCE_API_URL = os.getenv("PREFERENCE_API_URL")
|
40 |
assert PREFERENCE_API_URL, "PREFERENCE_API_URL is not set"
|
41 |
|
42 |
+
|
43 |
quantization_config = BitsAndBytesConfig(
|
44 |
load_in_4bit=True,
|
45 |
bnb_4bit_compute_dtype=torch.bfloat16,
|
|
|
47 |
bnb_4bit_use_double_quant=True,
|
48 |
)
|
49 |
model = AutoModelForCausalLM.from_pretrained(
|
50 |
+
MODEL_NAME, quantization_config=quantization_config, device_map="auto", token=HF_API_KEY
|
51 |
)
|
52 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_KEY)
|
53 |
|
54 |
print("Compiling model...")
|
55 |
model = torch.compile(model)
|