p1atdev commited on
Commit
bbf7f96
·
verified ·
1 Parent(s): 5ec70fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -34,10 +34,12 @@ import spaces
34
 
35
  load_dotenv()
36
 
 
37
  MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
38
  PREFERENCE_API_URL = os.getenv("PREFERENCE_API_URL")
39
  assert PREFERENCE_API_URL, "PREFERENCE_API_URL is not set"
40
 
 
41
  quantization_config = BitsAndBytesConfig(
42
  load_in_4bit=True,
43
  bnb_4bit_compute_dtype=torch.bfloat16,
@@ -45,9 +47,9 @@ quantization_config = BitsAndBytesConfig(
45
  bnb_4bit_use_double_quant=True,
46
  )
47
  model = AutoModelForCausalLM.from_pretrained(
48
- MODEL_NAME, quantization_config=quantization_config, device_map="auto"
49
  )
50
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
51
 
52
  print("Compiling model...")
53
  model = torch.compile(model)
 
34
 
35
  load_dotenv()
36
 
37
+ HF_API_KEY = os.getenv("HF_API_KEY")
38
  MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
39
  PREFERENCE_API_URL = os.getenv("PREFERENCE_API_URL")
40
  assert PREFERENCE_API_URL, "PREFERENCE_API_URL is not set"
41
 
42
+
43
  quantization_config = BitsAndBytesConfig(
44
  load_in_4bit=True,
45
  bnb_4bit_compute_dtype=torch.bfloat16,
 
47
  bnb_4bit_use_double_quant=True,
48
  )
49
  model = AutoModelForCausalLM.from_pretrained(
50
+ MODEL_NAME, quantization_config=quantization_config, device_map="auto", token=HF_API_KEY
51
  )
52
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_KEY)
53
 
54
  print("Compiling model...")
55
  model = torch.compile(model)