joaogante HF Staff commited on
Commit
1ef9900
·
verified ·
1 Parent(s): bbc624e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -10
app.py CHANGED
@@ -8,17 +8,9 @@ import time
8
 
9
  model_id = "google/gemma-2-27b-it"
10
  assistant_id = "google/gemma-2-2b-it"
11
- torch_device = "cuda" if torch.cuda.is_available() else "cpu"
12
- print("Running on device:", torch_device)
13
- print("CPU threads:", torch.get_num_threads())
14
 
15
-
16
- if torch_device == "cuda":
17
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
18
- assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id, torch_dtype=torch.bfloat16, device_map="auto")
19
- else:
20
- model = AutoModelForCausalLM.from_pretrained(model_id)
21
- assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id)
22
  tokenizer = AutoTokenizer.from_pretrained(model_id)
23
 
24
  @spaces.GPU
 
8
 
9
  model_id = "google/gemma-2-27b-it"
10
  assistant_id = "google/gemma-2-2b-it"
 
 
 
11
 
12
+ model = AutoModelForCausalLM.from_pretrained(model_id).to(dtype=torch.bfloat16, device="cuda")
13
+ assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id).to(dtype=torch.bfloat16, device="cuda")
 
 
 
 
 
14
  tokenizer = AutoTokenizer.from_pretrained(model_id)
15
 
16
  @spaces.GPU