Spaces:

joaogante
/

assisted_generation_demo

Running on Zero

joaogante HF Staff commited on Aug 21, 2024

Commit

1ef9900

verified ·

1 Parent(s): bbc624e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,17 +8,9 @@ import time
 model_id = "google/gemma-2-27b-it"
 assistant_id = "google/gemma-2-2b-it"
-torch_device = "cuda" if torch.cuda.is_available() else "cpu"
-print("Running on device:", torch_device)
-print("CPU threads:", torch.get_num_threads())
-if torch_device == "cuda":
-    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
-    assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id, torch_dtype=torch.bfloat16, device_map="auto")
-else:
-    model = AutoModelForCausalLM.from_pretrained(model_id)
-    assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 @spaces.GPU

 model_id = "google/gemma-2-27b-it"
 assistant_id = "google/gemma-2-2b-it"
+model = AutoModelForCausalLM.from_pretrained(model_id).to(dtype=torch.bfloat16, device="cuda")
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id).to(dtype=torch.bfloat16, device="cuda")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 @spaces.GPU