Spaces:
Runtime error
Runtime error
Sarath Shekkizhar
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ DESCRIPTION = """
|
|
| 15 |
Llama3-TenyxChat-70B is part of the TenyxChat series, models trained to function as useful assistants.
|
| 16 |
The model is obtained via direct preference tuning using Tenyx's fine-tuning technology. Model details available at our model page.
|
| 17 |
|
| 18 |
-
**The model is currently loaded in
|
| 19 |
"""
|
| 20 |
|
| 21 |
|
|
@@ -29,8 +29,8 @@ if not torch.cuda.is_available():
|
|
| 29 |
|
| 30 |
if torch.cuda.is_available():
|
| 31 |
model_id = "tenyx/Llama3-TenyxChat-70B"
|
| 32 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
|
| 33 |
-
|
| 34 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 35 |
tokenizer.use_default_system_prompt = False
|
| 36 |
|
|
|
|
| 15 |
Llama3-TenyxChat-70B is part of the TenyxChat series, models trained to function as useful assistants.
|
| 16 |
The model is obtained via direct preference tuning using Tenyx's fine-tuning technology. Model details available at our model page.
|
| 17 |
|
| 18 |
+
**The model is currently loaded in 8-bit**.
|
| 19 |
"""
|
| 20 |
|
| 21 |
|
|
|
|
| 29 |
|
| 30 |
if torch.cuda.is_available():
|
| 31 |
model_id = "tenyx/Llama3-TenyxChat-70B"
|
| 32 |
+
# model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
|
| 33 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True)
|
| 34 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 35 |
tokenizer.use_default_system_prompt = False
|
| 36 |
|