Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ from typing import Iterator
|
|
5 |
import gradio as gr
|
6 |
import torch
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
|
|
8 |
|
9 |
MAX_MAX_NEW_TOKENS = 4096
|
10 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
@@ -16,7 +17,7 @@ DESCRIPTION = """\
|
|
16 |
|
17 |
# Load model with appropriate device configuration
|
18 |
def load_model():
|
19 |
-
model_id = "CreitinGameplays/
|
20 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
21 |
|
22 |
# If using CPU, load in 32-bit to avoid potential issues with 16-bit operations
|
@@ -30,7 +31,8 @@ def load_model():
|
|
30 |
model = AutoModelForCausalLM.from_pretrained(
|
31 |
model_id,
|
32 |
torch_dtype=torch.float16,
|
33 |
-
device_map="auto"
|
|
|
34 |
)
|
35 |
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side='left')
|
36 |
tokenizer.use_default_system_prompt = False
|
|
|
5 |
import gradio as gr
|
6 |
import torch
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
8 |
+
import bitsandbytes
|
9 |
|
10 |
MAX_MAX_NEW_TOKENS = 4096
|
11 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
|
|
17 |
|
18 |
# Load model with appropriate device configuration
|
19 |
def load_model():
|
20 |
+
model_id = "CreitinGameplays/Mistral-Nemo-12B-R1-v0.1"
|
21 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
22 |
|
23 |
# If using CPU, load in 32-bit to avoid potential issues with 16-bit operations
|
|
|
31 |
model = AutoModelForCausalLM.from_pretrained(
|
32 |
model_id,
|
33 |
torch_dtype=torch.float16,
|
34 |
+
device_map="auto",
|
35 |
+
load_in_8bit=True
|
36 |
)
|
37 |
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side='left')
|
38 |
tokenizer.use_default_system_prompt = False
|