CreitinGameplays commited on
Commit
835ba85
·
verified ·
1 Parent(s): d9b9a34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -5,6 +5,7 @@ from typing import Iterator
5
  import gradio as gr
6
  import torch
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
8
 
9
  MAX_MAX_NEW_TOKENS = 4096
10
  DEFAULT_MAX_NEW_TOKENS = 1024
@@ -16,7 +17,7 @@ DESCRIPTION = """\
16
 
17
  # Load model with appropriate device configuration
18
  def load_model():
19
- model_id = "CreitinGameplays/Llama-3.1-8B-R1-v0.1"
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
 
22
  # If using CPU, load in 32-bit to avoid potential issues with 16-bit operations
@@ -30,7 +31,8 @@ def load_model():
30
  model = AutoModelForCausalLM.from_pretrained(
31
  model_id,
32
  torch_dtype=torch.float16,
33
- device_map="auto"
 
34
  )
35
  tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side='left')
36
  tokenizer.use_default_system_prompt = False
 
5
  import gradio as gr
6
  import torch
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
8
+ import bitsandbytes
9
 
10
  MAX_MAX_NEW_TOKENS = 4096
11
  DEFAULT_MAX_NEW_TOKENS = 1024
 
17
 
18
  # Load model with appropriate device configuration
19
  def load_model():
20
+ model_id = "CreitinGameplays/Mistral-Nemo-12B-R1-v0.1"
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
22
 
23
  # If using CPU, load in 32-bit to avoid potential issues with 16-bit operations
 
31
  model = AutoModelForCausalLM.from_pretrained(
32
  model_id,
33
  torch_dtype=torch.float16,
34
+ device_map="auto",
35
+ load_in_8bit=True
36
  )
37
  tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side='left')
38
  tokenizer.use_default_system_prompt = False