CreitinGameplays commited on
Commit
0488844
·
verified ·
1 Parent(s): d021845

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -1,13 +1,22 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
4
 
5
- # Define the BLOOM model name
6
  model_name = "CreitinGameplays/ConvAI-9b"
7
 
 
 
 
 
 
 
 
 
8
  # Load tokenizer and model
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
- model = AutoModelForCausalLM.from_pretrained(model_name)
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  model.to(device)
13
 
 
1
  import gradio as gr
2
  import torch
3
+ import bitsandbytes as bnb
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
 
6
+ # Define the model name
7
  model_name = "CreitinGameplays/ConvAI-9b"
8
 
9
+ # Quantization configuration with bitsandbytes settings
10
+ bnb_config = BitsAndBytesConfig(
11
+ load_in_4bit=True,
12
+ bnb_4bit_use_double_quant=True,
13
+ bnb_4bit_quant_type="nf4",
14
+ bnb_4bit_compute_dtype=torch.bfloat16
15
+ )
16
+
17
  # Load tokenizer and model
18
  tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, low_cpu_mem_usage=True)
20
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
  model.to(device)
22