Didier commited on
Commit
9f5045e
·
1 Parent(s): 1ce396b

Choosing the 10b model loaded in 8 bits

Browse files
Files changed (2) hide show
  1. app.py +6 -3
  2. requirements.txt +1 -0
app.py CHANGED
@@ -12,12 +12,15 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
12
  import gradio as gr
13
 
14
  #
15
- # Load the "small" MADLAD400 model (3 billion parameters)
16
  #
17
- model_name = "google/madlad400-3b-mt"
18
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
19
  model = AutoModelForSeq2SeqLM.from_pretrained(
20
- model_name, device_map="auto", torch_dtype=torch.float16)
 
 
 
21
  model = torch.compile(model)
22
 
23
  #
 
12
  import gradio as gr
13
 
14
  #
15
+ # Load the "small" MADLAD400 model
16
  #
17
+ model_name = "google/madlad400-10b-mt"
18
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
19
  model = AutoModelForSeq2SeqLM.from_pretrained(
20
+ model_name,
21
+ device_map="auto",
22
+ torch_dtype=torch.float16,
23
+ load_in_8bit=True)
24
  model = torch.compile(model)
25
 
26
  #
requirements.txt CHANGED
@@ -2,6 +2,7 @@
2
  torch
3
  transformers
4
  accelerate
 
5
  sentencepiece
6
  tokenizers
7
  optimum
 
2
  torch
3
  transformers
4
  accelerate
5
+ bitsandbytes
6
  sentencepiece
7
  tokenizers
8
  optimum