Plat commited on
Commit
2b7f2f4
·
1 Parent(s): 1a3a89b

fix: use quantization_config

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -1,5 +1,10 @@
1
  import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
 
 
 
 
3
  from threading import Thread
4
 
5
  import gradio as gr
@@ -16,8 +21,9 @@ except:
16
 
17
  MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
18
 
 
19
  model = AutoModelForCausalLM.from_pretrained(
20
- MODEL_NAME, load_in_8bit=True, device_map="auto"
21
  )
22
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
23
 
 
1
  import torch
2
+ from transformers import (
3
+ AutoModelForCausalLM,
4
+ AutoTokenizer,
5
+ TextIteratorStreamer,
6
+ BitsAndBytesConfig,
7
+ )
8
  from threading import Thread
9
 
10
  import gradio as gr
 
21
 
22
  MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
23
 
24
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
25
  model = AutoModelForCausalLM.from_pretrained(
26
+ MODEL_NAME, quantization_config=quantization_config, device_map="auto"
27
  )
28
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
29