Chris STC commited on
Commit
8a7e86d
·
1 Parent(s): 16236e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -2
app.py CHANGED
@@ -1,5 +1,37 @@
1
- import gradio as gr
2
  import torch
3
  from transformers import BitsAndBytesConfig
4
 
5
- print("E")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  from transformers import BitsAndBytesConfig
3
 
4
+ quantization_config = BitsAndBytesConfig(
5
+ load_in_4bit=True,
6
+ bnb_4bit_compute_dtype=torch.float16,
7
+ bnb_4bit_quant_type="nf4",
8
+ bnb_4bit_use_double_quant=True,
9
+ )
10
+
11
+ # My version with smaller chunks on safetensors for low RAM environments
12
+ model_id = "vilsonrodrigues/falcon-7b-instruct-sharded"
13
+
14
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
15
+ model_4bit = AutoModelForCausalLM.from_pretrained(
16
+ model_id,
17
+ device_map="auto",
18
+ quantization_config=quantization_config,
19
+ trust_remote_code=True)
20
+
21
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
22
+
23
+ pipeline = pipeline(
24
+ "text-generation",
25
+ model=model_4bit,
26
+ tokenizer=tokenizer,
27
+ use_cache=True,
28
+ device_map="auto",
29
+ max_length=296,
30
+ do_sample=True,
31
+ top_k=10,
32
+ num_return_sequences=1,
33
+ eos_token_id=tokenizer.eos_token_id,
34
+ pad_token_id=tokenizer.eos_token_id,
35
+ )
36
+
37
+ print(pipeline("Hello"))