ganchengguang commited on
Commit
e2009a9
·
verified ·
1 Parent(s): f14d9a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -1,10 +1,19 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
 
4
  # 加载本地模型和tokenizer
5
  model_name = "ganchengguang/OIELLM-8B-Instruction" # 替换为你的模型名称
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForCausalLM.from_pretrained(model_name)
 
 
 
 
 
 
 
 
8
 
9
  # 定义语言和选项的映射
10
  options = {
@@ -20,7 +29,7 @@ def respond(message, language, task, system_message, max_tokens, temperature, to
20
  messages.append({"role": "user", "content": message + " " + options[language][task]})
21
 
22
  # 编码输入
23
- inputs = tokenizer(messages, return_tensors="pt", padding=True, truncation=True)
24
  # 生成回复
25
  outputs = model.generate(
26
  inputs["input_ids"],
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import bitsandbytes as bnb
4
 
5
  # 加载本地模型和tokenizer
6
  model_name = "ganchengguang/OIELLM-8B-Instruction" # 替换为你的模型名称
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_name,
10
+ device_map="auto",
11
+ load_in_8bit=True,
12
+ quantization_config=bnb.configs.BitsAndBytesConfig(
13
+ load_in_8bit=True,
14
+ load_in_8bit_fp32_cpu_offload=True
15
+ )
16
+ )
17
 
18
  # 定义语言和选项的映射
19
  options = {
 
29
  messages.append({"role": "user", "content": message + " " + options[language][task]})
30
 
31
  # 编码输入
32
+ inputs = tokenizer(message + " " + options[language][task], return_tensors="pt", padding=True, truncation=True)
33
  # 生成回复
34
  outputs = model.generate(
35
  inputs["input_ids"],