abobonbobo13 commited on
Commit
8dd9d83
·
verified ·
1 Parent(s): e8fc194

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -15
app.py CHANGED
@@ -1,20 +1,25 @@
1
  import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
  MODEL_ID = "rinna/bilingual-gpt-neox-4b-instruction-ppo"
 
 
 
 
5
  model = AutoModelForCausalLM.from_pretrained(
6
  MODEL_ID,
7
- load_in_8bit=True,
8
  device_map="auto"
9
  )
 
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
 
11
  def generate_response(user_question,
12
- chat_history,
13
- temperature=0.3,
14
- top_p=0.85,
15
- max_new_tokens=2048,
16
- repetition_penalty=1.05
17
- ):
18
 
19
  user_prompt_template = "ユーザー: Hello, you are an assistant that helps me learn Japanese. I am going to ask you a question, so please answer *briefly*."
20
  system_prompt_template = "システム: Sure, I will answer briefly. What can I do for you?"
@@ -22,7 +27,7 @@ def generate_response(user_question,
22
  user_sample = "ユーザー: 日本で一番高い山は何ですか?"
23
  system_sample = "システム: 富士山です。高さは3776メートルです。"
24
 
25
- user_prerix = "ユーザー: "
26
  system_prefix = "システム: "
27
 
28
  prompt = user_prompt_template + "\n" + system_prompt_template + "\n"
@@ -32,9 +37,9 @@ def generate_response(user_question,
32
  else:
33
  u = chat_history[-1][0]
34
  s = chat_history[-1][1]
35
- prompt += user_prerix + u + "\n" + system_prefix + s + "\n"
36
 
37
- prompt += user_prerix + user_question + "\n" + system_prefix
38
 
39
  inputs = tokenizer(prompt, add_special_tokens=False, return_tensors="pt")
40
  inputs = inputs.to(model.device)
@@ -53,9 +58,8 @@ def generate_response(user_question,
53
  output = tokenizer.decode(tokens[0], skip_special_tokens=True)
54
  return output[len(prompt):]
55
 
56
- import gradio as gr # 慣習としてgrと略記
57
 
58
-
59
  with gr.Blocks() as demo:
60
  chat_history = gr.Chatbot()
61
  user_message = gr.Textbox(label="Question:", placeholder="人工知能とは何ですか?")
@@ -70,5 +74,3 @@ with gr.Blocks() as demo:
70
 
71
  if __name__ == "__main__":
72
  demo.launch()
73
-
74
-
 
1
  import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
 
4
  MODEL_ID = "rinna/bilingual-gpt-neox-4b-instruction-ppo"
5
+
6
+ # 8ビット量子化の設定を作成
7
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
8
+
9
  model = AutoModelForCausalLM.from_pretrained(
10
  MODEL_ID,
11
+ quantization_config=quantization_config,
12
  device_map="auto"
13
  )
14
+
15
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
16
+
17
  def generate_response(user_question,
18
+ chat_history,
19
+ temperature=0.3,
20
+ top_p=0.85,
21
+ max_new_tokens=2048,
22
+ repetition_penalty=1.05):
 
23
 
24
  user_prompt_template = "ユーザー: Hello, you are an assistant that helps me learn Japanese. I am going to ask you a question, so please answer *briefly*."
25
  system_prompt_template = "システム: Sure, I will answer briefly. What can I do for you?"
 
27
  user_sample = "ユーザー: 日本で一番高い山は何ですか?"
28
  system_sample = "システム: 富士山です。高さは3776メートルです。"
29
 
30
+ user_prefix = "ユーザー: "
31
  system_prefix = "システム: "
32
 
33
  prompt = user_prompt_template + "\n" + system_prompt_template + "\n"
 
37
  else:
38
  u = chat_history[-1][0]
39
  s = chat_history[-1][1]
40
+ prompt += user_prefix + u + "\n" + system_prefix + s + "\n"
41
 
42
+ prompt += user_prefix + user_question + "\n" + system_prefix
43
 
44
  inputs = tokenizer(prompt, add_special_tokens=False, return_tensors="pt")
45
  inputs = inputs.to(model.device)
 
58
  output = tokenizer.decode(tokens[0], skip_special_tokens=True)
59
  return output[len(prompt):]
60
 
61
+ import gradio as gr
62
 
 
63
  with gr.Blocks() as demo:
64
  chat_history = gr.Chatbot()
65
  user_message = gr.Textbox(label="Question:", placeholder="人工知能とは何ですか?")
 
74
 
75
  if __name__ == "__main__":
76
  demo.launch()