Spaces:

abobonbobo13
/

sample

Sleeping

App Files Files Community

abobonbobo13 commited on Jan 15

Commit

8dd9d83

verified ·

1 Parent(s): e8fc194

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -1,20 +1,25 @@
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 MODEL_ID = "rinna/bilingual-gpt-neox-4b-instruction-ppo"
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
-    load_in_8bit=True,
     device_map="auto"
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
 def generate_response(user_question,
-             chat_history,
-             temperature=0.3,
-             top_p=0.85,
-             max_new_tokens=2048,
-             repetition_penalty=1.05
-            ):
     user_prompt_template = "ユーザー: Hello, you are an assistant that helps me learn Japanese. I am going to ask you a question, so please answer *briefly*."
     system_prompt_template = "システム: Sure, I will answer briefly. What can I do for you?"
@@ -22,7 +27,7 @@ def generate_response(user_question,
     user_sample = "ユーザー: 日本で一番高い山は何ですか？"
     system_sample = "システム: 富士山です。高さは3776メートルです。"
-    user_prerix = "ユーザー: "
     system_prefix = "システム: "
     prompt = user_prompt_template + "\n" + system_prompt_template + "\n"
@@ -32,9 +37,9 @@ def generate_response(user_question,
     else:
         u = chat_history[-1][0]
         s = chat_history[-1][1]
-        prompt += user_prerix + u + "\n" + system_prefix + s + "\n"
-    prompt += user_prerix + user_question + "\n" + system_prefix
     inputs = tokenizer(prompt, add_special_tokens=False, return_tensors="pt")
     inputs = inputs.to(model.device)
@@ -53,9 +58,8 @@ def generate_response(user_question,
     output = tokenizer.decode(tokens[0], skip_special_tokens=True)
     return output[len(prompt):]
-    import gradio as gr # 慣習としてgrと略記
 with gr.Blocks() as demo:
     chat_history = gr.Chatbot()
     user_message = gr.Textbox(label="Question:", placeholder="人工知能とは何ですか？")
@@ -70,5 +74,3 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     demo.launch()

 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 MODEL_ID = "rinna/bilingual-gpt-neox-4b-instruction-ppo"
+# 8ビット量子化の設定を作成
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
+    quantization_config=quantization_config,
     device_map="auto"
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
 def generate_response(user_question,
+                      chat_history,
+                      temperature=0.3,
+                      top_p=0.85,
+                      max_new_tokens=2048,
+                      repetition_penalty=1.05):
     user_prompt_template = "ユーザー: Hello, you are an assistant that helps me learn Japanese. I am going to ask you a question, so please answer *briefly*."
     system_prompt_template = "システム: Sure, I will answer briefly. What can I do for you?"
     user_sample = "ユーザー: 日本で一番高い山は何ですか？"
     system_sample = "システム: 富士山です。高さは3776メートルです。"
+    user_prefix = "ユーザー: "
     system_prefix = "システム: "
     prompt = user_prompt_template + "\n" + system_prompt_template + "\n"
     else:
         u = chat_history[-1][0]
         s = chat_history[-1][1]
+        prompt += user_prefix + u + "\n" + system_prefix + s + "\n"
+    prompt += user_prefix + user_question + "\n" + system_prefix
     inputs = tokenizer(prompt, add_special_tokens=False, return_tensors="pt")
     inputs = inputs.to(model.device)
     output = tokenizer.decode(tokens[0], skip_special_tokens=True)
     return output[len(prompt):]
+import gradio as gr
 with gr.Blocks() as demo:
     chat_history = gr.Chatbot()
     user_message = gr.Textbox(label="Question:", placeholder="人工知能とは何ですか？")
 if __name__ == "__main__":
     demo.launch()