Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
|
|
8 |
from threading import Thread
|
9 |
import torch
|
10 |
|
|
|
|
|
11 |
DESCRIPTION = '''
|
12 |
<div>
|
13 |
<h1 style="text-align: center;">非公式LLM-JP-3-13B-Instruct</h1>
|
@@ -45,8 +47,8 @@ h1 {
|
|
45 |
"""
|
46 |
|
47 |
# Load the tokenizer and model
|
48 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
49 |
-
model = AutoModelForCausalLM.from_pretrained("
|
50 |
|
51 |
@spaces.GPU()
|
52 |
def chat_llama3_8b(message: str,
|
@@ -65,7 +67,6 @@ def chat_llama3_8b(message: str,
|
|
65 |
str: The generated response.
|
66 |
"""
|
67 |
conversation = []
|
68 |
-
conversation.append({"role": "system", "content": "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。"})
|
69 |
for user, assistant in history:
|
70 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
71 |
conversation.append({"role": "user", "content": message})
|
|
|
8 |
from threading import Thread
|
9 |
import torch
|
10 |
|
11 |
+
TOKEN=os.environ["TOKEN"]
|
12 |
+
|
13 |
DESCRIPTION = '''
|
14 |
<div>
|
15 |
<h1 style="text-align: center;">非公式LLM-JP-3-13B-Instruct</h1>
|
|
|
47 |
"""
|
48 |
|
49 |
# Load the tokenizer and model
|
50 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-jpn-it", token=TOKEN, torch_dtype=torch.bfloat16)
|
51 |
+
model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b-jpn-it", token=TOKEN, torch_dtype=torch.bfloat16,device_map="auto")
|
52 |
|
53 |
@spaces.GPU()
|
54 |
def chat_llama3_8b(message: str,
|
|
|
67 |
str: The generated response.
|
68 |
"""
|
69 |
conversation = []
|
|
|
70 |
for user, assistant in history:
|
71 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
72 |
conversation.append({"role": "user", "content": message})
|