Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -62,6 +62,28 @@ llm_model = None
|
|
62 |
|
63 |
def trans(text):
|
64 |
text = "こんにちは"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
input_text = f"<2ja>{text}".encode('utf-8')
|
66 |
tokens = llm.tokenize(input_text)
|
67 |
print("Tokens:", tokens)
|
|
|
62 |
|
63 |
def trans(text):
|
64 |
text = "こんにちは"
|
65 |
+
|
66 |
+
# テキストに言語タグを付与し、バイト列に変換
|
67 |
+
input_text = f"<2ja>{text}".encode('utf-8')
|
68 |
+
|
69 |
+
# トークナイズ
|
70 |
+
tokens = llm.tokenize(input_text)
|
71 |
+
print("Tokens:", tokens)
|
72 |
+
|
73 |
+
# BOSトークンを使用(デコーダーのみのモデルを想定)
|
74 |
+
initial_tokens = [llm.token_bos()]
|
75 |
+
|
76 |
+
# 生成
|
77 |
+
buf = ""
|
78 |
+
for token in llm.generate(initial_tokens, top_p=0.95, temperature=0.0, repetition_penalty=1.0):
|
79 |
+
decoded = llm.detokenize([token]).decode('utf-8', errors='ignore')
|
80 |
+
buf += decoded
|
81 |
+
if token == llm.token_eos():
|
82 |
+
break
|
83 |
+
|
84 |
+
return buf
|
85 |
+
|
86 |
+
|
87 |
input_text = f"<2ja>{text}".encode('utf-8')
|
88 |
tokens = llm.tokenize(input_text)
|
89 |
print("Tokens:", tokens)
|