Akjava commited on
Commit
77b3a29
·
verified ·
1 Parent(s): 9c3e34f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -0
app.py CHANGED
@@ -62,6 +62,28 @@ llm_model = None
62
 
63
  def trans(text):
64
  text = "こんにちは"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  input_text = f"<2ja>{text}".encode('utf-8')
66
  tokens = llm.tokenize(input_text)
67
  print("Tokens:", tokens)
 
62
 
63
  def trans(text):
64
  text = "こんにちは"
65
+
66
+ # テキストに言語タグを付与し、バイト列に変換
67
+ input_text = f"<2ja>{text}".encode('utf-8')
68
+
69
+ # トークナイズ
70
+ tokens = llm.tokenize(input_text)
71
+ print("Tokens:", tokens)
72
+
73
+ # BOSトークンを使用(デコーダーのみのモデルを想定)
74
+ initial_tokens = [llm.token_bos()]
75
+
76
+ # 生成
77
+ buf = ""
78
+ for token in llm.generate(initial_tokens, top_p=0.95, temperature=0.0, repetition_penalty=1.0):
79
+ decoded = llm.detokenize([token]).decode('utf-8', errors='ignore')
80
+ buf += decoded
81
+ if token == llm.token_eos():
82
+ break
83
+
84
+ return buf
85
+
86
+
87
  input_text = f"<2ja>{text}".encode('utf-8')
88
  tokens = llm.tokenize(input_text)
89
  print("Tokens:", tokens)