llamacpp-flan-t5-large-grammar-synthesis

Running

Akjava commited on Mar 19

Commit

1c197e3

verified ·

1 Parent(s): 4bdf72f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -62,6 +62,29 @@ llm_model = None
 def trans(text):
     text = "こんにちは"
     # テキストに言語タグを付与し、バイト列に変換
     input_text = f"<2ja>{text}".encode('utf-8')

 def trans(text):
     text = "こんにちは"
+    # テキストに言語タグを付与し、バイト列に変換
+    input_text = f"<2ja>{text}".encode('utf-8')
+    # トークナイズ
+    tokens = llm.tokenize(input_text)
+    print("Tokens:", tokens)
+    # BOSトークンを取得し、確認
+    bos_token = llm.token_bos()
+    print("BOS Token:", bos_token)
+    initial_tokens = [bos_token]
+    print("Initial Tokens:", initial_tokens)
+    # 生成
+    buf = ""
+    for token in llm.generate(initial_tokens, top_p=0.95, temp=0.0, repeat_penalty=1.0):
+        decoded = llm.detokenize([token]).decode('utf-8', errors='ignore')
+        buf += decoded
+        if token == llm.token_eos():
+            break
+    return buf
     # テキストに言語タグを付与し、バイト列に変換
     input_text = f"<2ja>{text}".encode('utf-8')