Spaces:

InspirationYF
/

rag_chatbot

Sleeping

InspirationYF commited on Jan 8

Commit

8f56420

1 Parent(s): 1e2dd9b

bugfix: test tokenizer

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,11 +10,11 @@ login(api_token)
 # You can use this section to suppress warnings generated by your code:
-def warn(*args, **kwargs):
-    pass
-import warnings
-warnings.warn = warn
-warnings.filterwarnings('ignore')
 def get_llm(model_id):
     model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -42,8 +42,9 @@ def retriever_qa(file, query):
     model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
     print('Start Inference')
     generated_ids = llm.generate(model_inputs, max_new_tokens=50, do_sample=True)
-    print('Start detokenize')
-    response = tokenizer.batch_decode(generated_ids)[0]
     # # Check if a GPU is available
     # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

 # You can use this section to suppress warnings generated by your code:
+# def warn(*args, **kwargs):
+#     pass
+# import warnings
+# warnings.warn = warn
+# warnings.filterwarnings('ignore')
 def get_llm(model_id):
     model = AutoModelForCausalLM.from_pretrained(model_id)
     model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
     print('Start Inference')
     generated_ids = llm.generate(model_inputs, max_new_tokens=50, do_sample=True)
+    response = generated_ids
+    # print('Start detokenize')
+    # response = tokenizer.batch_decode(generated_ids)[0]
     # # Check if a GPU is available
     # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")