Spaces:

InspirationYF
/

rag_chatbot

Sleeping

App Files Files Community

InspirationYF commited on Jan 8

Commit

cd2e4d5

1 Parent(s): c674325

feat: add auth

Browse files

Files changed (1) hide show

app.py +12 -19

app.py CHANGED Viewed

@@ -1,14 +1,12 @@
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
-# tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
-# # Check if a GPU is available
-# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# print(f"Using device: {device}")
-import gradio as gr
 # You can use this section to suppress warnings generated by your code:
 def warn(*args, **kwargs):
@@ -17,15 +15,16 @@ import warnings
 warnings.warn = warn
 warnings.filterwarnings('ignore')
-def get_llm():
-    model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
-    model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')
     model.to('cuda')
     return model
 @spaces.GPU
 def retriever_qa(file, query):
-    llm = get_llm()
     # retriever_obj = retriever(file)
     # qa = RetrievalQA.from_chain_type(llm=llm,
     #                                 chain_type="stuff",
@@ -38,18 +37,12 @@ def retriever_qa(file, query):
     messages = [
         {"role": "user", "content": first_line}
     ]
-    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
     model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
-    generated_ids = llm.generate(model_inputs, max_new_tokens=100, do_sample=True)
-    # tokenizer.batch_decode(generated_ids)[0]
     response = tokenizer.batch_decode(generated_ids)[0]
     # # Check if a GPU is available
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # print(f"Using device: {device}")
     response = response + f". Using device: {device}"
     return response
@@ -64,7 +57,7 @@ rag_application = gr.Interface(
     ],
     outputs=gr.Textbox(label="Output"),
     title="RAG Chatbot",
-    description="Upload a PDF document and ask any question. The chatbot will try to answer using the provided document. Using device: {device}"
 )
 rag_application.launch(share=True)

+import os
 import spaces
+import gradio as gr
+from huggingface_hub import login
 from transformers import AutoModelForCausalLM, AutoTokenizer
+api_token = os.environ.get("HF_API_TOKEN")
+login(api_token)
 # You can use this section to suppress warnings generated by your code:
 def warn(*args, **kwargs):
 warnings.warn = warn
 warnings.filterwarnings('ignore')
+def get_llm(model_id):
+    model = AutoModelForCausalLM.from_pretrained(model_id)
     model.to('cuda')
     return model
 @spaces.GPU
 def retriever_qa(file, query):
+    model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    llm = get_llm(model_id)
     # retriever_obj = retriever(file)
     # qa = RetrievalQA.from_chain_type(llm=llm,
     #                                 chain_type="stuff",
     messages = [
         {"role": "user", "content": first_line}
     ]
     model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+    generated_ids = llm.generate(model_inputs, max_new_tokens=512, do_sample=True)
     response = tokenizer.batch_decode(generated_ids)[0]
     # # Check if a GPU is available
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     response = response + f". Using device: {device}"
     return response
     ],
     outputs=gr.Textbox(label="Output"),
     title="RAG Chatbot",
+    description="Upload a TXT document and ask any question. The chatbot will try to answer using the provided document."
 )
 rag_application.launch(share=True)