InspirationYF commited on
Commit
cd2e4d5
·
1 Parent(s): c674325

feat: add auth

Browse files
Files changed (1) hide show
  1. app.py +12 -19
app.py CHANGED
@@ -1,14 +1,12 @@
 
1
  import spaces
 
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
- # model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
5
- # tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
6
-
7
- # # Check if a GPU is available
8
- # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
- # print(f"Using device: {device}")
10
 
11
- import gradio as gr
12
 
13
  # You can use this section to suppress warnings generated by your code:
14
  def warn(*args, **kwargs):
@@ -17,15 +15,16 @@ import warnings
17
  warnings.warn = warn
18
  warnings.filterwarnings('ignore')
19
 
20
- def get_llm():
21
- model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
22
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')
23
  model.to('cuda')
24
  return model
25
 
26
  @spaces.GPU
27
  def retriever_qa(file, query):
28
- llm = get_llm()
 
 
29
  # retriever_obj = retriever(file)
30
  # qa = RetrievalQA.from_chain_type(llm=llm,
31
  # chain_type="stuff",
@@ -38,18 +37,12 @@ def retriever_qa(file, query):
38
  messages = [
39
  {"role": "user", "content": first_line}
40
  ]
41
-
42
- tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
43
  model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
44
-
45
- generated_ids = llm.generate(model_inputs, max_new_tokens=100, do_sample=True)
46
- # tokenizer.batch_decode(generated_ids)[0]
47
-
48
  response = tokenizer.batch_decode(generated_ids)[0]
49
 
50
  # # Check if a GPU is available
51
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
52
- # print(f"Using device: {device}")
53
  response = response + f". Using device: {device}"
54
 
55
  return response
@@ -64,7 +57,7 @@ rag_application = gr.Interface(
64
  ],
65
  outputs=gr.Textbox(label="Output"),
66
  title="RAG Chatbot",
67
- description="Upload a PDF document and ask any question. The chatbot will try to answer using the provided document. Using device: {device}"
68
  )
69
 
70
  rag_application.launch(share=True)
 
1
+ import os
2
  import spaces
3
+ import gradio as gr
4
+ from huggingface_hub import login
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
 
7
+ api_token = os.environ.get("HF_API_TOKEN")
8
+ login(api_token)
 
 
 
 
9
 
 
10
 
11
  # You can use this section to suppress warnings generated by your code:
12
  def warn(*args, **kwargs):
 
15
  warnings.warn = warn
16
  warnings.filterwarnings('ignore')
17
 
18
+ def get_llm(model_id):
19
+ model = AutoModelForCausalLM.from_pretrained(model_id)
 
20
  model.to('cuda')
21
  return model
22
 
23
  @spaces.GPU
24
  def retriever_qa(file, query):
25
+ model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
26
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
27
+ llm = get_llm(model_id)
28
  # retriever_obj = retriever(file)
29
  # qa = RetrievalQA.from_chain_type(llm=llm,
30
  # chain_type="stuff",
 
37
  messages = [
38
  {"role": "user", "content": first_line}
39
  ]
 
 
40
  model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
41
+ generated_ids = llm.generate(model_inputs, max_new_tokens=512, do_sample=True)
 
 
 
42
  response = tokenizer.batch_decode(generated_ids)[0]
43
 
44
  # # Check if a GPU is available
45
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
46
  response = response + f". Using device: {device}"
47
 
48
  return response
 
57
  ],
58
  outputs=gr.Textbox(label="Output"),
59
  title="RAG Chatbot",
60
+ description="Upload a TXT document and ask any question. The chatbot will try to answer using the provided document."
61
  )
62
 
63
  rag_application.launch(share=True)