InspirationYF commited on
Commit
1e2dd9b
·
1 Parent(s): cd2e4d5
Files changed (2) hide show
  1. app.py +9 -5
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import spaces
3
  import gradio as gr
4
  from huggingface_hub import login
@@ -23,7 +24,7 @@ def get_llm(model_id):
23
  @spaces.GPU
24
  def retriever_qa(file, query):
25
  model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
26
- tokenizer = AutoTokenizer.from_pretrained(model_id)
27
  llm = get_llm(model_id)
28
  # retriever_obj = retriever(file)
29
  # qa = RetrievalQA.from_chain_type(llm=llm,
@@ -35,15 +36,18 @@ def retriever_qa(file, query):
35
  first_line = f.readline()
36
 
37
  messages = [
38
- {"role": "user", "content": first_line}
39
  ]
 
40
  model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
41
- generated_ids = llm.generate(model_inputs, max_new_tokens=512, do_sample=True)
 
 
42
  response = tokenizer.batch_decode(generated_ids)[0]
43
 
44
  # # Check if a GPU is available
45
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
46
- response = response + f". Using device: {device}"
47
 
48
  return response
49
 
 
1
  import os
2
+ # import torch
3
  import spaces
4
  import gradio as gr
5
  from huggingface_hub import login
 
24
  @spaces.GPU
25
  def retriever_qa(file, query):
26
  model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
27
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
28
  llm = get_llm(model_id)
29
  # retriever_obj = retriever(file)
30
  # qa = RetrievalQA.from_chain_type(llm=llm,
 
36
  first_line = f.readline()
37
 
38
  messages = [
39
+ {"role": "user", "content": first_line + query}
40
  ]
41
+ print(messages)
42
  model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
43
+ print('Start Inference')
44
+ generated_ids = llm.generate(model_inputs, max_new_tokens=50, do_sample=True)
45
+ print('Start detokenize')
46
  response = tokenizer.batch_decode(generated_ids)[0]
47
 
48
  # # Check if a GPU is available
49
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
50
+ # response = response + f". Using device: {device}"
51
 
52
  return response
53
 
requirements.txt CHANGED
@@ -1 +1,2 @@
1
  transformers==4.36.0
 
 
1
  transformers==4.36.0
2
+ sentencepiece