InspirationYF commited on
Commit
8f56420
·
1 Parent(s): 1e2dd9b

bugfix: test tokenizer

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -10,11 +10,11 @@ login(api_token)
10
 
11
 
12
  # You can use this section to suppress warnings generated by your code:
13
- def warn(*args, **kwargs):
14
- pass
15
- import warnings
16
- warnings.warn = warn
17
- warnings.filterwarnings('ignore')
18
 
19
  def get_llm(model_id):
20
  model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -42,8 +42,9 @@ def retriever_qa(file, query):
42
  model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
43
  print('Start Inference')
44
  generated_ids = llm.generate(model_inputs, max_new_tokens=50, do_sample=True)
45
- print('Start detokenize')
46
- response = tokenizer.batch_decode(generated_ids)[0]
 
47
 
48
  # # Check if a GPU is available
49
  # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
10
 
11
 
12
  # You can use this section to suppress warnings generated by your code:
13
+ # def warn(*args, **kwargs):
14
+ # pass
15
+ # import warnings
16
+ # warnings.warn = warn
17
+ # warnings.filterwarnings('ignore')
18
 
19
  def get_llm(model_id):
20
  model = AutoModelForCausalLM.from_pretrained(model_id)
 
42
  model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
43
  print('Start Inference')
44
  generated_ids = llm.generate(model_inputs, max_new_tokens=50, do_sample=True)
45
+ response = generated_ids
46
+ # print('Start detokenize')
47
+ # response = tokenizer.batch_decode(generated_ids)[0]
48
 
49
  # # Check if a GPU is available
50
  # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")