Spaces:
Sleeping
Sleeping
Commit
·
8f56420
1
Parent(s):
1e2dd9b
bugfix: test tokenizer
Browse files
app.py
CHANGED
@@ -10,11 +10,11 @@ login(api_token)
|
|
10 |
|
11 |
|
12 |
# You can use this section to suppress warnings generated by your code:
|
13 |
-
def warn(*args, **kwargs):
|
14 |
-
|
15 |
-
import warnings
|
16 |
-
warnings.warn = warn
|
17 |
-
warnings.filterwarnings('ignore')
|
18 |
|
19 |
def get_llm(model_id):
|
20 |
model = AutoModelForCausalLM.from_pretrained(model_id)
|
@@ -42,8 +42,9 @@ def retriever_qa(file, query):
|
|
42 |
model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
|
43 |
print('Start Inference')
|
44 |
generated_ids = llm.generate(model_inputs, max_new_tokens=50, do_sample=True)
|
45 |
-
|
46 |
-
|
|
|
47 |
|
48 |
# # Check if a GPU is available
|
49 |
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
10 |
|
11 |
|
12 |
# You can use this section to suppress warnings generated by your code:
|
13 |
+
# def warn(*args, **kwargs):
|
14 |
+
# pass
|
15 |
+
# import warnings
|
16 |
+
# warnings.warn = warn
|
17 |
+
# warnings.filterwarnings('ignore')
|
18 |
|
19 |
def get_llm(model_id):
|
20 |
model = AutoModelForCausalLM.from_pretrained(model_id)
|
|
|
42 |
model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
|
43 |
print('Start Inference')
|
44 |
generated_ids = llm.generate(model_inputs, max_new_tokens=50, do_sample=True)
|
45 |
+
response = generated_ids
|
46 |
+
# print('Start detokenize')
|
47 |
+
# response = tokenizer.batch_decode(generated_ids)[0]
|
48 |
|
49 |
# # Check if a GPU is available
|
50 |
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|