Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,11 @@ def load_model_and_tokenizer():
|
|
12 |
revision="main"
|
13 |
)
|
14 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
|
|
|
|
|
|
|
|
|
|
15 |
return model, tokenizer
|
16 |
|
17 |
model, tokenizer = load_model_and_tokenizer()
|
@@ -33,7 +38,7 @@ def get_response(comment):
|
|
33 |
input_ids=inputs["input_ids"].to("cuda"),
|
34 |
attention_mask=inputs["attention_mask"].to("cuda"),
|
35 |
max_new_tokens=140,
|
36 |
-
pad_token_id=tokenizer.
|
37 |
)
|
38 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
39 |
return response.split("[/INST]")[-1].strip()
|
|
|
12 |
revision="main"
|
13 |
)
|
14 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
15 |
+
|
16 |
+
# Check if tokenizer has a pad token, if not add it
|
17 |
+
if tokenizer.pad_token is None:
|
18 |
+
tokenizer.pad_token = tokenizer.eos_token # Use eos_token as padding token
|
19 |
+
|
20 |
return model, tokenizer
|
21 |
|
22 |
model, tokenizer = load_model_and_tokenizer()
|
|
|
38 |
input_ids=inputs["input_ids"].to("cuda"),
|
39 |
attention_mask=inputs["attention_mask"].to("cuda"),
|
40 |
max_new_tokens=140,
|
41 |
+
pad_token_id=tokenizer.pad_token_id # Ensure padding is handled properly
|
42 |
)
|
43 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
44 |
return response.split("[/INST]")[-1].strip()
|