SteveTran
commited on
Commit
·
607cb64
1
Parent(s):
83f8ffa
feat: optimize max tokens
Browse files- handler.py +4 -2
handler.py
CHANGED
@@ -7,7 +7,7 @@ from transformers import AutoTokenizer
|
|
7 |
INSTRUCTION = "rewrite: "
|
8 |
generation_config = {
|
9 |
"max_new_tokens": 16,
|
10 |
-
"use_cache":
|
11 |
"temperature": 0.6,
|
12 |
"do_sample": True,
|
13 |
"top_p": 0.95,
|
@@ -21,7 +21,7 @@ class EndpointHandler:
|
|
21 |
self.model = OVModelForSeq2SeqLM.from_pretrained(
|
22 |
path, use_cache=True, use_io_binding=False
|
23 |
)
|
24 |
-
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
25 |
|
26 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
27 |
"""
|
@@ -37,6 +37,8 @@ class EndpointHandler:
|
|
37 |
["{} {}".format(INSTRUCTION, inputs)],
|
38 |
padding=False,
|
39 |
return_tensors="pt",
|
|
|
|
|
40 |
)
|
41 |
|
42 |
outputs = self.model.generate(**inputs, **parameters)
|
|
|
7 |
INSTRUCTION = "rewrite: "
|
8 |
generation_config = {
|
9 |
"max_new_tokens": 16,
|
10 |
+
"use_cache": True,
|
11 |
"temperature": 0.6,
|
12 |
"do_sample": True,
|
13 |
"top_p": 0.95,
|
|
|
21 |
self.model = OVModelForSeq2SeqLM.from_pretrained(
|
22 |
path, use_cache=True, use_io_binding=False
|
23 |
)
|
24 |
+
self.tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
|
25 |
|
26 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
27 |
"""
|
|
|
37 |
["{} {}".format(INSTRUCTION, inputs)],
|
38 |
padding=False,
|
39 |
return_tensors="pt",
|
40 |
+
max_length=20,
|
41 |
+
truncation=True,
|
42 |
)
|
43 |
|
44 |
outputs = self.model.generate(**inputs, **parameters)
|