SteveTran commited on
Commit
607cb64
·
1 Parent(s): 83f8ffa

feat: optimize max tokens

Browse files
Files changed (1) hide show
  1. handler.py +4 -2
handler.py CHANGED
@@ -7,7 +7,7 @@ from transformers import AutoTokenizer
7
  INSTRUCTION = "rewrite: "
8
  generation_config = {
9
  "max_new_tokens": 16,
10
- "use_cache": False,
11
  "temperature": 0.6,
12
  "do_sample": True,
13
  "top_p": 0.95,
@@ -21,7 +21,7 @@ class EndpointHandler:
21
  self.model = OVModelForSeq2SeqLM.from_pretrained(
22
  path, use_cache=True, use_io_binding=False
23
  )
24
- self.tokenizer = AutoTokenizer.from_pretrained(path)
25
 
26
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
27
  """
@@ -37,6 +37,8 @@ class EndpointHandler:
37
  ["{} {}".format(INSTRUCTION, inputs)],
38
  padding=False,
39
  return_tensors="pt",
 
 
40
  )
41
 
42
  outputs = self.model.generate(**inputs, **parameters)
 
7
  INSTRUCTION = "rewrite: "
8
  generation_config = {
9
  "max_new_tokens": 16,
10
+ "use_cache": True,
11
  "temperature": 0.6,
12
  "do_sample": True,
13
  "top_p": 0.95,
 
21
  self.model = OVModelForSeq2SeqLM.from_pretrained(
22
  path, use_cache=True, use_io_binding=False
23
  )
24
+ self.tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
25
 
26
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
27
  """
 
37
  ["{} {}".format(INSTRUCTION, inputs)],
38
  padding=False,
39
  return_tensors="pt",
40
+ max_length=20,
41
+ truncation=True,
42
  )
43
 
44
  outputs = self.model.generate(**inputs, **parameters)