kevinpro commited on
Commit
e1185eb
·
verified ·
1 Parent(s): d26407b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -7
app.py CHANGED
@@ -15,7 +15,6 @@ import nltk
15
  from functools import lru_cache
16
 
17
 
18
- print(os.getenv('key'))
19
  code_mapping = dict(sorted(code_mapping.items(), key=lambda item: item[0]))
20
  flores_codes = list(code_mapping.keys())
21
  target_languages = flores_codes # 简化列表
@@ -64,14 +63,10 @@ def _translate(text: str, src_lang: str, tgt_lang: str):
64
  .numpy()
65
  .tolist()
66
  )
67
- translated_chunk = model.generate(
68
  input_ids=torch.tensor([input_tokens]).to(device),
69
- forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_code),
70
- max_length=len(input_tokens) + 50,
71
  num_return_sequences=1,
72
- num_beams=5,
73
- no_repeat_ngram_size=4, # repetition blocking works better if this number is below num_beams
74
- renormalize_logits=True, # recompute token probabilities after banning the repetitions
75
  )
76
  translated_chunk = tokenizer.decode(
77
  translated_chunk[0], skip_special_tokens=True
 
15
  from functools import lru_cache
16
 
17
 
 
18
  code_mapping = dict(sorted(code_mapping.items(), key=lambda item: item[0]))
19
  flores_codes = list(code_mapping.keys())
20
  target_languages = flores_codes # 简化列表
 
63
  .numpy()
64
  .tolist()
65
  )
66
+ translated_chunk = model(
67
  input_ids=torch.tensor([input_tokens]).to(device),
68
+ max_length=len(input_tokens) + 10000,
 
69
  num_return_sequences=1,
 
 
 
70
  )
71
  translated_chunk = tokenizer.decode(
72
  translated_chunk[0], skip_special_tokens=True