Didier commited on
Commit
6b417b7
·
verified ·
1 Parent(s): c153533

Upload model_translation.py

Browse files
Files changed (1) hide show
  1. model_translation.py +18 -1
model_translation.py CHANGED
@@ -58,6 +58,21 @@ def get_tokenizer_model_for_src_lang(src_lang: str) -> (AutoTokenizer, AutoModel
58
  # - e.g. 200 words
59
  max_words_per_chunk = 200
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  #
62
  # Multilingual translation model
63
  #
@@ -68,4 +83,6 @@ model_multilingual = AutoModelForSeq2SeqLM.from_pretrained(
68
  model_MADLAD_name,
69
  device_map="auto",
70
  torch_dtype=torch.float16,
71
- low_cpu_mem_usage=True)
 
 
 
58
  # - e.g. 200 words
59
  max_words_per_chunk = 200
60
 
61
+ #
62
+ # Multilingual language pairs
63
+ #
64
+ from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
65
+
66
+ model_name_m2m100 = "facebook/m2m100_418M"
67
+ tokenizer_m2m100 = M2M100Tokenizer.from_pretrained(model_name_m2m100)
68
+ model_m2m100 = M2M100ForConditionalGeneration.from_pretrained(
69
+ model_name_m2m100,
70
+ device_map="auto",
71
+ torch_dtype=torch.float16,
72
+ low_cpu_mem_usage=True,
73
+ load_in_8_bit=True
74
+ )
75
+
76
  #
77
  # Multilingual translation model
78
  #
 
83
  model_MADLAD_name,
84
  device_map="auto",
85
  torch_dtype=torch.float16,
86
+ low_cpu_mem_usage=True,
87
+ load_in_8bit=True
88
+ )