TiberiuCristianLeon commited on
Commit
8010198
·
verified ·
1 Parent(s): 4b5e076

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -14
app.py CHANGED
@@ -28,16 +28,22 @@ def model_to_cuda(model):
28
  print("CUDA not available! Using CPU.")
29
  return model
30
 
31
- def eurollm(model_id, sl, tl, input_text):
32
- model_id = "utter-project/EuroLLM-1.7B"
33
- tokenizer = AutoTokenizer.from_pretrained(model_id)
34
- model = AutoModelForCausalLM.from_pretrained(model_id)
35
- prompt = f"{sl}: {input_text}. {tl}:"
36
  inputs = tokenizer(prompt, return_tensors="pt")
37
  outputs = model.generate(**inputs, max_new_tokens=512)
38
  output = tokenizer.decode(outputs[0], skip_special_tokens=True)
39
- print(output.rsplit(f'{tl}:')[-1])
40
- return output
 
 
 
 
 
 
 
41
 
42
  @spaces.GPU
43
  def translate_text(input_text, sselected_language, tselected_language, model_name):
@@ -61,12 +67,10 @@ def translate_text(input_text, sselected_language, tselected_language, model_nam
61
  translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
62
  return translated_text, message_text
63
 
64
- if 'nllb' in model_name:
65
- tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang=languagecodes.nllb_language_codes[sselected_language])
66
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
67
- translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=languagecodes.nllb_language_codes[sselected_language], tgt_lang=languagecodes.nllb_language_codes[tselected_language])
68
- translated_text = translator(input_text, max_length=512)
69
- return translated_text[0]['translation_text'], message_text
70
 
71
  if model_name.startswith('facebook/mbart-large'):
72
  from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
@@ -113,7 +117,7 @@ def swap_languages(src_lang, tgt_lang):
113
 
114
  def create_interface():
115
  with gr.Blocks() as interface:
116
- gr.Markdown("## Machine Text Translation")
117
 
118
  with gr.Row():
119
  input_text = gr.Textbox(label="Enter text to translate:", placeholder="Type your text here...")
 
28
  print("CUDA not available! Using CPU.")
29
  return model
30
 
31
+ def eurollm(model_name, sl, tl, input_text):
32
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
33
+ model = AutoModelForCausalLM.from_pretrained(model_name)
34
+ prompt = f"{sl}: {input_text} {tl}:"
 
35
  inputs = tokenizer(prompt, return_tensors="pt")
36
  outputs = model.generate(**inputs, max_new_tokens=512)
37
  output = tokenizer.decode(outputs[0], skip_special_tokens=True)
38
+ result = output.rsplit(f'{tl}:')[-1].strip())
39
+ return result
40
+
41
+ def nllb(model_name, sl, tl, input_text):
42
+ tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang=sl)
43
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
44
+ translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=sl, tgt_lang=tl)
45
+ translated_text = translator(input_text, max_length=512)
46
+ return translated_text[0]['translation_text']
47
 
48
  @spaces.GPU
49
  def translate_text(input_text, sselected_language, tselected_language, model_name):
 
67
  translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
68
  return translated_text, message_text
69
 
70
+ if 'nllb' in model_name.lower():
71
+ nnlbsl, nnlbtl = languagecodes.nllb_language_codes[sselected_language], languagecodes.nllb_language_codes[tselected_language]
72
+ translated_text = nllb(model_name, nnlbsl, nnlbtl, input_text)
73
+ return translated_text, message_text
 
 
74
 
75
  if model_name.startswith('facebook/mbart-large'):
76
  from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
 
117
 
118
  def create_interface():
119
  with gr.Blocks() as interface:
120
+ gr.Markdown("### Machine Text Translation - maximum 512 tokens")
121
 
122
  with gr.Row():
123
  input_text = gr.Textbox(label="Enter text to translate:", placeholder="Type your text here...")