TiberiuCristianLeon commited on
Commit
cc8d7bd
·
verified ·
1 Parent(s): e647eeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -1
app.py CHANGED
@@ -17,7 +17,8 @@ models = ["Helsinki-NLP",
17
  "facebook/mbart-large-50-many-to-many-mmt",
18
  "utter-project/EuroLLM-1.7B",
19
  "Unbabel/TowerInstruct-7B-v0.2",
20
- "Unbabel/TowerInstruct-Mistral-7B-v0.2"
 
21
  ]
22
 
23
  def model_to_cuda(model):
@@ -29,6 +30,35 @@ def model_to_cuda(model):
29
  print("CUDA not available! Using CPU.")
30
  return model
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def eurollm(model_name, sl, tl, input_text):
33
  tokenizer = AutoTokenizer.from_pretrained(model_name)
34
  model = AutoModelForCausalLM.from_pretrained(model_name)
@@ -67,6 +97,10 @@ def translate_text(input_text, sselected_language, tselected_language, model_nam
67
  if 'eurollm' in model_name.lower():
68
  translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
69
  return translated_text, message_text
 
 
 
 
70
 
71
  if 'nllb' in model_name.lower():
72
  nnlbsl, nnlbtl = languagecodes.nllb_language_codes[sselected_language], languagecodes.nllb_language_codes[tselected_language]
 
17
  "facebook/mbart-large-50-many-to-many-mmt",
18
  "utter-project/EuroLLM-1.7B",
19
  "Unbabel/TowerInstruct-7B-v0.2",
20
+ "Unbabel/TowerInstruct-Mistral-7B-v0.2",
21
+ "openGPT-X/Teuken-7B-instruct-commercial-v0.4"
22
  ]
23
 
24
  def model_to_cuda(model):
 
30
  print("CUDA not available! Using CPU.")
31
  return model
32
 
33
+ def teuken(model_name, sl, tl, input_text):
34
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
35
+ # model_name = "openGPT-X/Teuken-7B-instruct-commercial-v0.4"
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ model_name,
38
+ trust_remote_code=True,
39
+ torch_dtype=torch.bfloat16,
40
+ )
41
+ model = model.to(device).eval()
42
+ tokenizer = AutoTokenizer.from_pretrained(
43
+ model_name,
44
+ use_fast=False,
45
+ trust_remote_code=True,
46
+ )
47
+ translation_prompt = f"Translate the following text from {sl} into {tl}: {input_text}"
48
+ messages = [{"role": "User", "content": translation_prompt}]
49
+ prompt_ids = tokenizer.apply_chat_template(messages, chat_template=sl.upper(), tokenize=True, add_generation_prompt=True, return_tensors="pt")
50
+ prediction = model.generate(
51
+ prompt_ids.to(model.device),
52
+ max_length=512,
53
+ do_sample=True,
54
+ top_k=50,
55
+ top_p=0.95,
56
+ temperature=0.7,
57
+ num_return_sequences=1,
58
+ )
59
+ prediction_text = tokenizer.decode(prediction[0].tolist())
60
+ return prediction_text
61
+
62
  def eurollm(model_name, sl, tl, input_text):
63
  tokenizer = AutoTokenizer.from_pretrained(model_name)
64
  model = AutoModelForCausalLM.from_pretrained(model_name)
 
97
  if 'eurollm' in model_name.lower():
98
  translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
99
  return translated_text, message_text
100
+
101
+ if 'teuken' in model_name.lower():
102
+ translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
103
+ return translated_text, message_text
104
 
105
  if 'nllb' in model_name.lower():
106
  nnlbsl, nnlbtl = languagecodes.nllb_language_codes[sselected_language], languagecodes.nllb_language_codes[tselected_language]