Coco-18 commited on
Commit
67a7810
Β·
verified Β·
1 Parent(s): a70fb66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -51
app.py CHANGED
@@ -88,7 +88,7 @@ except Exception as e:
88
  # Language-specific configurations
89
  LANGUAGE_CODES = {
90
  "kapampangan": "pam",
91
- "tagalog": "tgl",
92
  "english": "eng"
93
  }
94
 
@@ -127,36 +127,37 @@ TRANSLATION_MODELS = {
127
  "eng-pam": "Coco-18/opus-mt-en-pam",
128
  "tgl-eng": "Helsinki-NLP/opus-mt-tl-en",
129
  "eng-tgl": "Helsinki-NLP/opus-mt-en-tl"
130
- # pam-tgl and tgl-pam will be added later
 
131
  }
132
 
133
  logger.info(f"πŸ”„ Loading Translation model: {TRANSLATION_MODELS}")
134
 
135
- # Replace the single model initialization with:
136
  translation_models = {}
137
  translation_tokenizers = {}
138
 
139
- for lang_pair, model_id in TRANSLATION_MODELS.items():
140
  logger.info(f"πŸ”„ Loading Translation model: {model_id}")
141
 
142
  try:
143
- translation_tokenizers[lang_pair] = MarianTokenizer.from_pretrained(
144
  model_id,
145
  cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
146
  )
147
- logger.info(f"βœ… Translation tokenizer loaded successfully for {lang_pair}")
148
 
149
- translation_models[lang_pair] = MarianMTModel.from_pretrained(
150
  model_id,
151
  cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
152
  )
153
- translation_models[lang_pair].to(device)
154
- logger.info(f"βœ… Translation model loaded successfully on {device} for {lang_pair}")
155
  except Exception as e:
156
- logger.error(f"❌ Error loading Translation model for {lang_pair}: {str(e)}")
157
  logger.debug(f"Stack trace: {traceback.format_exc()}")
158
-
159
-
160
 
161
  # Constants
162
  SAMPLE_RATE = 16000
@@ -173,12 +174,25 @@ def home():
173
 
174
  @app.route("/health", methods=["GET"])
175
  def health_check():
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  health_status = {
177
  "api_status": "online",
178
  "asr_model": "loaded" if asr_model is not None else "failed",
179
  "tts_models": {lang: "loaded" if model is not None else "failed"
180
  for lang, model in tts_models.items()},
181
- "translation_model": "loaded" if translation_model is not None else "failed",
182
  "device": device
183
  }
184
  return jsonify(health_status)
@@ -380,48 +394,88 @@ def translate_text():
380
  source_code = LANGUAGE_CODES.get(source_language, source_language)
381
  target_code = LANGUAGE_CODES.get(target_language, target_language)
382
 
383
- # Create the language pair key
384
- lang_pair = f"{source_code}-{target_code}"
385
-
386
  logger.info(f"πŸ”„ Translating from {source_language} to {target_language}: '{source_text}'")
387
 
388
- # Check if we have a model for this language pair
389
- if lang_pair not in translation_models:
390
- logger.warning(f"⚠️ No translation model available for {lang_pair}")
391
- return jsonify({"error": f"Translation from {source_language} to {target_language} is not supported yet"}), 400
392
-
393
- if translation_models[lang_pair] is None or translation_tokenizers[lang_pair] is None:
394
- logger.error(f"❌ Translation model for {lang_pair} not loaded")
395
- return jsonify({"error": f"Translation model not available"}), 503
396
-
397
- try:
398
- # Get the appropriate model and tokenizer
399
- model = translation_models[lang_pair]
400
- tokenizer = translation_tokenizers[lang_pair]
401
-
402
- # Tokenize the text
403
- tokenized = tokenizer(source_text, return_tensors="pt", padding=True)
404
- tokenized = {k: v.to(device) for k, v in tokenized.items()}
405
 
406
- # Generate translation
407
- with torch.no_grad():
408
- translated = model.generate(**tokenized)
409
-
410
- # Decode the translation
411
- result = tokenizer.decode(translated[0], skip_special_tokens=True)
412
-
413
- logger.info(f"βœ… Translation result: '{result}'")
414
-
415
- return jsonify({
416
- "translated_text": result,
417
- "source_language": source_language,
418
- "target_language": target_language
419
- })
420
- except Exception as e:
421
- logger.error(f"❌ Translation processing failed: {str(e)}")
422
- logger.debug(f"Stack trace: {traceback.format_exc()}")
423
- return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  except Exception as e:
426
  logger.error(f"❌ Unhandled exception in translation endpoint: {str(e)}")
427
  logger.debug(f"Stack trace: {traceback.format_exc()}")
 
88
  # Language-specific configurations
89
  LANGUAGE_CODES = {
90
  "kapampangan": "pam",
91
+ "filipino": "fil", # Replaced tagalog with filipino
92
  "english": "eng"
93
  }
94
 
 
127
  "eng-pam": "Coco-18/opus-mt-en-pam",
128
  "tgl-eng": "Helsinki-NLP/opus-mt-tl-en",
129
  "eng-tgl": "Helsinki-NLP/opus-mt-en-tl"
130
+ # Special model for pam-fil translations in both directions
131
+ "phi": "Coco-18/opus-mt-phi"
132
  }
133
 
134
  logger.info(f"πŸ”„ Loading Translation model: {TRANSLATION_MODELS}")
135
 
136
+ # Initialize translation models and tokenizers
137
  translation_models = {}
138
  translation_tokenizers = {}
139
 
140
+ for model_key, model_id in TRANSLATION_MODELS.items():
141
  logger.info(f"πŸ”„ Loading Translation model: {model_id}")
142
 
143
  try:
144
+ translation_tokenizers[model_key] = MarianTokenizer.from_pretrained(
145
  model_id,
146
  cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
147
  )
148
+ logger.info(f"βœ… Translation tokenizer loaded successfully for {model_key}")
149
 
150
+ translation_models[model_key] = MarianMTModel.from_pretrained(
151
  model_id,
152
  cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
153
  )
154
+ translation_models[model_key].to(device)
155
+ logger.info(f"βœ… Translation model loaded successfully on {device} for {model_key}")
156
  except Exception as e:
157
+ logger.error(f"❌ Error loading Translation model for {model_key}: {str(e)}")
158
  logger.debug(f"Stack trace: {traceback.format_exc()}")
159
+ translation_models[model_key] = None
160
+ translation_tokenizers[model_key] = None
161
 
162
  # Constants
163
  SAMPLE_RATE = 16000
 
174
 
175
  @app.route("/health", methods=["GET"])
176
  def health_check():
177
+ # Initialize direct language pair statuses based on loaded models
178
+ translation_status = {
179
+ "pam-eng": "loaded" if "pam-eng" in translation_models and translation_models["pam-eng"] is not None else "failed",
180
+ "eng-pam": "loaded" if "eng-pam" in translation_models and translation_models["eng-pam"] is not None else "failed",
181
+ "fil-eng": "loaded" if "fil-eng" in translation_models and translation_models["fil-eng"] is not None else "failed",
182
+ "eng-fil": "loaded" if "eng-fil" in translation_models and translation_models["eng-fil"] is not None else "failed",
183
+ }
184
+
185
+ # Add special phi model status for pam-fil translations
186
+ phi_status = "loaded" if "phi" in translation_models and translation_models["phi"] is not None else "failed"
187
+ translation_status["pam-fil"] = phi_status
188
+ translation_status["fil-pam"] = phi_status
189
+
190
  health_status = {
191
  "api_status": "online",
192
  "asr_model": "loaded" if asr_model is not None else "failed",
193
  "tts_models": {lang: "loaded" if model is not None else "failed"
194
  for lang, model in tts_models.items()},
195
+ "translation_models": translation_status,
196
  "device": device
197
  }
198
  return jsonify(health_status)
 
394
  source_code = LANGUAGE_CODES.get(source_language, source_language)
395
  target_code = LANGUAGE_CODES.get(target_language, target_language)
396
 
 
 
 
397
  logger.info(f"πŸ”„ Translating from {source_language} to {target_language}: '{source_text}'")
398
 
399
+ # Special handling for pam-fil and fil-pam using the single phi model
400
+ if (source_code == "pam" and target_code == "fil") or (source_code == "fil" and target_code == "pam"):
401
+ model_key = "phi"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
 
403
+ # Check if we have the phi model
404
+ if model_key not in translation_models or translation_models[model_key] is None:
405
+ logger.error(f"❌ Translation model for {model_key} not loaded")
406
+ return jsonify({"error": f"Translation model not available"}), 503
407
+
408
+ try:
409
+ # Get the phi model and tokenizer
410
+ model = translation_models[model_key]
411
+ tokenizer = translation_tokenizers[model_key]
412
+
413
+ # Prepend target language token to input
414
+ input_text = f">>{target_code}<< {source_text}"
415
+
416
+ # Tokenize the text
417
+ tokenized = tokenizer(input_text, return_tensors="pt", padding=True)
418
+ tokenized = {k: v.to(device) for k, v in tokenized.items()}
419
+
420
+ # Generate translation
421
+ with torch.no_grad():
422
+ translated = model.generate(**tokenized)
423
+
424
+ # Decode the translation
425
+ result = tokenizer.decode(translated[0], skip_special_tokens=True)
426
+
427
+ logger.info(f"βœ… Translation result: '{result}'")
428
+
429
+ return jsonify({
430
+ "translated_text": result,
431
+ "source_language": source_language,
432
+ "target_language": target_language
433
+ })
434
+ except Exception as e:
435
+ logger.error(f"❌ Translation processing failed: {str(e)}")
436
+ logger.debug(f"Stack trace: {traceback.format_exc()}")
437
+ return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
438
+ else:
439
+ # Create the regular language pair key for other language pairs
440
+ lang_pair = f"{source_code}-{target_code}"
441
+
442
+ # Check if we have a model for this language pair
443
+ if lang_pair not in translation_models:
444
+ logger.warning(f"⚠️ No translation model available for {lang_pair}")
445
+ return jsonify({"error": f"Translation from {source_language} to {target_language} is not supported yet"}), 400
446
 
447
+ if translation_models[lang_pair] is None or translation_tokenizers[lang_pair] is None:
448
+ logger.error(f"❌ Translation model for {lang_pair} not loaded")
449
+ return jsonify({"error": f"Translation model not available"}), 503
450
+
451
+ try:
452
+ # Regular translation process for other language pairs
453
+ model = translation_models[lang_pair]
454
+ tokenizer = translation_tokenizers[lang_pair]
455
+
456
+ # Tokenize the text
457
+ tokenized = tokenizer(source_text, return_tensors="pt", padding=True)
458
+ tokenized = {k: v.to(device) for k, v in tokenized.items()}
459
+
460
+ # Generate translation
461
+ with torch.no_grad():
462
+ translated = model.generate(**tokenized)
463
+
464
+ # Decode the translation
465
+ result = tokenizer.decode(translated[0], skip_special_tokens=True)
466
+
467
+ logger.info(f"βœ… Translation result: '{result}'")
468
+
469
+ return jsonify({
470
+ "translated_text": result,
471
+ "source_language": source_language,
472
+ "target_language": target_language
473
+ })
474
+ except Exception as e:
475
+ logger.error(f"❌ Translation processing failed: {str(e)}")
476
+ logger.debug(f"Stack trace: {traceback.format_exc()}")
477
+ return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
478
+
479
  except Exception as e:
480
  logger.error(f"❌ Unhandled exception in translation endpoint: {str(e)}")
481
  logger.debug(f"Stack trace: {traceback.format_exc()}")