Update app.py
Browse files
app.py
CHANGED
@@ -88,7 +88,7 @@ except Exception as e:
|
|
88 |
# Language-specific configurations
|
89 |
LANGUAGE_CODES = {
|
90 |
"kapampangan": "pam",
|
91 |
-
"
|
92 |
"english": "eng"
|
93 |
}
|
94 |
|
@@ -127,36 +127,37 @@ TRANSLATION_MODELS = {
|
|
127 |
"eng-pam": "Coco-18/opus-mt-en-pam",
|
128 |
"tgl-eng": "Helsinki-NLP/opus-mt-tl-en",
|
129 |
"eng-tgl": "Helsinki-NLP/opus-mt-en-tl"
|
130 |
-
|
|
|
131 |
}
|
132 |
|
133 |
logger.info(f"π Loading Translation model: {TRANSLATION_MODELS}")
|
134 |
|
135 |
-
#
|
136 |
translation_models = {}
|
137 |
translation_tokenizers = {}
|
138 |
|
139 |
-
for
|
140 |
logger.info(f"π Loading Translation model: {model_id}")
|
141 |
|
142 |
try:
|
143 |
-
translation_tokenizers[
|
144 |
model_id,
|
145 |
cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
|
146 |
)
|
147 |
-
logger.info(f"β
Translation tokenizer loaded successfully for {
|
148 |
|
149 |
-
translation_models[
|
150 |
model_id,
|
151 |
cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
|
152 |
)
|
153 |
-
translation_models[
|
154 |
-
logger.info(f"β
Translation model loaded successfully on {device} for {
|
155 |
except Exception as e:
|
156 |
-
logger.error(f"β Error loading Translation model for {
|
157 |
logger.debug(f"Stack trace: {traceback.format_exc()}")
|
158 |
-
|
159 |
-
|
160 |
|
161 |
# Constants
|
162 |
SAMPLE_RATE = 16000
|
@@ -173,12 +174,25 @@ def home():
|
|
173 |
|
174 |
@app.route("/health", methods=["GET"])
|
175 |
def health_check():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
health_status = {
|
177 |
"api_status": "online",
|
178 |
"asr_model": "loaded" if asr_model is not None else "failed",
|
179 |
"tts_models": {lang: "loaded" if model is not None else "failed"
|
180 |
for lang, model in tts_models.items()},
|
181 |
-
"
|
182 |
"device": device
|
183 |
}
|
184 |
return jsonify(health_status)
|
@@ -380,48 +394,88 @@ def translate_text():
|
|
380 |
source_code = LANGUAGE_CODES.get(source_language, source_language)
|
381 |
target_code = LANGUAGE_CODES.get(target_language, target_language)
|
382 |
|
383 |
-
# Create the language pair key
|
384 |
-
lang_pair = f"{source_code}-{target_code}"
|
385 |
-
|
386 |
logger.info(f"π Translating from {source_language} to {target_language}: '{source_text}'")
|
387 |
|
388 |
-
#
|
389 |
-
if
|
390 |
-
|
391 |
-
return jsonify({"error": f"Translation from {source_language} to {target_language} is not supported yet"}), 400
|
392 |
-
|
393 |
-
if translation_models[lang_pair] is None or translation_tokenizers[lang_pair] is None:
|
394 |
-
logger.error(f"β Translation model for {lang_pair} not loaded")
|
395 |
-
return jsonify({"error": f"Translation model not available"}), 503
|
396 |
-
|
397 |
-
try:
|
398 |
-
# Get the appropriate model and tokenizer
|
399 |
-
model = translation_models[lang_pair]
|
400 |
-
tokenizer = translation_tokenizers[lang_pair]
|
401 |
-
|
402 |
-
# Tokenize the text
|
403 |
-
tokenized = tokenizer(source_text, return_tensors="pt", padding=True)
|
404 |
-
tokenized = {k: v.to(device) for k, v in tokenized.items()}
|
405 |
|
406 |
-
#
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
"
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
except Exception as e:
|
426 |
logger.error(f"β Unhandled exception in translation endpoint: {str(e)}")
|
427 |
logger.debug(f"Stack trace: {traceback.format_exc()}")
|
|
|
88 |
# Language-specific configurations
|
89 |
LANGUAGE_CODES = {
|
90 |
"kapampangan": "pam",
|
91 |
+
"filipino": "fil", # Replaced tagalog with filipino
|
92 |
"english": "eng"
|
93 |
}
|
94 |
|
|
|
127 |
"eng-pam": "Coco-18/opus-mt-en-pam",
|
128 |
"tgl-eng": "Helsinki-NLP/opus-mt-tl-en",
|
129 |
"eng-tgl": "Helsinki-NLP/opus-mt-en-tl"
|
130 |
+
# Special model for pam-fil translations in both directions
|
131 |
+
"phi": "Coco-18/opus-mt-phi"
|
132 |
}
|
133 |
|
134 |
logger.info(f"π Loading Translation model: {TRANSLATION_MODELS}")
|
135 |
|
136 |
+
# Initialize translation models and tokenizers
|
137 |
translation_models = {}
|
138 |
translation_tokenizers = {}
|
139 |
|
140 |
+
for model_key, model_id in TRANSLATION_MODELS.items():
|
141 |
logger.info(f"π Loading Translation model: {model_id}")
|
142 |
|
143 |
try:
|
144 |
+
translation_tokenizers[model_key] = MarianTokenizer.from_pretrained(
|
145 |
model_id,
|
146 |
cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
|
147 |
)
|
148 |
+
logger.info(f"β
Translation tokenizer loaded successfully for {model_key}")
|
149 |
|
150 |
+
translation_models[model_key] = MarianMTModel.from_pretrained(
|
151 |
model_id,
|
152 |
cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
|
153 |
)
|
154 |
+
translation_models[model_key].to(device)
|
155 |
+
logger.info(f"β
Translation model loaded successfully on {device} for {model_key}")
|
156 |
except Exception as e:
|
157 |
+
logger.error(f"β Error loading Translation model for {model_key}: {str(e)}")
|
158 |
logger.debug(f"Stack trace: {traceback.format_exc()}")
|
159 |
+
translation_models[model_key] = None
|
160 |
+
translation_tokenizers[model_key] = None
|
161 |
|
162 |
# Constants
|
163 |
SAMPLE_RATE = 16000
|
|
|
174 |
|
175 |
@app.route("/health", methods=["GET"])
|
176 |
def health_check():
|
177 |
+
# Initialize direct language pair statuses based on loaded models
|
178 |
+
translation_status = {
|
179 |
+
"pam-eng": "loaded" if "pam-eng" in translation_models and translation_models["pam-eng"] is not None else "failed",
|
180 |
+
"eng-pam": "loaded" if "eng-pam" in translation_models and translation_models["eng-pam"] is not None else "failed",
|
181 |
+
"fil-eng": "loaded" if "fil-eng" in translation_models and translation_models["fil-eng"] is not None else "failed",
|
182 |
+
"eng-fil": "loaded" if "eng-fil" in translation_models and translation_models["eng-fil"] is not None else "failed",
|
183 |
+
}
|
184 |
+
|
185 |
+
# Add special phi model status for pam-fil translations
|
186 |
+
phi_status = "loaded" if "phi" in translation_models and translation_models["phi"] is not None else "failed"
|
187 |
+
translation_status["pam-fil"] = phi_status
|
188 |
+
translation_status["fil-pam"] = phi_status
|
189 |
+
|
190 |
health_status = {
|
191 |
"api_status": "online",
|
192 |
"asr_model": "loaded" if asr_model is not None else "failed",
|
193 |
"tts_models": {lang: "loaded" if model is not None else "failed"
|
194 |
for lang, model in tts_models.items()},
|
195 |
+
"translation_models": translation_status,
|
196 |
"device": device
|
197 |
}
|
198 |
return jsonify(health_status)
|
|
|
394 |
source_code = LANGUAGE_CODES.get(source_language, source_language)
|
395 |
target_code = LANGUAGE_CODES.get(target_language, target_language)
|
396 |
|
|
|
|
|
|
|
397 |
logger.info(f"π Translating from {source_language} to {target_language}: '{source_text}'")
|
398 |
|
399 |
+
# Special handling for pam-fil and fil-pam using the single phi model
|
400 |
+
if (source_code == "pam" and target_code == "fil") or (source_code == "fil" and target_code == "pam"):
|
401 |
+
model_key = "phi"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
|
403 |
+
# Check if we have the phi model
|
404 |
+
if model_key not in translation_models or translation_models[model_key] is None:
|
405 |
+
logger.error(f"β Translation model for {model_key} not loaded")
|
406 |
+
return jsonify({"error": f"Translation model not available"}), 503
|
407 |
+
|
408 |
+
try:
|
409 |
+
# Get the phi model and tokenizer
|
410 |
+
model = translation_models[model_key]
|
411 |
+
tokenizer = translation_tokenizers[model_key]
|
412 |
+
|
413 |
+
# Prepend target language token to input
|
414 |
+
input_text = f">>{target_code}<< {source_text}"
|
415 |
+
|
416 |
+
# Tokenize the text
|
417 |
+
tokenized = tokenizer(input_text, return_tensors="pt", padding=True)
|
418 |
+
tokenized = {k: v.to(device) for k, v in tokenized.items()}
|
419 |
+
|
420 |
+
# Generate translation
|
421 |
+
with torch.no_grad():
|
422 |
+
translated = model.generate(**tokenized)
|
423 |
+
|
424 |
+
# Decode the translation
|
425 |
+
result = tokenizer.decode(translated[0], skip_special_tokens=True)
|
426 |
+
|
427 |
+
logger.info(f"β
Translation result: '{result}'")
|
428 |
+
|
429 |
+
return jsonify({
|
430 |
+
"translated_text": result,
|
431 |
+
"source_language": source_language,
|
432 |
+
"target_language": target_language
|
433 |
+
})
|
434 |
+
except Exception as e:
|
435 |
+
logger.error(f"β Translation processing failed: {str(e)}")
|
436 |
+
logger.debug(f"Stack trace: {traceback.format_exc()}")
|
437 |
+
return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
|
438 |
+
else:
|
439 |
+
# Create the regular language pair key for other language pairs
|
440 |
+
lang_pair = f"{source_code}-{target_code}"
|
441 |
+
|
442 |
+
# Check if we have a model for this language pair
|
443 |
+
if lang_pair not in translation_models:
|
444 |
+
logger.warning(f"β οΈ No translation model available for {lang_pair}")
|
445 |
+
return jsonify({"error": f"Translation from {source_language} to {target_language} is not supported yet"}), 400
|
446 |
|
447 |
+
if translation_models[lang_pair] is None or translation_tokenizers[lang_pair] is None:
|
448 |
+
logger.error(f"β Translation model for {lang_pair} not loaded")
|
449 |
+
return jsonify({"error": f"Translation model not available"}), 503
|
450 |
+
|
451 |
+
try:
|
452 |
+
# Regular translation process for other language pairs
|
453 |
+
model = translation_models[lang_pair]
|
454 |
+
tokenizer = translation_tokenizers[lang_pair]
|
455 |
+
|
456 |
+
# Tokenize the text
|
457 |
+
tokenized = tokenizer(source_text, return_tensors="pt", padding=True)
|
458 |
+
tokenized = {k: v.to(device) for k, v in tokenized.items()}
|
459 |
+
|
460 |
+
# Generate translation
|
461 |
+
with torch.no_grad():
|
462 |
+
translated = model.generate(**tokenized)
|
463 |
+
|
464 |
+
# Decode the translation
|
465 |
+
result = tokenizer.decode(translated[0], skip_special_tokens=True)
|
466 |
+
|
467 |
+
logger.info(f"β
Translation result: '{result}'")
|
468 |
+
|
469 |
+
return jsonify({
|
470 |
+
"translated_text": result,
|
471 |
+
"source_language": source_language,
|
472 |
+
"target_language": target_language
|
473 |
+
})
|
474 |
+
except Exception as e:
|
475 |
+
logger.error(f"β Translation processing failed: {str(e)}")
|
476 |
+
logger.debug(f"Stack trace: {traceback.format_exc()}")
|
477 |
+
return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
|
478 |
+
|
479 |
except Exception as e:
|
480 |
logger.error(f"β Unhandled exception in translation endpoint: {str(e)}")
|
481 |
logger.debug(f"Stack trace: {traceback.format_exc()}")
|