Spaces:

Coco-18
/

Kapamtalk

Sleeping

App Files Files Community

Coco-18 commited on Mar 22

Commit

67a7810

verified ·

1 Parent(s): a70fb66

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -51

app.py CHANGED Viewed

@@ -88,7 +88,7 @@ except Exception as e:
 # Language-specific configurations
 LANGUAGE_CODES = {
     "kapampangan": "pam",
-    "tagalog": "tgl",
     "english": "eng"
 }
@@ -127,36 +127,37 @@ TRANSLATION_MODELS = {
     "eng-pam": "Coco-18/opus-mt-en-pam",
     "tgl-eng": "Helsinki-NLP/opus-mt-tl-en",
     "eng-tgl": "Helsinki-NLP/opus-mt-en-tl"
-    # pam-tgl and tgl-pam will be added later
 }
 logger.info(f"🔄 Loading Translation model: {TRANSLATION_MODELS}")
-# Replace the single model initialization with:
 translation_models = {}
 translation_tokenizers = {}
-for lang_pair, model_id in TRANSLATION_MODELS.items():
     logger.info(f"🔄 Loading Translation model: {model_id}")
     try:
-        translation_tokenizers[lang_pair] = MarianTokenizer.from_pretrained(
             model_id,
             cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
         )
-        logger.info(f"✅ Translation tokenizer loaded successfully for {lang_pair}")
-        translation_models[lang_pair] = MarianMTModel.from_pretrained(
             model_id,
             cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
         )
-        translation_models[lang_pair].to(device)
-        logger.info(f"✅ Translation model loaded successfully on {device} for {lang_pair}")
     except Exception as e:
-        logger.error(f"❌ Error loading Translation model for {lang_pair}: {str(e)}")
         logger.debug(f"Stack trace: {traceback.format_exc()}")
 # Constants
 SAMPLE_RATE = 16000
@@ -173,12 +174,25 @@ def home():
 @app.route("/health", methods=["GET"])
 def health_check():
     health_status = {
         "api_status": "online",
         "asr_model": "loaded" if asr_model is not None else "failed",
         "tts_models": {lang: "loaded" if model is not None else "failed"
                       for lang, model in tts_models.items()},
-        "translation_model": "loaded" if translation_model is not None else "failed",
         "device": device
     }
     return jsonify(health_status)
@@ -380,48 +394,88 @@ def translate_text():
         source_code = LANGUAGE_CODES.get(source_language, source_language)
         target_code = LANGUAGE_CODES.get(target_language, target_language)
-        # Create the language pair key
-        lang_pair = f"{source_code}-{target_code}"
         logger.info(f"🔄 Translating from {source_language} to {target_language}: '{source_text}'")
-        # Check if we have a model for this language pair
-        if lang_pair not in translation_models:
-            logger.warning(f"⚠️ No translation model available for {lang_pair}")
-            return jsonify({"error": f"Translation from {source_language} to {target_language} is not supported yet"}), 400
-        if translation_models[lang_pair] is None or translation_tokenizers[lang_pair] is None:
-            logger.error(f"❌ Translation model for {lang_pair} not loaded")
-            return jsonify({"error": f"Translation model not available"}), 503
-        try:
-            # Get the appropriate model and tokenizer
-            model = translation_models[lang_pair]
-            tokenizer = translation_tokenizers[lang_pair]
-            # Tokenize the text
-            tokenized = tokenizer(source_text, return_tensors="pt", padding=True)
-            tokenized = {k: v.to(device) for k, v in tokenized.items()}
-            # Generate translation
-            with torch.no_grad():
-                translated = model.generate(**tokenized)
-            # Decode the translation
-            result = tokenizer.decode(translated[0], skip_special_tokens=True)
-            logger.info(f"✅ Translation result: '{result}'")
-            return jsonify({
-                "translated_text": result,
-                "source_language": source_language,
-                "target_language": target_language
-            })
-        except Exception as e:
-            logger.error(f"❌ Translation processing failed: {str(e)}")
-            logger.debug(f"Stack trace: {traceback.format_exc()}")
-            return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
     except Exception as e:
         logger.error(f"❌ Unhandled exception in translation endpoint: {str(e)}")
         logger.debug(f"Stack trace: {traceback.format_exc()}")

 # Language-specific configurations
 LANGUAGE_CODES = {
     "kapampangan": "pam",
+    "filipino": "fil",  # Replaced tagalog with filipino
     "english": "eng"
 }
     "eng-pam": "Coco-18/opus-mt-en-pam",
     "tgl-eng": "Helsinki-NLP/opus-mt-tl-en",
     "eng-tgl": "Helsinki-NLP/opus-mt-en-tl"
+     # Special model for pam-fil translations in both directions
+    "phi": "Coco-18/opus-mt-phi"
 }
 logger.info(f"🔄 Loading Translation model: {TRANSLATION_MODELS}")
+# Initialize translation models and tokenizers
 translation_models = {}
 translation_tokenizers = {}
+for model_key, model_id in TRANSLATION_MODELS.items():
     logger.info(f"🔄 Loading Translation model: {model_id}")
     try:
+        translation_tokenizers[model_key] = MarianTokenizer.from_pretrained(
             model_id,
             cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
         )
+        logger.info(f"✅ Translation tokenizer loaded successfully for {model_key}")
+        translation_models[model_key] = MarianMTModel.from_pretrained(
             model_id,
             cache_dir=cache_dirs["TRANSFORMERS_CACHE"]
         )
+        translation_models[model_key].to(device)
+        logger.info(f"✅ Translation model loaded successfully on {device} for {model_key}")
     except Exception as e:
+        logger.error(f"❌ Error loading Translation model for {model_key}: {str(e)}")
         logger.debug(f"Stack trace: {traceback.format_exc()}")
+        translation_models[model_key] = None
+        translation_tokenizers[model_key] = None
 # Constants
 SAMPLE_RATE = 16000
 @app.route("/health", methods=["GET"])
 def health_check():
+    # Initialize direct language pair statuses based on loaded models
+    translation_status = {
+        "pam-eng": "loaded" if "pam-eng" in translation_models and translation_models["pam-eng"] is not None else "failed",
+        "eng-pam": "loaded" if "eng-pam" in translation_models and translation_models["eng-pam"] is not None else "failed",
+        "fil-eng": "loaded" if "fil-eng" in translation_models and translation_models["fil-eng"] is not None else "failed",
+        "eng-fil": "loaded" if "eng-fil" in translation_models and translation_models["eng-fil"] is not None else "failed",
+    }
+    # Add special phi model status for pam-fil translations
+    phi_status = "loaded" if "phi" in translation_models and translation_models["phi"] is not None else "failed"
+    translation_status["pam-fil"] = phi_status
+    translation_status["fil-pam"] = phi_status
     health_status = {
         "api_status": "online",
         "asr_model": "loaded" if asr_model is not None else "failed",
         "tts_models": {lang: "loaded" if model is not None else "failed"
                       for lang, model in tts_models.items()},
+        "translation_models": translation_status,
         "device": device
     }
     return jsonify(health_status)
         source_code = LANGUAGE_CODES.get(source_language, source_language)
         target_code = LANGUAGE_CODES.get(target_language, target_language)
         logger.info(f"🔄 Translating from {source_language} to {target_language}: '{source_text}'")
+        # Special handling for pam-fil and fil-pam using the single phi model
+        if (source_code == "pam" and target_code == "fil") or (source_code == "fil" and target_code == "pam"):
+            model_key = "phi"
+            # Check if we have the phi model
+            if model_key not in translation_models or translation_models[model_key] is None:
+                logger.error(f"❌ Translation model for {model_key} not loaded")
+                return jsonify({"error": f"Translation model not available"}), 503
+            try:
+                # Get the phi model and tokenizer
+                model = translation_models[model_key]
+                tokenizer = translation_tokenizers[model_key]
+                # Prepend target language token to input
+                input_text = f">>{target_code}<< {source_text}"
+                # Tokenize the text
+                tokenized = tokenizer(input_text, return_tensors="pt", padding=True)
+                tokenized = {k: v.to(device) for k, v in tokenized.items()}
+                # Generate translation
+                with torch.no_grad():
+                    translated = model.generate(**tokenized)
+                # Decode the translation
+                result = tokenizer.decode(translated[0], skip_special_tokens=True)
+                logger.info(f"✅ Translation result: '{result}'")
+                return jsonify({
+                    "translated_text": result,
+                    "source_language": source_language,
+                    "target_language": target_language
+                })
+            except Exception as e:
+                logger.error(f"❌ Translation processing failed: {str(e)}")
+                logger.debug(f"Stack trace: {traceback.format_exc()}")
+                return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
+        else:
+            # Create the regular language pair key for other language pairs
+            lang_pair = f"{source_code}-{target_code}"
+            # Check if we have a model for this language pair
+            if lang_pair not in translation_models:
+                logger.warning(f"⚠️ No translation model available for {lang_pair}")
+                return jsonify({"error": f"Translation from {source_language} to {target_language} is not supported yet"}), 400
+            if translation_models[lang_pair] is None or translation_tokenizers[lang_pair] is None:
+                logger.error(f"❌ Translation model for {lang_pair} not loaded")
+                return jsonify({"error": f"Translation model not available"}), 503
+            try:
+                # Regular translation process for other language pairs
+                model = translation_models[lang_pair]
+                tokenizer = translation_tokenizers[lang_pair]
+                # Tokenize the text
+                tokenized = tokenizer(source_text, return_tensors="pt", padding=True)
+                tokenized = {k: v.to(device) for k, v in tokenized.items()}
+                # Generate translation
+                with torch.no_grad():
+                    translated = model.generate(**tokenized)
+                # Decode the translation
+                result = tokenizer.decode(translated[0], skip_special_tokens=True)
+                logger.info(f"✅ Translation result: '{result}'")
+                return jsonify({
+                    "translated_text": result,
+                    "source_language": source_language,
+                    "target_language": target_language
+                })
+            except Exception as e:
+                logger.error(f"❌ Translation processing failed: {str(e)}")
+                logger.debug(f"Stack trace: {traceback.format_exc()}")
+                return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
     except Exception as e:
         logger.error(f"❌ Unhandled exception in translation endpoint: {str(e)}")
         logger.debug(f"Stack trace: {traceback.format_exc()}")