Spaces:

Coco-18
/

Kapamtalk

Running

App Files Files Community

Coco-18 commited on Mar 30

Commit

aa906c3

verified ·

1 Parent(s): fec0be4

Update translator.py

Browse files

Files changed (1) hide show

translator.py +173 -336

translator.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# translator.py - Handles ASR, TTS, and translation tasks (OPTIMIZED)
 import os
 import sys
@@ -12,11 +12,6 @@ from pydub import AudioSegment
 from flask import jsonify
 from transformers import Wav2Vec2ForCTC, AutoProcessor, VitsModel, AutoTokenizer
 from transformers import MarianMTModel, MarianTokenizer
-import concurrent.futures
-import functools
-import threading
-from concurrent.futures import ThreadPoolExecutor
-from functools import lru_cache
 # Configure logging
 logger = logging.getLogger("speech_api")
@@ -29,16 +24,6 @@ tts_processors = {}
 translation_models = {}
 translation_tokenizers = {}
-# Caching dictionaries
-asr_cache = {}
-tts_cache = {}
-translation_cache = {}
-# Mutex locks for thread safety
-asr_lock = threading.Lock()
-tts_lock = threading.Lock()
-translation_lock = threading.Lock()
 # Language-specific configurations
 LANGUAGE_CODES = {
     "kapampangan": "pam",
@@ -63,114 +48,74 @@ TRANSLATION_MODELS = {
     "phi": "Coco-18/opus-mt-phi"
 }
-# Cache settings
-MAX_CACHE_SIZE = 100  # Maximum number of items to cache
-CACHE_TTL = 3600      # Time to live in seconds (1 hour)
 def init_models(device):
-    """Initialize all models required for the API with parallelization"""
     global asr_model, asr_processor, tts_models, tts_processors, translation_models, translation_tokenizers
-    logger.info("🔄 Starting parallel model initialization")
-    # Define model initialization functions
-    def init_asr():
-        global asr_model, asr_processor
-        ASR_MODEL_ID = "Coco-18/mms-asr-tgl-en-safetensor"
-        try:
-            asr_processor = AutoProcessor.from_pretrained(
-                ASR_MODEL_ID,
-                cache_dir=os.environ.get("TRANSFORMERS_CACHE")
-            )
-            asr_model = Wav2Vec2ForCTC.from_pretrained(
-                ASR_MODEL_ID,
-                cache_dir=os.environ.get("TRANSFORMERS_CACHE")
-            )
-            asr_model.to(device)
-            logger.info(f"✅ ASR model loaded successfully on {device}")
-            return True
-        except Exception as e:
-            logger.error(f"❌ Error loading ASR model: {str(e)}")
-            logger.debug(f"Stack trace: {traceback.format_exc()}")
-            return False
-    def init_tts(lang, model_id):
         try:
-            processor = AutoTokenizer.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
-            model = VitsModel.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
-            model.to(device)
             logger.info(f"✅ {lang} TTS model loaded on {device}")
-            return lang, processor, model
         except Exception as e:
             logger.error(f"❌ Failed to load {lang} TTS model: {str(e)}")
             logger.debug(f"Stack trace: {traceback.format_exc()}")
-            return lang, None, None
-    def init_translation(model_key, model_id):
         try:
-            tokenizer = MarianTokenizer.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
-            model = MarianMTModel.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
-            model.to(device)
             logger.info(f"✅ Translation model loaded successfully on {device} for {model_key}")
-            return model_key, tokenizer, model
         except Exception as e:
             logger.error(f"❌ Error loading Translation model for {model_key}: {str(e)}")
             logger.debug(f"Stack trace: {traceback.format_exc()}")
-            return model_key, None, None
-    # Use ThreadPoolExecutor to initialize models in parallel
-    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
-        # Start ASR model initialization
-        asr_future = executor.submit(init_asr)
-        # Start TTS model initialization in parallel
-        tts_futures = {
-            executor.submit(init_tts, lang, model_id): lang
-            for lang, model_id in TTS_MODELS.items()
-        }
-        # Start translation model initialization in parallel
-        translation_futures = {
-            executor.submit(init_translation, model_key, model_id): model_key
-            for model_key, model_id in TRANSLATION_MODELS.items()
-        }
-        # Wait for all futures to complete and process results
-        # Process TTS results
-        for future in concurrent.futures.as_completed(tts_futures):
-            lang, processor, model = future.result()
-            if processor is not None and model is not None:
-                tts_processors[lang] = processor
-                tts_models[lang] = model
-        # Process translation results
-        for future in concurrent.futures.as_completed(translation_futures):
-            model_key, tokenizer, model = future.result()
-            if tokenizer is not None and model is not None:
-                translation_tokenizers[model_key] = tokenizer
-                translation_models[model_key] = model
-    # Log summary of loaded models
-    logger.info("📊 Model initialization summary:")
-    logger.info(f"  - ASR model: {'loaded' if asr_model is not None else 'failed'}")
-    logger.info(f"  - TTS models loaded: {sum(1 for m in tts_models.values() if m is not None)}/{len(TTS_MODELS)}")
-    logger.info(f"  - Translation models loaded: {sum(1 for m in translation_models.values() if m is not None)}/{len(TRANSLATION_MODELS)}")
 def check_model_status():
@@ -197,50 +142,9 @@ def check_model_status():
         "translation_models": translation_status
     }
-# Cache for ASR results
-@lru_cache(maxsize=MAX_CACHE_SIZE)
-def get_cached_transcription(file_hash, language_code):
-    """Retrieve cached transcription result if available"""
-    return asr_cache.get((file_hash, language_code))
-def process_audio_file(audio_data, temp_audio_path, output_dir, sample_rate):
-    """Process audio file for ASR (separate from ASR logic)"""
-    wav_path = temp_audio_path
-    if not temp_audio_path.lower().endswith(".wav"):
-        wav_path = os.path.join(output_dir, "converted_audio.wav")
-        logger.info(f"🔄 Converting audio to WAV format: {wav_path}")
-        try:
-            audio = AudioSegment.from_file(temp_audio_path)
-            audio = audio.set_frame_rate(sample_rate).set_channels(1)
-            audio.export(wav_path, format="wav")
-        except Exception as e:
-            logger.error(f"❌ Audio conversion failed: {str(e)}")
-            raise Exception(f"Audio conversion failed: {str(e)}")
-    # Load and process the WAV file
-    try:
-        waveform, sr = torchaudio.load(wav_path)
-        # Resample if needed
-        if sr != sample_rate:
-            waveform = torchaudio.transforms.Resample(sr, sample_rate)(waveform)
-        # Normalize waveform
-        waveform = waveform / torch.max(torch.abs(waveform))
-        return waveform.squeeze().numpy(), wav_path
-    except Exception as e:
-        logger.error(f"❌ Failed to load or process audio: {str(e)}")
-        raise Exception(f"Audio processing failed: {str(e)}")
-def compute_audio_hash(audio_data):
-    """Compute a hash of audio data for caching purposes"""
-    import hashlib
-    return hashlib.md5(audio_data).hexdigest()
 def handle_asr_request(request, output_dir, sample_rate):
-    """Handle ASR (Automatic Speech Recognition) requests with optimization"""
     if asr_model is None or asr_processor is None:
         logger.error("❌ ASR endpoint called but models aren't loaded")
         return jsonify({"error": "ASR model not available"}), 503
@@ -261,40 +165,44 @@ def handle_asr_request(request, output_dir, sample_rate):
         lang_code = LANGUAGE_CODES[language]
         logger.info(f"🔄 Processing {language} audio for ASR")
-        # Read the file content for hashing
-        audio_content = audio_file.read()
-        audio_hash = compute_audio_hash(audio_content)
-        # Check cache first
-        with asr_lock:
-            cached_result = asr_cache.get((audio_hash, lang_code))
-            if cached_result:
-                logger.info(f"✅ Using cached ASR result for {language}")
-                return jsonify({
-                    "transcription": cached_result,
-                    "language": language,
-                    "language_code": lang_code,
-                    "from_cache": True
-                })
         # Save the uploaded file temporarily
         with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.filename)[-1]) as temp_audio:
-            temp_audio.write(audio_content)
             temp_audio_path = temp_audio.name
             logger.debug(f"📁 Temporary audio saved to {temp_audio_path}")
-        # Process audio in a separate thread/process
         try:
-            with ThreadPoolExecutor(max_workers=2) as executor:
-                future = executor.submit(process_audio_file, audio_content, temp_audio_path, output_dir, sample_rate)
-                waveform, wav_path = future.result()
         except Exception as e:
-            return jsonify({"error": str(e)}), 500
         # Process audio for ASR
         try:
             inputs = asr_processor(
-                waveform,
                 sampling_rate=sample_rate,
                 return_tensors="pt",
                 language=lang_code
@@ -312,14 +220,6 @@ def handle_asr_request(request, output_dir, sample_rate):
             transcription = asr_processor.decode(ids)
             logger.info(f"✅ Transcription ({language}): {transcription}")
-            # Cache the result
-            with asr_lock:
-                asr_cache[(audio_hash, lang_code)] = transcription
-                # Implement cache size limitation if needed
-                if len(asr_cache) > MAX_CACHE_SIZE:
-                    # Remove oldest entry (simplified approach)
-                    asr_cache.pop(next(iter(asr_cache)))
             # Clean up temp files
             try:
@@ -332,8 +232,7 @@ def handle_asr_request(request, output_dir, sample_rate):
             return jsonify({
                 "transcription": transcription,
                 "language": language,
-                "language_code": lang_code,
-                "from_cache": False
             })
         except Exception as e:
             logger.error(f"❌ ASR inference failed: {str(e)}")
@@ -345,14 +244,8 @@ def handle_asr_request(request, output_dir, sample_rate):
         logger.debug(f"Stack trace: {traceback.format_exc()}")
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
-# Cache key generator for TTS
-def tts_cache_key(text, language):
-    """Generate a cache key for TTS results"""
-    import hashlib
-    return hashlib.md5(f"{text}:{language}".encode()).hexdigest()
 def handle_tts_request(request, output_dir):
-    """Handle TTS (Text-to-Speech) requests with optimization"""
     try:
         data = request.get_json()
         if not data:
@@ -375,57 +268,7 @@ def handle_tts_request(request, output_dir):
             return jsonify({"error": f"TTS model for {language} not available"}), 503
         logger.info(f"🔄 Generating TTS for language: {language}, text: '{text_input}'")
-        # Generate cache key
-        cache_key = tts_cache_key(text_input, language)
-        # Check cache
-        with tts_lock:
-            cached_file = tts_cache.get(cache_key)
-            if cached_file and os.path.exists(cached_file):
-                logger.info(f"✅ Using cached TTS audio for: '{text_input}'")
-                return jsonify({
-                    "message": "TTS audio retrieved from cache",
-                    "file_url": f"/download/{os.path.basename(cached_file)}",
-                    "language": language,
-                    "text_length": len(text_input),
-                    "from_cache": True
-                })
-        # Chunk text if too long (optional optimization for very long texts)
-        MAX_TEXT_LENGTH = 200  # Maximum text length to process in one go
-        if len(text_input) > MAX_TEXT_LENGTH:
-            # Simple chunking by splitting on periods
-            chunks = []
-            current_chunk = ""
-            for sentence in text_input.split("."):
-                if len(current_chunk) + len(sentence) < MAX_TEXT_LENGTH:
-                    current_chunk += sentence + "."
-                else:
-                    if current_chunk:
-                        chunks.append(current_chunk)
-                    current_chunk = sentence + "."
-            if current_chunk:
-                chunks.append(current_chunk)
-            logger.info(f"🔄 Text chunked into {len(chunks)} parts for processing")
-            # Process chunks and combine results
-            try:
-                processor = tts_processors[language]
-                model = tts_models[language]
-                # For simplicity, we'll just use the first chunk in this example
-                # A full implementation would process all chunks and concatenate audio
-                text_input = chunks[0]
-                logger.info(f"⚠️ Using only the first chunk for demonstration: '{text_input}'")
-            except Exception as e:
-                logger.error(f"❌ TTS chunking failed: {str(e)}")
-                return jsonify({"error": f"TTS chunking failed: {str(e)}"}), 500
         try:
             processor = tts_processors[language]
             model = tts_models[language]
@@ -447,22 +290,10 @@ def handle_tts_request(request, output_dir):
         # Save to file
         try:
-            output_filename = os.path.join(output_dir, f"{language}_{cache_key}.wav")
             sampling_rate = model.config.sampling_rate
             sf.write(output_filename, waveform, sampling_rate)
             logger.info(f"✅ Speech generated! File saved: {output_filename}")
-            # Cache the result
-            with tts_lock:
-                tts_cache[cache_key] = output_filename
-                # Implement cache size limitation if needed
-                if len(tts_cache) > MAX_CACHE_SIZE:
-                    oldest_key = next(iter(tts_cache))
-                    try:
-                        os.remove(tts_cache[oldest_key])
-                    except:
-                        pass
-                    tts_cache.pop(oldest_key)
         except Exception as e:
             logger.error(f"❌ Failed to save audio file: {str(e)}")
             return jsonify({"error": f"Failed to save audio file: {str(e)}"}), 500
@@ -471,22 +302,15 @@ def handle_tts_request(request, output_dir):
             "message": "TTS audio generated",
             "file_url": f"/download/{os.path.basename(output_filename)}",
             "language": language,
-            "text_length": len(text_input),
-            "from_cache": False
         })
     except Exception as e:
         logger.error(f"❌ Unhandled exception in TTS endpoint: {str(e)}")
         logger.debug(f"Stack trace: {traceback.format_exc()}")
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
-# Cache key generator for translation
-def translation_cache_key(text, source_lang, target_lang):
-    """Generate a cache key for translation results"""
-    import hashlib
-    return hashlib.md5(f"{text}:{source_lang}:{target_lang}".encode()).hexdigest()
 def handle_translation_request(request):
-    """Handle translation requests with optimization"""
     try:
         data = request.get_json()
         if not data:
@@ -506,97 +330,110 @@ def handle_translation_request(request):
         target_code = LANGUAGE_CODES.get(target_language, target_language)
         logger.info(f"🔄 Translating from {source_language} to {target_language}: '{source_text}'")
-        # Generate cache key
-        cache_key = translation_cache_key(source_text, source_code, target_code)
-        # Check cache
-        with translation_lock:
-            cached_result = translation_cache.get(cache_key)
-            if cached_result:
-                logger.info(f"✅ Using cached translation result")
-                return jsonify({
-                    "translated_text": cached_result,
-                    "source_language": source_language,
-                    "target_language": target_language,
-                    "from_cache": True
-                })
-        # OPTIMIZED: Simplified language pair determination logic
-        model_key = None
         actual_source_code = source_code
         actual_target_code = target_code
-        input_text = source_text
-        # Determine which model to use with simplified logic
-        if f"{source_code}-{target_code}" in translation_models:
-            # Direct model exists
-            model_key = f"{source_code}-{target_code}"
-            use_phi_model = False
-        elif (source_code in ["pam", "fil", "tgl"] and target_code in ["pam", "fil", "tgl"]):
-            # Use phi model with appropriate substitutions
-            model_key = "phi"
             use_phi_model = True
-            # Replace tgl with fil for the phi model if needed
-            if source_code == "tgl": actual_source_code = "fil"
-            if target_code == "tgl": actual_target_code = "fil"
-            # Prepare input text for phi model
-            input_text = f">>{actual_target_code}<< {source_text}"
         else:
-            logger.warning(f"⚠️ No translation model available for {source_code}-{target_code}")
-            return jsonify(
-                {"error": f"Translation from {source_language} to {target_language} is not supported yet"}), 400
-        # Check if model exists and is loaded
-        if model_key not in translation_models or translation_models[model_key] is None:
-            logger.error(f"❌ Translation model for {model_key} not loaded")
-            return jsonify({"error": f"Translation model not available"}), 503
-        try:
-            # Get the model and tokenizer
-            model = translation_models[model_key]
-            tokenizer = translation_tokenizers[model_key]
-            # Tokenize the text
-            tokenized = tokenizer(input_text, return_tensors="pt", padding=True)
-            tokenized = {k: v.to(model.device) for k, v in tokenized.items()}
-            # Apply length-based optimizations
-            max_length = min(100, len(source_text.split()) * 2)  # Adaptive length
-            with torch.no_grad():
-                translated = model.generate(
-                    **tokenized,
-                    max_length=max_length,
-                    num_beams=4,
-                    length_penalty=0.6,
-                    early_stopping=True,
-                    repetition_penalty=1.5,
-                    no_repeat_ngram_size=3
-                )
-            # Decode the translation
-            result = tokenizer.decode(translated[0], skip_special_tokens=True)
-            logger.info(f"✅ Translation result: '{result}'")
-            # Cache the result
-            with translation_lock:
-                translation_cache[cache_key] = result
-                # Implement cache size limitation if needed
-                if len(translation_cache) > MAX_CACHE_SIZE:
-                    translation_cache.pop(next(iter(translation_cache)))
-            return jsonify({
-                "translated_text": result,
-                "source_language": source_language,
-                "target_language": target_language,
-                "from_cache": False
-            })
-        except Exception as e:
-            logger.error(f"❌ Translation processing failed: {str(e)}")
-            logger.debug(f"Stack trace: {traceback.format_exc()}")
-            return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
     except Exception as e:
         logger.error(f"❌ Unhandled exception in translation endpoint: {str(e)}")

+# translator.py - Handles ASR, TTS, and translation tasks
 import os
 import sys
 from flask import jsonify
 from transformers import Wav2Vec2ForCTC, AutoProcessor, VitsModel, AutoTokenizer
 from transformers import MarianMTModel, MarianTokenizer
 # Configure logging
 logger = logging.getLogger("speech_api")
 translation_models = {}
 translation_tokenizers = {}
 # Language-specific configurations
 LANGUAGE_CODES = {
     "kapampangan": "pam",
     "phi": "Coco-18/opus-mt-phi"
 }
 def init_models(device):
+    """Initialize all models required for the API"""
     global asr_model, asr_processor, tts_models, tts_processors, translation_models, translation_tokenizers
+    # Initialize ASR model
+    ASR_MODEL_ID = "Coco-18/mms-asr-tgl-en-safetensor"
+    logger.info(f"🔄 Loading ASR model: {ASR_MODEL_ID}")
+    try:
+        asr_processor = AutoProcessor.from_pretrained(
+            ASR_MODEL_ID,
+            cache_dir=os.environ.get("TRANSFORMERS_CACHE")
+        )
+        logger.info("✅ ASR processor loaded successfully")
+        asr_model = Wav2Vec2ForCTC.from_pretrained(
+            ASR_MODEL_ID,
+            cache_dir=os.environ.get("TRANSFORMERS_CACHE")
+        )
+        asr_model.to(device)
+        logger.info(f"✅ ASR model loaded successfully on {device}")
+    except Exception as e:
+        logger.error(f"❌ Error loading ASR model: {str(e)}")
+        logger.debug(f"Stack trace: {traceback.format_exc()}")
+    # Initialize TTS models
+    for lang, model_id in TTS_MODELS.items():
+        logger.info(f"🔄 Loading TTS model for {lang}: {model_id}")
         try:
+            tts_processors[lang] = AutoTokenizer.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
+            logger.info(f"✅ {lang} TTS processor loaded")
+            tts_models[lang] = VitsModel.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
+            tts_models[lang].to(device)
             logger.info(f"✅ {lang} TTS model loaded on {device}")
         except Exception as e:
             logger.error(f"❌ Failed to load {lang} TTS model: {str(e)}")
             logger.debug(f"Stack trace: {traceback.format_exc()}")
+            tts_models[lang] = None
+    # Initialize translation models
+    for model_key, model_id in TRANSLATION_MODELS.items():
+        logger.info(f"🔄 Loading Translation model: {model_id}")
         try:
+            translation_tokenizers[model_key] = MarianTokenizer.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
+            logger.info(f"✅ Translation tokenizer loaded successfully for {model_key}")
+            translation_models[model_key] = MarianMTModel.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
+            translation_models[model_key].to(device)
             logger.info(f"✅ Translation model loaded successfully on {device} for {model_key}")
         except Exception as e:
             logger.error(f"❌ Error loading Translation model for {model_key}: {str(e)}")
             logger.debug(f"Stack trace: {traceback.format_exc()}")
+            translation_models[model_key] = None
+            translation_tokenizers[model_key] = None
 def check_model_status():
         "translation_models": translation_status
     }
 def handle_asr_request(request, output_dir, sample_rate):
+    """Handle ASR (Automatic Speech Recognition) requests"""
     if asr_model is None or asr_processor is None:
         logger.error("❌ ASR endpoint called but models aren't loaded")
         return jsonify({"error": "ASR model not available"}), 503
         lang_code = LANGUAGE_CODES[language]
         logger.info(f"🔄 Processing {language} audio for ASR")
         # Save the uploaded file temporarily
         with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.filename)[-1]) as temp_audio:
+            temp_audio.write(audio_file.read())
             temp_audio_path = temp_audio.name
             logger.debug(f"📁 Temporary audio saved to {temp_audio_path}")
+        # Convert to WAV if necessary
+        wav_path = temp_audio_path
+        if not audio_file.filename.lower().endswith(".wav"):
+            wav_path = os.path.join(output_dir, "converted_audio.wav")
+            logger.info(f"🔄 Converting audio to WAV format: {wav_path}")
+            try:
+                audio = AudioSegment.from_file(temp_audio_path)
+                audio = audio.set_frame_rate(sample_rate).set_channels(1)
+                audio.export(wav_path, format="wav")
+            except Exception as e:
+                logger.error(f"❌ Audio conversion failed: {str(e)}")
+                return jsonify({"error": f"Audio conversion failed: {str(e)}"}), 500
+# Load and process the WAV file
         try:
+            waveform, sr = torchaudio.load(wav_path)
+            logger.debug(f"✅ Audio loaded: {wav_path} (Sample rate: {sr}Hz)")
+            # Resample if needed
+            if sr != sample_rate:
+                logger.info(f"🔄 Resampling audio from {sr}Hz to {sample_rate}Hz")
+                waveform = torchaudio.transforms.Resample(sr, sample_rate)(waveform)
+            waveform = waveform / torch.max(torch.abs(waveform))
         except Exception as e:
+            logger.error(f"❌ Failed to load or process audio: {str(e)}")
+            return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
         # Process audio for ASR
         try:
             inputs = asr_processor(
+                waveform.squeeze().numpy(),
                 sampling_rate=sample_rate,
                 return_tensors="pt",
                 language=lang_code
             transcription = asr_processor.decode(ids)
             logger.info(f"✅ Transcription ({language}): {transcription}")
             # Clean up temp files
             try:
             return jsonify({
                 "transcription": transcription,
                 "language": language,
+                "language_code": lang_code
             })
         except Exception as e:
             logger.error(f"❌ ASR inference failed: {str(e)}")
         logger.debug(f"Stack trace: {traceback.format_exc()}")
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
 def handle_tts_request(request, output_dir):
+    """Handle TTS (Text-to-Speech) requests"""
     try:
         data = request.get_json()
         if not data:
             return jsonify({"error": f"TTS model for {language} not available"}), 503
         logger.info(f"🔄 Generating TTS for language: {language}, text: '{text_input}'")
         try:
             processor = tts_processors[language]
             model = tts_models[language]
         # Save to file
         try:
+            output_filename = os.path.join(output_dir, f"{language}_output.wav")
             sampling_rate = model.config.sampling_rate
             sf.write(output_filename, waveform, sampling_rate)
             logger.info(f"✅ Speech generated! File saved: {output_filename}")
         except Exception as e:
             logger.error(f"❌ Failed to save audio file: {str(e)}")
             return jsonify({"error": f"Failed to save audio file: {str(e)}"}), 500
             "message": "TTS audio generated",
             "file_url": f"/download/{os.path.basename(output_filename)}",
             "language": language,
+            "text_length": len(text_input)
         })
     except Exception as e:
         logger.error(f"❌ Unhandled exception in TTS endpoint: {str(e)}")
         logger.debug(f"Stack trace: {traceback.format_exc()}")
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
 def handle_translation_request(request):
+    """Handle translation requests"""
     try:
         data = request.get_json()
         if not data:
         target_code = LANGUAGE_CODES.get(target_language, target_language)
         logger.info(f"🔄 Translating from {source_language} to {target_language}: '{source_text}'")
+        # Special handling for pam-fil, fil-pam, pam-tgl and tgl-pam using the phi model
+        use_phi_model = False
         actual_source_code = source_code
         actual_target_code = target_code
+        # Check if we need to use the phi model with fil replacement
+        if (source_code == "pam" and target_code == "fil") or (source_code == "fil" and target_code == "pam"):
+            use_phi_model = True
+        elif (source_code == "pam" and target_code == "tgl"):
             use_phi_model = True
+            actual_target_code = "fil"  # Replace tgl with fil for the phi model
+        elif (source_code == "tgl" and target_code == "pam"):
+            use_phi_model = True
+            actual_source_code = "fil"  # Replace tgl with fil for the phi model
+        if use_phi_model:
+            model_key = "phi"
+            # Check if we have the phi model
+            if model_key not in translation_models or translation_models[model_key] is None:
+                logger.error(f"❌ Translation model for {model_key} not loaded")
+                return jsonify({"error": f"Translation model not available"}), 503
+            try:
+                # Get the phi model and tokenizer
+                model = translation_models[model_key]
+                tokenizer = translation_tokenizers[model_key]
+                # Prepend target language token to input
+                input_text = f">>{actual_target_code}<< {source_text}"
+                logger.info(f"🔄 Using phi model with input: '{input_text}'")
+                # Tokenize the text
+                tokenized = tokenizer(input_text, return_tensors="pt", padding=True)
+                tokenized = {k: v.to(model.device) for k, v in tokenized.items()}
+                with torch.no_grad():
+                    translated = model.generate(
+                        **tokenized,
+                        max_length=100,              # Reasonable output length
+                        num_beams=4,                 # Same as in training
+                        length_penalty=0.6,          # Same as in training
+                        early_stopping=True,         # Same as in training
+                        repetition_penalty=1.5,      # Add this to prevent repetition
+                        no_repeat_ngram_size=3       # Add this to prevent repetition
+                    )
+                # Decode the translation
+                result = tokenizer.decode(translated[0], skip_special_tokens=True)
+                logger.info(f"✅ Translation result: '{result}'")
+                return jsonify({
+                    "translated_text": result,
+                    "source_language": source_language,
+                    "target_language": target_language
+                })
+            except Exception as e:
+                logger.error(f"❌ Translation processing failed: {str(e)}")
+                logger.debug(f"Stack trace: {traceback.format_exc()}")
+                return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
         else:
+            # Create the regular language pair key for other language pairs
+            lang_pair = f"{source_code}-{target_code}"
+            # Check if we have a model for this language pair
+            if lang_pair not in translation_models:
+                logger.warning(f"⚠️ No translation model available for {lang_pair}")
+                return jsonify(
+                    {"error": f"Translation from {source_language} to {target_language} is not supported yet"}), 400
+            if translation_models[lang_pair] is None or translation_tokenizers[lang_pair] is None:
+                logger.error(f"❌ Translation model for {lang_pair} not loaded")
+                return jsonify({"error": f"Translation model not available"}), 503
+            try:
+                # Regular translation process for other language pairs
+                model = translation_models[lang_pair]
+                tokenizer = translation_tokenizers[lang_pair]
+                # Tokenize the text
+                tokenized = tokenizer(source_text, return_tensors="pt", padding=True)
+                tokenized = {k: v.to(model.device) for k, v in tokenized.items()}
+                # Generate translation
+                with torch.no_grad():
+                    translated = model.generate(**tokenized)
+                # Decode the translation
+                result = tokenizer.decode(translated[0], skip_special_tokens=True)
+                logger.info(f"✅ Translation result: '{result}'")
+                return jsonify({
+                    "translated_text": result,
+                    "source_language": source_language,
+                    "target_language": target_language
+                })
+            except Exception as e:
+                logger.error(f"❌ Translation processing failed: {str(e)}")
+                logger.debug(f"Stack trace: {traceback.format_exc()}")
+                return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
     except Exception as e:
         logger.error(f"❌ Unhandled exception in translation endpoint: {str(e)}")