Spaces:

Coco-18
/

Kapamtalk

Sleeping

App Files Files Community

Coco-18 commited on Mar 29

Commit

d39ccee

verified ·

1 Parent(s): 203cc78

Update translator.py

Browse files

Files changed (1) hide show

translator.py +336 -173

translator.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# translator.py - Handles ASR, TTS, and translation tasks
 import os
 import sys
@@ -12,6 +12,11 @@ from pydub import AudioSegment
 from flask import jsonify
 from transformers import Wav2Vec2ForCTC, AutoProcessor, VitsModel, AutoTokenizer
 from transformers import MarianMTModel, MarianTokenizer
 # Configure logging
 logger = logging.getLogger("speech_api")
@@ -24,6 +29,16 @@ tts_processors = {}
 translation_models = {}
 translation_tokenizers = {}
 # Language-specific configurations
 LANGUAGE_CODES = {
     "kapampangan": "pam",
@@ -48,74 +63,114 @@ TRANSLATION_MODELS = {
     "phi": "Coco-18/opus-mt-phi"
 }
 def init_models(device):
-    """Initialize all models required for the API"""
     global asr_model, asr_processor, tts_models, tts_processors, translation_models, translation_tokenizers
-    # Initialize ASR model
-    ASR_MODEL_ID = "Coco-18/mms-asr-tgl-en-safetensor"
-    logger.info(f"🔄 Loading ASR model: {ASR_MODEL_ID}")
-    try:
-        asr_processor = AutoProcessor.from_pretrained(
-            ASR_MODEL_ID,
-            cache_dir=os.environ.get("TRANSFORMERS_CACHE")
-        )
-        logger.info("✅ ASR processor loaded successfully")
-        asr_model = Wav2Vec2ForCTC.from_pretrained(
-            ASR_MODEL_ID,
-            cache_dir=os.environ.get("TRANSFORMERS_CACHE")
-        )
-        asr_model.to(device)
-        logger.info(f"✅ ASR model loaded successfully on {device}")
-    except Exception as e:
-        logger.error(f"❌ Error loading ASR model: {str(e)}")
-        logger.debug(f"Stack trace: {traceback.format_exc()}")
-    # Initialize TTS models
-    for lang, model_id in TTS_MODELS.items():
-        logger.info(f"🔄 Loading TTS model for {lang}: {model_id}")
         try:
-            tts_processors[lang] = AutoTokenizer.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
-            logger.info(f"✅ {lang} TTS processor loaded")
-            tts_models[lang] = VitsModel.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
-            tts_models[lang].to(device)
             logger.info(f"✅ {lang} TTS model loaded on {device}")
         except Exception as e:
             logger.error(f"❌ Failed to load {lang} TTS model: {str(e)}")
             logger.debug(f"Stack trace: {traceback.format_exc()}")
-            tts_models[lang] = None
-    # Initialize translation models
-    for model_key, model_id in TRANSLATION_MODELS.items():
-        logger.info(f"🔄 Loading Translation model: {model_id}")
         try:
-            translation_tokenizers[model_key] = MarianTokenizer.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
-            logger.info(f"✅ Translation tokenizer loaded successfully for {model_key}")
-            translation_models[model_key] = MarianMTModel.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
-            translation_models[model_key].to(device)
             logger.info(f"✅ Translation model loaded successfully on {device} for {model_key}")
         except Exception as e:
             logger.error(f"❌ Error loading Translation model for {model_key}: {str(e)}")
             logger.debug(f"Stack trace: {traceback.format_exc()}")
-            translation_models[model_key] = None
-            translation_tokenizers[model_key] = None
 def check_model_status():
@@ -142,9 +197,50 @@ def check_model_status():
         "translation_models": translation_status
     }
 def handle_asr_request(request, output_dir, sample_rate):
-    """Handle ASR (Automatic Speech Recognition) requests"""
     if asr_model is None or asr_processor is None:
         logger.error("❌ ASR endpoint called but models aren't loaded")
         return jsonify({"error": "ASR model not available"}), 503
@@ -165,44 +261,40 @@ def handle_asr_request(request, output_dir, sample_rate):
         lang_code = LANGUAGE_CODES[language]
         logger.info(f"🔄 Processing {language} audio for ASR")
         # Save the uploaded file temporarily
         with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.filename)[-1]) as temp_audio:
-            temp_audio.write(audio_file.read())
             temp_audio_path = temp_audio.name
             logger.debug(f"📁 Temporary audio saved to {temp_audio_path}")
-        # Convert to WAV if necessary
-        wav_path = temp_audio_path
-        if not audio_file.filename.lower().endswith(".wav"):
-            wav_path = os.path.join(output_dir, "converted_audio.wav")
-            logger.info(f"🔄 Converting audio to WAV format: {wav_path}")
-            try:
-                audio = AudioSegment.from_file(temp_audio_path)
-                audio = audio.set_frame_rate(sample_rate).set_channels(1)
-                audio.export(wav_path, format="wav")
-            except Exception as e:
-                logger.error(f"❌ Audio conversion failed: {str(e)}")
-                return jsonify({"error": f"Audio conversion failed: {str(e)}"}), 500
-# Load and process the WAV file
         try:
-            waveform, sr = torchaudio.load(wav_path)
-            logger.debug(f"✅ Audio loaded: {wav_path} (Sample rate: {sr}Hz)")
-            # Resample if needed
-            if sr != sample_rate:
-                logger.info(f"🔄 Resampling audio from {sr}Hz to {sample_rate}Hz")
-                waveform = torchaudio.transforms.Resample(sr, sample_rate)(waveform)
-            waveform = waveform / torch.max(torch.abs(waveform))
         except Exception as e:
-            logger.error(f"❌ Failed to load or process audio: {str(e)}")
-            return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
         # Process audio for ASR
         try:
             inputs = asr_processor(
-                waveform.squeeze().numpy(),
                 sampling_rate=sample_rate,
                 return_tensors="pt",
                 language=lang_code
@@ -220,6 +312,14 @@ def handle_asr_request(request, output_dir, sample_rate):
             transcription = asr_processor.decode(ids)
             logger.info(f"✅ Transcription ({language}): {transcription}")
             # Clean up temp files
             try:
@@ -232,7 +332,8 @@ def handle_asr_request(request, output_dir, sample_rate):
             return jsonify({
                 "transcription": transcription,
                 "language": language,
-                "language_code": lang_code
             })
         except Exception as e:
             logger.error(f"❌ ASR inference failed: {str(e)}")
@@ -244,8 +345,14 @@ def handle_asr_request(request, output_dir, sample_rate):
         logger.debug(f"Stack trace: {traceback.format_exc()}")
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
 def handle_tts_request(request, output_dir):
-    """Handle TTS (Text-to-Speech) requests"""
     try:
         data = request.get_json()
         if not data:
@@ -268,7 +375,57 @@ def handle_tts_request(request, output_dir):
             return jsonify({"error": f"TTS model for {language} not available"}), 503
         logger.info(f"🔄 Generating TTS for language: {language}, text: '{text_input}'")
         try:
             processor = tts_processors[language]
             model = tts_models[language]
@@ -290,10 +447,22 @@ def handle_tts_request(request, output_dir):
         # Save to file
         try:
-            output_filename = os.path.join(output_dir, f"{language}_output.wav")
             sampling_rate = model.config.sampling_rate
             sf.write(output_filename, waveform, sampling_rate)
             logger.info(f"✅ Speech generated! File saved: {output_filename}")
         except Exception as e:
             logger.error(f"❌ Failed to save audio file: {str(e)}")
             return jsonify({"error": f"Failed to save audio file: {str(e)}"}), 500
@@ -302,15 +471,22 @@ def handle_tts_request(request, output_dir):
             "message": "TTS audio generated",
             "file_url": f"/download/{os.path.basename(output_filename)}",
             "language": language,
-            "text_length": len(text_input)
         })
     except Exception as e:
         logger.error(f"❌ Unhandled exception in TTS endpoint: {str(e)}")
         logger.debug(f"Stack trace: {traceback.format_exc()}")
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
 def handle_translation_request(request):
-    """Handle translation requests"""
     try:
         data = request.get_json()
         if not data:
@@ -330,110 +506,97 @@ def handle_translation_request(request):
         target_code = LANGUAGE_CODES.get(target_language, target_language)
         logger.info(f"🔄 Translating from {source_language} to {target_language}: '{source_text}'")
-        # Special handling for pam-fil, fil-pam, pam-tgl and tgl-pam using the phi model
-        use_phi_model = False
         actual_source_code = source_code
         actual_target_code = target_code
-        # Check if we need to use the phi model with fil replacement
-        if (source_code == "pam" and target_code == "fil") or (source_code == "fil" and target_code == "pam"):
-            use_phi_model = True
-        elif (source_code == "pam" and target_code == "tgl"):
-            use_phi_model = True
-            actual_target_code = "fil"  # Replace tgl with fil for the phi model
-        elif (source_code == "tgl" and target_code == "pam"):
-            use_phi_model = True
-            actual_source_code = "fil"  # Replace tgl with fil for the phi model
-        if use_phi_model:
             model_key = "phi"
-            # Check if we have the phi model
-            if model_key not in translation_models or translation_models[model_key] is None:
-                logger.error(f"❌ Translation model for {model_key} not loaded")
-                return jsonify({"error": f"Translation model not available"}), 503
-            try:
-                # Get the phi model and tokenizer
-                model = translation_models[model_key]
-                tokenizer = translation_tokenizers[model_key]
-                # Prepend target language token to input
-                input_text = f">>{actual_target_code}<< {source_text}"
-                logger.info(f"🔄 Using phi model with input: '{input_text}'")
-                # Tokenize the text
-                tokenized = tokenizer(input_text, return_tensors="pt", padding=True)
-                tokenized = {k: v.to(model.device) for k, v in tokenized.items()}
-                with torch.no_grad():
-                    translated = model.generate(
-                        **tokenized,
-                        max_length=100,              # Reasonable output length
-                        num_beams=4,                 # Same as in training
-                        length_penalty=0.6,          # Same as in training
-                        early_stopping=True,         # Same as in training
-                        repetition_penalty=1.5,      # Add this to prevent repetition
-                        no_repeat_ngram_size=3       # Add this to prevent repetition
-                    )
-                # Decode the translation
-                result = tokenizer.decode(translated[0], skip_special_tokens=True)
-                logger.info(f"✅ Translation result: '{result}'")
-                return jsonify({
-                    "translated_text": result,
-                    "source_language": source_language,
-                    "target_language": target_language
-                })
-            except Exception as e:
-                logger.error(f"❌ Translation processing failed: {str(e)}")
-                logger.debug(f"Stack trace: {traceback.format_exc()}")
-                return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
         else:
-            # Create the regular language pair key for other language pairs
-            lang_pair = f"{source_code}-{target_code}"
-            # Check if we have a model for this language pair
-            if lang_pair not in translation_models:
-                logger.warning(f"⚠️ No translation model available for {lang_pair}")
-                return jsonify(
-                    {"error": f"Translation from {source_language} to {target_language} is not supported yet"}), 400
-            if translation_models[lang_pair] is None or translation_tokenizers[lang_pair] is None:
-                logger.error(f"❌ Translation model for {lang_pair} not loaded")
-                return jsonify({"error": f"Translation model not available"}), 503
-            try:
-                # Regular translation process for other language pairs
-                model = translation_models[lang_pair]
-                tokenizer = translation_tokenizers[lang_pair]
-                # Tokenize the text
-                tokenized = tokenizer(source_text, return_tensors="pt", padding=True)
-                tokenized = {k: v.to(model.device) for k, v in tokenized.items()}
-                # Generate translation
-                with torch.no_grad():
-                    translated = model.generate(**tokenized)
-                # Decode the translation
-                result = tokenizer.decode(translated[0], skip_special_tokens=True)
-                logger.info(f"✅ Translation result: '{result}'")
-                return jsonify({
-                    "translated_text": result,
-                    "source_language": source_language,
-                    "target_language": target_language
-                })
-            except Exception as e:
-                logger.error(f"❌ Translation processing failed: {str(e)}")
-                logger.debug(f"Stack trace: {traceback.format_exc()}")
-                return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
     except Exception as e:
         logger.error(f"❌ Unhandled exception in translation endpoint: {str(e)}")

+# translator.py - Handles ASR, TTS, and translation tasks (OPTIMIZED)
 import os
 import sys
 from flask import jsonify
 from transformers import Wav2Vec2ForCTC, AutoProcessor, VitsModel, AutoTokenizer
 from transformers import MarianMTModel, MarianTokenizer
+import concurrent.futures
+import functools
+import threading
+from concurrent.futures import ThreadPoolExecutor
+from functools import lru_cache
 # Configure logging
 logger = logging.getLogger("speech_api")
 translation_models = {}
 translation_tokenizers = {}
+# Caching dictionaries
+asr_cache = {}
+tts_cache = {}
+translation_cache = {}
+# Mutex locks for thread safety
+asr_lock = threading.Lock()
+tts_lock = threading.Lock()
+translation_lock = threading.Lock()
 # Language-specific configurations
 LANGUAGE_CODES = {
     "kapampangan": "pam",
     "phi": "Coco-18/opus-mt-phi"
 }
+# Cache settings
+MAX_CACHE_SIZE = 100  # Maximum number of items to cache
+CACHE_TTL = 3600      # Time to live in seconds (1 hour)
 def init_models(device):
+    """Initialize all models required for the API with parallelization"""
     global asr_model, asr_processor, tts_models, tts_processors, translation_models, translation_tokenizers
+    logger.info("🔄 Starting parallel model initialization")
+    # Define model initialization functions
+    def init_asr():
+        global asr_model, asr_processor
+        ASR_MODEL_ID = "Coco-18/mms-asr-tgl-en-safetensor"
+        try:
+            asr_processor = AutoProcessor.from_pretrained(
+                ASR_MODEL_ID,
+                cache_dir=os.environ.get("TRANSFORMERS_CACHE")
+            )
+            asr_model = Wav2Vec2ForCTC.from_pretrained(
+                ASR_MODEL_ID,
+                cache_dir=os.environ.get("TRANSFORMERS_CACHE")
+            )
+            asr_model.to(device)
+            logger.info(f"✅ ASR model loaded successfully on {device}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Error loading ASR model: {str(e)}")
+            logger.debug(f"Stack trace: {traceback.format_exc()}")
+            return False
+    def init_tts(lang, model_id):
         try:
+            processor = AutoTokenizer.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
+            model = VitsModel.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
+            model.to(device)
             logger.info(f"✅ {lang} TTS model loaded on {device}")
+            return lang, processor, model
         except Exception as e:
             logger.error(f"❌ Failed to load {lang} TTS model: {str(e)}")
             logger.debug(f"Stack trace: {traceback.format_exc()}")
+            return lang, None, None
+    def init_translation(model_key, model_id):
         try:
+            tokenizer = MarianTokenizer.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
+            model = MarianMTModel.from_pretrained(
                 model_id,
                 cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
+            model.to(device)
             logger.info(f"✅ Translation model loaded successfully on {device} for {model_key}")
+            return model_key, tokenizer, model
         except Exception as e:
             logger.error(f"❌ Error loading Translation model for {model_key}: {str(e)}")
             logger.debug(f"Stack trace: {traceback.format_exc()}")
+            return model_key, None, None
+    # Use ThreadPoolExecutor to initialize models in parallel
+    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+        # Start ASR model initialization
+        asr_future = executor.submit(init_asr)
+        # Start TTS model initialization in parallel
+        tts_futures = {
+            executor.submit(init_tts, lang, model_id): lang
+            for lang, model_id in TTS_MODELS.items()
+        }
+        # Start translation model initialization in parallel
+        translation_futures = {
+            executor.submit(init_translation, model_key, model_id): model_key
+            for model_key, model_id in TRANSLATION_MODELS.items()
+        }
+        # Wait for all futures to complete and process results
+        # Process TTS results
+        for future in concurrent.futures.as_completed(tts_futures):
+            lang, processor, model = future.result()
+            if processor is not None and model is not None:
+                tts_processors[lang] = processor
+                tts_models[lang] = model
+        # Process translation results
+        for future in concurrent.futures.as_completed(translation_futures):
+            model_key, tokenizer, model = future.result()
+            if tokenizer is not None and model is not None:
+                translation_tokenizers[model_key] = tokenizer
+                translation_models[model_key] = model
+    # Log summary of loaded models
+    logger.info("📊 Model initialization summary:")
+    logger.info(f"  - ASR model: {'loaded' if asr_model is not None else 'failed'}")
+    logger.info(f"  - TTS models loaded: {sum(1 for m in tts_models.values() if m is not None)}/{len(TTS_MODELS)}")
+    logger.info(f"  - Translation models loaded: {sum(1 for m in translation_models.values() if m is not None)}/{len(TRANSLATION_MODELS)}")
 def check_model_status():
         "translation_models": translation_status
     }
+# Cache for ASR results
+@lru_cache(maxsize=MAX_CACHE_SIZE)
+def get_cached_transcription(file_hash, language_code):
+    """Retrieve cached transcription result if available"""
+    return asr_cache.get((file_hash, language_code))
+def process_audio_file(audio_data, temp_audio_path, output_dir, sample_rate):
+    """Process audio file for ASR (separate from ASR logic)"""
+    wav_path = temp_audio_path
+    if not temp_audio_path.lower().endswith(".wav"):
+        wav_path = os.path.join(output_dir, "converted_audio.wav")
+        logger.info(f"🔄 Converting audio to WAV format: {wav_path}")
+        try:
+            audio = AudioSegment.from_file(temp_audio_path)
+            audio = audio.set_frame_rate(sample_rate).set_channels(1)
+            audio.export(wav_path, format="wav")
+        except Exception as e:
+            logger.error(f"❌ Audio conversion failed: {str(e)}")
+            raise Exception(f"Audio conversion failed: {str(e)}")
+    # Load and process the WAV file
+    try:
+        waveform, sr = torchaudio.load(wav_path)
+        # Resample if needed
+        if sr != sample_rate:
+            waveform = torchaudio.transforms.Resample(sr, sample_rate)(waveform)
+        # Normalize waveform
+        waveform = waveform / torch.max(torch.abs(waveform))
+        return waveform.squeeze().numpy(), wav_path
+    except Exception as e:
+        logger.error(f"❌ Failed to load or process audio: {str(e)}")
+        raise Exception(f"Audio processing failed: {str(e)}")
+def compute_audio_hash(audio_data):
+    """Compute a hash of audio data for caching purposes"""
+    import hashlib
+    return hashlib.md5(audio_data).hexdigest()
 def handle_asr_request(request, output_dir, sample_rate):
+    """Handle ASR (Automatic Speech Recognition) requests with optimization"""
     if asr_model is None or asr_processor is None:
         logger.error("❌ ASR endpoint called but models aren't loaded")
         return jsonify({"error": "ASR model not available"}), 503
         lang_code = LANGUAGE_CODES[language]
         logger.info(f"🔄 Processing {language} audio for ASR")
+        # Read the file content for hashing
+        audio_content = audio_file.read()
+        audio_hash = compute_audio_hash(audio_content)
+        # Check cache first
+        with asr_lock:
+            cached_result = asr_cache.get((audio_hash, lang_code))
+            if cached_result:
+                logger.info(f"✅ Using cached ASR result for {language}")
+                return jsonify({
+                    "transcription": cached_result,
+                    "language": language,
+                    "language_code": lang_code,
+                    "from_cache": True
+                })
         # Save the uploaded file temporarily
         with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.filename)[-1]) as temp_audio:
+            temp_audio.write(audio_content)
             temp_audio_path = temp_audio.name
             logger.debug(f"📁 Temporary audio saved to {temp_audio_path}")
+        # Process audio in a separate thread/process
         try:
+            with ThreadPoolExecutor(max_workers=2) as executor:
+                future = executor.submit(process_audio_file, audio_content, temp_audio_path, output_dir, sample_rate)
+                waveform, wav_path = future.result()
         except Exception as e:
+            return jsonify({"error": str(e)}), 500
         # Process audio for ASR
         try:
             inputs = asr_processor(
+                waveform,
                 sampling_rate=sample_rate,
                 return_tensors="pt",
                 language=lang_code
             transcription = asr_processor.decode(ids)
             logger.info(f"✅ Transcription ({language}): {transcription}")
+            # Cache the result
+            with asr_lock:
+                asr_cache[(audio_hash, lang_code)] = transcription
+                # Implement cache size limitation if needed
+                if len(asr_cache) > MAX_CACHE_SIZE:
+                    # Remove oldest entry (simplified approach)
+                    asr_cache.pop(next(iter(asr_cache)))
             # Clean up temp files
             try:
             return jsonify({
                 "transcription": transcription,
                 "language": language,
+                "language_code": lang_code,
+                "from_cache": False
             })
         except Exception as e:
             logger.error(f"❌ ASR inference failed: {str(e)}")
         logger.debug(f"Stack trace: {traceback.format_exc()}")
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
+# Cache key generator for TTS
+def tts_cache_key(text, language):
+    """Generate a cache key for TTS results"""
+    import hashlib
+    return hashlib.md5(f"{text}:{language}".encode()).hexdigest()
 def handle_tts_request(request, output_dir):
+    """Handle TTS (Text-to-Speech) requests with optimization"""
     try:
         data = request.get_json()
         if not data:
             return jsonify({"error": f"TTS model for {language} not available"}), 503
         logger.info(f"🔄 Generating TTS for language: {language}, text: '{text_input}'")
+        # Generate cache key
+        cache_key = tts_cache_key(text_input, language)
+        # Check cache
+        with tts_lock:
+            cached_file = tts_cache.get(cache_key)
+            if cached_file and os.path.exists(cached_file):
+                logger.info(f"✅ Using cached TTS audio for: '{text_input}'")
+                return jsonify({
+                    "message": "TTS audio retrieved from cache",
+                    "file_url": f"/download/{os.path.basename(cached_file)}",
+                    "language": language,
+                    "text_length": len(text_input),
+                    "from_cache": True
+                })
+        # Chunk text if too long (optional optimization for very long texts)
+        MAX_TEXT_LENGTH = 200  # Maximum text length to process in one go
+        if len(text_input) > MAX_TEXT_LENGTH:
+            # Simple chunking by splitting on periods
+            chunks = []
+            current_chunk = ""
+            for sentence in text_input.split("."):
+                if len(current_chunk) + len(sentence) < MAX_TEXT_LENGTH:
+                    current_chunk += sentence + "."
+                else:
+                    if current_chunk:
+                        chunks.append(current_chunk)
+                    current_chunk = sentence + "."
+            if current_chunk:
+                chunks.append(current_chunk)
+            logger.info(f"🔄 Text chunked into {len(chunks)} parts for processing")
+            # Process chunks and combine results
+            try:
+                processor = tts_processors[language]
+                model = tts_models[language]
+                # For simplicity, we'll just use the first chunk in this example
+                # A full implementation would process all chunks and concatenate audio
+                text_input = chunks[0]
+                logger.info(f"⚠️ Using only the first chunk for demonstration: '{text_input}'")
+            except Exception as e:
+                logger.error(f"❌ TTS chunking failed: {str(e)}")
+                return jsonify({"error": f"TTS chunking failed: {str(e)}"}), 500
         try:
             processor = tts_processors[language]
             model = tts_models[language]
         # Save to file
         try:
+            output_filename = os.path.join(output_dir, f"{language}_{cache_key}.wav")
             sampling_rate = model.config.sampling_rate
             sf.write(output_filename, waveform, sampling_rate)
             logger.info(f"✅ Speech generated! File saved: {output_filename}")
+            # Cache the result
+            with tts_lock:
+                tts_cache[cache_key] = output_filename
+                # Implement cache size limitation if needed
+                if len(tts_cache) > MAX_CACHE_SIZE:
+                    oldest_key = next(iter(tts_cache))
+                    try:
+                        os.remove(tts_cache[oldest_key])
+                    except:
+                        pass
+                    tts_cache.pop(oldest_key)
         except Exception as e:
             logger.error(f"❌ Failed to save audio file: {str(e)}")
             return jsonify({"error": f"Failed to save audio file: {str(e)}"}), 500
             "message": "TTS audio generated",
             "file_url": f"/download/{os.path.basename(output_filename)}",
             "language": language,
+            "text_length": len(text_input),
+            "from_cache": False
         })
     except Exception as e:
         logger.error(f"❌ Unhandled exception in TTS endpoint: {str(e)}")
         logger.debug(f"Stack trace: {traceback.format_exc()}")
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
+# Cache key generator for translation
+def translation_cache_key(text, source_lang, target_lang):
+    """Generate a cache key for translation results"""
+    import hashlib
+    return hashlib.md5(f"{text}:{source_lang}:{target_lang}".encode()).hexdigest()
 def handle_translation_request(request):
+    """Handle translation requests with optimization"""
     try:
         data = request.get_json()
         if not data:
         target_code = LANGUAGE_CODES.get(target_language, target_language)
         logger.info(f"🔄 Translating from {source_language} to {target_language}: '{source_text}'")
+        # Generate cache key
+        cache_key = translation_cache_key(source_text, source_code, target_code)
+        # Check cache
+        with translation_lock:
+            cached_result = translation_cache.get(cache_key)
+            if cached_result:
+                logger.info(f"✅ Using cached translation result")
+                return jsonify({
+                    "translated_text": cached_result,
+                    "source_language": source_language,
+                    "target_language": target_language,
+                    "from_cache": True
+                })
+        # OPTIMIZED: Simplified language pair determination logic
+        model_key = None
         actual_source_code = source_code
         actual_target_code = target_code
+        input_text = source_text
+        # Determine which model to use with simplified logic
+        if f"{source_code}-{target_code}" in translation_models:
+            # Direct model exists
+            model_key = f"{source_code}-{target_code}"
+            use_phi_model = False
+        elif (source_code in ["pam", "fil", "tgl"] and target_code in ["pam", "fil", "tgl"]):
+            # Use phi model with appropriate substitutions
             model_key = "phi"
+            use_phi_model = True
+            # Replace tgl with fil for the phi model if needed
+            if source_code == "tgl": actual_source_code = "fil"
+            if target_code == "tgl": actual_target_code = "fil"
+            # Prepare input text for phi model
+            input_text = f">>{actual_target_code}<< {source_text}"
         else:
+            logger.warning(f"⚠️ No translation model available for {source_code}-{target_code}")
+            return jsonify(
+                {"error": f"Translation from {source_language} to {target_language} is not supported yet"}), 400
+        # Check if model exists and is loaded
+        if model_key not in translation_models or translation_models[model_key] is None:
+            logger.error(f"❌ Translation model for {model_key} not loaded")
+            return jsonify({"error": f"Translation model not available"}), 503
+        try:
+            # Get the model and tokenizer
+            model = translation_models[model_key]
+            tokenizer = translation_tokenizers[model_key]
+            # Tokenize the text
+            tokenized = tokenizer(input_text, return_tensors="pt", padding=True)
+            tokenized = {k: v.to(model.device) for k, v in tokenized.items()}
+            # Apply length-based optimizations
+            max_length = min(100, len(source_text.split()) * 2)  # Adaptive length
+            with torch.no_grad():
+                translated = model.generate(
+                    **tokenized,
+                    max_length=max_length,
+                    num_beams=4,
+                    length_penalty=0.6,
+                    early_stopping=True,
+                    repetition_penalty=1.5,
+                    no_repeat_ngram_size=3
+                )
+            # Decode the translation
+            result = tokenizer.decode(translated[0], skip_special_tokens=True)
+            logger.info(f"✅ Translation result: '{result}'")
+            # Cache the result
+            with translation_lock:
+                translation_cache[cache_key] = result
+                # Implement cache size limitation if needed
+                if len(translation_cache) > MAX_CACHE_SIZE:
+                    translation_cache.pop(next(iter(translation_cache)))
+            return jsonify({
+                "translated_text": result,
+                "source_language": source_language,
+                "target_language": target_language,
+                "from_cache": False
+            })
+        except Exception as e:
+            logger.error(f"❌ Translation processing failed: {str(e)}")
+            logger.debug(f"Stack trace: {traceback.format_exc()}")
+            return jsonify({"error": f"Translation processing failed: {str(e)}"}), 500
     except Exception as e:
         logger.error(f"❌ Unhandled exception in translation endpoint: {str(e)}")