import re class TextProcessor: """Handles text processing operations for translation.""" MAX_LENGTH = 512 PERSIAN_NUMBERS = { '0': '۰', '1': '۱', '2': '۲', '3': '۳', '4': '۴', '5': '۵', '6': '۶', '7': '۷', '8': '۸', '9': '۹' } @staticmethod def preprocess_text(text: str) -> str: """ Clean and prepare text for translation. Args: text: Input text to process Returns: Processed text ready for translation """ if not text: return "" # Normalize whitespace and remove special characters text = ' '.join(text.split()) text = re.sub(r'[^\w\s.,!?-]', '', text) return text[:TextProcessor.MAX_LENGTH] @staticmethod def postprocess_translation(text: str) -> str: """ Clean up translated text and normalize numbers. Args: text: Translated text to process Returns: Cleaned and normalized text """ if not text: return "" # Clean up model artifacts text = text.replace("", "").replace("", "").replace("", "") text = re.sub(r'\s+([.,!?])', r'\1', text) text = ' '.join(text.split()) # Convert to Persian numbers for en, fa in TextProcessor.PERSIAN_NUMBERS.items(): text = text.replace(en, fa) return text.strip() @staticmethod def detect_language(text: str) -> str: """ Detect if text is primarily English or Farsi. Args: text: Input text to analyze Returns: 'Farsi' or 'English' based on character frequency """ farsi_chars = len(re.findall(r'[\u0600-\u06FF]', text)) english_chars = len(re.findall(r'[a-zA-Z]', text)) return "Farsi" if farsi_chars > english_chars else "English" @staticmethod def validate_input(text: str) -> tuple[bool, str]: """ Validate input text length and content. Args: text: Input text to validate Returns: Tuple of (is_valid, error_message) """ if not text or len(text.strip()) < 1: return False, "Please enter text to translate" if len(text) > TextProcessor.MAX_LENGTH: return False, f"Input text is too long (maximum {TextProcessor.MAX_LENGTH} characters)" return True, "" # Expose static methods for backward compatibility preprocess_text = TextProcessor.preprocess_text postprocess_translation = TextProcessor.postprocess_translation detect_language = TextProcessor.detect_language validate_input = TextProcessor.validate_input