import gradio as gr from huggingface_hub import InferenceClient from deep_translator import GoogleTranslator from indic_transliteration import sanscript from indic_transliteration.detect import detect as detect_script from indic_transliteration.sanscript import transliterate import langdetect import re client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def detect_language_script(text: str) -> tuple[str, str]: """ Detect language and script of the input text. Returns (language_code, script_type) """ try: lang = langdetect.detect(text) script = None try: script = detect_script(text) except: pass return lang, script except: return 'en', None def is_romanized_indic(text: str) -> bool: """ Check if text appears to be romanized Indic language. This is a basic implementation - you may want to enhance the patterns. """ # Common Bengali romanized patterns bengali_patterns = [ r'\b(ami|tumi|apni)\b', # Common pronouns r'\b(ache|achen|thako|thaken)\b', # Common verbs r'\b(kemon|bhalo|kharap)\b', # Common adjectives r'\b(ki|kothay|keno)\b' # Common question words ] text_lower = text.lower() return any(re.search(pattern, text_lower) for pattern in bengali_patterns) def romanized_to_bengali(text: str) -> str: """ Convert romanized Bengali text to Bengali script. """ # Define common Bengali word mappings bengali_mappings = { 'ami': 'আমি', 'tumi': 'তুমি', 'apni': 'আপনি', 'kemon': 'কেমন', 'achen': 'আছেন', 'acchen': 'আছেন', 'bhalo': 'ভালো', 'achi': 'আছি', 'ki': 'কি', 'tumi': 'তুমি', 'kothay': 'কোথায়', 'keno': 'কেন', # Add more mappings as needed } # Convert to lowercase for matching text_lower = text.lower() # Replace words based on mappings for roman, bengali in bengali_mappings.items(): text_lower = re.sub(r'\b' + roman + r'\b', bengali, text_lower) # If no direct mapping found, try using transliteration if text_lower == text.lower(): try: return transliterate(text, sanscript.ITRANS, sanscript.BENGALI) except: return text return text_lower def translate_text(text: str, target_lang='en') -> tuple[str, str, bool]: """ Translate text to target language, handling both script and romanized text. Returns (translated_text, original_lang, is_transliterated) """ original_lang, script = detect_language_script(text) is_transliterated = False # Handle potential romanized Indic text if original_lang == 'en' and is_romanized_indic(text): text = romanized_to_bengali(text) original_lang = 'bn' is_transliterated = True # Only translate if not already in target language if original_lang != target_lang: try: translator = GoogleTranslator(source='auto', target=target_lang) translated = translator.translate(text) return translated, original_lang, is_transliterated except Exception as e: print(f"Translation error: {e}") return text, original_lang, is_transliterated return text, original_lang, is_transliterated def check_custom_responses(message: str) -> str: """Check for specific patterns and return custom responses.""" message_lower = message.lower() custom_responses = { "what is ur name?": "xylaria", "what is your name?": "xylaria", "what's your name?": "xylaria", "whats your name": "xylaria", "how many 'r' is in strawberry?": "3", "who is your developer?": "sk md saad amin", "how many r is in strawberry": "3", "who is ur dev": "sk md saad amin", "who is ur developer": "sk md saad amin", } for pattern, response in custom_responses.items(): if pattern in message_lower: return response return None def translate_to_original(text: str, original_lang: str, was_transliterated: bool) -> str: """ Translate response back to original language and script if needed. """ if original_lang != 'en': try: translator = GoogleTranslator(source='en', target=original_lang) translated = translator.translate(text) return translated except Exception as e: print(f"Translation error: {e}") return text return text def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): # First check for custom responses custom_response = check_custom_responses(message) if custom_response: yield custom_response return # Handle translation and transliteration translated_msg, original_lang, was_transliterated = translate_text(message) # Prepare conversation history messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: # Translate user message from history trans_user_msg, _, _ = translate_text(val[0]) messages.append({"role": "user", "content": trans_user_msg}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": translated_msg}) # Get response from model response = "" for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token # Translate accumulated response if original message wasn't in English if original_lang != 'en': translated_response = translate_to_original(response, original_lang, was_transliterated) yield translated_response else: yield response demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox( value="You are a friendly Chatbot.", label="System message" ), gr.Slider( minimum=1, maximum=2048, value=512, step=1, label="Max new tokens" ), gr.Slider( minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature" ), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)" ), ] ) if __name__ == "__main__": demo.launch(share=True)