API / app.py
Reality123b's picture
Update app.py
305d245 verified
raw
history blame
6.96 kB
import gradio as gr
from huggingface_hub import InferenceClient
from deep_translator import GoogleTranslator
from indic_transliteration import sanscript
from indic_transliteration.detect import detect as detect_script
from indic_transliteration.sanscript import transliterate
import langdetect
import re
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
def detect_language_script(text: str) -> tuple[str, str]:
"""
Detect language and script of the input text.
Returns (language_code, script_type)
"""
try:
lang = langdetect.detect(text)
script = None
try:
script = detect_script(text)
except:
pass
return lang, script
except:
return 'en', None
def is_romanized_indic(text: str) -> bool:
"""
Check if text appears to be romanized Indic language.
This is a basic implementation - you may want to enhance the patterns.
"""
# Common Bengali romanized patterns
bengali_patterns = [
r'\b(ami|tumi|apni)\b', # Common pronouns
r'\b(ache|achen|thako|thaken)\b', # Common verbs
r'\b(kemon|bhalo|kharap)\b', # Common adjectives
r'\b(ki|kothay|keno)\b' # Common question words
]
text_lower = text.lower()
return any(re.search(pattern, text_lower) for pattern in bengali_patterns)
def romanized_to_bengali(text: str) -> str:
"""
Convert romanized Bengali text to Bengali script.
"""
# Define common Bengali word mappings
bengali_mappings = {
'ami': 'আমি',
'tumi': 'তুমি',
'apni': 'আপনি',
'kemon': 'কেমন',
'achen': 'আছেন',
'acchen': 'আছেন',
'bhalo': 'ভালো',
'achi': 'আছি',
'ki': 'কি',
'tumi': 'তুমি',
'kothay': 'কোথায়',
'keno': 'কেন',
# Add more mappings as needed
}
# Convert to lowercase for matching
text_lower = text.lower()
# Replace words based on mappings
for roman, bengali in bengali_mappings.items():
text_lower = re.sub(r'\b' + roman + r'\b', bengali, text_lower)
# If no direct mapping found, try using transliteration
if text_lower == text.lower():
try:
return transliterate(text, sanscript.ITRANS, sanscript.BENGALI)
except:
return text
return text_lower
def translate_text(text: str, target_lang='en') -> tuple[str, str, bool]:
"""
Translate text to target language, handling both script and romanized text.
Returns (translated_text, original_lang, is_transliterated)
"""
original_lang, script = detect_language_script(text)
is_transliterated = False
# Handle potential romanized Indic text
if original_lang == 'en' and is_romanized_indic(text):
text = romanized_to_bengali(text)
original_lang = 'bn'
is_transliterated = True
# Only translate if not already in target language
if original_lang != target_lang:
try:
translator = GoogleTranslator(source='auto', target=target_lang)
translated = translator.translate(text)
return translated, original_lang, is_transliterated
except Exception as e:
print(f"Translation error: {e}")
return text, original_lang, is_transliterated
return text, original_lang, is_transliterated
def check_custom_responses(message: str) -> str:
"""Check for specific patterns and return custom responses."""
message_lower = message.lower()
custom_responses = {
"what is ur name?": "xylaria",
"what is your name?": "xylaria",
"what's your name?": "xylaria",
"whats your name": "xylaria",
"how many 'r' is in strawberry?": "3",
"who is your developer?": "sk md saad amin",
"how many r is in strawberry": "3",
"who is ur dev": "sk md saad amin",
"who is ur developer": "sk md saad amin",
}
for pattern, response in custom_responses.items():
if pattern in message_lower:
return response
return None
def translate_to_original(text: str, original_lang: str, was_transliterated: bool) -> str:
"""
Translate response back to original language and script if needed.
"""
if original_lang != 'en':
try:
translator = GoogleTranslator(source='en', target=original_lang)
translated = translator.translate(text)
return translated
except Exception as e:
print(f"Translation error: {e}")
return text
return text
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
# First check for custom responses
custom_response = check_custom_responses(message)
if custom_response:
yield custom_response
return
# Handle translation and transliteration
translated_msg, original_lang, was_transliterated = translate_text(message)
# Prepare conversation history
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
# Translate user message from history
trans_user_msg, _, _ = translate_text(val[0])
messages.append({"role": "user", "content": trans_user_msg})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": translated_msg})
# Get response from model
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
# Translate accumulated response if original message wasn't in English
if original_lang != 'en':
translated_response = translate_to_original(response, original_lang, was_transliterated)
yield translated_response
else:
yield response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a friendly Chatbot.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
]
)
if __name__ == "__main__":
demo.launch(share=True)