API / app.py
Reality123b's picture
Update app.py
65a6bd0 verified
raw
history blame
13.7 kB
import gradio as gr
from huggingface_hub import InferenceClient
from deep_translator import GoogleTranslator
from indic_transliteration import sanscript
from indic_transliteration.detect import detect as detect_script
from indic_transliteration.sanscript import transliterate
import langdetect
import re
# Initialize clients
text_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
image_client = InferenceClient("SG161222/RealVisXL_V3.0")
def detect_language_script(text: str) -> tuple[str, str]:
"""Detect language and script of the input text.
Returns (language_code, script_type)"""
try:
# Use confidence threshold to avoid false detections
lang_detect = langdetect.detect_langs(text)
if lang_detect[0].prob > 0.8:
# Only accept high confidence detections
lang = lang_detect[0].lang
else:
lang = 'en' # Default to English if unsure
script = None
try:
script = detect_script(text)
except:
pass
return lang, script
except:
return 'en', None
def is_romanized_indic(text: str) -> bool:
"""Check if text appears to be romanized Indic language.
More strict pattern matching."""
# Common Bengali romanized patterns with word boundaries
bengali_patterns = [
r'\b(ami|tumi|apni)\b', # Common pronouns
r'\b(ache|achen|thako|thaken)\b', # Common verbs
r'\b(kemon|bhalo|kharap)\b', # Common adjectives
r'\b(ki|kothay|keno)\b' # Common question words
]
# Require multiple matches to confirm it's actually Bengali
text_lower = text.lower()
matches = sum(1 for pattern in bengali_patterns if re.search(pattern, text_lower))
return matches >= 2 # Require at least 2 matches to consider it Bengali
def translate_text(text: str, target_lang='en') -> tuple[str, str, bool]:
"""Translate text to target language, with more conservative translation logic."""
# Skip translation for very short inputs or basic greetings
if len(text.split()) <= 2 or text.lower() in ['hello', 'hi', 'hey']:
return text, 'en', False
original_lang, script = detect_language_script(text)
is_transliterated = False
# Only process if confident it's non-English
if original_lang != 'en' and len(text.split()) > 2:
try:
translator = GoogleTranslator(source='auto', target=target_lang)
translated = translator.translate(text)
return translated, original_lang, is_transliterated
except Exception as e:
print(f"Translation error: {e}")
return text, 'en', False
# Check for romanized Indic text only if it's a longer input
if original_lang == 'en' and len(text.split()) > 2 and is_romanized_indic(text):
text = romanized_to_bengali(text)
return translate_text(text, target_lang) # Recursive call with Bengali script
return text, 'en', False
def check_custom_responses(message: str) -> str:
"""Check for specific patterns and return custom responses."""
message_lower = message.lower()
custom_responses = {
"what is ur name?": "xylaria",
"what is your name?": "xylaria",
"what's your name?": "xylaria",
"whats your name": "xylaria",
"how many 'r' is in strawberry?": "3",
"who is your developer?": "sk md saad amin",
"how many r is in strawberry": "3",
"who is ur dev": "sk md saad amin",
"who is ur developer": "sk md saad amin",
}
for pattern, response in custom_responses.items():
if pattern in message_lower:
return response
return None
def is_image_request(message: str) -> bool:
"""Detect if the message is requesting image generation."""
image_triggers = [
"generate an image",
"create an image",
"draw",
"make a picture",
"generate a picture",
"create a picture",
"generate art",
"create art",
"make art",
"visualize",
"show me",
]
message_lower = message.lower()
return any(trigger in message_lower for trigger in image_triggers)
def generate_image(prompt: str) -> str:
"""Generate an image using DALLE-4K model."""
try:
response = image_client.text_to_image(
prompt,
parameters={
"negative_prompt": "blurry, bad quality, nsfw",
"num_inference_steps": 30,
"guidance_scale": 7.5
}
)
# Save the image and return the path or base64 string
# Note: Implementation depends on how you want to handle the image output
return response
except Exception as e:
print(f"Image generation error: {e}")
return None
def romanized_to_bengali(text: str) -> str:
"""Convert romanized Bengali text to Bengali script."""
bengali_mappings = {
'ami': 'আমি',
'tumi': 'তুমি',
'apni': 'আপনি',
'kemon': 'কেমন',
'achen': 'আছেন',
'acchen': 'আছেন',
'bhalo': 'ভালো',
'achi': 'আছি',
'ki': 'কি',
'kothay': 'কোথায়',
'keno': 'কেন',
}
text_lower = text.lower()
for roman, bengali in bengali_mappings.items():
text_lower = re.sub(r'\b' + roman + r'\b', bengali, text_lower)
if text_lower == text.lower():
try:
return transliterate(text, sanscript.ITRANS, sanscript.BENGALI)
except:
return text
return text_lower
def create_chat_interface():
# Custom CSS for better styling
custom_css = """
body {
font-family: 'Inter', sans-serif;
}
.chat-container {
padding-top: 0;
padding-bottom: 0;
}
.chat-messages {
scroll-behavior: smooth;
}
.input-container {
border-top: 1px solid #ccc;
}
.input-container textarea {
border-radius: 12px 0 0 12px;
}
.input-container button {
border-radius: 0 12px 12px 0;
}
.loading {
animation: pulse 1.5s ease-in-out infinite;
}
@keyframes pulse {
0% { opacity: 1; }
50% { opacity: 0.5; }
100% { opacity: 1; }
}
"""
# Create the interface with custom theme
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
# Header
with gr.Row():
gr.HTML("""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Xylaria Chat</title>
<link rel="stylesheet" href="styles.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/full.css">
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/vue.js"></script>
</head>
<body>
<div id="app" class="mx-auto max-w-5xl">
<header class="bg-white rounded shadow-lg p-10 text-center mb-10">
<h1 class="text-6xl font-bold text-violet-600 mb-5">✨ Xylaria Chat</h1>
<p class="text-lg font-medium text-gray-600">Your Intelligent Multilingual Assistant</p>
</header>
<section class="bg-white rounded shadow-lg p-10 chat-container">
<div class="chat-messages overflow-y-auto max-h-screen">
<div v-for="(message, index) in messages" :key="index" class="my-4" :class="{ 'flex justify-end': message.type === 'user' }">
<div class="rounded-lg py-4 px-6" :class="{ 'bg-violet-500 text-white': message.type === 'user', 'bg-gray-200': message.type === 'bot' }">{{ message.text }}</div>
</div>
</div>
<div class="input-container flex mt-6">
<textarea v-model="inputText" class="w-full p-4 rounded-lg border-2 border-gray-400 resize-y" rows="3" placeholder="Type a message..."></textarea>
<button @click="sendMessage" class="bg-violet-500 text-white p-3 rounded-lg ml-4 hover:bg-violet-600 transition duration-300">Send</button>
</div>
</section>
</div>
<script src="script.js"></script>
</body>
</html>
""")
# Main chat interface
with gr.Row():
with gr.Column(scale=4):
chatbot = gr.Chatbot(
height=500,
show_label=False,
container=True,
elem_classes=["chat-window"]
)
# Input area with buttons
with gr.Row():
txt = gr.Textbox(
show_label=False,
placeholder="Type your message here...",
container=False
)
send_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear")
# Additional features bar
with gr.Row():
audio_input = gr.Audio(source="microphone", type="filepath", label="Voice Input")
image_output = gr.Image(label="Generated Image", visible=False)
# Settings panel (collapsible)
with gr.Accordion("Advanced Settings", open=False):
with gr.Row():
with gr.Column():
system_msg = gr.Textbox(
value="You are a friendly Chatbot who always responds in English unless the user specifically uses another language.",
label="System Message",
lines=2
)
max_tokens = gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max Tokens"
)
with gr.Column():
temperature = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
)
# Function to handle sending messages
def user_message(message, history):
if message:
return "", history + [[message, None]]
return "", history
def bot_response(history, system_msg, max_tokens, temperature, top_p):
if len(history) == 0:
return history
# Get the last user message
message = history[-1][0]
# Check for custom responses first
custom_response = check_custom_responses(message)
if custom_response:
history[-1][1] = custom_response
return history
# Check for image generation request
if is_image_request(message):
try:
image = generate_image(message)
if image:
history[-1][1] = "Here's your generated image!"
# Handle image display logic
return history
except Exception as e:
history[-1][1] = f"Sorry, I couldn't generate the image: {str(e)}"
return history
# Handle regular text responses
try:
translated_msg, original_lang, was_transliterated = translate_text(message)
response = respond(
translated_msg,
history[:-1],
system_msg,
max_tokens,
temperature,
top_p
)
# Stream the response
partial_response = ""
for chunk in response:
partial_response += chunk
history[-1][1] = partial_response
yield history
time.sleep(0.02) # Add slight delay for smooth streaming
except Exception as e:
history[-1][1] = f"An error occurred: {str(e)}"
yield history
# Event handlers
txt_msg = txt.submit(
user_message,
[txt, chatbot],
[txt, chatbot],
queue=False
).then(
bot_response,
[chatbot, system_msg, max_tokens, temperature, top_p],
chatbot
)
send_btn.click(
user_message,
[txt, chatbot],
[txt, chatbot],
queue=False
).then(
bot_response,
[chatbot, system_msg, max_tokens, temperature, top_p],
chatbot
)
clear_btn.click(lambda: None, None, chatbot, queue=False)
# Handle voice input
def process_audio(audio_file):
# Add your audio transcription logic here
return "Audio input received! (Add your transcription logic)"
audio_input.change(
process_audio,
inputs=[audio_input],
outputs=[txt]
)
return demo
# Create and launch the interface
demo = create_chat_interface()
if __name__ == "__main__":
demo.launch(share=True)