Reality123b commited on
Commit
305d245
·
verified ·
1 Parent(s): f147126

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -24
app.py CHANGED
@@ -1,34 +1,108 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- from googletrans import Translator
4
- from langdetect import detect
 
 
 
 
5
 
6
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
7
- translator = Translator()
8
 
9
- def detect_and_translate(text: str, target_lang='en') -> tuple[str, str]:
10
  """
11
- Detect language and translate to target language if needed.
12
- Returns tuple of (translated_text, detected_language)
13
  """
14
  try:
15
- detected_lang = detect(text)
16
- if detected_lang != target_lang:
17
- translation = translator.translate(text, dest=target_lang)
18
- return translation.text, detected_lang
19
- return text, detected_lang
 
 
20
  except:
21
- return text, 'en' # Fallback to original text if translation fails
22
 
23
- def translate_to_original(text: str, original_lang: str) -> str:
24
- """Translate response back to original language if needed"""
25
- if original_lang != 'en':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  try:
27
- translation = translator.translate(text, dest=original_lang)
28
- return translation.text
29
  except:
30
  return text
31
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  def check_custom_responses(message: str) -> str:
34
  """Check for specific patterns and return custom responses."""
@@ -50,6 +124,20 @@ def check_custom_responses(message: str) -> str:
50
  return response
51
  return None
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def respond(
54
  message,
55
  history: list[tuple[str, str]],
@@ -64,15 +152,15 @@ def respond(
64
  yield custom_response
65
  return
66
 
67
- # Detect language and translate to English if needed
68
- translated_msg, detected_lang = detect_and_translate(message)
69
 
70
  # Prepare conversation history
71
  messages = [{"role": "system", "content": system_message}]
72
  for val in history:
73
  if val[0]:
74
- # Translate user message from history if needed
75
- trans_user_msg, _ = detect_and_translate(val[0])
76
  messages.append({"role": "user", "content": trans_user_msg})
77
  if val[1]:
78
  messages.append({"role": "assistant", "content": val[1]})
@@ -92,8 +180,8 @@ def respond(
92
  response += token
93
 
94
  # Translate accumulated response if original message wasn't in English
95
- if detected_lang != 'en':
96
- translated_response = translate_to_original(response, detected_lang)
97
  yield translated_response
98
  else:
99
  yield response
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from deep_translator import GoogleTranslator
4
+ from indic_transliteration import sanscript
5
+ from indic_transliteration.detect import detect as detect_script
6
+ from indic_transliteration.sanscript import transliterate
7
+ import langdetect
8
+ import re
9
 
10
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
11
 
12
+ def detect_language_script(text: str) -> tuple[str, str]:
13
  """
14
+ Detect language and script of the input text.
15
+ Returns (language_code, script_type)
16
  """
17
  try:
18
+ lang = langdetect.detect(text)
19
+ script = None
20
+ try:
21
+ script = detect_script(text)
22
+ except:
23
+ pass
24
+ return lang, script
25
  except:
26
+ return 'en', None
27
 
28
+ def is_romanized_indic(text: str) -> bool:
29
+ """
30
+ Check if text appears to be romanized Indic language.
31
+ This is a basic implementation - you may want to enhance the patterns.
32
+ """
33
+ # Common Bengali romanized patterns
34
+ bengali_patterns = [
35
+ r'\b(ami|tumi|apni)\b', # Common pronouns
36
+ r'\b(ache|achen|thako|thaken)\b', # Common verbs
37
+ r'\b(kemon|bhalo|kharap)\b', # Common adjectives
38
+ r'\b(ki|kothay|keno)\b' # Common question words
39
+ ]
40
+
41
+ text_lower = text.lower()
42
+ return any(re.search(pattern, text_lower) for pattern in bengali_patterns)
43
+
44
+ def romanized_to_bengali(text: str) -> str:
45
+ """
46
+ Convert romanized Bengali text to Bengali script.
47
+ """
48
+ # Define common Bengali word mappings
49
+ bengali_mappings = {
50
+ 'ami': 'আমি',
51
+ 'tumi': 'তুমি',
52
+ 'apni': 'আপনি',
53
+ 'kemon': 'কেমন',
54
+ 'achen': 'আছেন',
55
+ 'acchen': 'আছেন',
56
+ 'bhalo': 'ভালো',
57
+ 'achi': 'আছি',
58
+ 'ki': 'কি',
59
+ 'tumi': 'তুমি',
60
+ 'kothay': 'কোথায়',
61
+ 'keno': 'কেন',
62
+ # Add more mappings as needed
63
+ }
64
+
65
+ # Convert to lowercase for matching
66
+ text_lower = text.lower()
67
+
68
+ # Replace words based on mappings
69
+ for roman, bengali in bengali_mappings.items():
70
+ text_lower = re.sub(r'\b' + roman + r'\b', bengali, text_lower)
71
+
72
+ # If no direct mapping found, try using transliteration
73
+ if text_lower == text.lower():
74
  try:
75
+ return transliterate(text, sanscript.ITRANS, sanscript.BENGALI)
 
76
  except:
77
  return text
78
+
79
+ return text_lower
80
+
81
+ def translate_text(text: str, target_lang='en') -> tuple[str, str, bool]:
82
+ """
83
+ Translate text to target language, handling both script and romanized text.
84
+ Returns (translated_text, original_lang, is_transliterated)
85
+ """
86
+ original_lang, script = detect_language_script(text)
87
+ is_transliterated = False
88
+
89
+ # Handle potential romanized Indic text
90
+ if original_lang == 'en' and is_romanized_indic(text):
91
+ text = romanized_to_bengali(text)
92
+ original_lang = 'bn'
93
+ is_transliterated = True
94
+
95
+ # Only translate if not already in target language
96
+ if original_lang != target_lang:
97
+ try:
98
+ translator = GoogleTranslator(source='auto', target=target_lang)
99
+ translated = translator.translate(text)
100
+ return translated, original_lang, is_transliterated
101
+ except Exception as e:
102
+ print(f"Translation error: {e}")
103
+ return text, original_lang, is_transliterated
104
+
105
+ return text, original_lang, is_transliterated
106
 
107
  def check_custom_responses(message: str) -> str:
108
  """Check for specific patterns and return custom responses."""
 
124
  return response
125
  return None
126
 
127
+ def translate_to_original(text: str, original_lang: str, was_transliterated: bool) -> str:
128
+ """
129
+ Translate response back to original language and script if needed.
130
+ """
131
+ if original_lang != 'en':
132
+ try:
133
+ translator = GoogleTranslator(source='en', target=original_lang)
134
+ translated = translator.translate(text)
135
+ return translated
136
+ except Exception as e:
137
+ print(f"Translation error: {e}")
138
+ return text
139
+ return text
140
+
141
  def respond(
142
  message,
143
  history: list[tuple[str, str]],
 
152
  yield custom_response
153
  return
154
 
155
+ # Handle translation and transliteration
156
+ translated_msg, original_lang, was_transliterated = translate_text(message)
157
 
158
  # Prepare conversation history
159
  messages = [{"role": "system", "content": system_message}]
160
  for val in history:
161
  if val[0]:
162
+ # Translate user message from history
163
+ trans_user_msg, _, _ = translate_text(val[0])
164
  messages.append({"role": "user", "content": trans_user_msg})
165
  if val[1]:
166
  messages.append({"role": "assistant", "content": val[1]})
 
180
  response += token
181
 
182
  # Translate accumulated response if original message wasn't in English
183
+ if original_lang != 'en':
184
+ translated_response = translate_to_original(response, original_lang, was_transliterated)
185
  yield translated_response
186
  else:
187
  yield response