Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
-
#
|
2 |
-
|
3 |
-
# Import necessary libraries
|
4 |
import numpy as np
|
5 |
import gradio as gr
|
6 |
from sklearn.cluster import KMeans
|
@@ -24,7 +22,7 @@ caption_pipeline = pipeline(
|
|
24 |
)
|
25 |
|
26 |
# Translation Pipeline
|
27 |
-
# Using facebook/mbart-large-50-many-to-many-mmt for
|
28 |
# This model supports multiple languages and provides better translation quality for Arabic
|
29 |
translation_pipeline = pipeline(
|
30 |
"translation",
|
@@ -246,15 +244,16 @@ def translate_to_arabic(text):
|
|
246 |
result = translation_pipeline(text)
|
247 |
translated_text = result[0]['translation_text']
|
248 |
|
249 |
-
#
|
250 |
-
# This example uses a simple method; for more robust solutions, consider using NLP libraries
|
251 |
words = translated_text.split()
|
|
|
252 |
cleaned_words = []
|
253 |
previous_word = ""
|
254 |
for word in words:
|
255 |
if word != previous_word:
|
256 |
cleaned_words.append(word)
|
257 |
-
|
|
|
258 |
cleaned_translated_text = ' '.join(cleaned_words)
|
259 |
|
260 |
return cleaned_translated_text
|
|
|
1 |
+
# Import Libraries
|
|
|
|
|
2 |
import numpy as np
|
3 |
import gradio as gr
|
4 |
from sklearn.cluster import KMeans
|
|
|
22 |
)
|
23 |
|
24 |
# Translation Pipeline
|
25 |
+
# Using facebook/mbart-large-50-many-to-many-mmt for translations
|
26 |
# This model supports multiple languages and provides better translation quality for Arabic
|
27 |
translation_pipeline = pipeline(
|
28 |
"translation",
|
|
|
244 |
result = translation_pipeline(text)
|
245 |
translated_text = result[0]['translation_text']
|
246 |
|
247 |
+
# Post-processing to remove repeated words
|
|
|
248 |
words = translated_text.split()
|
249 |
+
seen = set()
|
250 |
cleaned_words = []
|
251 |
previous_word = ""
|
252 |
for word in words:
|
253 |
if word != previous_word:
|
254 |
cleaned_words.append(word)
|
255 |
+
seen.add(word)
|
256 |
+
previous_word = word
|
257 |
cleaned_translated_text = ' '.join(cleaned_words)
|
258 |
|
259 |
return cleaned_translated_text
|