ayajoharji commited on
Commit
e90da55
·
verified ·
1 Parent(s): db66d88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -1,6 +1,4 @@
1
- # app.py
2
-
3
- # Import necessary libraries
4
  import numpy as np
5
  import gradio as gr
6
  from sklearn.cluster import KMeans
@@ -24,7 +22,7 @@ caption_pipeline = pipeline(
24
  )
25
 
26
  # Translation Pipeline
27
- # Using facebook/mbart-large-50-many-to-many-mmt for higher-quality translations
28
  # This model supports multiple languages and provides better translation quality for Arabic
29
  translation_pipeline = pipeline(
30
  "translation",
@@ -246,15 +244,16 @@ def translate_to_arabic(text):
246
  result = translation_pipeline(text)
247
  translated_text = result[0]['translation_text']
248
 
249
- # Advanced Post-processing to remove repeated words
250
- # This example uses a simple method; for more robust solutions, consider using NLP libraries
251
  words = translated_text.split()
 
252
  cleaned_words = []
253
  previous_word = ""
254
  for word in words:
255
  if word != previous_word:
256
  cleaned_words.append(word)
257
- previous_word = word
 
258
  cleaned_translated_text = ' '.join(cleaned_words)
259
 
260
  return cleaned_translated_text
 
1
+ # Import Libraries
 
 
2
  import numpy as np
3
  import gradio as gr
4
  from sklearn.cluster import KMeans
 
22
  )
23
 
24
  # Translation Pipeline
25
+ # Using facebook/mbart-large-50-many-to-many-mmt for translations
26
  # This model supports multiple languages and provides better translation quality for Arabic
27
  translation_pipeline = pipeline(
28
  "translation",
 
244
  result = translation_pipeline(text)
245
  translated_text = result[0]['translation_text']
246
 
247
+ # Post-processing to remove repeated words
 
248
  words = translated_text.split()
249
+ seen = set()
250
  cleaned_words = []
251
  previous_word = ""
252
  for word in words:
253
  if word != previous_word:
254
  cleaned_words.append(word)
255
+ seen.add(word)
256
+ previous_word = word
257
  cleaned_translated_text = ' '.join(cleaned_words)
258
 
259
  return cleaned_translated_text