Spaces:

tahirsher
/

Multilingual_Translator-English-Urdu

Sleeping

App Files Files Community

tahirsher commited on Jan 28

Commit

04f9dd5

verified ·

1 Parent(s): d76d653

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -14

app.py CHANGED Viewed

@@ -2,27 +2,34 @@ import streamlit as st
 import PyPDF2
 import docx2txt
 from transformers import pipeline
-# Initialize Hugging Face Translation Pipelines (Force PyTorch Backend)
-try:
-    translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", framework="pt")
-    translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-ur", framework="pt")
-except Exception as e:
-    st.error(f"Failed to initialize translation models. Error: {e}")
 def extract_text_from_pdf(file):
-    """Extract text from PDF."""
     text = ""
     try:
         pdf_reader = PyPDF2.PdfReader(file)
         for page in pdf_reader.pages:
-            text += page.extract_text()
     except Exception as e:
         st.error(f"Error extracting text from PDF: {e}")
     return text
 def extract_text_from_word(file):
-    """Extract text from Word file."""
     try:
         return docx2txt.process(file)
     except Exception as e:
@@ -30,8 +37,8 @@ def extract_text_from_word(file):
         return ""
 def translate_text(text, translator):
-    """Translate text in chunks using the given translator."""
-    max_chunk_size = 512  # Limit due to token constraints
     text_chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
     translations = []
@@ -66,11 +73,13 @@ if uploaded_file:
     if st.button("Translate"):
         if text_content:
             st.subheader(f"Translated Text ({target_language})")
-            if target_language == "English":
                 translated_text = translate_text(text_content, translator_en)
-            else:
                 translated_text = translate_text(text_content, translator_ur)
             st.text_area("Translation Output", translated_text, height=300)
         else:
             st.warning("No text found to translate. Please upload a valid document.")

 import PyPDF2
 import docx2txt
 from transformers import pipeline
+import sentencepiece
+# Initialize translation pipelines
+def load_translation_models():
+    """Load translation models with error handling."""
+    try:
+        translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", framework="pt")
+        translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-ur", framework="pt")
+        return translator_en, translator_ur
+    except Exception as e:
+        st.error(f"Error initializing translation models: {e}")
+        return None, None
+translator_en, translator_ur = load_translation_models()
 def extract_text_from_pdf(file):
+    """Extract text from a PDF file."""
     text = ""
     try:
         pdf_reader = PyPDF2.PdfReader(file)
         for page in pdf_reader.pages:
+            text += page.extract_text() or ""
     except Exception as e:
         st.error(f"Error extracting text from PDF: {e}")
     return text
 def extract_text_from_word(file):
+    """Extract text from a Word file."""
     try:
         return docx2txt.process(file)
     except Exception as e:
         return ""
 def translate_text(text, translator):
+    """Translate text in manageable chunks."""
+    max_chunk_size = 512
     text_chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
     translations = []
     if st.button("Translate"):
         if text_content:
             st.subheader(f"Translated Text ({target_language})")
+            if target_language == "English" and translator_en:
                 translated_text = translate_text(text_content, translator_en)
+            elif target_language == "Urdu" and translator_ur:
                 translated_text = translate_text(text_content, translator_ur)
+            else:
+                st.warning("Translation model not loaded successfully.")
             st.text_area("Translation Output", translated_text, height=300)
         else:
             st.warning("No text found to translate. Please upload a valid document.")