Spaces:

tahirsher
/

Multilingual_Translator-English-Urdu

Sleeping

App Files Files Community

tahirsher commited on Jan 31

Commit

b57bd69

verified ·

1 Parent(s): 5bb4750

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -14

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import fitz
 from PIL import Image
 from transformers import pipeline
 import streamlit as st
@@ -10,10 +10,12 @@ from langdetect import detect
 trocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-printed")
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
 def extract_text_from_image(image):
     result = trocr_pipeline(image)
     return result[0]['generated_text'] if result else ""
 def extract_from_pdf(pdf_path):
     doc = fitz.open(pdf_path)
     full_text = ""
@@ -22,6 +24,7 @@ def extract_from_pdf(pdf_path):
         full_text += page.get_text() + "\n"
     return full_text.strip()
 def extract_from_word(docx_path):
     doc = Document(docx_path)
     full_text = ""
@@ -29,9 +32,11 @@ def extract_from_word(docx_path):
         full_text += para.text + "\n"
     return full_text.strip()
 def clean_text(text):
     return re.sub(r'[\x00-\x1f\x7f-\x9f]', '', text).strip()
 def translate_text(text):
     if not text.strip():
         return "No text available for translation."
@@ -50,12 +55,22 @@ def translate_text(text):
             translated_text += translated_chunk[0]['translation_text'] + " "
     return translated_text.strip()
 def create_pdf(translated_text, output_path):
     doc = fitz.open()
     page = doc.new_page()
-    page.insert_text((50, 50), translated_text, fontsize=12, fontname="helv")
     doc.save(output_path)
 st.title("Multilingual Document Translator")
 uploaded_file = st.file_uploader("Upload a document (PDF, Word, or Image)", type=["pdf", "docx", "jpg", "jpeg", "png"])
@@ -86,18 +101,21 @@ if uploaded_file is not None:
             st.subheader("Translated Text (English)")
             st.write(translated_text)
-            output_pdf_path = "translated_document.pdf"
-            create_pdf(translated_text, output_pdf_path)
-            with open(output_pdf_path, "rb") as f:
-                st.download_button(
-                    label="Download Translated PDF",
-                    data=f,
-                    file_name="translated_document.pdf",
-                    mime="application/pdf"
-                )
         finally:
             if os.path.exists(temp_file_path):
                 os.remove(temp_file_path)
-            if os.path.exists(output_pdf_path):
-                os.remove(output_pdf_path)

+import fitz  # PyMuPDF for PDF processing
 from PIL import Image
 from transformers import pipeline
 import streamlit as st
 trocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-printed")
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
 def extract_text_from_image(image):
     result = trocr_pipeline(image)
     return result[0]['generated_text'] if result else ""
 def extract_from_pdf(pdf_path):
     doc = fitz.open(pdf_path)
     full_text = ""
         full_text += page.get_text() + "\n"
     return full_text.strip()
 def extract_from_word(docx_path):
     doc = Document(docx_path)
     full_text = ""
         full_text += para.text + "\n"
     return full_text.strip()
 def clean_text(text):
     return re.sub(r'[\x00-\x1f\x7f-\x9f]', '', text).strip()
 def translate_text(text):
     if not text.strip():
         return "No text available for translation."
             translated_text += translated_chunk[0]['translation_text'] + " "
     return translated_text.strip()
 def create_pdf(translated_text, output_path):
+    # Ensure translated text is inserted into PDF properly
     doc = fitz.open()
     page = doc.new_page()
+    wrapped_text = fitz.TextWriter(page.rect)
+    # Properly format text insertion for multiline text
+    lines = translated_text.split("\n")
+    for idx, line in enumerate(lines):
+        y_position = 50 + (idx * 15)  # Adjust line spacing (15pt between lines)
+        wrapped_text.append((50, y_position), line, fontsize=10, fontname="helv")
     doc.save(output_path)
 st.title("Multilingual Document Translator")
 uploaded_file = st.file_uploader("Upload a document (PDF, Word, or Image)", type=["pdf", "docx", "jpg", "jpeg", "png"])
             st.subheader("Translated Text (English)")
             st.write(translated_text)
+            if translated_text.strip():
+                output_pdf_path = "translated_document.pdf"
+                create_pdf(translated_text, output_pdf_path)
+                with open(output_pdf_path, "rb") as f:
+                    st.download_button(
+                        label="Download Translated PDF",
+                        data=f,
+                        file_name="translated_document.pdf",
+                        mime="application/pdf"
+                    )
+            else:
+                st.warning("No content to save in the translated PDF.")
         finally:
             if os.path.exists(temp_file_path):
                 os.remove(temp_file_path)
+            if os.path.exists("translated_document.pdf"):
+                os.remove("translated_document.pdf")