import streamlit as st import PyPDF2 import docx2txt from transformers import pipeline # Initialize Hugging Face Translation Pipelines (Force PyTorch Backend) try: translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", framework="pt") translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-ur", framework="pt") except Exception as e: st.error(f"Failed to initialize translation models. Error: {e}") def extract_text_from_pdf(file): """Extract text from PDF.""" text = "" try: pdf_reader = PyPDF2.PdfReader(file) for page in pdf_reader.pages: text += page.extract_text() except Exception as e: st.error(f"Error extracting text from PDF: {e}") return text def extract_text_from_word(file): """Extract text from Word file.""" try: return docx2txt.process(file) except Exception as e: st.error(f"Error extracting text from Word document: {e}") return "" def translate_text(text, translator): """Translate text in chunks using the given translator.""" max_chunk_size = 512 # Limit due to token constraints text_chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)] translations = [] for chunk in text_chunks: try: result = translator(chunk) translations.append(result[0]['translation_text']) except Exception as e: st.error(f"Error during translation: {e}") return "" return " ".join(translations) # Streamlit UI st.title("📚 Multilingual Document Translator") st.write("Translate PDF or Word documents to English and Urdu effortlessly!") uploaded_file = st.file_uploader("Upload a PDF or Word file", type=["pdf", "docx"]) target_language = st.radio("Select target language for translation", ["English", "Urdu"]) if uploaded_file: # Extract text from the uploaded file if uploaded_file.name.endswith(".pdf"): text_content = extract_text_from_pdf(uploaded_file) else: text_content = extract_text_from_word(uploaded_file) # Show extracted text preview st.subheader("Extracted Text (Preview)") st.write(text_content[:500] if text_content else "No content found in the file.") # Perform translation when the user clicks the button if st.button("Translate"): if text_content: st.subheader(f"Translated Text ({target_language})") if target_language == "English": translated_text = translate_text(text_content, translator_en) else: translated_text = translate_text(text_content, translator_ur) st.text_area("Translation Output", translated_text, height=300) else: st.warning("No text found to translate. Please upload a valid document.")