File size: 2,858 Bytes
ade99c4 464541c ade99c4 464541c ade99c4 464541c ade99c4 464541c ade99c4 464541c ade99c4 464541c ade99c4 464541c ade99c4 464541c ade99c4 464541c ade99c4 464541c ade99c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import streamlit as st
import PyPDF2
import docx2txt
from transformers import pipeline
# Initialize Hugging Face Translation Pipelines (Force PyTorch Backend)
try:
translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", framework="pt")
translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-ur", framework="pt")
except Exception as e:
st.error(f"Failed to initialize translation models. Error: {e}")
def extract_text_from_pdf(file):
"""Extract text from PDF."""
text = ""
try:
pdf_reader = PyPDF2.PdfReader(file)
for page in pdf_reader.pages:
text += page.extract_text()
except Exception as e:
st.error(f"Error extracting text from PDF: {e}")
return text
def extract_text_from_word(file):
"""Extract text from Word file."""
try:
return docx2txt.process(file)
except Exception as e:
st.error(f"Error extracting text from Word document: {e}")
return ""
def translate_text(text, translator):
"""Translate text in chunks using the given translator."""
max_chunk_size = 512 # Limit due to token constraints
text_chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
translations = []
for chunk in text_chunks:
try:
result = translator(chunk)
translations.append(result[0]['translation_text'])
except Exception as e:
st.error(f"Error during translation: {e}")
return ""
return " ".join(translations)
# Streamlit UI
st.title("📚 Multilingual Document Translator")
st.write("Translate PDF or Word documents to English and Urdu effortlessly!")
uploaded_file = st.file_uploader("Upload a PDF or Word file", type=["pdf", "docx"])
target_language = st.radio("Select target language for translation", ["English", "Urdu"])
if uploaded_file:
# Extract text from the uploaded file
if uploaded_file.name.endswith(".pdf"):
text_content = extract_text_from_pdf(uploaded_file)
else:
text_content = extract_text_from_word(uploaded_file)
# Show extracted text preview
st.subheader("Extracted Text (Preview)")
st.write(text_content[:500] if text_content else "No content found in the file.")
# Perform translation when the user clicks the button
if st.button("Translate"):
if text_content:
st.subheader(f"Translated Text ({target_language})")
if target_language == "English":
translated_text = translate_text(text_content, translator_en)
else:
translated_text = translate_text(text_content, translator_ur)
st.text_area("Translation Output", translated_text, height=300)
else:
st.warning("No text found to translate. Please upload a valid document.")
|