Spaces:

tahirsher
/

Multilingual_Translator-English-Urdu

Sleeping

App Files Files Community

Multilingual_Translator-English-Urdu / app.py

tahirsher

Update app.py

59f49e8 verified 5 months ago

raw

history blame

3.19 kB

	import streamlit as st
	import PyPDF2
	import docx2txt
	from transformers import pipeline
	import sentencepiece

	# Load translation models
	def load_translation_models():
	"""Load translation models for English and Urdu."""
	try:
	translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", framework="pt")
	translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur", framework="pt")
	return translator_en, translator_ur
	except Exception as e:
	st.error(f"Error initializing translation models: {e}")
	return None, None

	translator_en, translator_ur = load_translation_models()

	def extract_text_from_pdf(file):
	"""Extract text from a PDF file."""
	text = ""
	try:
	pdf_reader = PyPDF2.PdfReader(file)
	for page in pdf_reader.pages:
	text += page.extract_text() or ""
	except Exception as e:
	st.error(f"Error extracting text from PDF: {e}")
	return text

	def extract_text_from_word(file):
	"""Extract text from a Word file."""
	try:
	return docx2txt.process(file)
	except Exception as e:
	st.error(f"Error extracting text from Word document: {e}")
	return ""

	def translate_text(text, translator):
	"""Translate text in manageable chunks."""
	max_chunk_size = 512
	text_chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
	translations = []

	for chunk in text_chunks:
	try:
	result = translator(chunk)
	translations.append(result[0]['translation_text'])
	except Exception as e:
	st.error(f"Error during translation: {e}")
	return ""
	return " ".join(translations)

	# Streamlit UI
	st.title("📚 Multilingual Document Translator")
	st.write("Translate PDF or Word documents to English and Urdu effortlessly!")

	uploaded_file = st.file_uploader("Upload a PDF or Word file", type=["pdf", "docx"])
	target_language = st.radio("Select target language for translation", ["English", "Urdu"])

	if uploaded_file:
	# Extract text from the uploaded file
	if uploaded_file.name.endswith(".pdf"):
	text_content = extract_text_from_pdf(uploaded_file)
	else:
	text_content = extract_text_from_word(uploaded_file)

	# Show extracted text preview
	st.subheader("Extracted Text (Preview)")
	st.write(text_content[:500] if text_content else "No content found in the file.")

	# Perform translation when the user clicks the button
	if st.button("Translate"):
	if text_content:
	st.subheader(f"Translated Text ({target_language})")
	if target_language == "English" and translator_en:
	translated_text = translate_text(text_content, translator_en)
	elif target_language == "Urdu" and translator_ur:
	translated_text = translate_text(text_content, translator_ur)
	else:
	st.warning("Translation model not loaded successfully.")

	st.text_area("Translation Output", translated_text, height=300)
	else:
	st.warning("No text found to translate. Please upload a valid document.")