tahirsher's picture
Update app.py
f53330e verified
raw
history blame
3.58 kB
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForImageTextToText
import pytesseract
from pdf2image import convert_from_path
from PIL import Image
import torch
# Image-to-Text Model (TrOCR)
def load_image_to_text_model():
tokenizer = AutoTokenizer.from_pretrained("microsoft/trocr-large-printed")
model = AutoModelForImageTextToText.from_pretrained("microsoft/trocr-large-printed")
return tokenizer, model
def extract_text_with_trocr(image, tokenizer, model):
"""Extract text from an image using TrOCR."""
pixel_values = tokenizer(image, return_tensors="pt").pixel_values
outputs = model.generate(pixel_values)
return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
# Multilingual Translation Models
def load_translation_models():
translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", framework="pt")
translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur", framework="pt")
return translator_en, translator_ur
translator_en, translator_ur = load_translation_models()
tokenizer, trocr_model = load_image_to_text_model()
def extract_text_from_pdf_with_ocr(file_path):
"""Extract text from image-based PDF using TrOCR."""
text = ""
try:
pages = convert_from_path(file_path, 300)
for page_image in pages:
text += extract_text_with_trocr(page_image, tokenizer, trocr_model) + "\n"
except Exception as e:
st.error(f"Error during OCR extraction: {e}")
return text
# Translation Function
def translate_text(text, translator):
"""Translate text into the selected language."""
max_chunk_size = 512
text_chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
translations = []
for chunk in text_chunks:
try:
result = translator(chunk)
translations.append(result[0]['translation_text'])
except Exception as e:
st.error(f"Error during translation: {e}")
return ""
return " ".join(translations)
# Streamlit UI
st.title("πŸ“š Image-Based Document Translator with TrOCR and Translation Models")
st.write("Translate image-based PDF or image files using advanced models.")
uploaded_file = st.file_uploader("Upload a PDF or Image file (JPG/PNG)", type=["pdf", "jpg", "png"])
target_language = st.radio("Select target language for translation", ["English", "Urdu"])
if uploaded_file:
file_path = f"/mnt/data/{uploaded_file.name}"
# Image-based PDF processing using TrOCR
text_content = extract_text_from_pdf_with_ocr(file_path) if uploaded_file.name.endswith(".pdf") else extract_text_with_trocr(Image.open(uploaded_file), tokenizer, trocr_model)
st.subheader("Extracted Text (Preview)")
st.write(text_content[:500] if text_content else "No content found in the file.")
if st.button("Translate"):
if text_content:
st.subheader(f"Translated Text ({target_language})")
if target_language == "English" and translator_en:
translated_text = translate_text(text_content, translator_en)
elif target_language == "Urdu" and translator_ur:
translated_text = translate_text(text_content, translator_ur)
else:
st.warning("Translation model not loaded successfully.")
st.text_area("Translation Output", translated_text, height=300)
else:
st.warning("No text found to translate. Please upload a valid document.")